[gcc r15-1089] enable adjustment of return_pc debug attrs

2024-06-06 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:3472b5749df53b91bcb00a3e82cc85ef1f3b17ce

commit r15-1089-g3472b5749df53b91bcb00a3e82cc85ef1f3b17ce
Author: Alexandre Oliva 
Date:   Wed May 29 02:52:07 2024 -0300

enable adjustment of return_pc debug attrs

This patch introduces infrastructure for targets to add an offset to
the label issued after the call_insn to set the call_return_pc
attribute.  This will be used on rs6000, that sometimes issues another
instruction after the call proper as part of a call insn.


for  gcc/ChangeLog

* target.def (call_offset_return_label): New hook.
* doc/tm.texi.in (TARGET_CALL_OFFSET_RETURN_LABEL): Add
placeholder.
* doc/tm.texi: Rebuild.
* dwarf2out.cc (struct call_arg_loc_node): Record call_insn
instead of call_arg_loc_note.
(add_AT_lbl_id): Add optional offset argument.
(gen_call_site_die): Compute and pass on a return pc offset.
(gen_subprogram_die): Move call_arg_loc_note computation...
(dwarf2out_var_location): ... from here.  Set call_insn.

Diff:
---
 gcc/doc/tm.texi|  7 +++
 gcc/doc/tm.texi.in |  2 ++
 gcc/dwarf2out.cc   | 26 +-
 gcc/target.def |  9 +
 4 files changed, 35 insertions(+), 9 deletions(-)

diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index cd50078227d..8a7aa70d605 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -5557,6 +5557,13 @@ except the last are treated as named.
 You need not define this hook if it always returns @code{false}.
 @end deftypefn
 
+@deftypefn {Target Hook} int TARGET_CALL_OFFSET_RETURN_LABEL (rtx_insn 
*@var{call_insn})
+While generating call-site debug info for a CALL insn, or a SEQUENCE
+insn starting with a CALL, this target hook is invoked to compute the
+offset to be added to the debug label emitted after the call to obtain
+the return address that should be recorded as the return PC.
+@end deftypefn
+
 @deftypefn {Target Hook} void TARGET_START_CALL_ARGS (cumulative_args_t 
@var{complete_args})
 This target hook is invoked while generating RTL for a function call,
 after the argument values have been computed, and after stack arguments
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index 058bd56487a..9e0830758ae 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -3887,6 +3887,8 @@ These machine description macros help implement varargs:
 
 @hook TARGET_STRICT_ARGUMENT_NAMING
 
+@hook TARGET_CALL_OFFSET_RETURN_LABEL
+
 @hook TARGET_START_CALL_ARGS
 
 @hook TARGET_CALL_ARGS
diff --git a/gcc/dwarf2out.cc b/gcc/dwarf2out.cc
index 89efa5474d3..f90f7b1cfef 100644
--- a/gcc/dwarf2out.cc
+++ b/gcc/dwarf2out.cc
@@ -3593,7 +3593,7 @@ typedef struct var_loc_list_def var_loc_list;
 
 /* Call argument location list.  */
 struct GTY ((chain_next ("%h.next"))) call_arg_loc_node {
-  rtx GTY (()) call_arg_loc_note;
+  rtx_insn * GTY (()) call_insn;
   const char * GTY (()) label;
   tree GTY (()) block;
   bool tail_call_p;
@@ -3777,7 +3777,8 @@ static void remove_addr_table_entry (addr_table_entry *);
 static void add_AT_addr (dw_die_ref, enum dwarf_attribute, rtx, bool);
 static inline rtx AT_addr (dw_attr_node *);
 static void add_AT_symview (dw_die_ref, enum dwarf_attribute, const char *);
-static void add_AT_lbl_id (dw_die_ref, enum dwarf_attribute, const char *);
+static void add_AT_lbl_id (dw_die_ref, enum dwarf_attribute, const char *,
+  int = 0);
 static void add_AT_lineptr (dw_die_ref, enum dwarf_attribute, const char *);
 static void add_AT_macptr (dw_die_ref, enum dwarf_attribute, const char *);
 static void add_AT_range_list (dw_die_ref, enum dwarf_attribute,
@@ -5353,14 +5354,17 @@ add_AT_symview (dw_die_ref die, enum dwarf_attribute 
attr_kind,
 
 static inline void
 add_AT_lbl_id (dw_die_ref die, enum dwarf_attribute attr_kind,
-   const char *lbl_id)
+  const char *lbl_id, int offset)
 {
   dw_attr_node attr;
 
   attr.dw_attr = attr_kind;
   attr.dw_attr_val.val_class = dw_val_class_lbl_id;
   attr.dw_attr_val.val_entry = NULL;
-  attr.dw_attr_val.v.val_lbl_id = xstrdup (lbl_id);
+  if (!offset)
+attr.dw_attr_val.v.val_lbl_id = xstrdup (lbl_id);
+  else
+attr.dw_attr_val.v.val_lbl_id = xasprintf ("%s%+i", lbl_id, offset);
   if (dwarf_split_debug_info)
 attr.dw_attr_val.val_entry
 = add_addr_table_entry (attr.dw_attr_val.v.val_lbl_id,
@@ -23547,7 +23551,9 @@ gen_call_site_die (tree decl, dw_die_ref subr_die,
   if (stmt_die == NULL)
 stmt_die = subr_die;
   die = new_die (dwarf_TAG (DW_TAG_call_site), stmt_die, NULL_TREE);
-  add_AT_lbl_id (die, dwarf_AT (DW_AT_call_return_pc), ca_loc->label);
+  add_AT_lbl_id (die, dwarf_AT (DW_AT_call_return_pc),
+ca_loc->label,
+targetm.calls.call_offset_return_label (ca_loc->call_insn));
   if (ca_loc->tail_call_p)
 add_AT_flag (die, dwarf_AT (DW_AT_call_tail_call), 1);
   if (ca

[gcc(refs/users/aoliva/heads/testme)] [libstdc++] drop workaround for clang<=7

2024-06-06 Thread Alexandre Oliva via Libstdc++-cvs
https://gcc.gnu.org/g:c3e23e85d8a348cfaecf1a8885ad02318ade0ad6

commit c3e23e85d8a348cfaecf1a8885ad02318ade0ad6
Author: Alexandre Oliva 
Date:   Fri Jun 7 00:27:15 2024 -0300

[libstdc++] drop workaround for clang<=7

In response to a request in the review of the patch that introduced
_GLIBCXX_CLANG, this patch removes from std/variant an obsolete
workaround for clang 7-.


for  libstdc++-v3/ChangeLog

* include/std/variant: Drop obsolete workaround.

Diff:
---
 libstdc++-v3/include/std/variant | 5 -
 1 file changed, 5 deletions(-)

diff --git a/libstdc++-v3/include/std/variant b/libstdc++-v3/include/std/variant
index 51aaa620851..13ea1dd3849 100644
--- a/libstdc++-v3/include/std/variant
+++ b/libstdc++-v3/include/std/variant
@@ -1758,11 +1758,6 @@ namespace __detail::__variant
  }, __rhs);
   }
 
-#if defined(_GLIBCXX_CLANG) && __clang_major__ <= 7
-public:
-  using _Base::_M_u; // See https://bugs.llvm.org/show_bug.cgi?id=31852
-#endif
-
 private:
   template
friend constexpr decltype(auto)


[gcc(refs/users/aoliva/heads/testme)] [testsuite] [arm] test board cflags in multilib.exp

2024-06-06 Thread Alexandre Oliva via Gcc-cvs
https://gcc.gnu.org/g:81cb861f41a46bbc8632cb68878c3334b1c71cf8

commit 81cb861f41a46bbc8632cb68878c3334b1c71cf8
Author: Alexandre Oliva 
Date:   Fri Jun 7 00:27:11 2024 -0300

[testsuite] [arm] test board cflags in multilib.exp

multilib.exp tests for multilib-altering flags in a board's
multilib_flags and skips the test, but if such flags appear in the
board's cflags, with the same distorting effects on tested multilibs,
we fail to skip the test.

Extend the skipping logic to board's cflags as well.


for  gcc/testsuite/ChangeLog

* gcc.target/arm/multilib.exp: Skip based on board cflags too.

Diff:
---
 gcc/testsuite/gcc.target/arm/multilib.exp | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/gcc/testsuite/gcc.target/arm/multilib.exp 
b/gcc/testsuite/gcc.target/arm/multilib.exp
index 4442d5d754b..12c93bc89d2 100644
--- a/gcc/testsuite/gcc.target/arm/multilib.exp
+++ b/gcc/testsuite/gcc.target/arm/multilib.exp
@@ -18,13 +18,15 @@ load_lib gcc-dg.exp
 
 dg-init
 
-if { [board_info [target_info name] exists multilib_flags] 
- && [regexp {(-marm|-mthumb|-march=.*|-mcpu=.*|-mfpu=.*|-mfloat=abi=.*)\y} 
[board_info [target_info name] multilib_flags]] } {
+foreach flagsvar {multilib_flags cflags} {
+  if { [board_info [target_info name] exists $flagsvar] 
+ && [regexp {(-marm|-mthumb|-march=.*|-mcpu=.*|-mfpu=.*|-mfloat=abi=.*)\y} 
[board_info [target_info name] $flagsvar]] } {

 # Multilib flags override anything we can apply to a test, so
 # skip if any of the above options are set there.
-verbose "skipping multilib tests due to multilib_flags setting" 1
+verbose "skipping multilib tests due to $flagsvar setting" 1
 return
+  }
 }
 
 # We don't want to run this test multiple times in a parallel make check.


[gcc/aoliva/heads/testme] (167 commits) [libstdc++] drop workaround for clang<=7

2024-06-06 Thread Alexandre Oliva via Gcc-cvs
The branch 'aoliva/heads/testme' was updated to point to:

 c3e23e85d8a... [libstdc++] drop workaround for clang<=7

It previously pointed to:

 1472684e8ba... [libstdc++] add _GLIBCXX_CLANG to workaround predefined __c

Diff:

!!! WARNING: THE FOLLOWING COMMITS ARE NO LONGER ACCESSIBLE (LOST):
---

  1472684... [libstdc++] add _GLIBCXX_CLANG to workaround predefined __c


Summary of changes (added commits):
---

  c3e23e8... [libstdc++] drop workaround for clang<=7
  81cb861... [testsuite] [arm] test board cflags in multilib.exp
  b24f295... Add additional option --param max-completely-peeled-insns=2 (*)
  93f44e1... RISC-V: Add testcases for scalar unsigned SAT_ADD form 5 (*)
  a171aac... RISC-V: Add testcases for scalar unsigned SAT_ADD form 4 (*)
  39dde92... RISC-V: Add testcases for scalar unsigned SAT_ADD form 3 (*)
  0261ed4... RISC-V: Add testcases for scalar unsigned SAT_ADD form 2 (*)
  a737c2b... RISC-V: Add testcases for scalar unsigned SAT_ADD form 1 (*)
  6af18e8... Daily bump. (*)
  e14afbe... Match: Support more form for scalar unsigned SAT_ADD (*)
  d5a3c6d... c: Fix up pointer types to may_alias structures [PR114493] (*)
  230d62a... aarch64: Add vector floating point extend pattern [PR113880 (*)
  30ce9df... modula2: Simplify REAL/LONGREAL/SHORTREAL node creation. (*)
  366d45c... testsuite/i386: Add vector sat_sub testcases [PR112600] (*)
  6e64718... Plugins: Add label-text.h to CPPLIB_H so it will be install (*)
  43530bc... aarch64: Add missing ACLE macro for NEON-SVE Bridge (*)
  2963c76... arm: Fix CASE_VECTOR_SHORTEN_MODE for thumb2. (*)
  c559353... arm: Add .type and .size to __gnu_cmse_nonsecure_call [PR11 (*)
  423522a... libgomp.texi (nvptx): Add missing preposition (*)
  afe85f8... AArch64: correct constraint on Upl early clobber alternativ (*)
  3a4775d... nvptx, libgfortran: Switch out of "minimal" mode (*)
  0d25989... nvptx offloading: 'GOMP_NVPTX_NATIVE_GPU_THREAD_STACK_SIZE' (*)
  a29c585... nvptx, libgcc: Stub unwinding implementation (*)
  5bbe535... nvptx offloading: Global constructor, destructor support, v (*)
  b4e68dd... nvptx: Make 'nvptx_uniform_warp_check' fit for non-full-war (*)
  395ac04... Clean up after newlib "nvptx: In offloading execution, map  (*)
  2d11de3... Vect: Support IFN SAT_SUB for unsigned vector int (*)
  346f33e... lto: Remove random_seed from section name. (*)
  ca43678... lto: Skip flag OPT_fltrans_output_list_. (*)
  037fc4d... RISC-V: Regenerate opt urls. (*)
  0b6cea8... [APX CCMP] Support ccmp for float compare (*)
  23db873... [APX CCMP] Adjust startegy for selecting ccmp candidates (*)
  c989e59... [APX CCMP] Support APX CCMP (*)
  f46d54a... [APX] Adjust target-support check [PR 115341] (*)
  4653b68... Allow single-lane SLP in-order reductions (*)
  2ee41ef... Add double reduction support for SLP vectorization (*)
  202a9c8... Allow single-lane COND_REDUCTION vectorization (*)
  28edeb1... Relax COND_EXPR reduction vectorization SLP restriction (*)
  6a6bab4... libgomp: Mark Loop transformation constructs as implemented (*)
  edd90d6... MIPS: Need COSTS_N_INSNS in mips_insn_cost (*)
  fcfce55... Refine testcase for power10. (*)
  67be156... [libstdc++] add _GLIBCXX_CLANG to workaround predefined __c (*)
  961dd0d... Adjust rtx_cost for MEM to enable more simplication (*)
  7876cde... Simplify (AND (ASHIFTRT A imm) mask) to (LSHIFTRT A imm) fo (*)
  10cb333... Daily bump. (*)
  66fa2f1... contrib: Fix spelling and capitalization in header-tools (*)
  ac6fb0f... contrib: header-tools scripts updated to python3 (*)
  03e1a72... check_GNU_style: Use raw strings. (*)
  68b0742... RISC-V: Introduce -mvector-strict-align. (*)
  3eb9f6e... AArch64: enable new predicate tuning for Neoverse cores. (*)
  2de3bbd... AArch64: add new alternative with early clobber to patterns (*)
  35f17c6... AArch64: add new tuning param and attribute for enabling co (*)
  fd48988... AArch64: convert several predicate patterns to new compact  (*)
  804c0f3... openmp: OpenMP loop transformation support (*)
  d7cbcfe... AArch64: Fix cpu features initialization [PR115342] (*)
  acdc9df... testsuite: Improve check-function-bodies (*)
  58ecd2e... darwin: Replace use of LONG_DOUBLE_TYPE_SIZE (*)
  37a4800... fortran: Replace uses of {FLOAT,{,LONG_}DOUBLE}_TYPE_SIZE (*)
  b36461f... d: Replace use of LONG_DOUBLE_TYPE_SIZE (*)
  6fa25aa... ada: Replace use of LONG_DOUBLE_TYPE_SIZE (*)
  abe6d39... Internal-fn: Support new IFN SAT_SUB for unsigned scalar in (*)
  9931426... doc: Streamline recommendation of GNU awk (*)
  d5ccc21... Add 'c-c++-common/initpri1{,-lto,-split}-static.c' as inter (*)
  02e43e5... Add 'c-c++-common/initpri1-split.c': 'c-c++-common/initpri1 (*)
  08c7e87... Add C++ testing for 'gcc.dg/initpri1-lto.c': 'c-c++-common/ (*)
  a7d7577... Consolidate similar C/C++ test cases for 'constructor', 'de (*)
  38dd741... Clarify that 'gcc.dg/initpri3.c' is a

[gcc/aoliva/heads/testbase] (165 commits) Add additional option --param max-completely-peeled-insns=2

2024-06-06 Thread Alexandre Oliva via Gcc-cvs
The branch 'aoliva/heads/testbase' was updated to point to:

 b24f2954dbc... Add additional option --param max-completely-peeled-insns=2

It previously pointed to:

 ac5c6c90a7f... [testsuite] [powerpc] adjust -m32 counts for fold-vec-extra

Diff:

Summary of changes (added commits):
---

  b24f295... Add additional option --param max-completely-peeled-insns=2 (*)
  93f44e1... RISC-V: Add testcases for scalar unsigned SAT_ADD form 5 (*)
  a171aac... RISC-V: Add testcases for scalar unsigned SAT_ADD form 4 (*)
  39dde92... RISC-V: Add testcases for scalar unsigned SAT_ADD form 3 (*)
  0261ed4... RISC-V: Add testcases for scalar unsigned SAT_ADD form 2 (*)
  a737c2b... RISC-V: Add testcases for scalar unsigned SAT_ADD form 1 (*)
  6af18e8... Daily bump. (*)
  e14afbe... Match: Support more form for scalar unsigned SAT_ADD (*)
  d5a3c6d... c: Fix up pointer types to may_alias structures [PR114493] (*)
  230d62a... aarch64: Add vector floating point extend pattern [PR113880 (*)
  30ce9df... modula2: Simplify REAL/LONGREAL/SHORTREAL node creation. (*)
  366d45c... testsuite/i386: Add vector sat_sub testcases [PR112600] (*)
  6e64718... Plugins: Add label-text.h to CPPLIB_H so it will be install (*)
  43530bc... aarch64: Add missing ACLE macro for NEON-SVE Bridge (*)
  2963c76... arm: Fix CASE_VECTOR_SHORTEN_MODE for thumb2. (*)
  c559353... arm: Add .type and .size to __gnu_cmse_nonsecure_call [PR11 (*)
  423522a... libgomp.texi (nvptx): Add missing preposition (*)
  afe85f8... AArch64: correct constraint on Upl early clobber alternativ (*)
  3a4775d... nvptx, libgfortran: Switch out of "minimal" mode (*)
  0d25989... nvptx offloading: 'GOMP_NVPTX_NATIVE_GPU_THREAD_STACK_SIZE' (*)
  a29c585... nvptx, libgcc: Stub unwinding implementation (*)
  5bbe535... nvptx offloading: Global constructor, destructor support, v (*)
  b4e68dd... nvptx: Make 'nvptx_uniform_warp_check' fit for non-full-war (*)
  395ac04... Clean up after newlib "nvptx: In offloading execution, map  (*)
  2d11de3... Vect: Support IFN SAT_SUB for unsigned vector int (*)
  346f33e... lto: Remove random_seed from section name. (*)
  ca43678... lto: Skip flag OPT_fltrans_output_list_. (*)
  037fc4d... RISC-V: Regenerate opt urls. (*)
  0b6cea8... [APX CCMP] Support ccmp for float compare (*)
  23db873... [APX CCMP] Adjust startegy for selecting ccmp candidates (*)
  c989e59... [APX CCMP] Support APX CCMP (*)
  f46d54a... [APX] Adjust target-support check [PR 115341] (*)
  4653b68... Allow single-lane SLP in-order reductions (*)
  2ee41ef... Add double reduction support for SLP vectorization (*)
  202a9c8... Allow single-lane COND_REDUCTION vectorization (*)
  28edeb1... Relax COND_EXPR reduction vectorization SLP restriction (*)
  6a6bab4... libgomp: Mark Loop transformation constructs as implemented (*)
  edd90d6... MIPS: Need COSTS_N_INSNS in mips_insn_cost (*)
  fcfce55... Refine testcase for power10. (*)
  67be156... [libstdc++] add _GLIBCXX_CLANG to workaround predefined __c (*)
  961dd0d... Adjust rtx_cost for MEM to enable more simplication (*)
  7876cde... Simplify (AND (ASHIFTRT A imm) mask) to (LSHIFTRT A imm) fo (*)
  10cb333... Daily bump. (*)
  66fa2f1... contrib: Fix spelling and capitalization in header-tools (*)
  ac6fb0f... contrib: header-tools scripts updated to python3 (*)
  03e1a72... check_GNU_style: Use raw strings. (*)
  68b0742... RISC-V: Introduce -mvector-strict-align. (*)
  3eb9f6e... AArch64: enable new predicate tuning for Neoverse cores. (*)
  2de3bbd... AArch64: add new alternative with early clobber to patterns (*)
  35f17c6... AArch64: add new tuning param and attribute for enabling co (*)
  fd48988... AArch64: convert several predicate patterns to new compact  (*)
  804c0f3... openmp: OpenMP loop transformation support (*)
  d7cbcfe... AArch64: Fix cpu features initialization [PR115342] (*)
  acdc9df... testsuite: Improve check-function-bodies (*)
  58ecd2e... darwin: Replace use of LONG_DOUBLE_TYPE_SIZE (*)
  37a4800... fortran: Replace uses of {FLOAT,{,LONG_}DOUBLE}_TYPE_SIZE (*)
  b36461f... d: Replace use of LONG_DOUBLE_TYPE_SIZE (*)
  6fa25aa... ada: Replace use of LONG_DOUBLE_TYPE_SIZE (*)
  abe6d39... Internal-fn: Support new IFN SAT_SUB for unsigned scalar in (*)
  9931426... doc: Streamline recommendation of GNU awk (*)
  d5ccc21... Add 'c-c++-common/initpri1{,-lto,-split}-static.c' as inter (*)
  02e43e5... Add 'c-c++-common/initpri1-split.c': 'c-c++-common/initpri1 (*)
  08c7e87... Add C++ testing for 'gcc.dg/initpri1-lto.c': 'c-c++-common/ (*)
  a7d7577... Consolidate similar C/C++ test cases for 'constructor', 'de (*)
  38dd741... Clarify that 'gcc.dg/initpri3.c' is a LTO variant of 'gcc.d (*)
  35e453d... libstdc++: Update gcc.gnu.org links in FAQ to https (*)
  b05288d... Don't simplify NAN/INF or out-of-range constant for FIX/UNS (*)
  4638e50... RISC-V: Add Zfbfmin extension (*)
  904fbe9... Daily bump. (*)
  126ccf8... c++: Add testcase for PR103338 (*)
  7f21aee... 

[gcc r15-1088] Add additional option --param max-completely-peeled-insns=200 for power64*-*-*

2024-06-06 Thread hongtao Liu via Gcc-cvs
https://gcc.gnu.org/g:b24f2954dbc13d85e9fb62e05a88e9df21e4d4f4

commit r15-1088-gb24f2954dbc13d85e9fb62e05a88e9df21e4d4f4
Author: liuhongt 
Date:   Fri Jun 7 09:29:24 2024 +0800

Add additional option --param max-completely-peeled-insns=200 for 
power64*-*-*

gcc/testsuite/ChangeLog:

* gcc.dg/vect/pr112325.c:Add additional option --param
max-completely-peeled-insns=200 for power64*-*-*.

Diff:
---
 gcc/testsuite/gcc.dg/vect/pr112325.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/testsuite/gcc.dg/vect/pr112325.c 
b/gcc/testsuite/gcc.dg/vect/pr112325.c
index dea6cca3b86..143903beab2 100644
--- a/gcc/testsuite/gcc.dg/vect/pr112325.c
+++ b/gcc/testsuite/gcc.dg/vect/pr112325.c
@@ -3,6 +3,7 @@
 /* { dg-require-effective-target vect_int } */
 /* { dg-require-effective-target vect_shift } */
 /* { dg-additional-options "-mavx2" { target x86_64-*-* i?86-*-* } } */
+/* { dg-additional-options "--param max-completely-peeled-insns=200" { target 
powerpc64*-*-* } } */
 
 typedef unsigned short ggml_fp16_t;
 static float table_f32_f16[1 << 16];


[gcc r15-1086] RISC-V: Add testcases for scalar unsigned SAT_ADD form 4

2024-06-06 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:a171aac72408837ed0b20e3912a22c5b4891ace4

commit r15-1086-ga171aac72408837ed0b20e3912a22c5b4891ace4
Author: Pan Li 
Date:   Mon Jun 3 10:33:15 2024 +0800

RISC-V: Add testcases for scalar unsigned SAT_ADD form 4

After the middle-end support the form 4 of unsigned SAT_ADD and
the RISC-V backend implement the scalar .SAT_ADD, add more test
case to cover the form 4 of unsigned .SAT_ADD.

Form 4:
  #define SAT_ADD_U_4(T) \
  T sat_add_u_4_##T (T x, T y) \
  { \
T ret; \
return __builtin_add_overflow (x, y, &ret) == 0 ? ret : -1; \
  }

Passed the rv64gcv fully regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test macro for form 4.
* gcc.target/riscv/sat_u_add-17.c: New test.
* gcc.target/riscv/sat_u_add-18.c: New test.
* gcc.target/riscv/sat_u_add-19.c: New test.
* gcc.target/riscv/sat_u_add-20.c: New test.
* gcc.target/riscv/sat_u_add-run-17.c: New test.
* gcc.target/riscv/sat_u_add-run-18.c: New test.
* gcc.target/riscv/sat_u_add-run-19.c: New test.
* gcc.target/riscv/sat_u_add-run-20.c: New test.

Signed-off-by: Pan Li 

Diff:
---
 gcc/testsuite/gcc.target/riscv/sat_arith.h|  8 
 gcc/testsuite/gcc.target/riscv/sat_u_add-17.c | 19 +
 gcc/testsuite/gcc.target/riscv/sat_u_add-18.c | 21 +++
 gcc/testsuite/gcc.target/riscv/sat_u_add-19.c | 18 
 gcc/testsuite/gcc.target/riscv/sat_u_add-20.c | 17 +++
 gcc/testsuite/gcc.target/riscv/sat_u_add-run-17.c | 25 +++
 gcc/testsuite/gcc.target/riscv/sat_u_add-run-18.c | 25 +++
 gcc/testsuite/gcc.target/riscv/sat_u_add-run-19.c | 25 +++
 gcc/testsuite/gcc.target/riscv/sat_u_add-run-20.c | 25 +++
 9 files changed, 183 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/sat_arith.h
index adb8be5886e..6ca158d57c4 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h
@@ -34,6 +34,13 @@ sat_u_add_##T##_fmt_4 (T x, T y) \
   return __builtin_add_overflow (x, y, &ret) ? -1 : ret; \
 }
 
+#define DEF_SAT_U_ADD_FMT_5(T)\
+T __attribute__((noinline))   \
+sat_u_add_##T##_fmt_5 (T x, T y)  \
+{ \
+  T ret;  \
+  return __builtin_add_overflow (x, y, &ret) == 0 ? ret : -1; \
+}
 
 #define DEF_VEC_SAT_U_ADD_FMT_1(T)   \
 void __attribute__((noinline))   \
@@ -52,6 +59,7 @@ vec_sat_u_add_##T##_fmt_1 (T *out, T *op_1, T *op_2, unsigned 
limit) \
 #define RUN_SAT_U_ADD_FMT_2(T, x, y) sat_u_add_##T##_fmt_2(x, y)
 #define RUN_SAT_U_ADD_FMT_3(T, x, y) sat_u_add_##T##_fmt_3(x, y)
 #define RUN_SAT_U_ADD_FMT_4(T, x, y) sat_u_add_##T##_fmt_4(x, y)
+#define RUN_SAT_U_ADD_FMT_5(T, x, y) sat_u_add_##T##_fmt_5(x, y)
 
 #define RUN_VEC_SAT_U_ADD_FMT_1(T, out, op_1, op_2, N) \
   vec_sat_u_add_##T##_fmt_1(out, op_1, op_2, N)
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add-17.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_add-17.c
new file mode 100644
index 000..7085ac835f7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_add-17.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_add_uint8_t_fmt_5:
+** add\s+[atx][0-9]+,\s*a0,\s*a1
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff
+** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** neg\s+[atx][0-9]+,\s*[atx][0-9]+
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** andi\s+a0,\s*a0,\s*0xff
+** ret
+*/
+DEF_SAT_U_ADD_FMT_5(uint8_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add-18.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_add-18.c
new file mode 100644
index 000..355ff8ba4ef
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_add-18.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_add_uint16_t_fmt_5:
+** add\s+[atx][0-9]+,\s*a0,\s*a1
+** slli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*48
+** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*48
+** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** neg\s+[atx][0-9]+,\s*[atx][0-9]+
+** or\s+[atx][0-9]+

[gcc r15-1087] RISC-V: Add testcases for scalar unsigned SAT_ADD form 5

2024-06-06 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:93f44e18cddb2b5eb3a00232d3be9a5bc8179f25

commit r15-1087-g93f44e18cddb2b5eb3a00232d3be9a5bc8179f25
Author: Pan Li 
Date:   Mon Jun 3 10:43:10 2024 +0800

RISC-V: Add testcases for scalar unsigned SAT_ADD form 5

After the middle-end support the form 5 of unsigned SAT_ADD and
the RISC-V backend implement the scalar .SAT_ADD, add more test
case to cover the form 5 of unsigned .SAT_ADD.

Form 5:
  #define SAT_ADD_U_5(T) \
  T sat_add_u_5_##T(T x, T y) \
  { \
return (T)(x + y) < x ? -1 : (x + y); \
  }

Passed the riscv fully regression tests.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test macro for form 5.
* gcc.target/riscv/sat_u_add-21.c: New test.
* gcc.target/riscv/sat_u_add-22.c: New test.
* gcc.target/riscv/sat_u_add-23.c: New test.
* gcc.target/riscv/sat_u_add-24.c: New test.
* gcc.target/riscv/sat_u_add-run-21.c: New test.
* gcc.target/riscv/sat_u_add-run-22.c: New test.
* gcc.target/riscv/sat_u_add-run-23.c: New test.
* gcc.target/riscv/sat_u_add-run-24.c: New test.

Signed-off-by: Pan Li 

Diff:
---
 gcc/testsuite/gcc.target/riscv/sat_arith.h|  8 
 gcc/testsuite/gcc.target/riscv/sat_u_add-21.c | 19 +
 gcc/testsuite/gcc.target/riscv/sat_u_add-22.c | 21 +++
 gcc/testsuite/gcc.target/riscv/sat_u_add-23.c | 18 
 gcc/testsuite/gcc.target/riscv/sat_u_add-24.c | 17 +++
 gcc/testsuite/gcc.target/riscv/sat_u_add-run-21.c | 25 +++
 gcc/testsuite/gcc.target/riscv/sat_u_add-run-22.c | 25 +++
 gcc/testsuite/gcc.target/riscv/sat_u_add-run-23.c | 25 +++
 gcc/testsuite/gcc.target/riscv/sat_u_add-run-24.c | 25 +++
 9 files changed, 183 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/sat_arith.h
index 6ca158d57c4..976ef1c44c1 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h
@@ -42,6 +42,13 @@ sat_u_add_##T##_fmt_5 (T x, T y) 
 \
   return __builtin_add_overflow (x, y, &ret) == 0 ? ret : -1; \
 }
 
+#define DEF_SAT_U_ADD_FMT_6(T)  \
+T __attribute__((noinline)) \
+sat_u_add_##T##_fmt_6 (T x, T y)\
+{   \
+  return (T)(x + y) < x ? -1 : (x + y); \
+}
+
 #define DEF_VEC_SAT_U_ADD_FMT_1(T)   \
 void __attribute__((noinline))   \
 vec_sat_u_add_##T##_fmt_1 (T *out, T *op_1, T *op_2, unsigned limit) \
@@ -60,6 +67,7 @@ vec_sat_u_add_##T##_fmt_1 (T *out, T *op_1, T *op_2, unsigned 
limit) \
 #define RUN_SAT_U_ADD_FMT_3(T, x, y) sat_u_add_##T##_fmt_3(x, y)
 #define RUN_SAT_U_ADD_FMT_4(T, x, y) sat_u_add_##T##_fmt_4(x, y)
 #define RUN_SAT_U_ADD_FMT_5(T, x, y) sat_u_add_##T##_fmt_5(x, y)
+#define RUN_SAT_U_ADD_FMT_6(T, x, y) sat_u_add_##T##_fmt_6(x, y)
 
 #define RUN_VEC_SAT_U_ADD_FMT_1(T, out, op_1, op_2, N) \
   vec_sat_u_add_##T##_fmt_1(out, op_1, op_2, N)
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add-21.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_add-21.c
new file mode 100644
index 000..f75e35a5fa9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_add-21.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_add_uint8_t_fmt_6:
+** add\s+[atx][0-9]+,\s*a0,\s*a1
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff
+** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** neg\s+[atx][0-9]+,\s*[atx][0-9]+
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** andi\s+a0,\s*a0,\s*0xff
+** ret
+*/
+DEF_SAT_U_ADD_FMT_6(uint8_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add-22.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_add-22.c
new file mode 100644
index 000..ad957a061f4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_add-22.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_add_uint16_t_fmt_6:
+** add\s+[atx][0-9]+,\s*a0,\s*a1
+** slli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*48
+** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*48
+** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** neg\s+[atx][0-9]+,\s*[atx][0-9]+
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** slli\s+a0,\s*a0,\s*48
+** srli\s+a0,\s*a0,\s*48
+** ret
+*/
+DEF_SAT_U_ADD_FMT_6(uint16_t)
+
+/* {

[gcc r15-1085] RISC-V: Add testcases for scalar unsigned SAT_ADD form 3

2024-06-06 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:39dde9200dd936339df7dd6c8f56e88866bcecc5

commit r15-1085-g39dde9200dd936339df7dd6c8f56e88866bcecc5
Author: Pan Li 
Date:   Mon Jun 3 10:24:47 2024 +0800

RISC-V: Add testcases for scalar unsigned SAT_ADD form 3

After the middle-end support the form 3 of unsigned SAT_ADD and
the RISC-V backend implement the scalar .SAT_ADD, add more test
case to cover the form 3 of unsigned .SAT_ADD.

Form 3:
  #define SAT_ADD_U_3(T) \
  T sat_add_u_3_##T (T x, T y) \
  { \
T ret; \
return __builtin_add_overflow (x, y, &ret) ? -1 : ret; \
  }

Passed the rv64gcv fully regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test macro for form 3.
* gcc.target/riscv/sat_u_add-13.c: New test.
* gcc.target/riscv/sat_u_add-14.c: New test.
* gcc.target/riscv/sat_u_add-15.c: New test.
* gcc.target/riscv/sat_u_add-16.c: New test.
* gcc.target/riscv/sat_u_add-run-13.c: New test.
* gcc.target/riscv/sat_u_add-run-14.c: New test.
* gcc.target/riscv/sat_u_add-run-15.c: New test.
* gcc.target/riscv/sat_u_add-run-16.c: New test.

Signed-off-by: Pan Li 

Diff:
---
 gcc/testsuite/gcc.target/riscv/sat_arith.h| 10 +
 gcc/testsuite/gcc.target/riscv/sat_u_add-13.c | 19 +
 gcc/testsuite/gcc.target/riscv/sat_u_add-14.c | 21 +++
 gcc/testsuite/gcc.target/riscv/sat_u_add-15.c | 18 
 gcc/testsuite/gcc.target/riscv/sat_u_add-16.c | 17 +++
 gcc/testsuite/gcc.target/riscv/sat_u_add-run-13.c | 25 +++
 gcc/testsuite/gcc.target/riscv/sat_u_add-run-14.c | 25 +++
 gcc/testsuite/gcc.target/riscv/sat_u_add-run-15.c | 25 +++
 gcc/testsuite/gcc.target/riscv/sat_u_add-run-16.c | 25 +++
 9 files changed, 185 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/sat_arith.h
index d44fd63fd83..adb8be5886e 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h
@@ -26,6 +26,15 @@ sat_u_add_##T##_fmt_3 (T x, T y)\
   return (T)(-overflow) | ret;  \
 }
 
+#define DEF_SAT_U_ADD_FMT_4(T)   \
+T __attribute__((noinline))  \
+sat_u_add_##T##_fmt_4 (T x, T y) \
+{\
+  T ret; \
+  return __builtin_add_overflow (x, y, &ret) ? -1 : ret; \
+}
+
+
 #define DEF_VEC_SAT_U_ADD_FMT_1(T)   \
 void __attribute__((noinline))   \
 vec_sat_u_add_##T##_fmt_1 (T *out, T *op_1, T *op_2, unsigned limit) \
@@ -42,6 +51,7 @@ vec_sat_u_add_##T##_fmt_1 (T *out, T *op_1, T *op_2, unsigned 
limit) \
 #define RUN_SAT_U_ADD_FMT_1(T, x, y) sat_u_add_##T##_fmt_1(x, y)
 #define RUN_SAT_U_ADD_FMT_2(T, x, y) sat_u_add_##T##_fmt_2(x, y)
 #define RUN_SAT_U_ADD_FMT_3(T, x, y) sat_u_add_##T##_fmt_3(x, y)
+#define RUN_SAT_U_ADD_FMT_4(T, x, y) sat_u_add_##T##_fmt_4(x, y)
 
 #define RUN_VEC_SAT_U_ADD_FMT_1(T, out, op_1, op_2, N) \
   vec_sat_u_add_##T##_fmt_1(out, op_1, op_2, N)
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add-13.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_add-13.c
new file mode 100644
index 000..b2d93f29f48
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_add-13.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_add_uint8_t_fmt_4:
+** add\s+[atx][0-9]+,\s*a0,\s*a1
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff
+** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** neg\s+[atx][0-9]+,\s*[atx][0-9]+
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** andi\s+a0,\s*a0,\s*0xff
+** ret
+*/
+DEF_SAT_U_ADD_FMT_4(uint8_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add-14.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_add-14.c
new file mode 100644
index 000..eafc578aafa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_add-14.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_add_uint16_t_fmt_4:
+** add\s+[atx][0-9]+,\s*a0,\s*a1
+** slli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*48
+** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*48
+** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** neg\s+[atx][0-9]+,\s*[a

[gcc r15-1084] RISC-V: Add testcases for scalar unsigned SAT_ADD form 2

2024-06-06 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:0261ed4337f62c247b33145a81cd4fb5a69bc5a7

commit r15-1084-g0261ed4337f62c247b33145a81cd4fb5a69bc5a7
Author: Pan Li 
Date:   Mon Jun 3 09:35:49 2024 +0800

RISC-V: Add testcases for scalar unsigned SAT_ADD form 2

After the middle-end support the form 2 of unsigned SAT_ADD and
the RISC-V backend implement the scalar .SAT_ADD, add more test
case to cover the form 2 of unsigned .SAT_ADD.

Form 2:

  #define SAT_ADD_U_2(T) \
  T sat_add_u_2_##T(T x, T y) \
  { \
T ret; \
T overflow = __builtin_add_overflow (x, y, &ret); \
return (T)(-overflow) | ret; \
  }

Passed the rv64gcv fully regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test macro for form 2.
* gcc.target/riscv/sat_u_add-10.c: New test.
* gcc.target/riscv/sat_u_add-11.c: New test.
* gcc.target/riscv/sat_u_add-12.c: New test.
* gcc.target/riscv/sat_u_add-9.c: New test.
* gcc.target/riscv/sat_u_add-run-10.c: New test.
* gcc.target/riscv/sat_u_add-run-11.c: New test.
* gcc.target/riscv/sat_u_add-run-12.c: New test.
* gcc.target/riscv/sat_u_add-run-9.c: New test.

Signed-off-by: Pan Li 

Diff:
---
 gcc/testsuite/gcc.target/riscv/sat_arith.h| 10 +
 gcc/testsuite/gcc.target/riscv/sat_u_add-10.c | 21 +++
 gcc/testsuite/gcc.target/riscv/sat_u_add-11.c | 18 
 gcc/testsuite/gcc.target/riscv/sat_u_add-12.c | 17 +++
 gcc/testsuite/gcc.target/riscv/sat_u_add-9.c  | 19 +
 gcc/testsuite/gcc.target/riscv/sat_u_add-run-10.c | 25 +++
 gcc/testsuite/gcc.target/riscv/sat_u_add-run-11.c | 25 +++
 gcc/testsuite/gcc.target/riscv/sat_u_add-run-12.c | 25 +++
 gcc/testsuite/gcc.target/riscv/sat_u_add-run-9.c  | 25 +++
 9 files changed, 185 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/sat_arith.h
index 2abc83d7666..d44fd63fd83 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h
@@ -17,6 +17,15 @@ sat_u_add_##T##_fmt_2 (T x, T y) \
   return (T)(x + y) >= x ? (x + y) : -1; \
 }
 
+#define DEF_SAT_U_ADD_FMT_3(T)  \
+T __attribute__((noinline)) \
+sat_u_add_##T##_fmt_3 (T x, T y)\
+{   \
+  T ret;\
+  T overflow = __builtin_add_overflow (x, y, &ret); \
+  return (T)(-overflow) | ret;  \
+}
+
 #define DEF_VEC_SAT_U_ADD_FMT_1(T)   \
 void __attribute__((noinline))   \
 vec_sat_u_add_##T##_fmt_1 (T *out, T *op_1, T *op_2, unsigned limit) \
@@ -32,6 +41,7 @@ vec_sat_u_add_##T##_fmt_1 (T *out, T *op_1, T *op_2, unsigned 
limit) \
 
 #define RUN_SAT_U_ADD_FMT_1(T, x, y) sat_u_add_##T##_fmt_1(x, y)
 #define RUN_SAT_U_ADD_FMT_2(T, x, y) sat_u_add_##T##_fmt_2(x, y)
+#define RUN_SAT_U_ADD_FMT_3(T, x, y) sat_u_add_##T##_fmt_3(x, y)
 
 #define RUN_VEC_SAT_U_ADD_FMT_1(T, out, op_1, op_2, N) \
   vec_sat_u_add_##T##_fmt_1(out, op_1, op_2, N)
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add-10.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_add-10.c
new file mode 100644
index 000..3f627ef80b1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_add-10.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_add_uint16_t_fmt_3:
+** add\s+[atx][0-9]+,\s*a0,\s*a1
+** slli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*48
+** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*48
+** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** neg\s+[atx][0-9]+,\s*[atx][0-9]+
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** slli\s+a0,\s*a0,\s*48
+** srli\s+a0,\s*a0,\s*48
+** ret
+*/
+DEF_SAT_U_ADD_FMT_3(uint16_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add-11.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_add-11.c
new file mode 100644
index 000..b6dc779b212
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_add-11.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_add_uint32_t_fmt_3:
+** addw\s+[atx][0-9]+,\s*a0,\s*a1
+** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** neg\s+[atx][0-9]+,\s*[atx][0-9]+
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,

[gcc r15-1083] RISC-V: Add testcases for scalar unsigned SAT_ADD form 1

2024-06-06 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:a737c2bf5212822b8225f65efa643a968e5a7c78

commit r15-1083-ga737c2bf5212822b8225f65efa643a968e5a7c78
Author: Pan Li 
Date:   Wed May 29 14:15:45 2024 +0800

RISC-V: Add testcases for scalar unsigned SAT_ADD form 1

After the middle-end support the form 1 of unsigned SAT_ADD and
the RISC-V backend implement the scalar .SAT_ADD, add more test
case to cover the form 1 of unsigned .SAT_ADD.

Form 1:

  #define SAT_ADD_U_1(T)   \
  T sat_add_u_1_##T(T x, T y)  \
  {\
return (T)(x + y) >= x ? (x + y) : -1; \
  }

Passed the riscv fully regression tests.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add helper macro for form 1.
* gcc.target/riscv/sat_u_add-5.c: New test.
* gcc.target/riscv/sat_u_add-6.c: New test.
* gcc.target/riscv/sat_u_add-7.c: New test.
* gcc.target/riscv/sat_u_add-8.c: New test.
* gcc.target/riscv/sat_u_add-run-5.c: New test.
* gcc.target/riscv/sat_u_add-run-6.c: New test.
* gcc.target/riscv/sat_u_add-run-7.c: New test.
* gcc.target/riscv/sat_u_add-run-8.c: New test.

Signed-off-by: Pan Li 

Diff:
---
 gcc/testsuite/gcc.target/riscv/sat_arith.h   |  8 
 gcc/testsuite/gcc.target/riscv/sat_u_add-5.c | 19 ++
 gcc/testsuite/gcc.target/riscv/sat_u_add-6.c | 21 
 gcc/testsuite/gcc.target/riscv/sat_u_add-7.c | 18 +
 gcc/testsuite/gcc.target/riscv/sat_u_add-8.c | 17 
 gcc/testsuite/gcc.target/riscv/sat_u_add-run-5.c | 25 
 gcc/testsuite/gcc.target/riscv/sat_u_add-run-6.c | 25 
 gcc/testsuite/gcc.target/riscv/sat_u_add-run-7.c | 25 
 gcc/testsuite/gcc.target/riscv/sat_u_add-run-8.c | 25 
 9 files changed, 183 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/sat_arith.h
index 2ef9fd825f3..2abc83d7666 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h
@@ -10,6 +10,13 @@ sat_u_add_##T##_fmt_1 (T x, T y)   \
   return (x + y) | (-(T)((T)(x + y) < x)); \
 }
 
+#define DEF_SAT_U_ADD_FMT_2(T)   \
+T __attribute__((noinline))  \
+sat_u_add_##T##_fmt_2 (T x, T y) \
+{\
+  return (T)(x + y) >= x ? (x + y) : -1; \
+}
+
 #define DEF_VEC_SAT_U_ADD_FMT_1(T)   \
 void __attribute__((noinline))   \
 vec_sat_u_add_##T##_fmt_1 (T *out, T *op_1, T *op_2, unsigned limit) \
@@ -24,6 +31,7 @@ vec_sat_u_add_##T##_fmt_1 (T *out, T *op_1, T *op_2, unsigned 
limit) \
 }
 
 #define RUN_SAT_U_ADD_FMT_1(T, x, y) sat_u_add_##T##_fmt_1(x, y)
+#define RUN_SAT_U_ADD_FMT_2(T, x, y) sat_u_add_##T##_fmt_2(x, y)
 
 #define RUN_VEC_SAT_U_ADD_FMT_1(T, out, op_1, op_2, N) \
   vec_sat_u_add_##T##_fmt_1(out, op_1, op_2, N)
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add-5.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_add-5.c
new file mode 100644
index 000..4c73c7f8a21
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_add-5.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_add_uint8_t_fmt_2:
+** add\s+[atx][0-9]+,\s*a0,\s*a1
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff
+** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** neg\s+[atx][0-9]+,\s*[atx][0-9]+
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** andi\s+a0,\s*a0,\s*0xff
+** ret
+*/
+DEF_SAT_U_ADD_FMT_2(uint8_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add-6.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_add-6.c
new file mode 100644
index 000..0d64f5631bb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_add-6.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_add_uint16_t_fmt_2:
+** add\s+[atx][0-9]+,\s*a0,\s*a1
+** slli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*48
+** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*48
+** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** neg\s+[atx][0-9]+,\s*[atx][0-9]+
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** slli\s+a0,\s*a0,\s*48
+** srli\s+a0,\s*a0,\s*48
+** ret
+*/
+DEF_SAT_U_ADD_FMT_2(uint16_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.targ

[gcc r15-1081] Match: Support more form for scalar unsigned SAT_ADD

2024-06-06 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:e14afbe2d1c696cc4abda24ca10a5a43ee9c2818

commit r15-1081-ge14afbe2d1c696cc4abda24ca10a5a43ee9c2818
Author: Pan Li 
Date:   Thu Jun 6 09:19:53 2024 +0800

Match: Support more form for scalar unsigned SAT_ADD

After we support one gassign form of the unsigned .SAT_ADD,  we
would like to support more forms including both the branch and
branchless.  There are 5 other forms of .SAT_ADD,  list as below:

Form 1:
  #define SAT_ADD_U_1(T) \
  T sat_add_u_1_##T(T x, T y) \
  { \
return (T)(x + y) >= x ? (x + y) : -1; \
  }

Form 2:
  #define SAT_ADD_U_2(T) \
  T sat_add_u_2_##T(T x, T y) \
  { \
T ret; \
T overflow = __builtin_add_overflow (x, y, &ret); \
return (T)(-overflow) | ret; \
  }

Form 3:
  #define SAT_ADD_U_3(T) \
  T sat_add_u_3_##T (T x, T y) \
  { \
T ret; \
return __builtin_add_overflow (x, y, &ret) ? -1 : ret; \
  }

Form 4:
  #define SAT_ADD_U_4(T) \
  T sat_add_u_4_##T (T x, T y) \
  { \
T ret; \
return __builtin_add_overflow (x, y, &ret) == 0 ? ret : -1; \
  }

Form 5:
  #define SAT_ADD_U_5(T) \
  T sat_add_u_5_##T(T x, T y) \
  { \
return (T)(x + y) < x ? -1 : (x + y); \
  }

Take the forms 3 of above as example:

uint64_t
sat_add (uint64_t x, uint64_t y)
{
  uint64_t ret;
  return __builtin_add_overflow (x, y, &ret) ? -1 : ret;
}

Before this patch:
uint64_t sat_add (uint64_t x, uint64_t y)
{
  long unsigned int _1;
  long unsigned int _2;
  uint64_t _3;
  __complex__ long unsigned int _6;

;;   basic block 2, loop depth 0
;;pred:   ENTRY
  _6 = .ADD_OVERFLOW (x_4(D), y_5(D));
  _2 = IMAGPART_EXPR <_6>;
  if (_2 != 0)
goto ; [35.00%]
  else
goto ; [65.00%]
;;succ:   4
;;3

;;   basic block 3, loop depth 0
;;pred:   2
  _1 = REALPART_EXPR <_6>;
;;succ:   4

;;   basic block 4, loop depth 0
;;pred:   3
;;2
  # _3 = PHI <_1(3), 18446744073709551615(2)>
  return _3;
;;succ:   EXIT
}

After this patch:
uint64_t sat_add (uint64_t x, uint64_t y)
{
  long unsigned int _12;

;;   basic block 2, loop depth 0
;;pred:   ENTRY
  _12 = .SAT_ADD (x_4(D), y_5(D)); [tail call]
  return _12;
;;succ:   EXIT
}

The flag '^' acts on cond_expr will generate matching code similar as below:

else if (gphi *_a1 = dyn_cast  (_d1))
  {
basic_block _b1 = gimple_bb (_a1);
if (gimple_phi_num_args (_a1) == 2)
  {
basic_block _pb_0_1 = EDGE_PRED (_b1, 0)->src;
basic_block _pb_1_1 = EDGE_PRED (_b1, 1)->src;
basic_block _db_1 = safe_dyn_cast  (*gsi_last_bb (_pb_0_1))
? _pb_0_1 : _pb_1_1;
basic_block _other_db_1 = safe_dyn_cast  (*gsi_last_bb 
(_pb_0_1))
  ? _pb_1_1 : _pb_0_1;
gcond *_ct_1 = safe_dyn_cast  (*gsi_last_bb (_db_1));
if (_ct_1 && EDGE_COUNT (_other_db_1->preds) == 1
  && EDGE_COUNT (_other_db_1->succs) == 1
  && EDGE_PRED (_other_db_1, 0)->src == _db_1)
  {
tree _cond_lhs_1 = gimple_cond_lhs (_ct_1);
tree _cond_rhs_1 = gimple_cond_rhs (_ct_1);
tree _p0 = build2 (gimple_cond_code (_ct_1), boolean_type_node,
   _cond_lhs_1, _cond_rhs_1);
bool _arg_0_is_true_1 = gimple_phi_arg_edge (_a1, 0)->flags & 
EDGE_TRUE_VALUE;
tree _p1 = gimple_phi_arg_def (_a1, _arg_0_is_true_1 ? 0 : 1);
tree _p2 = gimple_phi_arg_def (_a1, _arg_0_is_true_1 ? 1 : 0);


The below test suites are passed for this patch.
* The x86 bootstrap test.
* The x86 fully regression test.
* The riscv fully regression test.

gcc/ChangeLog:

* doc/match-and-simplify.texi: Add doc for the matching flag '^'.
* genmatch.cc (cmp_operand): Add match_phi comparation.
(dt_node::gen_kids_1): Add cond_expr bool flag for phi match.
(dt_operand::gen_phi_on_cond): Add new func to gen phi matching
on cond_expr.
(parser::parse_expr): Add handling for the expr flag '^'.
* match.pd: Add more form for unsigned .SAT_ADD.
* tree-ssa-math-opts.cc (build_saturation_binary_arith_call): Add
new func impl to build call for phi gimple.
(match_unsigned_saturation_add): Add new func impl to match the
.SAT_ADD for phi gimple.
(math_opts_dom_walker::after_dom_children): Add phi matching
try for all gimpl

[gcc r14-10286] c: Fix up pointer types to may_alias structures [PR114493]

2024-06-06 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:56c73729c3eab08ca48f366bd435f98457743e45

commit r14-10286-g56c73729c3eab08ca48f366bd435f98457743e45
Author: Jakub Jelinek 
Date:   Thu Jun 6 22:12:11 2024 +0200

c: Fix up pointer types to may_alias structures [PR114493]

The following testcase ICEs in ipa-free-lang, because the
fld_incomplete_type_of
  gcc_assert (TYPE_CANONICAL (t2) != t2
  && TYPE_CANONICAL (t2) == TYPE_CANONICAL (TREE_TYPE 
(t)));
assertion doesn't hold.
This is because t is a struct S * type which was created while struct S
was still incomplete and without the may_alias attribute (and TYPE_CANONICAL
of a pointer type is a type created with can_alias_all = false argument),
while later on on the struct definition may_alias attribute was used.
fld_incomplete_type_of then creates an incomplete distinct copy of the
structure (but with the original attributes) but pointers created for it
are because of the "may_alias" attribute TYPE_REF_CAN_ALIAS_ALL, including
their TYPE_CANONICAL, because while that is created with !can_alias_all
argument, we later set it because of the "may_alias" attribute on the
to_type.

This doesn't ICE with C++ since PR70512 fix because the C++ FE sets
TYPE_REF_CAN_ALIAS_ALL on all pointer types to the class type (and its
variants) when the may_alias is added.

The following patch does that in the C FE as well.

2024-06-06  Jakub Jelinek  

PR c/114493
* c-decl.cc (c_fixup_may_alias): New function.
(finish_struct): Call it if "may_alias" attribute is
specified.

* gcc.dg/pr114493-1.c: New test.
* gcc.dg/pr114493-2.c: New test.

(cherry picked from commit d5a3c6d43acb8b2211d9fb59d59482d74c010f01)

Diff:
---
 gcc/c/c-decl.cc   | 15 +++
 gcc/testsuite/gcc.dg/pr114493-1.c | 19 +++
 gcc/testsuite/gcc.dg/pr114493-2.c | 26 ++
 3 files changed, 60 insertions(+)

diff --git a/gcc/c/c-decl.cc b/gcc/c/c-decl.cc
index 52af8f32998..e63dab49589 100644
--- a/gcc/c/c-decl.cc
+++ b/gcc/c/c-decl.cc
@@ -9393,6 +9393,17 @@ c_update_type_canonical (tree t)
 }
 }
 
+/* TYPE is a struct or union that we're applying may_alias to after the body is
+   parsed.  Fixup any POINTER_TO types.  */
+
+static void
+c_fixup_may_alias (tree type)
+{
+  for (tree t = TYPE_POINTER_TO (type); t; t = TYPE_NEXT_PTR_TO (t))
+for (tree v = TYPE_MAIN_VARIANT (t); v; v = TYPE_NEXT_VARIANT (v))
+  TYPE_REF_CAN_ALIAS_ALL (v) = true;
+}
+
 /* Fill in the fields of a RECORD_TYPE or UNION_TYPE node, T.
LOC is the location of the RECORD_TYPE or UNION_TYPE's definition.
FIELDLIST is a chain of FIELD_DECL nodes for the fields.
@@ -9737,6 +9748,10 @@ finish_struct (location_t loc, tree t, tree fieldlist, 
tree attributes,
 
   C_TYPE_BEING_DEFINED (t) = 0;
 
+  if (lookup_attribute ("may_alias", TYPE_ATTRIBUTES (t)))
+for (x = TYPE_MAIN_VARIANT (t); x; x = TYPE_NEXT_VARIANT (x))
+  c_fixup_may_alias (x);
+
   /* Set type canonical based on equivalence class.  */
   if (flag_isoc23)
 {
diff --git a/gcc/testsuite/gcc.dg/pr114493-1.c 
b/gcc/testsuite/gcc.dg/pr114493-1.c
new file mode 100644
index 000..446f33eac3b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr114493-1.c
@@ -0,0 +1,19 @@
+/* PR c/114493 */
+/* { dg-do compile { target lto } } */
+/* { dg-options "-O2 -flto" } */
+
+void foo (void);
+struct S;
+struct S bar (struct S **);
+struct S qux (const struct S **);
+
+struct __attribute__((__may_alias__)) S {
+  int s;
+};
+
+struct S
+baz (void)
+{
+  foo ();
+  return (struct S) {};
+}
diff --git a/gcc/testsuite/gcc.dg/pr114493-2.c 
b/gcc/testsuite/gcc.dg/pr114493-2.c
new file mode 100644
index 000..1b4a5792dc9
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr114493-2.c
@@ -0,0 +1,26 @@
+/* PR c/114493 */
+/* { dg-do compile { target lto } } */
+/* { dg-options "-O2 -flto -std=c23" } */
+
+void foo (void);
+struct S;
+struct S bar (struct S **);
+struct S qux (const struct S **);
+
+void
+corge (void)
+{
+  struct S { int s; } s;
+  s.s = 0;
+}
+
+struct __attribute__((__may_alias__)) S {
+  int s;
+};
+
+struct S
+baz (void)
+{
+  foo ();
+  return (struct S) {};
+}


[gcc r15-1080] c: Fix up pointer types to may_alias structures [PR114493]

2024-06-06 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:d5a3c6d43acb8b2211d9fb59d59482d74c010f01

commit r15-1080-gd5a3c6d43acb8b2211d9fb59d59482d74c010f01
Author: Jakub Jelinek 
Date:   Thu Jun 6 22:12:11 2024 +0200

c: Fix up pointer types to may_alias structures [PR114493]

The following testcase ICEs in ipa-free-lang, because the
fld_incomplete_type_of
  gcc_assert (TYPE_CANONICAL (t2) != t2
  && TYPE_CANONICAL (t2) == TYPE_CANONICAL (TREE_TYPE 
(t)));
assertion doesn't hold.
This is because t is a struct S * type which was created while struct S
was still incomplete and without the may_alias attribute (and TYPE_CANONICAL
of a pointer type is a type created with can_alias_all = false argument),
while later on on the struct definition may_alias attribute was used.
fld_incomplete_type_of then creates an incomplete distinct copy of the
structure (but with the original attributes) but pointers created for it
are because of the "may_alias" attribute TYPE_REF_CAN_ALIAS_ALL, including
their TYPE_CANONICAL, because while that is created with !can_alias_all
argument, we later set it because of the "may_alias" attribute on the
to_type.

This doesn't ICE with C++ since PR70512 fix because the C++ FE sets
TYPE_REF_CAN_ALIAS_ALL on all pointer types to the class type (and its
variants) when the may_alias is added.

The following patch does that in the C FE as well.

2024-06-06  Jakub Jelinek  

PR c/114493
* c-decl.cc (c_fixup_may_alias): New function.
(finish_struct): Call it if "may_alias" attribute is
specified.

* gcc.dg/pr114493-1.c: New test.
* gcc.dg/pr114493-2.c: New test.

Diff:
---
 gcc/c/c-decl.cc   | 15 +++
 gcc/testsuite/gcc.dg/pr114493-1.c | 19 +++
 gcc/testsuite/gcc.dg/pr114493-2.c | 26 ++
 3 files changed, 60 insertions(+)

diff --git a/gcc/c/c-decl.cc b/gcc/c/c-decl.cc
index 64924b87a91..6c09eb73128 100644
--- a/gcc/c/c-decl.cc
+++ b/gcc/c/c-decl.cc
@@ -9446,6 +9446,17 @@ verify_counted_by_attribute (tree struct_type, tree 
field_decl)
   return;
 }
 
+/* TYPE is a struct or union that we're applying may_alias to after the body is
+   parsed.  Fixup any POINTER_TO types.  */
+
+static void
+c_fixup_may_alias (tree type)
+{
+  for (tree t = TYPE_POINTER_TO (type); t; t = TYPE_NEXT_PTR_TO (t))
+for (tree v = TYPE_MAIN_VARIANT (t); v; v = TYPE_NEXT_VARIANT (v))
+  TYPE_REF_CAN_ALIAS_ALL (v) = true;
+}
+
 /* Fill in the fields of a RECORD_TYPE or UNION_TYPE node, T.
LOC is the location of the RECORD_TYPE or UNION_TYPE's definition.
FIELDLIST is a chain of FIELD_DECL nodes for the fields.
@@ -9791,6 +9802,10 @@ finish_struct (location_t loc, tree t, tree fieldlist, 
tree attributes,
 
   C_TYPE_BEING_DEFINED (t) = 0;
 
+  if (lookup_attribute ("may_alias", TYPE_ATTRIBUTES (t)))
+for (x = TYPE_MAIN_VARIANT (t); x; x = TYPE_NEXT_VARIANT (x))
+  c_fixup_may_alias (x);
+
   /* Set type canonical based on equivalence class.  */
   if (flag_isoc23 && !C_TYPE_VARIABLE_SIZE (t))
 {
diff --git a/gcc/testsuite/gcc.dg/pr114493-1.c 
b/gcc/testsuite/gcc.dg/pr114493-1.c
new file mode 100644
index 000..446f33eac3b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr114493-1.c
@@ -0,0 +1,19 @@
+/* PR c/114493 */
+/* { dg-do compile { target lto } } */
+/* { dg-options "-O2 -flto" } */
+
+void foo (void);
+struct S;
+struct S bar (struct S **);
+struct S qux (const struct S **);
+
+struct __attribute__((__may_alias__)) S {
+  int s;
+};
+
+struct S
+baz (void)
+{
+  foo ();
+  return (struct S) {};
+}
diff --git a/gcc/testsuite/gcc.dg/pr114493-2.c 
b/gcc/testsuite/gcc.dg/pr114493-2.c
new file mode 100644
index 000..1b4a5792dc9
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr114493-2.c
@@ -0,0 +1,26 @@
+/* PR c/114493 */
+/* { dg-do compile { target lto } } */
+/* { dg-options "-O2 -flto -std=c23" } */
+
+void foo (void);
+struct S;
+struct S bar (struct S **);
+struct S qux (const struct S **);
+
+void
+corge (void)
+{
+  struct S { int s; } s;
+  s.s = 0;
+}
+
+struct __attribute__((__may_alias__)) S {
+  int s;
+};
+
+struct S
+baz (void)
+{
+  foo ();
+  return (struct S) {};
+}


[gcc r15-1079] aarch64: Add vector floating point extend pattern [PR113880, PR113869]

2024-06-06 Thread Pengxuan Zheng via Gcc-cvs
https://gcc.gnu.org/g:230d62a2cdd16c1ec8fe87998ec01081503f010d

commit r15-1079-g230d62a2cdd16c1ec8fe87998ec01081503f010d
Author: Pengxuan Zheng 
Date:   Thu May 30 17:53:23 2024 -0700

aarch64: Add vector floating point extend pattern [PR113880, PR113869]

This patch adds vector floating point extend pattern for V2SF->V2DF and
V4HF->V4SF conversions by renaming the existing 
aarch64_float_extend_lo_
pattern to the standard optab one, i.e., extend2. This allows 
the
vectorizer to vectorize certain floating point widening operations for the
aarch64 target.

PR target/113880
PR target/113869

gcc/ChangeLog:

* config/aarch64/aarch64-builtins.cc (VAR1): Remap float_extend_lo_
builtin codes to standard optab ones.
* config/aarch64/aarch64-simd.md (aarch64_float_extend_lo_): 
Rename
to...
(extend2): ... This.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/extend-vec.c: New test.

Signed-off-by: Pengxuan Zheng 

Diff:
---
 gcc/config/aarch64/aarch64-builtins.cc|  9 +
 gcc/config/aarch64/aarch64-simd.md|  2 +-
 gcc/testsuite/gcc.target/aarch64/extend-vec.c | 21 +
 3 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
b/gcc/config/aarch64/aarch64-builtins.cc
index f8eeccb554d..25189888d17 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -534,6 +534,15 @@ BUILTIN_VDQ_BHSI (urhadd, uavg, _ceil, 0)
 BUILTIN_VDQ_BHSI (shadd, avg, _floor, 0)
 BUILTIN_VDQ_BHSI (uhadd, uavg, _floor, 0)
 
+/* The builtins below should be expanded through the standard optabs
+   CODE_FOR_extend2. */
+#undef VAR1
+#define VAR1(F,T,N,M) \
+  constexpr insn_code CODE_FOR_aarch64_##F##M = CODE_FOR_##T##N##M##2;
+
+VAR1 (float_extend_lo_, extend, v2sf, v2df)
+VAR1 (float_extend_lo_, extend, v4hf, v4sf)
+
 #undef VAR1
 #define VAR1(T, N, MAP, FLAG, A) \
   {#N #A, UP (A), CF##MAP (N, A), 0, TYPES_##T, FLAG_##FLAG},
diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index 868f4486218..c5e2c9f00d0 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -3132,7 +3132,7 @@
 DONE;
   }
 )
-(define_insn "aarch64_float_extend_lo_"
+(define_insn "extend2"
   [(set (match_operand: 0 "register_operand" "=w")
(float_extend:
  (match_operand:VDF 1 "register_operand" "w")))]
diff --git a/gcc/testsuite/gcc.target/aarch64/extend-vec.c 
b/gcc/testsuite/gcc.target/aarch64/extend-vec.c
new file mode 100644
index 000..f6241d5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/extend-vec.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+/* { dg-final { scan-assembler-times {fcvtl\tv[0-9]+.2d, v[0-9]+.2s} 1 } } */
+void
+f (float *__restrict a, double *__restrict b)
+{
+  b[0] = a[0];
+  b[1] = a[1];
+}
+
+/* { dg-final { scan-assembler-times {fcvtl\tv[0-9]+.4s, v[0-9]+.4h} 1 } } */
+void
+f1 (_Float16 *__restrict a, float *__restrict b)
+{
+
+  b[0] = a[0];
+  b[1] = a[1];
+  b[2] = a[2];
+  b[3] = a[3];
+}


[gcc r15-1078] modula2: Simplify REAL/LONGREAL/SHORTREAL node creation.

2024-06-06 Thread Gaius Mulley via Gcc-cvs
https://gcc.gnu.org/g:30ce9dfcc665b6088e5898cfa766b57556ebb90e

commit r15-1078-g30ce9dfcc665b6088e5898cfa766b57556ebb90e
Author: Gaius Mulley 
Date:   Thu Jun 6 19:27:56 2024 +0100

modula2: Simplify REAL/LONGREAL/SHORTREAL node creation.

This patch simplifies the real type build functions by using
the default float_type_node, double_type_node rather than create
new nodes.  It also uses the default GCC long_double_type_node
or float128_type_nodes for longreal.

gcc/m2/ChangeLog:

* gm2-gcc/m2type.cc (build_m2_short_real_node): Rewrite
to use the default float_type_node.
(build_m2_real_node): Rewrite to use the default
double_type_node.
(build_m2_long_real_node): Rewrite to use the default
long_double_type_node or float128_type_node.

Co-Authored-By: Kewen.Lin  
Signed-off-by: Gaius Mulley 

Diff:
---
 gcc/m2/gm2-gcc/m2type.cc | 30 +++---
 1 file changed, 7 insertions(+), 23 deletions(-)

diff --git a/gcc/m2/gm2-gcc/m2type.cc b/gcc/m2/gm2-gcc/m2type.cc
index 571923c08ef..5773a5cbd19 100644
--- a/gcc/m2/gm2-gcc/m2type.cc
+++ b/gcc/m2/gm2-gcc/m2type.cc
@@ -1415,45 +1415,29 @@ build_m2_char_node (void)
 static tree
 build_m2_short_real_node (void)
 {
-  tree c;
-
-  /* Define `REAL'.  */
-
-  c = make_node (REAL_TYPE);
-  TYPE_PRECISION (c) = FLOAT_TYPE_SIZE;
-  layout_type (c);
-  return c;
+  /* Define `SHORTREAL'.  */
+  ASSERT_CONDITION (TYPE_PRECISION (float_type_node) == FLOAT_TYPE_SIZE);
+  return float_type_node;
 }
 
 static tree
 build_m2_real_node (void)
 {
-  tree c;
-
   /* Define `REAL'.  */
-
-  c = make_node (REAL_TYPE);
-  TYPE_PRECISION (c) = DOUBLE_TYPE_SIZE;
-  layout_type (c);
-  return c;
+  ASSERT_CONDITION (TYPE_PRECISION (double_type_node) == DOUBLE_TYPE_SIZE);  
+  return double_type_node;
 }
 
 static tree
 build_m2_long_real_node (void)
 {
   tree longreal;
-
+  
   /* Define `LONGREAL'.  */
-  if (M2Options_GetIBMLongDouble ())
-{
-  longreal = make_node (REAL_TYPE);
-  TYPE_PRECISION (longreal) = LONG_DOUBLE_TYPE_SIZE;
-}
-  else if (M2Options_GetIEEELongDouble ())
+  if (M2Options_GetIEEELongDouble ())
 longreal = float128_type_node;
   else
 longreal = long_double_type_node;
-  layout_type (longreal);
   return longreal;
 }


[gcc r15-1077] testsuite/i386: Add vector sat_sub testcases [PR112600]

2024-06-06 Thread Uros Bizjak via Gcc-cvs
https://gcc.gnu.org/g:366d45c8d4911dc7874d2e64cf2583c0133b8dd5

commit r15-1077-g366d45c8d4911dc7874d2e64cf2583c0133b8dd5
Author: Uros Bizjak 
Date:   Thu Jun 6 19:18:41 2024 +0200

testsuite/i386: Add vector sat_sub testcases [PR112600]

PR middle-end/112600

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr112600-2a.c: New test.
* gcc.target/i386/pr112600-2b.c: New test.

Diff:
---
 gcc/testsuite/gcc.target/i386/pr112600-2a.c | 15 +++
 gcc/testsuite/gcc.target/i386/pr112600-2b.c | 15 +++
 2 files changed, 30 insertions(+)

diff --git a/gcc/testsuite/gcc.target/i386/pr112600-2a.c 
b/gcc/testsuite/gcc.target/i386/pr112600-2a.c
new file mode 100644
index 000..4df38e5a720
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr112600-2a.c
@@ -0,0 +1,15 @@
+/* PR middle-end/112600 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -msse2" } */
+
+typedef unsigned char T;
+
+void foo (T *out, T *x, T *y, int n)
+{
+  int i;
+
+  for (i = 0; i < n; i++)
+out[i] = (x[i] - y[i]) & (-(T)(x[i] >= y[i]));
+}
+
+/* { dg-final { scan-assembler "psubusb" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr112600-2b.c 
b/gcc/testsuite/gcc.target/i386/pr112600-2b.c
new file mode 100644
index 000..0f6345de704
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr112600-2b.c
@@ -0,0 +1,15 @@
+/* PR middle-end/112600 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -msse2" } */
+
+typedef unsigned short T;
+
+void foo (T *out, T *x, T *y, int n)
+{
+  int i;
+
+  for (i = 0; i < n; i++)
+out[i] = (x[i] - y[i]) & (-(T)(x[i] >= y[i]));
+}
+
+/* { dg-final { scan-assembler "psubusw" } } */


[gcc r15-1076] Plugins: Add label-text.h to CPPLIB_H so it will be installed [PR115288]

2024-06-06 Thread Andrew Pinski via Gcc-cvs
https://gcc.gnu.org/g:6e6471806d886bc052d3922d636d49aaf75d5d16

commit r15-1076-g6e6471806d886bc052d3922d636d49aaf75d5d16
Author: Andrew Pinski 
Date:   Thu May 30 07:59:00 2024 -0700

Plugins: Add label-text.h to CPPLIB_H so it will be installed [PR115288]

After r15-874-g9bda2c4c81b668, out of tree plugins won't compile
as the new libcpp header file label-text.h is not installed.

This adds the new header file to CPPLIB_H which is used for
the plugin headers to install.

Committed as obvious after a build and install and make sure
the new header file is installed.

gcc/ChangeLog:

PR plugins/115288
* Makefile.in (CPPLIB_H): Add label-text.h.

Signed-off-by: Andrew Pinski 

Diff:
---
 gcc/Makefile.in | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index c983b0c102a..f5adb647d3f 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1038,6 +1038,7 @@ SYSTEM_H = system.h hwint.h 
$(srcdir)/../include/libiberty.h \
 PREDICT_H = predict.h predict.def
 CPPLIB_H = $(srcdir)/../libcpp/include/line-map.h \
$(srcdir)/../libcpp/include/rich-location.h \
+   $(srcdir)/../libcpp/include/label-text.h \
$(srcdir)/../libcpp/include/cpplib.h
 CODYLIB_H = $(srcdir)/../libcody/cody.hh
 INPUT_H = $(srcdir)/../libcpp/include/line-map.h input.h


[gcc r14-10285] aarch64: Add missing ACLE macro for NEON-SVE Bridge

2024-06-06 Thread Richard Ball via Gcc-cvs
https://gcc.gnu.org/g:35ed54f136fe63bd04d48ada6efb305457bbd824

commit r14-10285-g35ed54f136fe63bd04d48ada6efb305457bbd824
Author: Richard Ball 
Date:   Thu Jun 6 16:28:00 2024 +0100

aarch64: Add missing ACLE macro for NEON-SVE Bridge

__ARM_NEON_SVE_BRIDGE was missed in the original patch and is
added by this patch.

gcc/ChangeLog:

* config/aarch64/aarch64-c.cc (aarch64_define_unconditional_macros):
Add missing __ARM_NEON_SVE_BRIDGE.

(cherry picked from commit 43530bc40b1d0465911e493e56a6631202ce85b1)

Diff:
---
 gcc/config/aarch64/aarch64-c.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc
index fe1a20e4e54..d042e5fbd8c 100644
--- a/gcc/config/aarch64/aarch64-c.cc
+++ b/gcc/config/aarch64/aarch64-c.cc
@@ -75,6 +75,7 @@ aarch64_define_unconditional_macros (cpp_reader *pfile)
 
   builtin_define ("__ARM_STATE_ZA");
   builtin_define ("__ARM_STATE_ZT0");
+  builtin_define ("__ARM_NEON_SVE_BRIDGE");
 
   /* Define keyword attributes like __arm_streaming as macros that expand
  to the associated [[...]] attribute.  Use __extension__ in the attribute


[gcc r15-1075] aarch64: Add missing ACLE macro for NEON-SVE Bridge

2024-06-06 Thread Richard Ball via Gcc-cvs
https://gcc.gnu.org/g:43530bc40b1d0465911e493e56a6631202ce85b1

commit r15-1075-g43530bc40b1d0465911e493e56a6631202ce85b1
Author: Richard Ball 
Date:   Thu Jun 6 16:28:00 2024 +0100

aarch64: Add missing ACLE macro for NEON-SVE Bridge

__ARM_NEON_SVE_BRIDGE was missed in the original patch and is
added by this patch.

gcc/ChangeLog:

* config/aarch64/aarch64-c.cc (aarch64_define_unconditional_macros):
Add missing __ARM_NEON_SVE_BRIDGE.

Diff:
---
 gcc/config/aarch64/aarch64-c.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc
index fe1a20e4e54..d042e5fbd8c 100644
--- a/gcc/config/aarch64/aarch64-c.cc
+++ b/gcc/config/aarch64/aarch64-c.cc
@@ -75,6 +75,7 @@ aarch64_define_unconditional_macros (cpp_reader *pfile)
 
   builtin_define ("__ARM_STATE_ZA");
   builtin_define ("__ARM_STATE_ZT0");
+  builtin_define ("__ARM_NEON_SVE_BRIDGE");
 
   /* Define keyword attributes like __arm_streaming as macros that expand
  to the associated [[...]] attribute.  Use __extension__ in the attribute


[gcc r15-1074] arm: Fix CASE_VECTOR_SHORTEN_MODE for thumb2.

2024-06-06 Thread Richard Ball via Gcc-cvs
https://gcc.gnu.org/g:2963c76e8e24d4ebaf2b1b4ac4d7ca44eb0a9025

commit r15-1074-g2963c76e8e24d4ebaf2b1b4ac4d7ca44eb0a9025
Author: Richard Ball 
Date:   Thu Jun 6 16:10:14 2024 +0100

arm: Fix CASE_VECTOR_SHORTEN_MODE for thumb2.

The CASE_VECTOR_SHORTEN_MODE query is missing some equals signs
which causes suboptimal codegen due to missed optimisation
opportunities. This patch also adds a test for thumb2
switch statements as none exist currently.

gcc/ChangeLog:
PR target/115353
* config/arm/arm.h (enum arm_auto_incmodes):
Correct CASE_VECTOR_SHORTEN_MODE query.

gcc/testsuite/ChangeLog:

* gcc.target/arm/thumb2-switchstatement.c: New test.

Diff:
---
 gcc/config/arm/arm.h   |   4 +-
 .../gcc.target/arm/thumb2-switchstatement.c| 144 +
 2 files changed, 146 insertions(+), 2 deletions(-)

diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index 449e6935b32..0cd5d733952 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -2111,8 +2111,8 @@ enum arm_auto_incmodes
   ? (ADDR_DIFF_VEC_FLAGS (body).offset_unsigned = 0, HImode)   \
   : SImode)
\
: (TARGET_THUMB2\
-  ? ((min > 0 && max < 0x200) ? QImode \
-  : (min > 0 && max <= 0x2) ? HImode   \
+  ? ((min >= 0 && max < 0x200) ? QImode\
+  : (min >= 0 && max < 0x2) ? HImode   \
   : SImode)
\
: ((min >= 0 && max < 1024) \
   ? (ADDR_DIFF_VEC_FLAGS (body).offset_unsigned = 1, QImode)   \
diff --git a/gcc/testsuite/gcc.target/arm/thumb2-switchstatement.c 
b/gcc/testsuite/gcc.target/arm/thumb2-switchstatement.c
new file mode 100644
index 000..8badf318e62
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/thumb2-switchstatement.c
@@ -0,0 +1,144 @@
+/* { dg-do compile } */
+/* { dg-options "-mthumb --param case-values-threshold=1 -fno-reorder-blocks 
-fno-tree-dce -O2" } */
+/* { dg-require-effective-target arm_thumb2_ok } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#define NOP "nop;"
+#define NOP2 NOP NOP
+#define NOP4 NOP2 NOP2
+#define NOP8 NOP4 NOP4
+#define NOP16 NOP8 NOP8
+#define NOP32 NOP16 NOP16
+#define NOP64 NOP32 NOP32
+#define NOP128 NOP64 NOP64
+#define NOP256 NOP128 NOP128
+#define NOP512 NOP256 NOP256
+#define NOP1024 NOP512 NOP512
+#define NOP2048 NOP1024 NOP1024
+#define NOP4096 NOP2048 NOP2048
+#define NOP8192 NOP4096 NOP4096
+#define NOP16384 NOP8192 NOP8192
+#define NOP32768 NOP16384 NOP16384
+#define NOP65536 NOP32768 NOP32768
+#define NOP131072 NOP65536 NOP65536
+
+enum z
+{
+  a = 1,
+  b,
+  c,
+  d,
+  e,
+  f = 7,
+};
+
+inline void QIFunction (const char* flag)
+{
+  asm volatile (NOP32);
+  return;
+}
+
+inline void HIFunction (const char* flag)
+{
+  asm volatile (NOP512);
+  return;
+}
+
+inline void SIFunction (const char* flag)
+{
+  asm volatile (NOP131072);
+  return;
+}
+
+/*
+**QImode_test:
+** ...
+** tbb \[pc, r[0-9]+\]
+** ...
+*/
+__attribute__ ((noinline)) __attribute__ ((noclone)) const char* 
QImode_test(enum z x)
+{
+  switch (x)
+{
+  case d:
+QIFunction("QItest");
+return "InlineASM";
+  case f:
+return "TEST";
+  default:
+return "Default";
+}
+}
+
+/* { dg-final { scan-assembler ".byte" } } */
+
+/*
+**HImode_test:
+** ...
+** tbh \[pc, r[0-9]+, lsl #1\]
+** ...
+*/
+__attribute__ ((noinline)) __attribute__ ((noclone)) const char* 
HImode_test(enum z x)
+{
+  switch (x)
+  {
+case d:
+  HIFunction("HItest");
+  return "InlineASM";
+case f:
+  return "TEST";
+default:
+  return "Default";
+  }
+}
+
+/* { dg-final { scan-assembler ".2byte" } } */
+
+/*
+**SImode_test:
+** ...
+** adr (r[0-9]+), .L[0-9]+
+** ldr pc, \[\1, r[0-9]+, lsl #2\]
+** ...
+*/
+__attribute__ ((noinline)) __attribute__ ((noclone)) const char* 
SImode_test(enum z x)
+{
+  switch (x)
+  {
+case d:
+  SIFunction("SItest");
+  return "InlineASM";
+case f:
+  return "TEST";
+default:
+  return "Default";
+  }
+}
+
+/* { dg-final { scan-assembler ".word" } } */
+
+/*
+**backwards_branch_test:
+** ...
+** adr (r[0-9]+), .L[0-9]+
+** ldr pc, \[\1, r[0-9]+, lsl #2\]
+** ...
+*/
+__attribute__ ((noinline)) __attribute__ ((noclone)) const char* 
backwards_branch_test(enum z x, int flag)
+{
+  if (flag == 5)
+  {
+backwards:
+  asm volatile (NOP512);
+  return "ASM";
+  }
+  switch (x)
+  {
+case d:
+  goto backwards;
+case f:
+  return "TEST";
+default:
+  return "Default";
+  }
+}

[gcc r15-1073] arm: Add .type and .size to __gnu_cmse_nonsecure_call [PR115360]

2024-06-06 Thread Andre Simoes Dias Vieira via Gcc-cvs
https://gcc.gnu.org/g:c559353af49fe5743d226ac3112a285b27a50f6a

commit r15-1073-gc559353af49fe5743d226ac3112a285b27a50f6a
Author: Andre Vieira 
Date:   Thu Jun 6 16:02:50 2024 +0100

arm: Add .type and .size to __gnu_cmse_nonsecure_call [PR115360]

This patch adds missing assembly directives to the CMSE library wrapper to 
call
functions with attribute cmse_nonsecure_call.  Without the .type directive 
the
linker will fail to produce the correct veneer if a call to this wrapper
function is to far from the wrapper itself.  The .size was added for
completeness, though we don't necessarily have a usecase for it.

libgcc/ChangeLog:

PR target/115360
* config/arm/cmse_nonsecure_call.S: Add .type and .size directives.

Diff:
---
 libgcc/config/arm/cmse_nonsecure_call.S | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libgcc/config/arm/cmse_nonsecure_call.S 
b/libgcc/config/arm/cmse_nonsecure_call.S
index f93ce6bb4f9..fef37b955af 100644
--- a/libgcc/config/arm/cmse_nonsecure_call.S
+++ b/libgcc/config/arm/cmse_nonsecure_call.S
@@ -33,6 +33,7 @@
 #endif
 
 .thumb
+.type __gnu_cmse_nonsecure_call, %function
 .global __gnu_cmse_nonsecure_call
 __gnu_cmse_nonsecure_call:
 #if defined(__ARM_ARCH_8M_MAIN__)
@@ -142,3 +143,4 @@ pop {r5-r7, pc}
 #else
 #error "This should only be used for armv8-m base- and mainline."
 #endif
+.size __gnu_cmse_nonsecure_call, .-__gnu_cmse_nonsecure_call


[gcc r15-1072] libgomp.texi (nvptx): Add missing preposition

2024-06-06 Thread Tobias Burnus via Gcc-cvs
https://gcc.gnu.org/g:423522aacd9f30bb75aa77d38fccb630bfc4c98a

commit r15-1072-g423522aacd9f30bb75aa77d38fccb630bfc4c98a
Author: Tobias Burnus 
Date:   Thu Jun 6 16:37:55 2024 +0200

libgomp.texi (nvptx): Add missing preposition

libgomp/
* libgomp.texi (nvptx): Add missing preposition.

Diff:
---
 libgomp/libgomp.texi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi
index eb608915938..73e8e39ca42 100644
--- a/libgomp/libgomp.texi
+++ b/libgomp/libgomp.texi
@@ -6432,7 +6432,7 @@ The implementation remark:
 @item I/O within OpenMP target regions and OpenACC compute regions is supported
   using the C library @code{printf} functions.
   Additionally, the Fortran @code{print}/@code{write} statements are
-  supported within OpenMP target regions, but not yet OpenACC compute
+  supported within OpenMP target regions, but not yet within OpenACC 
compute
   regions.  @c The latter needs 'GOMP_NVPTX_NATIVE_GPU_THREAD_STACK_SIZE'.
 @item Compilation OpenMP code that contains @code{requires reverse_offload}
   requires at least @code{-march=sm_35}, compiling for @code{-march=sm_30}


[gcc r15-1071] AArch64: correct constraint on Upl early clobber alternatives

2024-06-06 Thread Tamar Christina via Gcc-cvs
https://gcc.gnu.org/g:afe85f8e22a703280b17c701f3490d89337f674a

commit r15-1071-gafe85f8e22a703280b17c701f3490d89337f674a
Author: Tamar Christina 
Date:   Thu Jun 6 14:35:48 2024 +0100

AArch64: correct constraint on Upl early clobber alternatives

I made an oversight in the previous patch, where I added a ?Upa
alternative to the Upl cases.  This causes it to create the tie
between the larger register file rather than the constrained one.

This fixes the affected patterns.

gcc/ChangeLog:

* config/aarch64/aarch64-sve.md (@aarch64_pred_cmp,
*cmp_cc, *cmp_ptest,
@aarch64_pred_cmp_wide,
*aarch64_pred_cmp_wide_cc,
*aarch64_pred_cmp_wide_ptest): Fix Upl tie 
alternative.
* config/aarch64/aarch64-sve2.md 
(@aarch64_pred_): Fix
Upl tie alternative.

Diff:
---
 gcc/config/aarch64/aarch64-sve.md  | 64 +++---
 gcc/config/aarch64/aarch64-sve2.md |  2 +-
 2 files changed, 33 insertions(+), 33 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-sve.md 
b/gcc/config/aarch64/aarch64-sve.md
index d902bce62fd..d69db34016a 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -8134,13 +8134,13 @@
  UNSPEC_PRED_Z))
(clobber (reg:CC_NZC CC_REGNUM))]
   "TARGET_SVE"
-  {@ [ cons: =0 , 1   , 3 , 4; attrs: pred_clobber ]
- [ &Upa , Upl , w , ; yes ] 
cmp\t%0., %1/z, %3., #%4
- [ ?Upa , 0Upl, w , ; yes ] ^
- [ Upa  , Upl , w , ; no  ] ^
- [ &Upa , Upl , w , w; yes ] 
cmp\t%0., %1/z, %3., %4.
- [ ?Upa , 0Upl, w , w; yes ] ^
- [ Upa  , Upl , w , w; no  ] ^
+  {@ [ cons: =0 , 1  , 3 , 4; attrs: pred_clobber ]
+ [ &Upa , Upl, w , ; yes ] 
cmp\t%0., %1/z, %3., #%4
+ [ ?Upl , 0  , w , ; yes ] ^
+ [ Upa  , Upl, w , ; no  ] ^
+ [ &Upa , Upl, w , w; yes ] 
cmp\t%0., %1/z, %3., %4.
+ [ ?Upl , 0  , w , w; yes ] ^
+ [ Upa  , Upl, w , w; no  ] ^
   }
 )
 
@@ -8170,13 +8170,13 @@
  UNSPEC_PRED_Z))]
   "TARGET_SVE
&& aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
-  {@ [ cons: =0 , 1, 2 , 3; attrs: pred_clobber ]
- [ &Upa ,  Upl , w , ; yes ] 
cmp\t%0., %1/z, %2., #%3
- [ ?Upa ,  0Upl, w , ; yes ] ^
- [ Upa  ,  Upl , w , ; no  ] ^
- [ &Upa ,  Upl , w , w; yes ] 
cmp\t%0., %1/z, %2., %3.
- [ ?Upa ,  0Upl, w , w; yes ] ^
- [ Upa  ,  Upl , w , w; no  ] ^
+  {@ [ cons: =0 , 1   , 2 , 3; attrs: pred_clobber ]
+ [ &Upa ,  Upl, w , ; yes ] 
cmp\t%0., %1/z, %2., #%3
+ [ ?Upl ,  0  , w , ; yes ] ^
+ [ Upa  ,  Upl, w , ; no  ] ^
+ [ &Upa ,  Upl, w , w; yes ] 
cmp\t%0., %1/z, %2., %3.
+ [ ?Upl ,  0  , w , w; yes ] ^
+ [ Upa  ,  Upl, w , w; no  ] ^
   }
   "&& !rtx_equal_p (operands[4], operands[6])"
   {
@@ -8205,12 +8205,12 @@
   "TARGET_SVE
&& aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
   {@ [ cons: =0, 1, 2 , 3; attrs: pred_clobber ]
- [ &Upa,  Upl , w , ; yes ] 
cmp\t%0., %1/z, %2., #%3
- [ ?Upa,  0Upl, w , ; yes ] ^
- [ Upa ,  Upl , w , ; no  ] ^
- [ &Upa,  Upl , w , w; yes ] 
cmp\t%0., %1/z, %2., %3.
- [ ?Upa,  0Upl, w , w; yes ] ^
- [ Upa ,  Upl , w , w; no  ] ^
+ [ &Upa,  Upl, w , ; yes ] 
cmp\t%0., %1/z, %2., #%3
+ [ ?Upl,  0  , w , ; yes ] ^
+ [ Upa ,  Upl, w , ; no  ] ^
+ [ &Upa,  Upl, w , w; yes ] 
cmp\t%0., %1/z, %2., %3.
+ [ ?Upl,  0  , w , w; yes ] ^
+ [ Upa ,  Upl, w , w; no  ] ^
   }
   "&& !rtx_equal_p (operands[4], operands[6])"
   {
@@ -8263,10 +8263,10 @@
  UNSPEC_PRED_Z))
(clobber (reg:CC_NZC CC_REGNUM))]
   "TARGET_SVE"
-  {@ [ cons: =0, 1, 2, 3, 4; attrs: pred_clobber ]
- [ &Upa,  Upl ,  , w, w; yes ] 
cmp\t%0., %1/z, %3., %4.d
- [ ?Upa,  0Upl,  , w, w; yes ] ^
- [ Upa ,  Upl ,  , w, w; no  ] ^
+  {@ [ cons: =0, 1   , 2, 3, 4; attrs: pred_clobber ]
+ [ &Upa   

[gcc r15-1068] nvptx, libgcc: Stub unwinding implementation

2024-06-06 Thread Thomas Schwinge via Gcc-cvs
https://gcc.gnu.org/g:a29c5852a606588175d11844db84da0881227100

commit r15-1068-ga29c5852a606588175d11844db84da0881227100
Author: Thomas Schwinge 
Date:   Wed Jun 5 13:11:04 2024 +0200

nvptx, libgcc: Stub unwinding implementation

Adding stub '_Unwind_Backtrace', '_Unwind_GetIPInfo' functions is necessary
for linking libbacktrace, as a normal (non-'LIBGFOR_MINIMAL') configuration
of libgfortran wants to do, for example.

The file 'libgcc/config/nvptx/unwind-nvptx.c' is copied from
'libgcc/config/gcn/unwind-gcn.c'.

libgcc/ChangeLog:

* config/nvptx/t-nvptx: Add unwind-nvptx.c.
* config/nvptx/unwind-nvptx.c: New file.

Co-authored-by: Andrew Stubbs 

Diff:
---
 libgcc/config/nvptx/t-nvptx|  3 ++-
 libgcc/config/nvptx/unwind-nvptx.c | 37 +
 2 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/libgcc/config/nvptx/t-nvptx b/libgcc/config/nvptx/t-nvptx
index 260ed6334db..1ff574c2982 100644
--- a/libgcc/config/nvptx/t-nvptx
+++ b/libgcc/config/nvptx/t-nvptx
@@ -1,6 +1,7 @@
 LIB2ADD=$(srcdir)/config/nvptx/reduction.c \
$(srcdir)/config/nvptx/mgomp.c \
-   $(srcdir)/config/nvptx/atomic.c
+   $(srcdir)/config/nvptx/atomic.c \
+   $(srcdir)/config/nvptx/unwind-nvptx.c
 
 # Until we have libstdc++-v3/libsupc++ proper.
 LIB2ADD += $(srcdir)/c++-minimal/guard.c
diff --git a/libgcc/config/nvptx/unwind-nvptx.c 
b/libgcc/config/nvptx/unwind-nvptx.c
new file mode 100644
index 000..d08ba266be1
--- /dev/null
+++ b/libgcc/config/nvptx/unwind-nvptx.c
@@ -0,0 +1,37 @@
+/* Stub unwinding implementation.
+
+   Copyright (C) 2019-2024 Free Software Foundation, Inc.
+   Contributed by Mentor Graphics
+
+   This file is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 3, or (at your option) any
+   later version.
+
+   This file is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   .  */
+
+#include "unwind.h"
+
+_Unwind_Reason_Code
+_Unwind_Backtrace(_Unwind_Trace_Fn trace, void * trace_argument)
+{
+  return 0;
+}
+
+_Unwind_Ptr
+_Unwind_GetIPInfo (struct _Unwind_Context *c, int *ip_before_insn)
+{
+  return 0;
+}


[gcc r15-1069] nvptx offloading: 'GOMP_NVPTX_NATIVE_GPU_THREAD_STACK_SIZE' environment variable [PR97384, PR105274]

2024-06-06 Thread Thomas Schwinge via Gcc-cvs
https://gcc.gnu.org/g:0d25989d60d15866ef4737d66e02432f50717255

commit r15-1069-g0d25989d60d15866ef4737d66e02432f50717255
Author: Thomas Schwinge 
Date:   Fri May 31 17:04:39 2024 +0200

nvptx offloading: 'GOMP_NVPTX_NATIVE_GPU_THREAD_STACK_SIZE' environment 
variable [PR97384, PR105274]

... as a means to manually set the "native" GPU thread stack size.

PR libgomp/97384
PR libgomp/105274
libgomp/
* plugin/cuda-lib.def (cuCtxSetLimit): Add.
* plugin/plugin-nvptx.c (nvptx_open_device): Handle
'GOMP_NVPTX_NATIVE_GPU_THREAD_STACK_SIZE' environment variable.

Diff:
---
 libgomp/plugin/cuda-lib.def   |  1 +
 libgomp/plugin/plugin-nvptx.c | 45 +++
 2 files changed, 46 insertions(+)

diff --git a/libgomp/plugin/cuda-lib.def b/libgomp/plugin/cuda-lib.def
index 007c6e0f4df..9255c1cff68 100644
--- a/libgomp/plugin/cuda-lib.def
+++ b/libgomp/plugin/cuda-lib.def
@@ -4,6 +4,7 @@ CUDA_ONE_CALL (cuCtxGetCurrent)
 CUDA_ONE_CALL (cuCtxGetDevice)
 CUDA_ONE_CALL (cuCtxPopCurrent)
 CUDA_ONE_CALL (cuCtxPushCurrent)
+CUDA_ONE_CALL (cuCtxSetLimit)
 CUDA_ONE_CALL (cuCtxSynchronize)
 CUDA_ONE_CALL (cuDeviceGet)
 CUDA_ONE_CALL (cuDeviceGetAttribute)
diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index 0f3a3be1898..99cbcb699b3 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -150,6 +150,8 @@ init_cuda_lib (void)
 
 #include "secure_getenv.h"
 
+static void notify_var (const char *, const char *);
+
 #undef MIN
 #undef MAX
 #define MIN(X,Y) ((X) < (Y) ? (X) : (Y))
@@ -341,6 +343,9 @@ struct ptx_device
 
 static struct ptx_device **ptx_devices;
 
+/* "Native" GPU thread stack size.  */
+static unsigned native_gpu_thread_stack_size = 0;
+
 /* OpenMP kernels reserve a small amount of ".shared" space for use by
omp_alloc.  The size is configured using GOMP_NVPTX_LOWLAT_POOL, but the
default is set here.  */
@@ -555,6 +560,46 @@ nvptx_open_device (int n)
   ptx_dev->free_blocks = NULL;
   pthread_mutex_init (&ptx_dev->free_blocks_lock, NULL);
 
+  /* "Native" GPU thread stack size.  */
+  {
+/* This is intentionally undocumented, until we work out a proper, common
+   scheme (as much as makes sense) between all offload plugins as well
+   as between nvptx offloading use of "native" stacks for OpenACC vs.
+   OpenMP "soft stacks" vs. OpenMP '-msoft-stack-reserve-local=[...]'.
+
+   GCN offloading has a 'GCN_STACK_SIZE' environment variable (without
+   'GOMP_' prefix): documented; presumably used for all things OpenACC and
+   OpenMP?  Based on GCN command-line option '-mstack-size=[...]' (marked
+   "obsolete"), that one may be set via a GCN 'mkoffload'-synthesized
+   'constructor' function.  */
+const char *var_name = "GOMP_NVPTX_NATIVE_GPU_THREAD_STACK_SIZE";
+const char *env_var = secure_getenv (var_name);
+notify_var (var_name, env_var);
+
+if (env_var != NULL)
+  {
+   char *endptr;
+   unsigned long val = strtoul (env_var, &endptr, 10);
+   if (endptr == NULL || *endptr != '\0'
+   || errno == ERANGE || errno == EINVAL
+   || val > UINT_MAX)
+ GOMP_PLUGIN_error ("Error parsing %s", var_name);
+   else
+ native_gpu_thread_stack_size = val;
+  }
+  }
+  if (native_gpu_thread_stack_size == 0)
+; /* Zero means use default.  */
+  else
+{
+  GOMP_PLUGIN_debug (0, "Setting \"native\" GPU thread stack size"
+" ('CU_LIMIT_STACK_SIZE') to %u bytes\n",
+native_gpu_thread_stack_size);
+  CUDA_CALL (cuCtxSetLimit,
+CU_LIMIT_STACK_SIZE, (size_t) native_gpu_thread_stack_size);
+}
+
+  /* OpenMP "soft stacks".  */
   ptx_dev->omp_stacks.ptr = 0;
   ptx_dev->omp_stacks.size = 0;
   pthread_mutex_init (&ptx_dev->omp_stacks.lock, NULL);


[gcc r15-1070] nvptx, libgfortran: Switch out of "minimal" mode

2024-06-06 Thread Thomas Schwinge via Gcc-cvs
https://gcc.gnu.org/g:3a4775d4403f2e88b589e88a9937cc1fd45a0e87

commit r15-1070-g3a4775d4403f2e88b589e88a9937cc1fd45a0e87
Author: Thomas Schwinge 
Date:   Wed Jun 5 13:13:24 2024 +0200

nvptx, libgfortran: Switch out of "minimal" mode

..., in order to enable (portions of) Fortran I/O, for example.

libgfortran/
* configure.ac: No longer set 'LIBGFOR_MINIMAL' for nvptx.
* configure: Regenerate.
libgomp/
* libgomp.texi (nvptx): Update.
* testsuite/libgomp.fortran/target-print-1-nvptx.f90: Remove.
* testsuite/libgomp.fortran/target-print-1.f90: Adjust.
* testsuite/libgomp.oacc-fortran/error_stop-2-nvptx.f: New.
* testsuite/libgomp.oacc-fortran/error_stop-2.f: Adjust.
* testsuite/libgomp.oacc-fortran/print-1-nvptx.f90: Adjust.
* testsuite/libgomp.oacc-fortran/print-1.f90: Adjust.
* testsuite/libgomp.oacc-fortran/stop-2-nvptx.f: New.
* testsuite/libgomp.oacc-fortran/stop-2.f: Adjust.

Co-authored-by: Andrew Stubbs 

Diff:
---
 libgfortran/configure  | 21 +---
 libgfortran/configure.ac   | 17 -
 libgomp/libgomp.texi   | 10 +++---
 .../libgomp.fortran/target-print-1-nvptx.f90   | 11 --
 .../testsuite/libgomp.fortran/target-print-1.f90   |  3 --
 .../libgomp.oacc-fortran/error_stop-2-nvptx.f  | 39 +
 .../testsuite/libgomp.oacc-fortran/error_stop-2.f  |  3 +-
 .../libgomp.oacc-fortran/print-1-nvptx.f90 | 40 ++
 libgomp/testsuite/libgomp.oacc-fortran/print-1.f90 |  4 +--
 .../testsuite/libgomp.oacc-fortran/stop-2-nvptx.f  | 36 +++
 libgomp/testsuite/libgomp.oacc-fortran/stop-2.f|  3 +-
 11 files changed, 134 insertions(+), 53 deletions(-)

diff --git a/libgfortran/configure b/libgfortran/configure
index 774dd52fc95..11a1bc5f070 100755
--- a/libgfortran/configure
+++ b/libgfortran/configure
@@ -6207,17 +6207,12 @@ else
 fi
 
 
-# For GPU offloading, not everything in libfortran can be supported.
-# Currently, the only target that has this problem is nvptx.  The
-# following is a (partial) list of features that are unsupportable on
-# this particular target:
-# * Constructors
-# * alloca
-# * C library support for I/O, with printf as the one notable exception
-# * C library support for other features such as signal, environment
-#   variables, time functions
-
- if test "x${target_cpu}" = xnvptx; then
+# "Minimal" mode is for targets that cannot (yet) support all features of
+# libgfortran.  It avoids the need for working constructors, alloca, and C
+# library support for I/O, signals, environment variables, time functions, etc.
+# At present there are no targets that require this mode.
+
+ if false; then
   LIBGFOR_MINIMAL_TRUE=
   LIBGFOR_MINIMAL_FALSE='#'
 else
@@ -12852,7 +12847,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 12855 "configure"
+#line 12850 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -12958,7 +12953,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 12961 "configure"
+#line 12956 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
diff --git a/libgfortran/configure.ac b/libgfortran/configure.ac
index 46585a3ee14..cca1ea0ea97 100644
--- a/libgfortran/configure.ac
+++ b/libgfortran/configure.ac
@@ -209,17 +209,12 @@ AM_CONDITIONAL(LIBGFOR_USE_SYMVER, [test 
"x$gfortran_use_symver" != xno])
 AM_CONDITIONAL(LIBGFOR_USE_SYMVER_GNU, [test "x$gfortran_use_symver" = xgnu])
 AM_CONDITIONAL(LIBGFOR_USE_SYMVER_SUN, [test "x$gfortran_use_symver" = xsun])
 
-# For GPU offloading, not everything in libfortran can be supported.
-# Currently, the only target that has this problem is nvptx.  The
-# following is a (partial) list of features that are unsupportable on
-# this particular target:
-# * Constructors
-# * alloca
-# * C library support for I/O, with printf as the one notable exception
-# * C library support for other features such as signal, environment
-#   variables, time functions
-
-AM_CONDITIONAL(LIBGFOR_MINIMAL, [test "x${target_cpu}" = xnvptx])
+# "Minimal" mode is for targets that cannot (yet) support all features of
+# libgfortran.  It avoids the need for working constructors, alloca, and C
+# library support for I/O, signals, environment variables, time functions, etc.
+# At present there are no targets that require this mode.
+
+AM_CONDITIONAL(LIBGFOR_MINIMAL, false)
 
 # Some compiler target support may have limited support for integer
 # or floating point numbers – or may want to reduce the libgfortran size
diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi
index c52bb2672c6..eb608915938 100644
--- a/libgomp/libgomp.texi
+++ b/libgomp/libgomp.texi
@@ -

[gcc r15-1067] nvptx offloading: Global constructor, destructor support, via nvptx-tools 'ld'

2024-06-06 Thread Thomas Schwinge via Gcc-cvs
https://gcc.gnu.org/g:5bbe5350a0932c78d4ffce292ba4104a6fe6ef96

commit r15-1067-g5bbe5350a0932c78d4ffce292ba4104a6fe6ef96
Author: Thomas Schwinge 
Date:   Wed Jun 5 12:40:50 2024 +0200

nvptx offloading: Global constructor, destructor support, via nvptx-tools 
'ld'

This extends commit d9c90c82d900fdae95df4499bf5f0a4ecb903b53
"nvptx target: Global constructor, destructor support, via nvptx-tools 'ld'"
for offloading.

libgcc/
* config/nvptx/gbl-ctors.c ["mgomp"]
(__do_global_ctors__entry__mgomp)
(__do_global_dtors__entry__mgomp): New.
[!"mgomp"] (__do_global_ctors__entry, __do_global_dtors__entry):
New.
libgomp/
* plugin/plugin-nvptx.c (nvptx_do_global_cdtors): New.
(nvptx_close_device, GOMP_OFFLOAD_load_image)
(GOMP_OFFLOAD_unload_image): Call it.

Diff:
---
 libgcc/config/nvptx/gbl-ctors.c |  55 +++
 libgomp/plugin/plugin-nvptx.c   | 117 +++-
 2 files changed, 171 insertions(+), 1 deletion(-)

diff --git a/libgcc/config/nvptx/gbl-ctors.c b/libgcc/config/nvptx/gbl-ctors.c
index a2ca053e5e3..a56d64f8ef8 100644
--- a/libgcc/config/nvptx/gbl-ctors.c
+++ b/libgcc/config/nvptx/gbl-ctors.c
@@ -68,6 +68,61 @@ __gbl_ctors (void)
 }
 
 
+/* For nvptx offloading configurations, need '.entry' wrappers.  */
+
+# if defined(__nvptx_softstack__) && defined(__nvptx_unisimt__)
+
+/* OpenMP */
+
+/* See 'crt0.c', 'mgomp.c'.  */
+extern void *__nvptx_stacks[32] __attribute__((shared,nocommon));
+extern unsigned __nvptx_uni[32] __attribute__((shared,nocommon));
+
+__attribute__((kernel)) void __do_global_ctors__entry__mgomp (void *);
+
+void
+__do_global_ctors__entry__mgomp (void *nvptx_stacks_0)
+{
+  __nvptx_stacks[0] = nvptx_stacks_0;
+  __nvptx_uni[0] = 0;
+
+  __static_do_global_ctors ();
+}
+
+__attribute__((kernel)) void __do_global_dtors__entry__mgomp (void *);
+
+void
+__do_global_dtors__entry__mgomp (void *nvptx_stacks_0)
+{
+  __nvptx_stacks[0] = nvptx_stacks_0;
+  __nvptx_uni[0] = 0;
+
+  __static_do_global_dtors ();
+}
+
+# else
+
+/* OpenACC */
+
+__attribute__((kernel)) void __do_global_ctors__entry (void);
+
+void
+__do_global_ctors__entry (void)
+{
+  __static_do_global_ctors ();
+}
+
+__attribute__((kernel)) void __do_global_dtors__entry (void);
+
+void
+__do_global_dtors__entry (void)
+{
+  __static_do_global_dtors ();
+}
+
+# endif
+
+
 /* The following symbol just provides a means for the nvptx-tools 'ld' to
trigger linking in this file.  */
 
diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index 4cedc5390a3..0f3a3be1898 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -346,6 +346,11 @@ static struct ptx_device **ptx_devices;
default is set here.  */
 static unsigned lowlat_pool_size = 8 * 1024;
 
+static bool nvptx_do_global_cdtors (CUmodule, struct ptx_device *,
+   const char *);
+static size_t nvptx_stacks_size ();
+static void *nvptx_stacks_acquire (struct ptx_device *, size_t, int);
+
 static inline struct nvptx_thread *
 nvptx_thread (void)
 {
@@ -565,6 +570,18 @@ nvptx_close_device (struct ptx_device *ptx_dev)
   if (!ptx_dev)
 return true;
 
+  bool ret = true;
+
+  for (struct ptx_image_data *image = ptx_dev->images;
+   image != NULL;
+   image = image->next)
+{
+  if (!nvptx_do_global_cdtors (image->module, ptx_dev,
+  "__do_global_dtors__entry"
+  /* or "__do_global_dtors__entry__mgomp" */))
+   ret = false;
+}
+
   for (struct ptx_free_block *b = ptx_dev->free_blocks; b;)
 {
   struct ptx_free_block *b_next = b->next;
@@ -585,7 +602,8 @@ nvptx_close_device (struct ptx_device *ptx_dev)
 CUDA_CALL (cuCtxDestroy, ptx_dev->ctx);
 
   free (ptx_dev);
-  return true;
+
+  return ret;
 }
 
 static int
@@ -1317,6 +1335,93 @@ nvptx_set_clocktick (CUmodule module, struct ptx_device 
*dev)
 GOMP_PLUGIN_fatal ("cuMemcpyHtoD error: %s", cuda_error (r));
 }
 
+/* Invoke MODULE's global constructors/destructors.  */
+
+static bool
+nvptx_do_global_cdtors (CUmodule module, struct ptx_device *ptx_dev,
+   const char *funcname)
+{
+  bool ret = true;
+  char *funcname_mgomp = NULL;
+  CUresult r;
+  CUfunction funcptr;
+  r = CUDA_CALL_NOCHECK (cuModuleGetFunction,
+&funcptr, module, funcname);
+  GOMP_PLUGIN_debug (0, "cuModuleGetFunction (%s): %s\n",
+funcname, cuda_error (r));
+  if (r == CUDA_ERROR_NOT_FOUND)
+{
+  /* Try '[funcname]__mgomp'.  */
+
+  size_t funcname_len = strlen (funcname);
+  const char *mgomp_suffix = "__mgomp";
+  size_t mgomp_suffix_len = strlen (mgomp_suffix);
+  funcname_mgomp
+   = GOMP_PLUGIN_malloc (funcname_len + mgomp_suffix_len + 1);
+  memcpy (funcname_mgomp, funcname, funcname_len)

[gcc r15-1066] nvptx: Make 'nvptx_uniform_warp_check' fit for non-full-warp execution, via 'vote.all.pred'

2024-06-06 Thread Thomas Schwinge via Gcc-cvs
https://gcc.gnu.org/g:b4e68dd9084e48ee3e83c11d7f27548d8cca7066

commit r15-1066-gb4e68dd9084e48ee3e83c11d7f27548d8cca7066
Author: Thomas Schwinge 
Date:   Fri May 10 12:50:23 2024 +0200

nvptx: Make 'nvptx_uniform_warp_check' fit for non-full-warp execution, via 
'vote.all.pred'

For example, this allows for '-muniform-simt' code to be executed
single-threaded, which currently fails (device-side 'trap'): the 
'0x'
bitmask isn't correct if not all 32 threads of a warp are active.  The same
issue/fix, I suppose but have not verified, would apply if we were to allow 
for
OpenACC 'vector_length' smaller than 32, for example for OpenACC 'serial'.

We use 'nvptx_uniform_warp_check' only for PTX ISA version less than 6.0.
Otherwise we're using 'nvptx_warpsync', which emits 'bar.warp.sync 
0x',
which evidently appears to do the right thing.  (I've tested 
'-muniform-simt'
code executing single-threaded.)

The change that I proposed on 2022-12-15 was to emit PTX code to calculate
'(1 << %ntid.x) - 1' as the actual bitmask to use instead of '0x'.
This works, but the PTX JIT generates SASS code to do this computation.

In turn, this change now uses PTX 'vote.all.pred' -- which even simplifies 
upon
the original code a little bit, see the following examplary SASS 'diff' 
before
vs. after this change:

[...]
  /*[...]*/   SYNC  
  (*"BRANCH_TARGETS .L_x_332"*)}
  .L_x_332:
- /*[...]*/   VOTE.ANY R9, PT, PT ;
+ /*[...]*/   VOTE.ALL P1, PT ;
- /*[...]*/   ISETP.NE.U32.AND P1, PT, R9, 
-0x1, PT ;
- /*[...]*/  @!P1 BRA `(.L_x_333) ;
+ /*[...]*/   @P1 BRA `(.L_x_333) ;
  /*[...]*/   BPT.TRAP 0x1 ;
  .L_x_333:
- /*[...]*/   @P1 EXIT ;
+ /*[...]*/  @!P1 EXIT ;
[...]

gcc/
* config/nvptx/nvptx.md (nvptx_uniform_warp_check): Make fit for
non-full-warp execution, via 'vote.all.pred'.
gcc/testsuite/
* gcc.target/nvptx/nvptx.exp
(check_effective_target_default_ptx_isa_version_at_least_6_0):
New.
* gcc.target/nvptx/uniform-simt-2.c: Adjust.
* gcc.target/nvptx/uniform-simt-5.c: New.

Diff:
---
 gcc/config/nvptx/nvptx.md   | 13 +---
 gcc/testsuite/gcc.target/nvptx/nvptx.exp|  5 +
 gcc/testsuite/gcc.target/nvptx/uniform-simt-2.c |  2 +-
 gcc/testsuite/gcc.target/nvptx/uniform-simt-5.c | 28 +
 4 files changed, 39 insertions(+), 9 deletions(-)

diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md
index ef7e3fb00fa..7878a3b6f09 100644
--- a/gcc/config/nvptx/nvptx.md
+++ b/gcc/config/nvptx/nvptx.md
@@ -2316,14 +2316,11 @@
   {
 const char *insns[] = {
   "{",
-  "\\t"  ".reg.b32""\\t" "%%r_act;",
-  "%.\\t""vote.ballot.b32" "\\t" "%%r_act,1;",
-  "\\t"  ".reg.pred"   "\\t" "%%r_do_abort;",
-  "\\t"  "mov.pred""\\t" "%%r_do_abort,0;",
-  "%.\\t""setp.ne.b32" "\\t" "%%r_do_abort,%%r_act,"
- "0x;",
-  "@ %%r_do_abort\\t" "trap;",
-  "@ %%r_do_abort\\t" "exit;",
+  "\\t"".reg.pred" "\\t" "%%r_sync;",
+  "\\t""mov.pred"  "\\t" "%%r_sync, 1;",
+  "%.\\t"  "vote.all.pred" "\\t" "%%r_sync, 1;",
+  "@!%%r_sync\\t"  "trap;",
+  "@!%%r_sync\\t"  "exit;",
   "}",
   NULL
 };
diff --git a/gcc/testsuite/gcc.target/nvptx/nvptx.exp 
b/gcc/testsuite/gcc.target/nvptx/nvptx.exp
index 97aa7ae0852..3151381f51a 100644
--- a/gcc/testsuite/gcc.target/nvptx/nvptx.exp
+++ b/gcc/testsuite/gcc.target/nvptx/nvptx.exp
@@ -49,6 +49,11 @@ proc check_effective_target_default_ptx_isa_version_at_least 
{ major minor } {
 return $res
 }
 
+# Return 1 if code by default compiles for at least PTX ISA version 6.0.
+proc check_effective_target_default_ptx_isa_version_at_least_6_0 { } {
+return [check_effective_target_default_ptx_isa_version_at_least 6 0]
+}
+
 # Return 1 if code with PTX ISA version major.minor or higher can be run.
 proc check_effective_target_runtime_ptx_isa_version_at_least { major minor } {
 set name runtime_ptx_isa_version_${major}_${minor}
diff --git a/gcc/testsuite/gcc.target/nvptx/uniform-simt-2.c 
b/gcc/testsuite/gcc.target/nvptx/uniform-simt-2.c
index b1eee0d618f..1d83c49a44b 100644
--- a/gcc/testsuite/gcc.target/nvptx/uniform-simt-2.c
+++ b/gcc/testsuite/gcc.target/nvptx/uniform-simt-2.c
@@ -17,4 +17,4 @@ f (void)
 
 /* { dg-final { scan-assemble

[gcc r15-1065] Clean up after newlib "nvptx: In offloading execution, map '_exit' to 'abort' [GCC PR85463]"

2024-06-06 Thread Thomas Schwinge via Gcc-cvs
https://gcc.gnu.org/g:395ac0417a17ba6405873f891f895417d696b603

commit r15-1065-g395ac0417a17ba6405873f891f895417d696b603
Author: Thomas Schwinge 
Date:   Wed Jun 5 14:34:06 2024 +0200

Clean up after newlib "nvptx: In offloading execution, map '_exit' to 
'abort' [GCC PR85463]"

PR target/85463
libgfortran/
* runtime/minimal.c [__nvptx__] (exit): Don't override.
libgomp/
* config/nvptx/error.c (exit): Don't override.
* testsuite/libgomp.oacc-fortran/error_stop-1.f: Update.
* testsuite/libgomp.oacc-fortran/error_stop-2.f: Likewise.
* testsuite/libgomp.oacc-fortran/error_stop-3.f: Likewise.
* testsuite/libgomp.oacc-fortran/stop-1.f: Likewise.
* testsuite/libgomp.oacc-fortran/stop-2.f: Likewise.
* testsuite/libgomp.oacc-fortran/stop-3.f: Likewise.

Diff:
---
 libgfortran/runtime/minimal.c |  8 
 libgomp/config/nvptx/error.c  |  7 ---
 libgomp/testsuite/libgomp.oacc-fortran/error_stop-1.f |  8 +---
 libgomp/testsuite/libgomp.oacc-fortran/error_stop-2.f |  8 +---
 libgomp/testsuite/libgomp.oacc-fortran/error_stop-3.f |  8 +---
 libgomp/testsuite/libgomp.oacc-fortran/stop-1.f   | 13 +
 libgomp/testsuite/libgomp.oacc-fortran/stop-2.f   |  6 +-
 libgomp/testsuite/libgomp.oacc-fortran/stop-3.f   | 12 
 8 files changed, 37 insertions(+), 33 deletions(-)

diff --git a/libgfortran/runtime/minimal.c b/libgfortran/runtime/minimal.c
index f13b3a4bf90..619f818c844 100644
--- a/libgfortran/runtime/minimal.c
+++ b/libgfortran/runtime/minimal.c
@@ -31,14 +31,6 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If 
not, see
 #endif
 
 
-#if __nvptx__
-/* Map "exit" to "abort"; see PR85463 '[nvptx] "exit" in offloaded region
-   doesn't terminate process'.  */
-# undef exit
-# define exit(status) do { (void) (status); abort (); } while (0)
-#endif
-
-
 #if __nvptx__
 /* 'printf' is all we have.  */
 # undef estr_vprintf
diff --git a/libgomp/config/nvptx/error.c b/libgomp/config/nvptx/error.c
index 7e668276004..f7a2536c29b 100644
--- a/libgomp/config/nvptx/error.c
+++ b/libgomp/config/nvptx/error.c
@@ -58,11 +58,4 @@
 #endif
 
 
-/* The 'exit (EXIT_FAILURE);' of an Fortran (only, huh?) OpenMP 'error'
-   directive with 'severity (fatal)' causes a hang, so 'abort' instead of
-   'exit'.  */
-#undef exit
-#define exit(status) abort ()
-
-
 #include "../../error.c"
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/error_stop-1.f 
b/libgomp/testsuite/libgomp.oacc-fortran/error_stop-1.f
index de727749a53..3918d6853f6 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/error_stop-1.f
+++ b/libgomp/testsuite/libgomp.oacc-fortran/error_stop-1.f
@@ -16,14 +16,16 @@
   END PROGRAM MAIN
 
 ! { dg-output "CheCKpOInT(\n|\r\n|\r)+" }
+
 ! { dg-output "ERROR STOP (\n|\r\n|\r)+" }
 !
 ! In gfortran's main program, libfortran's set_options is called - which sets
 ! compiler_options.backtrace = 1 by default.  For an offload libgfortran, this
 ! is never called and, hence, "Error termination." is never printed.  Thus:
 ! { dg-output "Error termination.*" { target { ! { 
openacc_nvidia_accel_selected || openacc_radeon_accel_selected } } } }
-!
-! PR85463:
+
+! PR85463.  The 'exit' implementation used with nvptx
+! offloading is a little bit different.
 ! { dg-output "libgomp: cuStreamSynchronize error.*" { target 
openacc_nvidia_accel_selected } }
-!
+
 ! { dg-shouldfail "" }
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/error_stop-2.f 
b/libgomp/testsuite/libgomp.oacc-fortran/error_stop-2.f
index 475c9cb5850..5951e8cbe64 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/error_stop-2.f
+++ b/libgomp/testsuite/libgomp.oacc-fortran/error_stop-2.f
@@ -16,14 +16,16 @@
   END PROGRAM MAIN
 
 ! { dg-output "CheCKpOInT(\n|\r\n|\r)+" }
+
 ! { dg-output "ERROR STOP 35(\n|\r\n|\r)+" }
 !
 ! In gfortran's main program, libfortran's set_options is called - which sets
 ! compiler_options.backtrace = 1 by default.  For an offload libgfortran, this
 ! is never called and, hence, "Error termination." is never printed.  Thus:
 ! { dg-output "Error termination.*" { target { ! { 
openacc_nvidia_accel_selected || openacc_radeon_accel_selected } } } }
-!
-! PR85463:
+
+! PR85463.  The 'exit' implementation used with nvptx
+! offloading is a little bit different.
 ! { dg-output "libgomp: cuStreamSynchronize error.*" { target 
openacc_nvidia_accel_selected } }
-!
+
 ! { dg-shouldfail "" }
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/error_stop-3.f 
b/libgomp/testsuite/libgomp.oacc-fortran/error_stop-3.f
index ab63444ce34..15e02d8b744 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/error_stop-3.f
+++ b/libgomp/testsuite/libgomp.oacc-fortran/error_stop-3.f
@@ -16,14 +16,16 @@
   END PROGRAM MAIN
 
 ! { dg-output "CheCKpOInT(\n|\r\n|\r)+" }
+
 ! { dg-output "ERROR STOP SiGN(\n|\r\n|

[gcc r15-1064] Vect: Support IFN SAT_SUB for unsigned vector int

2024-06-06 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:2d11de35d378a0763a8956638766182a49272e0b

commit r15-1064-g2d11de35d378a0763a8956638766182a49272e0b
Author: Pan Li 
Date:   Wed May 29 16:18:31 2024 +0800

Vect: Support IFN SAT_SUB for unsigned vector int

This patch would like to support the .SAT_SUB for the unsigned
vector int.  Given we have below example code:

void
vec_sat_sub_u64 (uint64_t *out, uint64_t *x, uint64_t *y, unsigned n)
{
  for (unsigned i = 0; i < n; i++)
out[i] = (x[i] - y[i]) & (-(uint64_t)(x[i] >= y[i]));
}

Before this patch:
void
vec_sat_sub_u64 (uint64_t *out, uint64_t *x, uint64_t *y, unsigned n)
{
  ...
  _77 = .SELECT_VL (ivtmp_75, POLY_INT_CST [2, 2]);
  ivtmp_56 = _77 * 8;
  vect__4.7_59 = .MASK_LEN_LOAD (vectp_x.5_57, 64B, { -1, ... }, _77, 0);
  vect__6.10_63 = .MASK_LEN_LOAD (vectp_y.8_61, 64B, { -1, ... }, _77, 0);

  mask__7.11_64 = vect__4.7_59 >= vect__6.10_63;
  _66 = .COND_SUB (mask__7.11_64, vect__4.7_59, vect__6.10_63, { 0, ... });

  .MASK_LEN_STORE (vectp_out.15_71, 64B, { -1, ... }, _77, 0, _66);
  vectp_x.5_58 = vectp_x.5_57 + ivtmp_56;
  vectp_y.8_62 = vectp_y.8_61 + ivtmp_56;
  vectp_out.15_72 = vectp_out.15_71 + ivtmp_56;
  ivtmp_76 = ivtmp_75 - _77;
  ...
}

After this patch:
void
vec_sat_sub_u64 (uint64_t *out, uint64_t *x, uint64_t *y, unsigned n)
{
  ...
  _76 = .SELECT_VL (ivtmp_74, POLY_INT_CST [2, 2]);
  ivtmp_60 = _76 * 8;
  vect__4.7_63 = .MASK_LEN_LOAD (vectp_x.5_61, 64B, { -1, ... }, _76, 0);
  vect__6.10_67 = .MASK_LEN_LOAD (vectp_y.8_65, 64B, { -1, ... }, _76, 0);

  vect_patt_37.11_68 = .SAT_SUB (vect__4.7_63, vect__6.10_67);

  .MASK_LEN_STORE (vectp_out.12_70, 64B, { -1, ... }, _76, 0, 
vect_patt_37.11_68);
  vectp_x.5_62 = vectp_x.5_61 + ivtmp_60;
  vectp_y.8_66 = vectp_y.8_65 + ivtmp_60;
  vectp_out.12_71 = vectp_out.12_70 + ivtmp_60;
  ivtmp_75 = ivtmp_74 - _76;
  ...
}

The below test suites are passed for this patch
* The x86 bootstrap test.
* The x86 fully regression test.
* The riscv fully regression tests.

gcc/ChangeLog:

* match.pd: Add new form for vector mode recog.
* tree-vect-patterns.cc (gimple_unsigned_integer_sat_sub): Add
new match func decl;
(vect_recog_build_binary_gimple_call): Extract helper func to
build gcall with given internal_fn.
(vect_recog_sat_sub_pattern): Add new func impl to recog .SAT_SUB.

Signed-off-by: Pan Li 

Diff:
---
 gcc/match.pd  | 14 
 gcc/tree-vect-patterns.cc | 85 ++-
 2 files changed, 84 insertions(+), 15 deletions(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index 7c1ad428a3c..ebc60eba8dc 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3110,6 +3110,20 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
   && types_match (type, @0, @1
 
+/* Unsigned saturation sub, case 3 (branchless with gt):
+   SAT_U_SUB = (X - Y) * (X > Y).  */
+(match (unsigned_integer_sat_sub @0 @1)
+ (mult:c (minus @0 @1) (convert (gt @0 @1)))
+ (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
+  && types_match (type, @0, @1
+
+/* Unsigned saturation sub, case 4 (branchless with ge):
+   SAT_U_SUB = (X - Y) * (X >= Y).  */
+(match (unsigned_integer_sat_sub @0 @1)
+ (mult:c (minus @0 @1) (convert (ge @0 @1)))
+ (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
+  && types_match (type, @0, @1
+
 /* x >  y  &&  x != XXX_MIN  -->  x > y
x >  y  &&  x == XXX_MIN  -->  false . */
 (for eqne (eq ne)
diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index 81e8fdc9122..cef901808eb 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -4488,6 +4488,32 @@ vect_recog_mult_pattern (vec_info *vinfo,
 }
 
 extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
+extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree));
+
+static gcall *
+vect_recog_build_binary_gimple_call (vec_info *vinfo, gimple *stmt,
+internal_fn fn, tree *type_out,
+tree op_0, tree op_1)
+{
+  tree itype = TREE_TYPE (op_0);
+  tree vtype = get_vectype_for_scalar_type (vinfo, itype);
+
+  if (vtype != NULL_TREE
+&& direct_internal_fn_supported_p (fn, vtype, OPTIMIZE_FOR_BOTH))
+{
+  gcall *call = gimple_build_call_internal (fn, 2, op_0, op_1);
+
+  gimple_call_set_lhs (call, vect_recog_temp_ssa_var (itype, NULL));
+  gimple_call_set_nothrow (call, /* nothrow_p */ false);
+  gimple_set_location (call, gimple_location (stmt));
+
+  *type_out = vtype;
+
+  return call;
+}
+
+  return NULL;
+}
 
 /*
  * Try to detect saturation add pattern (SAT_ADD), aka below gimple

[gcc r15-1063] lto: Remove random_seed from section name.

2024-06-06 Thread Michal Jires via Gcc-cvs
https://gcc.gnu.org/g:346f33e27809ae012696c4731c8ebcec2414dbfb

commit r15-1063-g346f33e27809ae012696c4731c8ebcec2414dbfb
Author: Michal Jires 
Date:   Tue Jan 9 17:49:34 2024 +0100

lto: Remove random_seed from section name.

This patch removes suffixes from section names during LTO linking.

These suffixes were originally added for ld -r to work (PR lto/44992).
They were added to all LTO object files, but are only useful before WPA.
After that they waste space, and if kept random, make LTO caching 
impossible.

Bootstrapped/regtested on x86_64-pc-linux-gnu

gcc/ChangeLog:

* lto-streamer.cc (lto_get_section_name): Remove suffixes after WPA.

gcc/lto/ChangeLog:

* lto-common.cc (lto_section_with_id): Dont load suffix during 
LTRANS.

Diff:
---
 gcc/lto-streamer.cc   | 11 +--
 gcc/lto/lto-common.cc |  7 +++
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/gcc/lto-streamer.cc b/gcc/lto-streamer.cc
index 8032bbf7108..40ca6b2da1b 100644
--- a/gcc/lto-streamer.cc
+++ b/gcc/lto-streamer.cc
@@ -132,11 +132,18 @@ lto_get_section_name (int section_type, const char *name,
  doesn't confuse the reader with merged sections.
 
  For options don't add a ID, the option reader cannot deal with them
- and merging should be ok here. */
-  if (section_type == LTO_section_opts)
+ and merging should be ok here.
+
+ LTRANS files (output of wpa, input and output of ltrans) are handled
+ directly inside of linker/lto-wrapper, so name uniqueness for external
+ tools is not needed.
+ Randomness would inhibit incremental LTO.  */
+  if (section_type == LTO_section_opts || flag_ltrans)
 strcpy (post, "");
   else if (f != NULL) 
 sprintf (post, "." HOST_WIDE_INT_PRINT_HEX_PURE, f->id);
+  else if (flag_wpa)
+strcpy (post, "");
   else
 sprintf (post, "." HOST_WIDE_INT_PRINT_HEX_PURE, get_random_seed (false)); 
   char *res = concat (section_name_prefix, sep, add, post, NULL);
diff --git a/gcc/lto/lto-common.cc b/gcc/lto/lto-common.cc
index 2ce94cc3282..34aa63b179c 100644
--- a/gcc/lto/lto-common.cc
+++ b/gcc/lto/lto-common.cc
@@ -2176,6 +2176,13 @@ lto_section_with_id (const char *name, unsigned 
HOST_WIDE_INT *id)
 
   if (strncmp (name, section_name_prefix, strlen (section_name_prefix)))
 return 0;
+
+  if (flag_ltrans)
+{
+  *id = 0;
+  return 1;
+}
+
   s = strrchr (name, '.');
   if (!s)
 return 0;


[gcc r15-1062] lto: Skip flag OPT_fltrans_output_list_.

2024-06-06 Thread Michal Jires via Gcc-cvs
https://gcc.gnu.org/g:ca43678c3d8aff1b8774e0b05c9a4a42fd271b13

commit r15-1062-gca43678c3d8aff1b8774e0b05c9a4a42fd271b13
Author: Michal Jires 
Date:   Fri Nov 17 21:16:37 2023 +0100

lto: Skip flag OPT_fltrans_output_list_.

Bootstrapped/regtested on x86_64-pc-linux-gnu

gcc/ChangeLog:

* lto-opts.cc (lto_write_options): Skip OPT_fltrans_output_list_.

Diff:
---
 gcc/lto-opts.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/lto-opts.cc b/gcc/lto-opts.cc
index b3d19f8b361..a3a1d110329 100644
--- a/gcc/lto-opts.cc
+++ b/gcc/lto-opts.cc
@@ -152,6 +152,7 @@ lto_write_options (void)
case OPT_fprofile_prefix_map_:
case OPT_fcanon_prefix_map:
case OPT_fwhole_program:
+   case OPT_fltrans_output_list_:
  continue;
 
default:


[gcc r15-1061] RISC-V: Regenerate opt urls.

2024-06-06 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:037fc4d1012dc9d533862ef7e2c946249877dd71

commit r15-1061-g037fc4d1012dc9d533862ef7e2c946249877dd71
Author: Robin Dapp 
Date:   Thu Jun 6 09:32:28 2024 +0200

RISC-V: Regenerate opt urls.

I wasn't aware that I needed to regenerate the opt urls when
adding an option.  This patch does that.

gcc/ChangeLog:

* config/riscv/riscv.opt.urls: Regenerate.

Diff:
---
 gcc/config/riscv/riscv.opt.urls | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/gcc/config/riscv/riscv.opt.urls b/gcc/config/riscv/riscv.opt.urls
index d87e9d5c9a8..622cb6e7b44 100644
--- a/gcc/config/riscv/riscv.opt.urls
+++ b/gcc/config/riscv/riscv.opt.urls
@@ -47,6 +47,12 @@ UrlSuffix(gcc/RISC-V-Options.html#index-mcmodel_003d-4)
 mstrict-align
 UrlSuffix(gcc/RISC-V-Options.html#index-mstrict-align-4)
 
+mscalar-strict-align
+UrlSuffix(gcc/RISC-V-Options.html#index-mscalar-strict-align)
+
+mvector-strict-align
+UrlSuffix(gcc/RISC-V-Options.html#index-mvector-strict-align)
+
 ; skipping UrlSuffix for 'mexplicit-relocs' due to finding no URLs
 
 mrelax


[gcc r15-1060] [APX CCMP] Support ccmp for float compare

2024-06-06 Thread Hongyu Wang via Gcc-cvs
https://gcc.gnu.org/g:0b6cea8783b9e1b86c5c7c277c301cb5931bc5e0

commit r15-1060-g0b6cea8783b9e1b86c5c7c277c301cb5931bc5e0
Author: Hongyu Wang 
Date:   Wed May 8 11:08:42 2024 +0800

[APX CCMP] Support ccmp for float compare

The ccmp insn itself doesn't support fp compare, but x86 has fp comi
insn that changes EFLAG which can be the scc input to ccmp. Allow
scalar fp compare in ix86_gen_ccmp_first except ORDERED/UNORDERD
compare which can not be identified in ccmp.

gcc/ChangeLog:

* config/i386/i386-expand.cc (ix86_gen_ccmp_first):
Add fp compare and check the allowed fp compare type.
(ix86_gen_ccmp_next): Adjust compare_code input to ccmp for
fp compare.

gcc/testsuite/ChangeLog:

* gcc.target/i386/apx-ccmp-1.c: Add test for fp compare.
* gcc.target/i386/apx-ccmp-2.c: Likewise.

Diff:
---
 gcc/config/i386/i386-expand.cc | 53 +++---
 gcc/testsuite/gcc.target/i386/apx-ccmp-1.c | 45 +++--
 gcc/testsuite/gcc.target/i386/apx-ccmp-2.c | 47 ++
 3 files changed, 138 insertions(+), 7 deletions(-)

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 5353d761384..d1d396a8713 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -25369,18 +25369,58 @@ ix86_gen_ccmp_first (rtx_insn **prep_seq, rtx_insn 
**gen_seq,
   if (op_mode == VOIDmode)
 op_mode = GET_MODE (op1);
 
+  /* We only supports following scalar comparisons that use just 1
+ instruction: DI/SI/QI/HI/DF/SF/HF.
+ Unordered/Ordered compare cannot be corretly indentified by
+ ccmp so they are not supported.  */
   if (!(op_mode == DImode || op_mode == SImode || op_mode == HImode
-   || op_mode == QImode))
+   || op_mode == QImode || op_mode == DFmode || op_mode == SFmode
+   || op_mode == HFmode)
+  || code == ORDERED
+  || code == UNORDERED)
 {
   end_sequence ();
   return NULL_RTX;
 }
 
   /* Canonicalize the operands according to mode.  */
-  if (!nonimmediate_operand (op0, op_mode))
-op0 = force_reg (op_mode, op0);
-  if (!x86_64_general_operand (op1, op_mode))
-op1 = force_reg (op_mode, op1);
+  if (SCALAR_INT_MODE_P (op_mode))
+{
+  if (!nonimmediate_operand (op0, op_mode))
+   op0 = force_reg (op_mode, op0);
+  if (!x86_64_general_operand (op1, op_mode))
+   op1 = force_reg (op_mode, op1);
+}
+  else
+{
+  /* op0/op1 can be canonicallized from expand_fp_compare, so
+just adjust the code to make it generate supported fp
+condition.  */
+  if (ix86_fp_compare_code_to_integer (code) == UNKNOWN)
+   {
+ /* First try to split condition if we don't need to honor
+NaNs, as the ORDERED/UNORDERED check always fall
+through.  */
+ if (!HONOR_NANS (op_mode))
+   {
+ rtx_code first_code;
+ split_comparison (code, op_mode, &first_code, &code);
+   }
+ /* Otherwise try to swap the operand order and check if
+the comparison is supported.  */
+ else
+   {
+ code = swap_condition (code);
+ std::swap (op0, op1);
+   }
+
+ if (ix86_fp_compare_code_to_integer (code) == UNKNOWN)
+   {
+ end_sequence ();
+ return NULL_RTX;
+   }
+   }
+}
 
   *prep_seq = get_insns ();
   end_sequence ();
@@ -25445,6 +25485,9 @@ ix86_gen_ccmp_next (rtx_insn **prep_seq, rtx_insn 
**gen_seq, rtx prev,
   dfv = ix86_get_flags_cc ((rtx_code) cmp_code);
 
   prev_code = GET_CODE (prev);
+  /* Fixup FP compare code here.  */
+  if (GET_MODE (XEXP (prev, 0)) == CCFPmode)
+prev_code = ix86_fp_compare_code_to_integer (prev_code);
 
   if (bit_code != AND)
 prev_code = reverse_condition (prev_code);
diff --git a/gcc/testsuite/gcc.target/i386/apx-ccmp-1.c 
b/gcc/testsuite/gcc.target/i386/apx-ccmp-1.c
index 5a2dad89f1f..e4e112f07e0 100644
--- a/gcc/testsuite/gcc.target/i386/apx-ccmp-1.c
+++ b/gcc/testsuite/gcc.target/i386/apx-ccmp-1.c
@@ -1,5 +1,5 @@
 /* { dg-do compile { target { ! ia32 } } } */
-/* { dg-options "-O2 -mapx-features=ccmp" } */
+/* { dg-options "-O2 -ffast-math -mapx-features=ccmp" } */
 
 int
 f1 (int a)
@@ -56,8 +56,49 @@ f9 (int a, int b)
   return a == 3 || a == 0;
 }
 
+int
+f10 (float a, int b, float c)
+{
+  return a > c || b < 19;
+}
+
+int
+f11 (float a, int b)
+{
+  return a == 0.0 && b > 21;
+}
+
+int
+f12 (double a, int b)
+{
+  return a < 3.0 && b != 23;
+}
+
+int
+f13 (double a, double b, int c, int d)
+{
+  a += b;
+  c += d;
+  return a != b || c == d;
+}
+
+int
+f14 (double a, int b)
+{
+  return b != 0 && a < 1.5;
+}
+
+int
+f15 (double a, double b, int c, int d)
+{
+  return c != d || a <= b;
+}
+
 /* { dg-final { scan-assembler-times "ccmpg" 2 } } */
 /* { dg-final { scan-assembler-times

[gcc r15-1059] [APX CCMP] Adjust startegy for selecting ccmp candidates

2024-06-06 Thread Hongyu Wang via Gcc-cvs
https://gcc.gnu.org/g:23db87301b623ecf162c9df718ce82ed9aa354a8

commit r15-1059-g23db87301b623ecf162c9df718ce82ed9aa354a8
Author: Hongyu Wang 
Date:   Tue Apr 9 16:05:26 2024 +0800

[APX CCMP] Adjust startegy for selecting ccmp candidates

For general ccmp scenario, the tree sequence is like

_1 = (a < b)
_2 = (c < d)
_3 = _1 & _2

current ccmp expanding will try to swap compare order for _1 and _2,
compare the expansion cost/cost2 for expanding _1 or _2 first, then
return the sequence with lower cost.

It is possible that one expansion succeeds and the other fails.
For example, x86 has int ccmp but not fp ccmp, so a combined fp and
int comparison must be ordered such that the fp comparison happens
first.  The costs are not meaningful for failed expansions.

Check the expand_ccmp_next result ret and ret2, returns the valid one
before cost comparison.

gcc/ChangeLog:

* ccmp.cc (expand_ccmp_expr_1): Check ret and ret2 of
expand_ccmp_next, returns the valid one first instead of
comparing cost.

Diff:
---
 gcc/ccmp.cc | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/gcc/ccmp.cc b/gcc/ccmp.cc
index 7cb525addf4..4d50708d986 100644
--- a/gcc/ccmp.cc
+++ b/gcc/ccmp.cc
@@ -247,7 +247,15 @@ expand_ccmp_expr_1 (gimple *g, rtx_insn **prep_seq, 
rtx_insn **gen_seq)
  cost2 = seq_cost (prep_seq_2, speed_p);
  cost2 += seq_cost (gen_seq_2, speed_p);
}
- if (cost2 < cost1)
+
+ /* It's possible that one expansion succeeds and the other
+fails.
+For example, x86 has int ccmp but not fp ccmp, and so a
+combined fp and int comparison must be ordered such that
+the fp comparison happens first. The costs are not
+meaningful for failed expansions.  */
+
+ if (ret2 && (!ret || cost2 < cost1))
{
  *prep_seq = prep_seq_2;
  *gen_seq = gen_seq_2;


[gcc r15-1058] [APX CCMP] Support APX CCMP

2024-06-06 Thread Hongyu Wang via Gcc-cvs
https://gcc.gnu.org/g:c989e59fc99d994159114304d4e715c72bedff0a

commit r15-1058-gc989e59fc99d994159114304d4e715c72bedff0a
Author: Hongyu Wang 
Date:   Wed Mar 27 10:13:06 2024 +0800

[APX CCMP] Support APX CCMP

APX CCMP feature implements conditional compare which executes compare
when EFLAGS matches certain condition.

CCMP introduces default flags value (dfv), when conditional compare does
not execute, it will directly set the flags according to dfv.

The instruction goes like

ccmpeq {dfv=sf,of,cf,zf}  %rax, %r16

For this instruction, it will test EFLAGS regs if it matches conditional
code EQ, if yes, compare %rax and %r16 like legacy cmp. If no, the
EFLAGS will be updated according to dfv, which means SF,OF,CF,ZF are
set. PF will be set according to CF in dfv, and AF will always be
cleared.

The dfv part can be a combination of sf,of,cf,zf, like {dfv=cf,zf} which
sets CF and ZF only and clear others, or {dfv=} which clears all EFLAGS.

To enable CCMP, we implemented the target hook TARGET_GEN_CCMP_FIRST and
TARGET_GEN_CCMP_NEXT to reuse the current ccmp infrastructure. Also we
extended the cstorem4 optab to support storing different CCmode to fit
current ccmp infrasturcture.

gcc/ChangeLog:

* config/i386/i386-expand.cc (ix86_gen_ccmp_first): New function
that test if the first compare can be generated.
(ix86_gen_ccmp_next): New function to emit a simgle compare and ccmp
sequence.
* config/i386/i386-opts.h (enum apx_features): Add apx_ccmp.
* config/i386/i386-protos.h (ix86_gen_ccmp_first): New proto
declare.
(ix86_gen_ccmp_next): Likewise.
(ix86_get_flags_cc): Likewise.
* config/i386/i386.cc (ix86_flags_cc): New enum.
(ix86_ccmp_dfv_mapping): New string array to map conditional
code to dfv.
(ix86_print_operand): Handle special dfv flag for CCMP.
(ix86_get_flags_cc): New function to return x86 CC enum.
(TARGET_GEN_CCMP_FIRST): Define.
(TARGET_GEN_CCMP_NEXT): Likewise.
* config/i386/i386.h (TARGET_APX_CCMP): Define.
* config/i386/i386.md (@ccmp): New define_insn to support
ccmp.
(UNSPEC_APX_DFV): New unspec for ccmp dfv.
(ALL_CC): New mode iterator.
(cstorecc4): Change to ...
(cstore4) ... this, use ALL_CC to loop through all
available CCmodes.
* config/i386/i386.opt (apx_ccmp): Add enum value for ccmp.

gcc/testsuite/ChangeLog:

* gcc.target/i386/apx-ccmp-1.c: New compile test.
* gcc.target/i386/apx-ccmp-2.c: New runtime test.

Diff:
---
 gcc/config/i386/i386-expand.cc | 121 +
 gcc/config/i386/i386-opts.h|   6 +-
 gcc/config/i386/i386-protos.h  |   5 ++
 gcc/config/i386/i386.cc|  50 
 gcc/config/i386/i386.h |   1 +
 gcc/config/i386/i386.md|  35 -
 gcc/config/i386/i386.opt   |   3 +
 gcc/testsuite/gcc.target/i386/apx-ccmp-1.c |  63 +++
 gcc/testsuite/gcc.target/i386/apx-ccmp-2.c |  57 ++
 9 files changed, 337 insertions(+), 4 deletions(-)

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 56d29c15f9a..5353d761384 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -25352,4 +25352,125 @@ ix86_expand_fast_convert_bf_to_sf (rtx val)
   return ret;
 }
 
+rtx
+ix86_gen_ccmp_first (rtx_insn **prep_seq, rtx_insn **gen_seq,
+   rtx_code code, tree treeop0, tree treeop1)
+{
+  if (!TARGET_APX_CCMP)
+return NULL_RTX;
+
+  rtx op0, op1, res;
+  machine_mode op_mode;
+
+  start_sequence ();
+  expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
+
+  op_mode = GET_MODE (op0);
+  if (op_mode == VOIDmode)
+op_mode = GET_MODE (op1);
+
+  if (!(op_mode == DImode || op_mode == SImode || op_mode == HImode
+   || op_mode == QImode))
+{
+  end_sequence ();
+  return NULL_RTX;
+}
+
+  /* Canonicalize the operands according to mode.  */
+  if (!nonimmediate_operand (op0, op_mode))
+op0 = force_reg (op_mode, op0);
+  if (!x86_64_general_operand (op1, op_mode))
+op1 = force_reg (op_mode, op1);
+
+  *prep_seq = get_insns ();
+  end_sequence ();
+
+  start_sequence ();
+
+  res = ix86_expand_compare (code, op0, op1);
+
+  if (!res)
+{
+  end_sequence ();
+  return NULL_RTX;
+}
+  *gen_seq = get_insns ();
+  end_sequence ();
+
+  return res;
+}
+
+rtx
+ix86_gen_ccmp_next (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev,
+  rtx_code cmp_code, tree treeop0, tree treeop1,
+  rtx_code bit_code)
+{
+  if (!TARGET_

[gcc r15-1057] [APX] Adjust target-support check [PR 115341]

2024-06-06 Thread Hongyu Wang via Gcc-cvs
https://gcc.gnu.org/g:f46d54a2a76acb94356989fb187853e5b58c3098

commit r15-1057-gf46d54a2a76acb94356989fb187853e5b58c3098
Author: Hongyu Wang 
Date:   Thu Jun 6 13:00:26 2024 +0800

[APX] Adjust target-support check [PR 115341]

Current target apxf check does not specify sub-features that assembler
supports, so the check with older binutils will fail at assemble stage
for new apx features like NF,CCMP or CFCMOV. Adjust the assembler check
for all apx subfeatures.

gcc/testsuite/ChangeLog:

PR target/115341
* lib/target-supports.exp (check_effective_target_apxf):
Check for all apx sub-features.

Diff:
---
 gcc/testsuite/lib/target-supports.exp | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index 4766104c6d8..5c0a3dade22 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -10451,7 +10451,13 @@ proc check_effective_target_apxf { } {
void
foo ()
{
- __asm__ volatile ("add\t%%r16, %%r31" ::);
+ __asm__ volatile ("movq\t%r16, %rax");
+ __asm__ volatile ("push2p\t%r15, %r14");
+ __asm__ volatile ("addq\t%r16, %r31, %r17");
+ __asm__ volatile ("{nf} addq\t%r16, %r31");
+ __asm__ volatile ("setzule\t%al");
+ __asm__ volatile ("cfcmoveq\t%r16, %r31");
+ __asm__ volatile ("ccmpleq\t{dfv=sf} %r16, %r31");
}
 } "-mapxf" ]
 }