Re: [PATCH] function.h: eliminate macros "dom_computed" and "n_bbs_in_dom_tree"

2024-06-17 Thread Richard Biener
On Tue, Jun 18, 2024 at 2:11 AM David Malcolm  wrote:
>
> Be explicit when we use "cfun".
>
> No functional change intended.
>
> Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
>
> OK for trunk?
>
> gcc/ChangeLog:
> * dominance.cc (compute_dom_fast_query): Replace uses of
> "dom_computed" macro with explicit use of cfun.
> (compute_dom_fast_query_in_region): Likewise.
> (calculate_dominance_info): Likewise, also for macro
> "n_bbs_in_dom_tree".
> (calculate_dominance_info_for_region): Likewise for
> "dom_computed" macro.
> (get_immediate_dominator): Likewise.
> (set_immediate_dominator): Likewise.
> (get_dominated_by): Likewise.
> (redirect_immediate_dominators): Likewise.
> (nearest_common_dominator): Likewise.
> (dominated_by_p): Likewise.
> (bb_dom_dfs_in): Likewise.
> (bb_dom_dfs_out): Likewise.
> (recompute_dominator): Likewise.
> (iterate_fix_dominators): Likewise.
> (add_to_dominance_info): Likewise, also for macro
> "n_bbs_in_dom_tree".
> (delete_from_dominance_info): Likewise.
> (set_dom_info_availability): Likewise for
> "dom_computed" macro.
> * function.h (dom_computed): Delete macro.
> (n_bbs_in_dom_tree): Delete macro.
>
> Signed-off-by: David Malcolm 
> ---
>  gcc/dominance.cc | 70 +---
>  gcc/function.h   |  3 ---
>  2 files changed, 36 insertions(+), 37 deletions(-)
>
> diff --git a/gcc/dominance.cc b/gcc/dominance.cc
> index 0357210ed27f..528b38caa9db 100644
> --- a/gcc/dominance.cc
> +++ b/gcc/dominance.cc
> @@ -672,7 +672,7 @@ compute_dom_fast_query (enum cdi_direction dir)
>
>gcc_checking_assert (dom_info_available_p (dir));
>
> -  if (dom_computed[dir_index] == DOM_OK)
> +  if (cfun->cfg->x_dom_computed[dir_index] == DOM_OK)

The x_* members are named this way to be not used directly.  I think the
canonical replacement these days would be a

inline bool dom_computed (function *, enum cdi_direction) {...}

inline function.

Richard.

>  return;
>
>FOR_ALL_BB_FN (bb, cfun)
> @@ -681,7 +681,7 @@ compute_dom_fast_query (enum cdi_direction dir)
> assign_dfs_numbers (bb->dom[dir_index], &num);
>  }
>
> -  dom_computed[dir_index] = DOM_OK;
> +  cfun->cfg->x_dom_computed[dir_index] = DOM_OK;
>  }
>
>  /* Analogous to the previous function but compute the data for reducible
> @@ -697,7 +697,7 @@ compute_dom_fast_query_in_region (enum cdi_direction dir,
>
>gcc_checking_assert (dom_info_available_p (dir));
>
> -  if (dom_computed[dir_index] == DOM_OK)
> +  if (cfun->cfg->x_dom_computed[dir_index] == DOM_OK)
>  return;
>
>/* Assign dfs numbers for region nodes except for entry and exit nodes.  */
> @@ -708,7 +708,7 @@ compute_dom_fast_query_in_region (enum cdi_direction dir,
> assign_dfs_numbers (bb->dom[dir_index], &num);
>  }
>
> -  dom_computed[dir_index] = DOM_OK;
> +  cfun->cfg->x_dom_computed[dir_index] = DOM_OK;
>  }
>
>  /* The main entry point into this module.  DIR is set depending on whether
> @@ -721,7 +721,7 @@ calculate_dominance_info (cdi_direction dir, bool 
> compute_fast_query)
>  {
>unsigned int dir_index = dom_convert_dir_to_idx (dir);
>
> -  if (dom_computed[dir_index] == DOM_OK)
> +  if (cfun->cfg->x_dom_computed[dir_index] == DOM_OK)
>  {
>checking_verify_dominators (dir);
>return;
> @@ -730,14 +730,14 @@ calculate_dominance_info (cdi_direction dir, bool 
> compute_fast_query)
>timevar_push (TV_DOMINANCE);
>if (!dom_info_available_p (dir))
>  {
> -  gcc_assert (!n_bbs_in_dom_tree[dir_index]);
> +  gcc_assert (!cfun->cfg->x_n_bbs_in_dom_tree[dir_index]);
>
>basic_block b;
>FOR_ALL_BB_FN (b, cfun)
> {
>   b->dom[dir_index] = et_new_tree (b);
> }
> -  n_bbs_in_dom_tree[dir_index] = n_basic_blocks_for_fn (cfun);
> +  cfun->cfg->x_n_bbs_in_dom_tree[dir_index] = n_basic_blocks_for_fn 
> (cfun);
>
>dom_info di (cfun, dir);
>di.calc_dfs_tree ();
> @@ -749,7 +749,7 @@ calculate_dominance_info (cdi_direction dir, bool 
> compute_fast_query)
> et_set_father (b->dom[dir_index], d->dom[dir_index]);
> }
>
> -  dom_computed[dir_index] = DOM_NO_FAST_QUERY;
> +  cfun->cfg->x_dom_computed[dir_index] = DOM_NO_FAST_QUERY;
>  }
>else
>  checking_verify_dominators (dir);
> @@ -772,7 +772,7 @@ calculate_dominance_info_for_region (cdi_direction dir,
>basic_block bb;
>unsigned int i;
>
> -  if (dom_computed[dir_index] == DOM_OK)
> +  if (cfun->cfg->x_dom_computed[dir_index] == DOM_OK)
>  return;
>
>timevar_push (TV_DOMINANCE);
> @@ -791,7 +791,7 @@ calculate_dominance_info_for_region (cdi_direction dir,
>  if (basic_block d = di.get_idom (bb))
>et_set_father (bb->dom[dir_index], d->dom[dir_index]);
>
> -  dom_computed[dir_index] = D

RE: [PATCH v3] aarch64: Add vector popcount besides QImode [PR113859]

2024-06-17 Thread Pengxuan Zheng (QUIC)
> Hi,
> 
> > -Original Message-
> > From: Pengxuan Zheng 
> > Sent: Friday, June 14, 2024 12:57 AM
> > To: gcc-patches@gcc.gnu.org
> > Cc: Pengxuan Zheng 
> > Subject: [PATCH v3] aarch64: Add vector popcount besides QImode
> > [PR113859]
> >
> > This patch improves GCC’s vectorization of __builtin_popcount for
> > aarch64 target by adding popcount patterns for vector modes besides
> > QImode, i.e., HImode, SImode and DImode.
> >
> > With this patch, we now generate the following for V8HI:
> >   cnt v1.16b, v.16b
> >   uaddlp  v2.8h, v1.16b
> >
> > For V4HI, we generate:
> >   cnt v1.8b, v.8b
> >   uaddlp  v2.4h, v1.8b
> >
> > For V4SI, we generate:
> >   cnt v1.16b, v.16b
> >   uaddlp  v2.8h, v1.16b
> >   uaddlp  v3.4s, v2.8h
> >
> > For V2SI, we generate:
> >   cnt v1.8b, v.8b
> >   uaddlp  v2.4h, v1.8b
> >   uaddlp  v3.2s, v2.4h
> >
> > For V2DI, we generate:
> >   cnt v1.16b, v.16b
> >   uaddlp  v2.8h, v1.16b
> >   uaddlp  v3.4s, v2.8h
> >   uaddlp  v4.2d, v3.4s
> 
> Nice patch!  We can do better for these sequences though. Would you
> instead consider using udot with a 0 accumulator and 1 multiplicatent.
> 
> Essentially
> movi v0.16b, #0
> movi v1.16b, #1
> cnt v3.16b, v2.16b
> udot  v0.4s, v3.16b, v1.16b
> 
> this has 1 instruction less on the critical path so should be half the 
> latency of
> the uaddlp variants.
> 
> For the DI case you'll still need a final uaddlp.

Thanks for your suggestions, Tamar! That's indeed more efficient. I have 
updated 
the patch accordingly. Please let me know if you have any other comments.

https://gcc.gnu.org/pipermail/gcc-patches/2024-June/654947.html

Thanks,
Pengxuan
> 
> Cheers,
> Tamar
> 
> >
> > PR target/113859
> >
> > gcc/ChangeLog:
> >
> > * config/aarch64/aarch64-simd.md (aarch64_addlp):
> > Rename to...
> > (@aarch64_addlp): ... This.
> > (popcount2): New define_expand.
> >
> > gcc/testsuite/ChangeLog:
> >
> > * gcc.target/aarch64/popcnt-vec.c: New test.
> >
> > Signed-off-by: Pengxuan Zheng 
> > ---
> >  gcc/config/aarch64/aarch64-simd.md| 28 +++-
> >  gcc/testsuite/gcc.target/aarch64/popcnt-vec.c | 69
> > +++
> >  2 files changed, 96 insertions(+), 1 deletion(-)  create mode 100644
> > gcc/testsuite/gcc.target/aarch64/popcnt-vec.c
> >
> > diff --git a/gcc/config/aarch64/aarch64-simd.md
> > b/gcc/config/aarch64/aarch64- simd.md index 0bb39091a38..ee73e13534b
> > 100644
> > --- a/gcc/config/aarch64/aarch64-simd.md
> > +++ b/gcc/config/aarch64/aarch64-simd.md
> > @@ -3461,7 +3461,7 @@ (define_insn
> > "*aarch64_addlv_ze"
> >[(set_attr "type" "neon_reduc_add")]
> >  )
> >
> > -(define_expand "aarch64_addlp"
> > +(define_expand "@aarch64_addlp"
> >[(set (match_operand: 0 "register_operand")
> > (plus:
> >   (vec_select:
> > @@ -3517,6 +3517,32 @@ (define_insn
> "popcount2"
> >[(set_attr "type" "neon_cnt")]
> >  )
> >
> > +(define_expand "popcount2"
> > +  [(set (match_operand:VDQHSD 0 "register_operand")
> > +(popcount:VDQHSD (match_operand:VDQHSD 1
> > +"register_operand")))]
> > +  "TARGET_SIMD"
> > +  {
> > +/* Generate a byte popcount. */
> > +machine_mode mode =  == 64 ? V8QImode : V16QImode;
> > +rtx tmp = gen_reg_rtx (mode);
> > +auto icode = optab_handler (popcount_optab, mode);
> > +emit_insn (GEN_FCN (icode) (tmp, gen_lowpart (mode,
> > +operands[1])));
> > +
> > +/* Use a sequence of UADDLPs to accumulate the counts. Each step
> doubles
> > +   the element size and halves the number of elements. */
> > +do
> > +  {
> > +auto icode = code_for_aarch64_addlp (ZERO_EXTEND, GET_MODE
> (tmp));
> > +mode = insn_data[icode].operand[0].mode;
> > +rtx dest = mode == mode ? operands[0] : gen_reg_rtx
> (mode);
> > +emit_insn (GEN_FCN (icode) (dest, tmp));
> > +tmp = dest;
> > +  }
> > +while (mode != mode);
> > +DONE;
> > +  }
> > +)
> > +
> >  ;; 'across lanes' max and min ops.
> >
> >  ;; Template for outputting a scalar, so we can create __builtins
> > which can be diff --git
> > a/gcc/testsuite/gcc.target/aarch64/popcnt-vec.c
> > b/gcc/testsuite/gcc.target/aarch64/popcnt-vec.c
> > new file mode 100644
> > index 000..0c4926d7ca8
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/aarch64/popcnt-vec.c
> > @@ -0,0 +1,69 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2 -fno-vect-cost-model" } */
> > +
> > +/* This function should produce cnt v.16b. */ void bar (unsigned char
> > +*__restrict b, unsigned char *__restrict d) {
> > +  for (int i = 0; i < 1024; i++)
> > +d[i] = __builtin_popcount (b[i]); }
> > +
> > +/* This function should produce cnt v.16b and uaddlp (Add Long
> > +Pairwise). */ void
> > +bar1 (unsigned short *__restrict b, unsigned short *__restrict d) {
> > +  for (int i = 0; i < 1024; i++)
> > +d[i] = __builtin_popcount (b[i]); }
> > +
> > +/* This function should produce cnt v.16b and 2 uaddlp (Add Long
> > +Pairwise)

[PATCH v4] aarch64: Add vector popcount besides QImode [PR113859]

2024-06-17 Thread Pengxuan Zheng
This patch improves GCC’s vectorization of __builtin_popcount for aarch64 target
by adding popcount patterns for vector modes besides QImode, i.e., HImode,
SImode and DImode.

With this patch, we now generate the following for V8HI:
  cnt v1.16b, v0.16b
  uaddlp  v2.8h, v1.16b

For V4HI, we generate:
  cnt v1.8b, v0.8b
  uaddlp  v2.4h, v1.8b

For V4SI, we generate:
  cnt v1.16b, v0.16b
  uaddlp  v2.8h, v1.16b
  uaddlp  v3.4s, v2.8h

For V4SI with TARGET_DOTPROD, we generate the following instead:
  moviv0.4s, #0
  moviv1.16b, #1
  cnt v3.16b, v2.16b
  udotv0.4s, v3.16b, v1.16b

For V2SI, we generate:
  cnt v1.8b, v.8b
  uaddlp  v2.4h, v1.8b
  uaddlp  v3.2s, v2.4h

For V2SI with TARGET_DOTPROD, we generate the following instead:
  moviv0.8b, #0
  moviv1.8b, #1
  cnt v3.8b, v2.8b
  udotv0.2s, v3.8b, v1.8b

For V2DI, we generate:
  cnt v1.16b, v.16b
  uaddlp  v2.8h, v1.16b
  uaddlp  v3.4s, v2.8h
  uaddlp  v4.2d, v3.4s

For V4SI with TARGET_DOTPROD, we generate the following instead:
  moviv0.4s, #0
  moviv1.16b, #1
  cnt v3.16b, v2.16b
  udotv0.4s, v3.16b, v1.16b
  uaddlp  v0.2d, v0.4s

PR target/113859

gcc/ChangeLog:

* config/aarch64/aarch64-simd.md (aarch64_addlp): Rename to...
(@aarch64_addlp): ... This.
(popcount2): New define_expand.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/popcnt-udot.c: New test.
* gcc.target/aarch64/popcnt-vec.c: New test.

Signed-off-by: Pengxuan Zheng 
---
 gcc/config/aarch64/aarch64-simd.md| 52 +-
 .../gcc.target/aarch64/popcnt-udot.c  | 45 
 gcc/testsuite/gcc.target/aarch64/popcnt-vec.c | 69 +++
 3 files changed, 165 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/popcnt-udot.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/popcnt-vec.c

diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index 0bb39091a38..3bdd4400408 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -3461,7 +3461,7 @@ (define_insn 
"*aarch64_addlv_ze"
   [(set_attr "type" "neon_reduc_add")]
 )
 
-(define_expand "aarch64_addlp"
+(define_expand "@aarch64_addlp"
   [(set (match_operand: 0 "register_operand")
(plus:
  (vec_select:
@@ -3517,6 +3517,56 @@ (define_insn "popcount2"
   [(set_attr "type" "neon_cnt")]
 )
 
+(define_expand "popcount2"
+  [(set (match_operand:VDQHSD 0 "register_operand")
+(popcount:VDQHSD (match_operand:VDQHSD 1 "register_operand")))]
+  "TARGET_SIMD"
+  {
+/* Generate a byte popcount. */
+machine_mode mode =  == 64 ? V8QImode : V16QImode;
+rtx tmp = gen_reg_rtx (mode);
+auto icode = optab_handler (popcount_optab, mode);
+emit_insn (GEN_FCN (icode) (tmp, gen_lowpart (mode, operands[1])));
+
+if (TARGET_DOTPROD)
+  {
+/* For V4SI and V2SI, we can generate a UDOT with a 0 accumulator and a
+   1 multiplicant. For V2DI, another UAADDLP is needed. */
+if (mode == V4SImode || mode == V2SImode
+|| mode == V2DImode)
+  {
+machine_mode dp_mode =  == 64 ? V2SImode : V4SImode;
+rtx ones = force_reg (mode, CONST1_RTX (mode));
+rtx zeros = CONST0_RTX (dp_mode);
+rtx dp = gen_reg_rtx (dp_mode);
+auto dp_icode = optab_handler (udot_prod_optab, mode);
+emit_move_insn (dp, zeros);
+emit_insn (GEN_FCN (dp_icode) (dp, tmp, ones, dp));
+if (mode == V2DImode)
+  {
+emit_insn (gen_aarch64_uaddlpv4si (operands[0], dp));
+DONE;
+  }
+emit_move_insn (operands[0], dp);
+DONE;
+  }
+  }
+
+/* Use a sequence of UADDLPs to accumulate the counts. Each step doubles
+   the element size and halves the number of elements. */
+do
+  {
+auto icode = code_for_aarch64_addlp (ZERO_EXTEND, GET_MODE (tmp));
+mode = insn_data[icode].operand[0].mode;
+rtx dest = mode == mode ? operands[0] : gen_reg_rtx (mode);
+emit_insn (GEN_FCN (icode) (dest, tmp));
+tmp = dest;
+  }
+while (mode != mode);
+DONE;
+  }
+)
+
 ;; 'across lanes' max and min ops.
 
 ;; Template for outputting a scalar, so we can create __builtins which can be
diff --git a/gcc/testsuite/gcc.target/aarch64/popcnt-udot.c 
b/gcc/testsuite/gcc.target/aarch64/popcnt-udot.c
new file mode 100644
index 000..759d34a2d10
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/popcnt-udot.c
@@ -0,0 +1,45 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv8.2-a+dotprod -fno-vect-cost-model" } */
+
+/* This function should produce cnt v.16b and udot. */
+void
+bar (unsigned int *__restrict b, unsigned int *__restrict d)
+{
+  for (int i = 0; i < 1024; i++)
+d[i] = __builtin_popcount (b[i]);
+}
+
+/* This function should pr

[PATCH] libstdc++: Fix find_last_set(simd_mask) to ignore padding bits

2024-06-17 Thread Matthias Kretz
Tested on x86_64-linux-gnu (also -m32 and -mx32), aarch64-linux-gnu, and arm-
linux-gnueabi(hf).

OK for trunk and backports? OK, to go for GCC 11.5 as early as possible?

--- 8< ---

With the change to the AVX512 find_last_set implementation, the change
to AVX512 operator!= is unnecessary. However, the latter was not
producing optimal code and unnecessarily set the padding bits. In
theory, the compiler could determine that with the new !=
implementation, the bit operation for clearing the padding bits is a
no-op and can be elided.

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

PR libstdc++/115454
* include/experimental/bits/simd_x86.h (_S_not_equal_to): Use
neq comparison instead of bitwise negation after eq.
(_S_find_last_set): Clear unused high bits before computing
bit_width.
* testsuite/experimental/simd/pr115454_find_last_set.cc: New
test.
---
 .../include/experimental/bits/simd_x86.h  | 26 +-
 .../simd/pr115454_find_last_set.cc| 49 +++
 2 files changed, 62 insertions(+), 13 deletions(-)
 create mode 100644 libstdc++-v3/testsuite/experimental/simd/
pr115454_find_last_set.cc


--
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──diff --git a/libstdc++-v3/include/experimental/bits/simd_x86.h b/libstdc++-v3/include/experimental/bits/simd_x86.h
index 517c4b4a5be..8a23aa2082b 100644
--- a/libstdc++-v3/include/experimental/bits/simd_x86.h
+++ b/libstdc++-v3/include/experimental/bits/simd_x86.h
@@ -2339,29 +2339,29 @@ _S_not_equal_to(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
 		  __assert_unreachable<_Tp>();
 	  }
 	else if constexpr (sizeof(__xi) == 64 && sizeof(_Tp) == 8)
-	  return ~_mm512_mask_cmpeq_epi64_mask(__k1, __xi, __yi);
+	  return _mm512_mask_cmpneq_epi64_mask(__k1, __xi, __yi);
 	else if constexpr (sizeof(__xi) == 64 && sizeof(_Tp) == 4)
-	  return ~_mm512_mask_cmpeq_epi32_mask(__k1, __xi, __yi);
+	  return _mm512_mask_cmpneq_epi32_mask(__k1, __xi, __yi);
 	else if constexpr (sizeof(__xi) == 64 && sizeof(_Tp) == 2)
-	  return ~_mm512_mask_cmpeq_epi16_mask(__k1, __xi, __yi);
+	  return _mm512_mask_cmpneq_epi16_mask(__k1, __xi, __yi);
 	else if constexpr (sizeof(__xi) == 64 && sizeof(_Tp) == 1)
-	  return ~_mm512_mask_cmpeq_epi8_mask(__k1, __xi, __yi);
+	  return _mm512_mask_cmpneq_epi8_mask(__k1, __xi, __yi);
 	else if constexpr (sizeof(__xi) == 32 && sizeof(_Tp) == 8)
-	  return ~_mm256_mask_cmpeq_epi64_mask(__k1, __xi, __yi);
+	  return _mm256_mask_cmpneq_epi64_mask(__k1, __xi, __yi);
 	else if constexpr (sizeof(__xi) == 32 && sizeof(_Tp) == 4)
-	  return ~_mm256_mask_cmpeq_epi32_mask(__k1, __xi, __yi);
+	  return _mm256_mask_cmpneq_epi32_mask(__k1, __xi, __yi);
 	else if constexpr (sizeof(__xi) == 32 && sizeof(_Tp) == 2)
-	  return ~_mm256_mask_cmpeq_epi16_mask(__k1, __xi, __yi);
+	  return _mm256_mask_cmpneq_epi16_mask(__k1, __xi, __yi);
 	else if constexpr (sizeof(__xi) == 32 && sizeof(_Tp) == 1)
-	  return ~_mm256_mask_cmpeq_epi8_mask(__k1, __xi, __yi);
+	  return _mm256_mask_cmpneq_epi8_mask(__k1, __xi, __yi);
 	else if constexpr (sizeof(__xi) == 16 && sizeof(_Tp) == 8)
-	  return ~_mm_mask_cmpeq_epi64_mask(__k1, __xi, __yi);
+	  return _mm_mask_cmpneq_epi64_mask(__k1, __xi, __yi);
 	else if constexpr (sizeof(__xi) == 16 && sizeof(_Tp) == 4)
-	  return ~_mm_mask_cmpeq_epi32_mask(__k1, __xi, __yi);
+	  return _mm_mask_cmpneq_epi32_mask(__k1, __xi, __yi);
 	else if constexpr (sizeof(__xi) == 16 && sizeof(_Tp) == 2)
-	  return ~_mm_mask_cmpeq_epi16_mask(__k1, __xi, __yi);
+	  return _mm_mask_cmpneq_epi16_mask(__k1, __xi, __yi);
 	else if constexpr (sizeof(__xi) == 16 && sizeof(_Tp) == 1)
-	  return ~_mm_mask_cmpeq_epi8_mask(__k1, __xi, __yi);
+	  return _mm_mask_cmpneq_epi8_mask(__k1, __xi, __yi);
 	else
 	  __assert_unreachable<_Tp>();
 	  }   // }}}
@@ -5292,7 +5292,7 @@ _S_find_first_set(simd_mask<_Tp, _Abi> __k)
   _S_find_last_set(simd_mask<_Tp, _Abi> __k)
   {
 	if constexpr (__is_avx512_abi<_Abi>())
-	  return std::__bit_width(__k._M_data._M_data) - 1;
+	  return std::__bit_width(_Abi::_S_masked(__k._M_data)._M_data) - 1;
 	else
 	  return _Base::_S_find_last_set(__k);
   }
diff --git a/libstdc++-v3/testsuite/experimental/simd/pr115454_find_last_set.cc b/libstdc++-v3/testsuite/experimental/simd/pr115454_find_last_set.cc
new file mode 100644
index 000..b47f19d3067
--- /dev/null
+++ b/libstdc++-v3/testsuite/experimental/simd/pr115454_find_last_set.cc
@@ 

Re: [PATCH] middle-end/114189 - drop uses of vcond{,u,eq}_optab

2024-06-17 Thread Richard Biener
On Mon, 17 Jun 2024, Richard Sandiford wrote:

> Richard Biener  writes:
> > On Fri, 14 Jun 2024, Richard Biener wrote:
> >
> >> On Fri, 14 Jun 2024, Richard Sandiford wrote:
> >> 
> >> > Richard Biener  writes:
> >> > > On Fri, 14 Jun 2024, Richard Sandiford wrote:
> >> > >
> >> > >> Richard Biener  writes:
> >> > >> > The following retires vcond{,u,eq} optabs by stopping to use them
> >> > >> > from the middle-end.  Targets instead (should) implement vcond_mask
> >> > >> > and vec_cmp{,u,eq} optabs.  The PR this change refers to lists
> >> > >> > possibly affected targets - those implementing these patterns,
> >> > >> > and in particular it lists mips, sparc and ia64 as targets that
> >> > >> > most definitely will regress while others might simply remove
> >> > >> > their vcond{,u,eq} patterns.
> >> > >> >
> >> > >> > I'd appreciate testing, I do not expect fallout for x86 or 
> >> > >> > arm/aarch64.
> >> > >> > I know riscv doesn't implement any of the legacy optabs.  But less
> >> > >> > maintained vector targets might need adjustments.
> >> > >> >
> >> > >> > I want to get rid of those optabs for GCC 15.  If I don't hear from
> >> > >> > you I will assume your target is fine.
> >> > >> 
> >> > >> Great!  Thanks for doing this.
> >> > >> 
> >> > >> Is there a plan for how we should handle vector comparisons that
> >> > >> have to be done as the inverse of the negated condition?  Should
> >> > >> targets simply not provide vec_cmp for such conditions and leave
> >> > >> the target-independent code to deal with the fallout?  (For a
> >> > >> standalone comparison, it would invert the result.  For a 
> >> > >> VEC_COND_EXPR
> >> > >> it would swap the true and false values.)
> >> > >
> >> > > I would expect that the ISEL pass which currently deals with finding
> >> > > valid combos of .VCMP{,U,EQ} and .VCOND_MASK deals with this.
> >> > > So how do we deal with this right now?  I expect RTL expansion will
> >> > > do the inverse trick, no?
> >> > 
> >> > I think in practice (at least for the targets I've worked on),
> >> > the target's vec_cmp handles the inversion itself.  Thus the
> >> > main optimisation done by targets' vcond patterns is to avoid
> >> > the inversion (and instead swap the true/false values) when the
> >> > "opposite" comparison is the native one.
> >> 
> >> I see.  I suppose whether or not vec_cmp is handled is determined
> >> by a FAIL so it's somewhat difficult to determine this at ISEL time.
> 
> In principle we could say that the predicates should accept only the
> conditions that can be done natively.  Then target-independent code
> can apply the usual approaches to generating other conditions
> (which tend to be replicated across targets anyway).

Ah yeah, I suppose that would work.  So we'd update the docs
to say predicates are required to reject not handled compares
and otherwise the expander may not FAIL?

I'll note that expand_vec_cmp_expr_p already looks at the insn
predicates, so adjusting vector lowering (and vectorization) to
emit only recognized compares (and requiring folding to keep it at that)
should be possible.

ISEL would then mainly need to learn the trick of swapping vector
cond arms on inverted masks.  OTOH folding should also do that.

Or do you suggest to allow all compares on GIMPLE and only fixup
during ISEL?  How do we handle vector lowering then?  Would it be
enough to require "any" condition code and thus we expect targets
to implement enough codes so all compares can be handled by
swapping/inversion?

> > I'll also note that we document vec_cmp{,u,eq} as having all zeros,
> > all ones for the result while vcond_mask might only care for the MSB
> > (it's documented to work on the result of a pre-computed vector
> > comparison).
> 
> Not sure how much the docs reflect reality.  At least for SVE,
> vec_cmp returns 0/1 results for vector boolean modes. 

Likewise for AVX512 though since all elements are 1 bit it's -1 as well.

> But I think for integer comparison results, vec_cmp must produce 0/-1
> and vcond only accepts 0/-1.

OK, so we adjust docs to constrain vector integer results but otherwise
state the result is only used as predicate/mask operand.

> > So this eventually asks for targets to work out the optimal sequence
> > via combine helpers and thus eventually splitters to fixup invalid
> > compare operators late?
> 
> I really hope we can do this in late gimple & expand.

Me as well.

Richard.

> Thanks,
> Richard
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)


RE: [PATCH] aarch64: Add fix_truncv4sfv4hi2 pattern [PR113882]

2024-06-17 Thread Richard Biener
On Tue, 18 Jun 2024, Pengxuan Zheng (QUIC) wrote:

> > Pengxuan Zheng  writes:
> > > This patch adds the fix_truncv4sfv4hi2 (V4SF->V4HI) pattern which is
> > > implemented using fix_truncv4sfv4si2 (V4SF->V4SI) and then truncv4siv4hi2
> > (V4SI->V4HI).
> > >
> > >   PR target/113882
> > >
> > > gcc/ChangeLog:
> > >
> > >   * config/aarch64/aarch64-simd.md (fix_truncv4sfv4hi2): New pattern.
> > 
> > Could we handle this by extending the target-independent code instead?
> > Richard mentioned in comment 1 that the current set of intermediate
> > conversions is hard-coded, but it didn't sound like he was implying that the
> > set shouldn't change.
> 
> Yes, Richard. I checked the target-independent code. In fact, SLP already 
> handles this type of intermediate conversions. However, the logic is guarded 
> by 
> "!flag_trapping_math". Therefore, if we pass -fno-trapping-math , SLP 
> actually 
> generates the right vectorized code. Also, looks like the check for 
> "!flag_trapping_math" was added intentionally in r14-2085-g77a50c772771f6 to 
> fix 
> some PRs. So, I'm not sure what we should do here. Thoughts?
> 
>   if (GET_MODE_SIZE (lhs_mode) != GET_MODE_SIZE (rhs_mode)
>   && (code == FLOAT_EXPR ||
>   (code == FIX_TRUNC_EXPR && !flag_trapping_math)))

That is because of missing FE_INVALID(?) when say float -> signed char
doesn't fit but float -> int does and the remaining converts are done
as int -> {short,char}.

There has been multiple rounds of discussion whether flag_trapping_math
should be off by default.

Richard.

> Thanks,
> Pengxuan
> > 
> > Thanks,
> > Richard
> > 
> > > gcc/testsuite/ChangeLog:
> > >
> > >   * gcc.target/aarch64/fix_trunc2.c: New test.
> > >
> > > Signed-off-by: Pengxuan Zheng 
> > > ---
> > >  gcc/config/aarch64/aarch64-simd.md| 13 +
> > >  gcc/testsuite/gcc.target/aarch64/fix_trunc2.c | 14 ++
> > >  2 files changed, 27 insertions(+)
> > >  create mode 100644 gcc/testsuite/gcc.target/aarch64/fix_trunc2.c
> > >
> > > diff --git a/gcc/config/aarch64/aarch64-simd.md
> > > b/gcc/config/aarch64/aarch64-simd.md
> > > index 868f4486218..096f7b56a27 100644
> > > --- a/gcc/config/aarch64/aarch64-simd.md
> > > +++ b/gcc/config/aarch64/aarch64-simd.md
> > > @@ -3032,6 +3032,19 @@ (define_expand
> > "2"
> > >"TARGET_SIMD"
> > >{})
> > >
> > > +
> > > +(define_expand "fix_truncv4sfv4hi2"
> > > +  [(match_operand:V4HI 0 "register_operand")
> > > +   (match_operand:V4SF 1 "register_operand")]
> > > +  "TARGET_SIMD"
> > > +  {
> > > +rtx tmp = gen_reg_rtx (V4SImode);
> > > +emit_insn (gen_fix_truncv4sfv4si2 (tmp, operands[1]));
> > > +emit_insn (gen_truncv4siv4hi2 (operands[0], tmp));
> > > +DONE;
> > > +  }
> > > +)
> > > +
> > >  (define_expand "ftrunc2"
> > >[(set (match_operand:VHSDF 0 "register_operand")
> > >   (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")] diff
> > > --git a/gcc/testsuite/gcc.target/aarch64/fix_trunc2.c
> > > b/gcc/testsuite/gcc.target/aarch64/fix_trunc2.c
> > > new file mode 100644
> > > index 000..57cc00913a3
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/aarch64/fix_trunc2.c
> > > @@ -0,0 +1,14 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O2" } */
> > > +
> > > +void
> > > +f (short *__restrict a, float *__restrict b) {
> > > +  a[0] = b[0];
> > > +  a[1] = b[1];
> > > +  a[2] = b[2];
> > > +  a[3] = b[3];
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler-times {fcvtzs\tv[0-9]+.4s, v[0-9]+.4s}
> > > +1 } } */
> > > +/* { dg-final { scan-assembler-times {xtn\tv[0-9]+.4h, v[0-9]+.4s} 1
> > > +} } */
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)


[to-be-committed][RISC-V] Improve bset generation when bit position is limited

2024-06-17 Thread Jeff Law


So more work in the ongoing effort to make better use of the Zbs 
extension.  This time we're trying to exploit knowledge of the shift 
count/bit position to allow us to use a bset instruction.


Consider this expression in SImode


 (1 << (pos & 0xf)

None of the resulting values will have bit 31 set.  So if there's an 
explicit zero or sign extension to DI we can drop that explicit 
extension and generate a simple bset with x0 as the input value.


Or another example (which I think came from spec at some point and IIRC 
was the primary motivation for this patch):




(1 << (7-(pos) % 8))



Before this change they'd generate something like this respectively:

li  a5,1
andia0,a0,15
sllwa0,a5,a0


li  a5,7
andna0,a5,a0
li  a5,1
sllwa0,a5,a0



After this change they generate:



andia0,a0,15# 9 [c=4 l=4]  *anddi3/1
bseta0,x0,a0# 17[c=8 l=4]  *bsetdi_2


li  a5,7# 27[c=4 l=4]  *movdi_64bit/1
andna0,a5,a0# 28[c=4 l=4]  and_notdi3
bseta0,x0,a0# 19[c=8 l=4]  *bsetdi_2



We achieve this with simple define_splits which target the bsetdi_2 
pattern I recently added.  Much better than the original implementation 
I did a few months back :-)  I've got a bclr/binv variant from a few 
months back as well, but it needs to be updated to the simpler 
implementation found here.


Just ran this through my tester.  Will wait for the precommit CI to 
render its verdict before moving forward.



Jeffdiff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index 094bc2acf1c..dc7a7e7fba7 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -609,6 +609,36 @@ (define_insn "*bsetdi_2"
   "bset\t%0,x0,%1"
   [(set_attr "type" "bitmanip")])
 
+;; These two splitters take advantage of the limited range of the
+;; shift constant.   With the limited range we know the SImode sign
+;; bit is never set, thus we can treat this as zero extending and
+;; generate the bsetdi_2 pattern.
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+   (any_extend:DI
+(ashift:SI (const_int 1)
+   (subreg:QI (and:DI (not:DI (match_operand:DI 1 
"register_operand"))
+  (match_operand 2 "const_int_operand")) 0
+   (clobber (match_operand:DI 3 "register_operand"))]
+  "TARGET_64BIT
+   && TARGET_ZBS
+   && (TARGET_ZBB || TARGET_ZBKB)
+   && (INTVAL (operands[2]) & 0x1f) != 0x1f"
+   [(set (match_dup 0) (and:DI (not:DI (match_dup 1)) (match_dup 2)))
+(set (match_dup 0) (zero_extend:DI (ashift:SI (const_int 1) (subreg:QI 
(match_dup 0) 0])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+   (any_extend:DI
+(ashift:SI (const_int 1)
+   (subreg:QI (and:DI (match_operand:DI 1 "register_operand")
+  (match_operand 2 "const_int_operand")) 
0]
+  "TARGET_64BIT
+   && TARGET_ZBS
+   && (INTVAL (operands[2]) & 0x1f) != 0x1f"
+   [(set (match_dup 0) (and:DI (match_dup 1) (match_dup 2)))
+(set (match_dup 0) (zero_extend:DI (ashift:SI (const_int 1) (subreg:QI 
(match_dup 0) 0])
+
 (define_insn "*bset_1_mask"
   [(set (match_operand:X 0 "register_operand" "=r")
(ashift:X (const_int 1)
diff --git a/gcc/testsuite/gcc.target/riscv/zbs-ext-2.c 
b/gcc/testsuite/gcc.target/riscv/zbs-ext-2.c
new file mode 100644
index 000..301bc9d89c4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zbs-ext-2.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc_zbb_zbs -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" "-Os" } } */
+
+
+typedef unsigned int uint32_t;
+uint32_t foo(uint32_t pos)
+{
+return (1 << (7-(pos) % 8));
+}
+
+typedef unsigned int uint32_t;
+uint32_t foo2(uint32_t pos)
+{
+return (1 << (pos & 0xf));
+}
+
+/* { dg-final { scan-assembler-not "sll\t" } } */
+/* { dg-final { scan-assembler-times "bset\t" 2 } } */
+/* { dg-final { scan-assembler-times "andi\t" 1 } } */
+/* { dg-final { scan-assembler-times "andn\t" 1 } } */
+/* { dg-final { scan-assembler-times "li\t" 1 } } */
+/* { dg-final { scan-assembler-times "ret" 2 } } */
+


Re: [RFC v3] RISC-V: Promote Zaamo/Zalrsc to a when using an old binutils

2024-06-17 Thread Kito Cheng
When 'a' is put into riscv_combine_info, 'a' will only be added into
arch string only if zaamo *AND* zalrsc is there, so zalrsc only won't
trigger that.

On Tue, Jun 18, 2024 at 1:35 PM Patrick O'Neill  wrote:
>
>
>
> On Mon, Jun 17, 2024 at 5:51 PM Kito Cheng  wrote:
>>
>> Maybe just add 'a' to riscv_combine_info and other logic to keep the
>> same (e.g. keep the logic for skip_zaamo_zalrsc)?
>
>
> I did consider unconditionally upgrading zaamo/zalrsc to ‘a’ (I think that’s 
> what you’re suggesting w/ riscv_combine_info).
> That could cause issues if users are trying to compile for a zalrsc-only chip 
> with an old version of binutils. If we upgrade zalrsc -> ‘a’ for both cc1 and 
> binutils then cc1 will emit amo ops instead of their lr/sc equivalent.
> GCC would end up emitting insns that are illegal for the user-provided -march 
> string.
>
> Patrick
>
>>
>> On Tue, Jun 18, 2024 at 8:03 AM Patrick O'Neill  wrote:
>> >
>> > Binutils 2.42 and before don't support Zaamo/Zalrsc. Promote Zaamo/Zalrsc 
>> > to
>> > 'a' in the -march string when assembling.
>> >
>> > This change respects Zaamo/Zalrsc when generating code.
>> >
>> > Testcases that check for the default isa string will fail with the old 
>> > binutils
>> > since zaamo/zalrsc aren't emitted anymore. All other Zaamo/Zalrsc testcases
>> > pass.
>> >
>> > gcc/ChangeLog:
>> >
>> > * common/config/riscv/riscv-common.cc
>> > (riscv_subset_list::to_string): Add toggle to promote Zaamo/Zalrsc
>> > extensions to 'a'.
>> > (riscv_arch_str): Ditto.
>> > (riscv_expand_arch): Ditto.
>> > (riscv_expand_arch_from_cpu): Ditto.
>> > (riscv_expand_arch_upgrade_exts): New function. Wrapper around
>> > riscv_expand_arch to preserve the function signature.
>> > (riscv_expand_arch_no_upgrade_exts): Ditto
>> > (riscv_expand_arch_from_cpu_upgrade_exts): New function. Wrapper 
>> > around
>> > riscv_expand_arch_from_cpu to preserve the function signature.
>> > (riscv_expand_arch_from_cpu_no_upgrade_exts): Ditto.
>> > * config/riscv/riscv-protos.h (riscv_arch_str): Add toggle to 
>> > function
>> > prototype.
>> > * config/riscv/riscv-subset.h: Ditto.
>> > * config/riscv/riscv-target-attr.cc (riscv_process_target_attr):
>> > * config/riscv/riscv.cc (riscv_emit_attribute):
>> > (riscv_declare_function_name):
>> > * config/riscv/riscv.h (riscv_expand_arch): Remove.
>> > (riscv_expand_arch_from_cpu): Ditto.
>> > (riscv_expand_arch_upgrade_exts): Add toggle wrapper functions.
>> > (riscv_expand_arch_no_upgrade_exts): Ditto.
>> > (riscv_expand_arch_from_cpu_upgrade_exts): Ditto.
>> > (riscv_expand_arch_from_cpu_no_upgrade_exts): Ditto.
>> > (EXTRA_SPEC_FUNCTIONS): Ditto.
>> > (OPTION_DEFAULT_SPECS): Use non-upgraded march string when 
>> > invoking the
>> > compiler.
>> > (ASM_SPEC): Use upgraded march string when invoking the assembler.
>> >
>> > Signed-off-by: Patrick O'Neill 
>> > ---
>> > v3 ChangeLog:
>> > Rebased on non-promoting patch.
>> > Wrap all Zaamo/Zalrsc upgrade code in #ifndef to prevent compiler
>> > warnings about unused/potentially undefined variables.
>> > Silence unused parameter warning with a voidcast.
>> > ---
>> > RFC since I'm not sure if this upgrade behavior is more trouble than
>> > it's worth - this is a pretty invasive change. Happy to iterate further
>> > or just drop these changes.
>> > ---
>> >  gcc/common/config/riscv/riscv-common.cc | 111 +---
>> >  gcc/config/riscv/riscv-protos.h |   3 +-
>> >  gcc/config/riscv/riscv-subset.h |   2 +-
>> >  gcc/config/riscv/riscv-target-attr.cc   |   4 +-
>> >  gcc/config/riscv/riscv.cc   |   7 +-
>> >  gcc/config/riscv/riscv.h|  46 ++
>> >  6 files changed, 137 insertions(+), 36 deletions(-)
>> >
>> > diff --git a/gcc/common/config/riscv/riscv-common.cc 
>> > b/gcc/common/config/riscv/riscv-common.cc
>> > index 1dc1d9904c7..05c26f73b73 100644
>> > --- a/gcc/common/config/riscv/riscv-common.cc
>> > +++ b/gcc/common/config/riscv/riscv-common.cc
>> > @@ -907,7 +907,7 @@ riscv_subset_list::add (const char *subset, bool 
>> > implied_p)
>> > VERSION_P to determine append version info or not.  */
>> >
>> >  std::string
>> > -riscv_subset_list::to_string (bool version_p) const
>> > +riscv_subset_list::to_string (bool version_p, bool upgrade_exts) const
>> >  {
>> >std::ostringstream oss;
>> >oss << "rv" << m_xlen;
>> > @@ -916,10 +916,17 @@ riscv_subset_list::to_string (bool version_p) const
>> >riscv_subset_t *subset;
>> >
>> >bool skip_zifencei = false;
>> > -  bool skip_zaamo_zalrsc = false;
>> >bool skip_zicsr = false;
>> >bool i2p0 = false;
>> >
>> > +#ifndef HAVE_AS_MARCH_ZAAMO_ZALRSC
>> > +  bool upgrade_zaamo_zalrsc = false;
>> > +  bool has_a_ext = false;
>> > +  bool insert_a_ext = f

Re: [RFC v3] RISC-V: Promote Zaamo/Zalrsc to a when using an old binutils

2024-06-17 Thread Patrick O'Neill
On Mon, Jun 17, 2024 at 5:51 PM Kito Cheng  wrote:

> Maybe just add 'a' to riscv_combine_info and other logic to keep the
> same (e.g. keep the logic for skip_zaamo_zalrsc)?


I did consider unconditionally upgrading zaamo/zalrsc to ‘a’ (I think
that’s what you’re suggesting w/ riscv_combine_info).
That could cause issues if users are trying to compile for a zalrsc-only
chip with an old version of binutils. If we upgrade zalrsc -> ‘a’ for both
cc1 and binutils then cc1 will emit amo ops instead of their lr/sc
equivalent.
GCC would end up emitting insns that are illegal for the user-provided
-march string.

Patrick


> On Tue, Jun 18, 2024 at 8:03 AM Patrick O'Neill 
> wrote:
> >
> > Binutils 2.42 and before don't support Zaamo/Zalrsc. Promote
> Zaamo/Zalrsc to
> > 'a' in the -march string when assembling.
> >
> > This change respects Zaamo/Zalrsc when generating code.
> >
> > Testcases that check for the default isa string will fail with the old
> binutils
> > since zaamo/zalrsc aren't emitted anymore. All other Zaamo/Zalrsc
> testcases
> > pass.
> >
> > gcc/ChangeLog:
> >
> > * common/config/riscv/riscv-common.cc
> > (riscv_subset_list::to_string): Add toggle to promote
> Zaamo/Zalrsc
> > extensions to 'a'.
> > (riscv_arch_str): Ditto.
> > (riscv_expand_arch): Ditto.
> > (riscv_expand_arch_from_cpu): Ditto.
> > (riscv_expand_arch_upgrade_exts): New function. Wrapper around
> > riscv_expand_arch to preserve the function signature.
> > (riscv_expand_arch_no_upgrade_exts): Ditto
> > (riscv_expand_arch_from_cpu_upgrade_exts): New function. Wrapper
> around
> > riscv_expand_arch_from_cpu to preserve the function signature.
> > (riscv_expand_arch_from_cpu_no_upgrade_exts): Ditto.
> > * config/riscv/riscv-protos.h (riscv_arch_str): Add toggle to
> function
> > prototype.
> > * config/riscv/riscv-subset.h: Ditto.
> > * config/riscv/riscv-target-attr.cc (riscv_process_target_attr):
> > * config/riscv/riscv.cc (riscv_emit_attribute):
> > (riscv_declare_function_name):
> > * config/riscv/riscv.h (riscv_expand_arch): Remove.
> > (riscv_expand_arch_from_cpu): Ditto.
> > (riscv_expand_arch_upgrade_exts): Add toggle wrapper functions.
> > (riscv_expand_arch_no_upgrade_exts): Ditto.
> > (riscv_expand_arch_from_cpu_upgrade_exts): Ditto.
> > (riscv_expand_arch_from_cpu_no_upgrade_exts): Ditto.
> > (EXTRA_SPEC_FUNCTIONS): Ditto.
> > (OPTION_DEFAULT_SPECS): Use non-upgraded march string when
> invoking the
> > compiler.
> > (ASM_SPEC): Use upgraded march string when invoking the
> assembler.
> >
> > Signed-off-by: Patrick O'Neill 
> > ---
> > v3 ChangeLog:
> > Rebased on non-promoting patch.
> > Wrap all Zaamo/Zalrsc upgrade code in #ifndef to prevent compiler
> > warnings about unused/potentially undefined variables.
> > Silence unused parameter warning with a voidcast.
> > ---
> > RFC since I'm not sure if this upgrade behavior is more trouble than
> > it's worth - this is a pretty invasive change. Happy to iterate further
> > or just drop these changes.
> > ---
> >  gcc/common/config/riscv/riscv-common.cc | 111 +---
> >  gcc/config/riscv/riscv-protos.h |   3 +-
> >  gcc/config/riscv/riscv-subset.h |   2 +-
> >  gcc/config/riscv/riscv-target-attr.cc   |   4 +-
> >  gcc/config/riscv/riscv.cc   |   7 +-
> >  gcc/config/riscv/riscv.h|  46 ++
> >  6 files changed, 137 insertions(+), 36 deletions(-)
> >
> > diff --git a/gcc/common/config/riscv/riscv-common.cc
> b/gcc/common/config/riscv/riscv-common.cc
> > index 1dc1d9904c7..05c26f73b73 100644
> > --- a/gcc/common/config/riscv/riscv-common.cc
> > +++ b/gcc/common/config/riscv/riscv-common.cc
> > @@ -907,7 +907,7 @@ riscv_subset_list::add (const char *subset, bool
> implied_p)
> > VERSION_P to determine append version info or not.  */
> >
> >  std::string
> > -riscv_subset_list::to_string (bool version_p) const
> > +riscv_subset_list::to_string (bool version_p, bool upgrade_exts) const
> >  {
> >std::ostringstream oss;
> >oss << "rv" << m_xlen;
> > @@ -916,10 +916,17 @@ riscv_subset_list::to_string (bool version_p) const
> >riscv_subset_t *subset;
> >
> >bool skip_zifencei = false;
> > -  bool skip_zaamo_zalrsc = false;
> >bool skip_zicsr = false;
> >bool i2p0 = false;
> >
> > +#ifndef HAVE_AS_MARCH_ZAAMO_ZALRSC
> > +  bool upgrade_zaamo_zalrsc = false;
> > +  bool has_a_ext = false;
> > +  bool insert_a_ext = false;
> > +  bool inserted_a_ext = false;
> > +  riscv_subset_t *a_subset;
> > +#endif
> > +
> >/* For RISC-V ISA version 2.2 or earlier version, zicsr and zifencei
> is
> >   included in the base ISA.  */
> >if (riscv_isa_spec == ISA_SPEC_CLASS_2P2)
> > @@ -945,8 +952,33 @@ riscv_subset_list::to_string (bool version_p) const
> >skip_zif

[PATCH 1/2] aarch64: make thunderxt88p1 an alias of thunderxt88

2024-06-17 Thread Andrew Pinski
Since r7-6575-g71aba51d6460ff, thunderxt88 has been the same as thunderxt88p1 
so let's make
them a true alias and remove the odd variant handling and moves it below 
thunderxt88.

Bootstrapped and tested on aarch64-linux-gnu with no regressions.

gcc/ChangeLog:

* config/aarch64/aarch64-cores.def (thunderxt88p1): Make an alias of 
thunderxt88 and
move below thunderxt88.
* config/aarch64/aarch64-tune.md: Regenerate.

Signed-off-by: Andrew Pinski 
---
 gcc/config/aarch64/aarch64-cores.def | 5 ++---
 gcc/config/aarch64/aarch64-tune.md   | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-cores.def 
b/gcc/config/aarch64/aarch64-cores.def
index be60929e400..06a8213811c 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -58,10 +58,9 @@ AARCH64_CORE("cortex-a73",  cortexa73, cortexa57, V8A,  
(CRC), cortexa73, 0x41,
 
 /* Cavium ('C') cores. */
 AARCH64_CORE("thunderx",  thunderx,  thunderx,  V8A,  (CRC, CRYPTO), 
thunderx,  0x43, 0x0a0, -1)
-/* Do not swap around "thunderxt88p1" and "thunderxt88",
-   this order is required to handle variant correctly. */
-AARCH64_CORE("thunderxt88p1", thunderxt88p1, thunderx,  V8A,  (CRC, CRYPTO),   
thunderxt88,  0x43, 0x0a1, 0)
 AARCH64_CORE("thunderxt88",   thunderxt88,   thunderx,  V8A,  (CRC, CRYPTO), 
thunderxt88,  0x43, 0x0a1, -1)
+/* "thunderxt88p1 is just an alias for thunderxt88 now. */
+AARCH64_CORE("thunderxt88p1", thunderxt88p1, thunderx,  V8A,  (CRC, CRYPTO), 
thunderxt88,  0x43, 0x0a1, -1)
 
 /* OcteonTX is the official name for T81/T83. */
 AARCH64_CORE("octeontx",  octeontx,  thunderx,  V8A,  (CRC, CRYPTO), 
thunderx,  0x43, 0x0a0, -1)
diff --git a/gcc/config/aarch64/aarch64-tune.md 
b/gcc/config/aarch64/aarch64-tune.md
index ba940f1c890..9b1f32a0330 100644
--- a/gcc/config/aarch64/aarch64-tune.md
+++ b/gcc/config/aarch64/aarch64-tune.md
@@ -1,5 +1,5 @@
 ;; -*- buffer-read-only: t -*-
 ;; Generated automatically by gentune.sh from aarch64-cores.def
 (define_attr "tune"
-   
"cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,ampere1b,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,cortexx1c,neoversen1,ares,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,neoversev1,zeus,neoverse512tvb,saphira,oryon1,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa520,cortexa710,cortexa715,cortexa720,cortexx2,cortexx3,cortexx4,neoversen2,cobalt100,neoversev2,demeter,generic,generic_armv8_a,generic_armv9_a"
+   
"cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88,thunderxt88p1,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,ampere1b,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,cortexx1c,neoversen1,ares,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,neoversev1,zeus,neoverse512tvb,saphira,oryon1,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa520,cortexa710,cortexa715,cortexa720,cortexx2,cortexx3,cortexx4,neoversen2,cobalt100,neoversev2,demeter,generic,generic_armv8_a,generic_armv9_a"
(const (symbol_ref "((enum attr_tune) aarch64_tune)")))
-- 
2.43.0



[PATCH 2/2] aarch64: Add comment about thunderxt81/t83 being aliases

2024-06-17 Thread Andrew Pinski
Since these were already aliases just make it clear on that.

gcc/ChangeLog:

* config/aarch64/aarch64-cores.def: Add comment
saying thunderxt81/t83 are aliases of octeontx81/83.

Signed-off-by: Andrew Pinski 
---
 gcc/config/aarch64/aarch64-cores.def | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/config/aarch64/aarch64-cores.def 
b/gcc/config/aarch64/aarch64-cores.def
index 06a8213811c..0e05e81761c 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -67,6 +67,7 @@ AARCH64_CORE("octeontx",  octeontx,  thunderx,  V8A,  
(CRC, CRYPTO), thu
 AARCH64_CORE("octeontx81",octeontxt81,   thunderx,  V8A,  (CRC, CRYPTO), 
thunderx,  0x43, 0x0a2, -1)
 AARCH64_CORE("octeontx83",octeontxt83,   thunderx,  V8A,  (CRC, CRYPTO), 
thunderx,  0x43, 0x0a3, -1)
 
+/* thunderxt81/83 are aliases for octeontxt81/83. */
 AARCH64_CORE("thunderxt81",   thunderxt81,   thunderx,  V8A,  (CRC, CRYPTO), 
thunderx,  0x43, 0x0a2, -1)
 AARCH64_CORE("thunderxt83",   thunderxt83,   thunderx,  V8A,  (CRC, CRYPTO), 
thunderx,  0x43, 0x0a3, -1)
 
-- 
2.43.0



[PATCH 0/2] aarch64: Small cleanups of the cavium cores

2024-06-17 Thread Andrew Pinski
While thinking the variant patch I had posted, I went back to
look at the original cores which used the variant and saw there
was small cleanup for them since thunderx was no longer considered
a V8.1-a core but rather just a V8-a one; when I did that change
I didn't do the cleanups like is done in this patch set.
Note there is a core which uses the variant selection so we can't
remove the code there.

Andrew Pinski (2):
  aarch64: make thunderxt88p1 an alias of thunderxt88
  aarch64: Add comment about thunderxt81/t83 being aliases

 gcc/config/aarch64/aarch64-cores.def | 6 +++---
 gcc/config/aarch64/aarch64-tune.md   | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

-- 
2.43.0



Re: [PATCH] rs6000: ROP - Do not disable shrink-wrapping for leaf functions [PR114759]

2024-06-17 Thread Peter Bergner
On 6/17/24 7:57 PM, Segher Boessenkool wrote:
> On Mon, Jun 17, 2024 at 06:49:18PM -0500, Peter Bergner wrote:
>> On 6/17/24 6:11 PM, Segher Boessenkool wrote:
>> Yeah, I didn't write that, I only moved it, but I can try to come up with
>> an explanation of why we need to disable it now.  That said, my hope is to
>> not have to disable shrink-wrapping even when we emit the ROP protect hash
>> insns in the future, but that will take some extra work.  If I can manage
>> that, then this should all just go away. :-)  Until then, we can stick
>> with this patch's micro-optimization.
> 
> If you inline one function into another, there is no ROP protection on
> their boundary anymore (since there is no such boundary anymore!)  This
> is not necessarily a problem, but you do want some noipa or similar
> markup where without ROP protection you have no incentive to do that.
> 
> Shrink-wrapping allows more inlining, and more inlining allows more
> shrink-wrapping, but there is no direct relation between shrink-wrapping
> and our ROP protect stuff?  We just need to make sure the hashst and
> hashchk things are done at the very start and the very end of the
> functions, but we need to make sure of that anyway!
> 
> So yeah, please investigate a bit more :-)

So we should be able to shrink-wrap in the presence of the ROP protection.
The ROP attacks work by buffer overrun type issues, clobbering the return
address that was saved on the stack causing us to return to somewhere else.
If we don't need to save the return address on the stack like for leaf
functions, or shrink-wrapped sections that are call free, those codes
are not really susceptible to ROP attacks.  It's the call paths where we
save the return address on the stack that we have to protect.  If inlining
or shrink wrapping increases the amount of code that is call free (ie, we
don't need to save the return address), then that code is not less safe
than before but as safe or safer than before.  It seems the reason we
disabled shrink-wrapping now, was that we were emitting the hashst in the
wrong location (PR101324) causing us to store a bad hash value.  I think
that was just a "bug" that probably should have been fixed rather than
worked around by disabling shrink-wrapping.  It's on my TODO to take a
look at fixing that correctly.





>> At the moment, yes, since the rop_ok test not only checks for the -mcpu= 
>> level,
>> it also verifies that the ABI is ok.
> 
> Ah right!  Add a short comment?

Can do.


>> Currently, rop_ok makes sure we have
>> Power10 and ELFv2 ABI being used.  So currently, if we were to run this test
>> on BE, we'd get an UNSUPPORTED using the rop_ok check, but if we removed it,
>> we'd see a FAIL.  
> 
> Yup.


Peter




Re: [PATCH] rs6000, altivec-2-runnable.c update the require-effective-target

2024-06-17 Thread Kewen.Lin
Hi,

on 2024/6/18 00:08, Peter Bergner wrote:
> On 6/14/24 1:37 PM, Carl Love wrote:
>> Per the additional feedback after patch: 
>>
>>   commit c892525813c94b018464d5a4edc17f79186606b7
>>   Author: Carl Love 
>>   Date:   Tue Jun 11 14:01:16 2024 -0400
>>
>>   rs6000, altivec-2-runnable.c should be a runnable test
>> 
>>   The test case has "dg-do compile" set not "dg-do run" for a runnable
>>   test.  This patch changes the dg-do command argument to run.
>> 
>>   gcc/testsuite/ChangeLog:gcc/testsuite/ChangeLog:
>>   * gcc.target/powerpc/altivec-2-runnable.c: Change dg-do
>>   argument to run.
> 
> Test case altivec-1-runnable.c seems to have the same issue, in that it
> is currently a dg-do compile test case rather than the intended dg-do run.

Good catch!

> Can you have a look at changing that to dg-do run too?  My guess it that
> this one will want something similar to some other altivec test cases, ala:
> 
> /* { dg-do run { target vmx_hw } } */
> /* { dg-do compile { target { ! vmx_hw } } } */
> /* { dg-require-effective-target powerpc_altivec_ok } */
> /* { dg-options "-O2 -maltivec -mabi=altivec" } */

I'd expect the "-runnable" test case focuses on testing for run.  Normally,
the one without "-runnable" would focus on testing for compiling (scan some
desired insn), but this altivec-1.c and altivec-1-runnable.c seems to test
for different things, maybe we should separate them into different names
if they don't test for a same test point.

> 
> That said, I don't like not having a -mdejagnu-cpu=... here.
> I think for our server cpus, this is fine, but on an embedded system
> with a old ISA default for -mcpu=... (so we be doing a dg-do compile),
> just adding -maltivec to that default may not make much sense for that
> default and probably should be an error.  Maybe something like:

Yes, for some embedded cpus, there will be some error messages, but since
we have powerpc_altivec_ok effective target, the error would make that
effective target checking fail so I'd expect it'll stop it being tested
(unsupported).

> 
> /* { dg-do run { target vmx_hw } } */
> /* { dg-do compile { target { ! vmx_hw } } } */
> /* { dg-require-effective-target powerpc_altivec_ok } */
> /* { dg-options "-O2 -mdejagnu=power7" } */
> 
> ...makes more sense?   Ke Wen & Segher, thoughts on that?
> Ke Wen, should powerpc_altivec_ok be powerpc_altivec here???

Yes, I just pushed r15-1390 for this change.

BR,
Kewen



Re: [Patch-2v3, rs6000] Eliminate unnecessary byte swaps for duplicated constant vector store [PR113325]

2024-06-17 Thread Kewen.Lin
Hi Haochen,

on 2024/6/17 16:59, HAO CHEN GUI wrote:
> Hi,
>   This patch creates an insn_and_split pattern which helps the duplicated
> constant vector replace the source pseudo of store insn in fwprop pass.
> Thus the store can be implemented by a single stxvd2x and it eliminates the
> unnecessary byte swap insn on P8 LE. The test case shows the optimization.
> 
>   The patch depends on the first generic patch which uses insn cost in fwprop.
> 
>   Compared to previous version, the main change is to move
> "can_create_pseudo_p ()" to insn condition.
> 
>   Bootstrapped and tested on x86 and powerpc64-linux BE and LE with no
> regressions. Is it OK for trunk?

OK, thanks!

BR,
Kewen

> 
> Thanks
> Gui Haochen
> 
> 
> ChangeLog
> rs6000: Eliminate unnecessary byte swaps for duplicated constant vector store
> 
> gcc/
>   PR target/113325
>   * config/rs6000/vsx.md (vsx_stxvd2x4_le_const_): New.
> 
> gcc/testsuite/
>   PR target/113325
>   * gcc.target/powerpc/pr113325.c: New.
> 
> 
> patch.diff
> diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
> index f135fa079bd..d350c92141c 100644
> --- a/gcc/config/rs6000/vsx.md
> +++ b/gcc/config/rs6000/vsx.md
> @@ -3368,6 +3368,31 @@ (define_insn "*vsx_stxvd2x4_le_"
>"stxvd2x %x1,%y0"
>[(set_attr "type" "vecstore")])
> 
> +(define_insn_and_split "vsx_stxvd2x4_le_const_"
> +  [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
> + (match_operand:VSX_W 1 "immediate_operand" "W"))]
> +  "!BYTES_BIG_ENDIAN
> +   && VECTOR_MEM_VSX_P (mode)
> +   && !TARGET_P9_VECTOR
> +   && const_vec_duplicate_p (operands[1])
> +   && can_create_pseudo_p ()"
> +  "#"
> +  "&& 1"
> +  [(set (match_dup 2)
> + (match_dup 1))
> +   (set (match_dup 0)
> + (vec_select:VSX_W
> +   (match_dup 2)
> +   (parallel [(const_int 2) (const_int 3)
> +  (const_int 0) (const_int 1)])))]
> +{
> +  /* Here all the constants must be loaded without memory.  */
> +  gcc_assert (easy_altivec_constant (operands[1], mode));
> +  operands[2] = gen_reg_rtx (mode);
> +}
> +  [(set_attr "type" "vecstore")
> +   (set_attr "length" "8")])
> +
>  (define_insn "*vsx_stxvd2x8_le_V8HI"
>[(set (match_operand:V8HI 0 "memory_operand" "=Z")
>  (vec_select:V8HI
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr113325.c 
> b/gcc/testsuite/gcc.target/powerpc/pr113325.c
> new file mode 100644
> index 000..3ca1fcbc9ba
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr113325.c
> @@ -0,0 +1,9 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mdejagnu-cpu=power8 -mvsx" } */
> +/* { dg-require-effective-target powerpc_vsx } */
> +/* { dg-final { scan-assembler-not {\mxxpermdi\M} } } */
> +
> +void* foo (void* s1)
> +{
> +  return __builtin_memset (s1, 0, 32);
> +}



Re: [PATCH] rs6000: Compute rop_hash_save_offset for non-Altivec compiles [PR115389]

2024-06-17 Thread Kewen.Lin
on 2024/6/17 20:57, Peter Bergner wrote:
> On 6/16/24 9:40 PM, Kewen.Lin wrote:
>> on 2024/6/17 10:31, Peter Bergner wrote:
>>> On 6/16/24 9:10 PM, Kewen.Lin wrote:
 on 2024/6/15 01:05, Peter Bergner wrote:
> That said, the --with-cpu=power5 build without fortran did bootstrap and
> regtest with no regressions, so the build did test that code path and
> exposed no problems.

 OK, nice!  Thanks!
>>>
>>> I assume this means you're "OK" with the updated patch, correct?
>>
>> Yes, OK for trunk, thanks!
> 
> Thanks.  We will need backports to GCC 11, as it is broken back to when
> ROP was first added then.  I'll let things burn-in on trunk for a couple
> of days so Bill's CI builders have a chance to test it on all of our
> configs.  

OK for backporting too (if you still need an explicit OK).  Thanks!

BR,
Kewen



[PING][PATCH] [tree-optimization/110279] fix testcase pr110279-1.c

2024-06-17 Thread Di Zhao OS
This is OK for trunk?

Thanks,
Di Zhao

> -Original Message-
> From: Di Zhao OS 
> Sent: Thursday, May 23, 2024 5:55 PM
> To: Jeff Law 
> Cc: gcc-patches@gcc.gnu.org
> Subject: RE: [PATCH] [tree-optimization/110279] fix testcase pr110279-1.c
> 
> > -Original Message-
> > From: Jeff Law 
> > Sent: Wednesday, May 22, 2024 11:14 PM
> > To: Di Zhao OS ; gcc-patches@gcc.gnu.org
> > Subject: Re: [PATCH] [tree-optimization/110279] fix testcase pr110279-1.c
> >
> >
> >
> > On 5/22/24 5:46 AM, Di Zhao OS wrote:
> > > The test case is for targets that support FMA. Previously
> > > the "target" selector is missed in dg-final command.
> > >
> > > Tested on x86_64-pc-linux-gnu.
> > >
> > > Thanks
> > > Di Zhao
> > >
> > > gcc/testsuite/ChangeLog:
> > >
> > >  * gcc.dg/pr110279-1.c: add target selector.
> > Rather than list targets explicitly in the test, wouldn't it be better
> > to have a common routine that could be used in other cases where we have
> > a test that requires FMA?
> >
> > So something similar to check_effective_target_scalar_all_fma?
> >
> >
> > Jeff
> 
> Here is an updated version of the patch. Sorry I'm not very familiar
> with the testsuite commands.
> 
> gcc/testsuite/ChangeLog:
> 
> * gcc.dg/pr110279-1.c: add target selector.
> 
> ---
> gcc/testsuite/gcc.dg/pr110279-1.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/gcc/testsuite/gcc.dg/pr110279-1.c
> b/gcc/testsuite/gcc.dg/pr110279-1.c
> index a8c7257b28d..c4f94ea5810 100644
> --- a/gcc/testsuite/gcc.dg/pr110279-1.c
> +++ b/gcc/testsuite/gcc.dg/pr110279-1.c
> @@ -1,4 +1,4 @@
> -/* { dg-do compile } */
> +/* { dg-do compile { target { scalar_all_fma || { i?86-*-* x86_64-*-* } } } }
> */
>  /* { dg-options "-Ofast --param avoid-fma-max-bits=512 --param tree-reassoc-
> width=4 -fdump-tree-widening_mul-details" } */
>  /* { dg-additional-options "-mcpu=generic" { target aarch64*-*-* } } */
>  /* { dg-additional-options "-mfma" { target i?86-*-* x86_64-*-* } } */
> @@ -64,4 +64,4 @@ foo3 (data_e a, data_e b, data_e c, data_e d)
>return result;
>  }
> 
> -/* { dg-final { scan-tree-dump-times "Generated FMA" 3 "widening_mul"} } */
> \ No newline at end of file
> +/* { dg-final { scan-tree-dump-times "Generated FMA" 3 "widening_mul" } } */
> --
> 2.25.1
> 



Re: [PATCH] LoongArch: NFC: Dedup and sort the comment in loongarch_print_operand_reloc

2024-06-17 Thread Lulu Cheng

I think that's fine.

Thanks!

在 2024/6/16 下午5:11, Xi Ruoyao 写道:

gcc/ChangeLog:

* config/loongarch/loongarch.cc (loongarch_print_operand_reloc):
Dedup and sort the comment describing modifiers.
---

It's a non-functional change thus I've not tested it.  Ok for trunk?

  gcc/config/loongarch/loongarch.cc | 10 +-
  1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index 256b76d044b..dcb32a96577 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -6132,21 +6132,13 @@ loongarch_print_operand_reloc (FILE *file, rtx op, bool 
hi64_part,
 'T'Print 'f' for (eq:CC ...), 't' for (ne:CC ...),
  'z' for (eq:?I ...), 'n' for (ne:?I ...).
 't'Like 'T', but with the EQ/NE cases reversed
-   'F' Print the FPU branch condition for comparison OP.
-   'W' Print the inverse of the FPU branch condition for comparison OP.
-   'w' Print a LSX register.
 'u'Print a LASX register.
-   'T' Print 'f' for (eq:CC ...), 't' for (ne:CC ...),
- 'z' for (eq:?I ...), 'n' for (ne:?I ...).
-   't' Like 'T', but with the EQ/NE cases reversed
-   'Y' Print loongarch_fp_conditions[INTVAL (OP)]
-   'Z' Print OP and a comma for 8CC, otherwise print nothing.
-   'z' Print $0 if OP is zero, otherwise print OP normally.
 'v'Print the insn size suffix b, h, w or d for vector modes V16QI, 
V8HI,
  V4SI, V2SI, and w, d for vector modes V4SF, V2DF respectively.
 'V'Print exact log2 of CONST_INT OP element 0 of a replicated
  CONST_VECTOR in decimal.
 'W'Print the inverse of the FPU branch condition for comparison OP.
+   'w' Print a LSX register.
 'X'Print CONST_INT OP in hexadecimal format.
 'x'Print the low 16 bits of CONST_INT OP in hexadecimal format.
 'Y'Print loongarch_fp_conditions[INTVAL (OP)]




Re: [PATCH] rs6000: ROP - Do not disable shrink-wrapping for leaf functions [PR114759]

2024-06-17 Thread Segher Boessenkool
Hi!

On Mon, Jun 17, 2024 at 06:49:18PM -0500, Peter Bergner wrote:
> On 6/17/24 6:11 PM, Segher Boessenkool wrote:
> >> -  /* If we are inserting ROP-protect instructions, disable shrink wrap.  
> >> */
> >> -  if (rs6000_rop_protect)
> >> -flag_shrink_wrap = 0;
> >>  }
> > 
> > (Yes, I know the original code didn't say either, but let's try to make
> > things better :-) )
> 
> Yeah, I didn't write that, I only moved it, but I can try to come up with
> an explanation of why we need to disable it now.  That said, my hope is to
> not have to disable shrink-wrapping even when we emit the ROP protect hash
> insns in the future, but that will take some extra work.  If I can manage
> that, then this should all just go away. :-)  Until then, we can stick
> with this patch's micro-optimization.

If you inline one function into another, there is no ROP protection on
their boundary anymore (since there is no such boundary anymore!)  This
is not necessarily a problem, but you do want some noipa or similar
markup where without ROP protection you have no incentive to do that.

Shrink-wrapping allows more inlining, and more inlining allows more
shrink-wrapping, but there is no direct relation between shrink-wrapping
and our ROP protect stuff?  We just need to make sure the hashst and
hashchk things are done at the very start and the very end of the
functions, but we need to make sure of that anyway!

So yeah, please investigate a bit more :-)

> >> --- /dev/null
> >> +++ b/gcc/testsuite/gcc.target/powerpc/pr114759-1.c
> >> @@ -0,0 +1,16 @@
> >> +/* { dg-do compile } */
> >> +/* { dg-options "-O2 -mdejagnu-cpu=power10 -mrop-protect 
> >> -fdump-rtl-pro_and_epilogue" } */
> >> +/* { dg-require-effective-target rop_ok } */
> > 
> > Do you want rop_ok while you are *forcing* it to be okay anyway?  Why?
> 
> At the moment, yes, since the rop_ok test not only checks for the -mcpu= 
> level,
> it also verifies that the ABI is ok.

Ah right!  Add a short comment?

> Currently, rop_ok makes sure we have
> Power10 and ELFv2 ABI being used.  So currently, if we were to run this test
> on BE, we'd get an UNSUPPORTED using the rop_ok check, but if we removed it,
> we'd see a FAIL.  

Yup.

> As we discussed offline, the plan is to eventually enable emitting the ROP 
> protect
> hash insns on other ABIs, but until then, I think we want to keep the rop_ok 
> check
> so as to keep Bill's CI builder from flagging it as a FAIL.

:-)


Segher


Re: [RFC v3] RISC-V: Promote Zaamo/Zalrsc to a when using an old binutils

2024-06-17 Thread Kito Cheng
Maybe just add 'a' to riscv_combine_info and other logic to keep the
same (e.g. keep the logic for skip_zaamo_zalrsc)?

On Tue, Jun 18, 2024 at 8:03 AM Patrick O'Neill  wrote:
>
> Binutils 2.42 and before don't support Zaamo/Zalrsc. Promote Zaamo/Zalrsc to
> 'a' in the -march string when assembling.
>
> This change respects Zaamo/Zalrsc when generating code.
>
> Testcases that check for the default isa string will fail with the old 
> binutils
> since zaamo/zalrsc aren't emitted anymore. All other Zaamo/Zalrsc testcases
> pass.
>
> gcc/ChangeLog:
>
> * common/config/riscv/riscv-common.cc
> (riscv_subset_list::to_string): Add toggle to promote Zaamo/Zalrsc
> extensions to 'a'.
> (riscv_arch_str): Ditto.
> (riscv_expand_arch): Ditto.
> (riscv_expand_arch_from_cpu): Ditto.
> (riscv_expand_arch_upgrade_exts): New function. Wrapper around
> riscv_expand_arch to preserve the function signature.
> (riscv_expand_arch_no_upgrade_exts): Ditto
> (riscv_expand_arch_from_cpu_upgrade_exts): New function. Wrapper 
> around
> riscv_expand_arch_from_cpu to preserve the function signature.
> (riscv_expand_arch_from_cpu_no_upgrade_exts): Ditto.
> * config/riscv/riscv-protos.h (riscv_arch_str): Add toggle to function
> prototype.
> * config/riscv/riscv-subset.h: Ditto.
> * config/riscv/riscv-target-attr.cc (riscv_process_target_attr):
> * config/riscv/riscv.cc (riscv_emit_attribute):
> (riscv_declare_function_name):
> * config/riscv/riscv.h (riscv_expand_arch): Remove.
> (riscv_expand_arch_from_cpu): Ditto.
> (riscv_expand_arch_upgrade_exts): Add toggle wrapper functions.
> (riscv_expand_arch_no_upgrade_exts): Ditto.
> (riscv_expand_arch_from_cpu_upgrade_exts): Ditto.
> (riscv_expand_arch_from_cpu_no_upgrade_exts): Ditto.
> (EXTRA_SPEC_FUNCTIONS): Ditto.
> (OPTION_DEFAULT_SPECS): Use non-upgraded march string when invoking 
> the
> compiler.
> (ASM_SPEC): Use upgraded march string when invoking the assembler.
>
> Signed-off-by: Patrick O'Neill 
> ---
> v3 ChangeLog:
> Rebased on non-promoting patch.
> Wrap all Zaamo/Zalrsc upgrade code in #ifndef to prevent compiler
> warnings about unused/potentially undefined variables.
> Silence unused parameter warning with a voidcast.
> ---
> RFC since I'm not sure if this upgrade behavior is more trouble than
> it's worth - this is a pretty invasive change. Happy to iterate further
> or just drop these changes.
> ---
>  gcc/common/config/riscv/riscv-common.cc | 111 +---
>  gcc/config/riscv/riscv-protos.h |   3 +-
>  gcc/config/riscv/riscv-subset.h |   2 +-
>  gcc/config/riscv/riscv-target-attr.cc   |   4 +-
>  gcc/config/riscv/riscv.cc   |   7 +-
>  gcc/config/riscv/riscv.h|  46 ++
>  6 files changed, 137 insertions(+), 36 deletions(-)
>
> diff --git a/gcc/common/config/riscv/riscv-common.cc 
> b/gcc/common/config/riscv/riscv-common.cc
> index 1dc1d9904c7..05c26f73b73 100644
> --- a/gcc/common/config/riscv/riscv-common.cc
> +++ b/gcc/common/config/riscv/riscv-common.cc
> @@ -907,7 +907,7 @@ riscv_subset_list::add (const char *subset, bool 
> implied_p)
> VERSION_P to determine append version info or not.  */
>
>  std::string
> -riscv_subset_list::to_string (bool version_p) const
> +riscv_subset_list::to_string (bool version_p, bool upgrade_exts) const
>  {
>std::ostringstream oss;
>oss << "rv" << m_xlen;
> @@ -916,10 +916,17 @@ riscv_subset_list::to_string (bool version_p) const
>riscv_subset_t *subset;
>
>bool skip_zifencei = false;
> -  bool skip_zaamo_zalrsc = false;
>bool skip_zicsr = false;
>bool i2p0 = false;
>
> +#ifndef HAVE_AS_MARCH_ZAAMO_ZALRSC
> +  bool upgrade_zaamo_zalrsc = false;
> +  bool has_a_ext = false;
> +  bool insert_a_ext = false;
> +  bool inserted_a_ext = false;
> +  riscv_subset_t *a_subset;
> +#endif
> +
>/* For RISC-V ISA version 2.2 or earlier version, zicsr and zifencei is
>   included in the base ISA.  */
>if (riscv_isa_spec == ISA_SPEC_CLASS_2P2)
> @@ -945,8 +952,33 @@ riscv_subset_list::to_string (bool version_p) const
>skip_zifencei = true;
>  #endif
>  #ifndef HAVE_AS_MARCH_ZAAMO_ZALRSC
> -  /* Skip since binutils 2.42 and earlier don't recognize zaamo/zalrsc.  */
> -  skip_zaamo_zalrsc = true;
> +  /* Upgrade Zaamo/Zalrsc extensions to 'a' since binutils 2.42 and earlier
> + don't recognize zaamo/zalrsc.  */
> +  upgrade_zaamo_zalrsc = upgrade_exts;
> +  if (upgrade_zaamo_zalrsc)
> +{
> +  for (subset = m_head; subset != NULL; subset = subset->next)
> +   {
> + if (subset->name == "a")
> +   has_a_ext = true;
> + if (subset->name == "zaamo" || subset->name == "zalrsc")
> +   insert_a_ext = true;
> +   }
> +  if (insert_a_ext && !has_a_ext)
> +   {
> +

[PATCH] rs6000: ROP - Do not disable shrink-wrapping for leaf functions [PR114759]

2024-06-17 Thread Peter Bergner
While auditing our ROP code generation for some test cases I wrote, I noticed
a few issues which I'm tracking in PR114759.  The first issue I noticed is we
disable shrink-wrapping when using -mrop-protect, even in the cases where we
never emit the ROP instructions because they're not needed.  The problem is
we disable shrink-wrapping too early, before we know whether we will need to
emit the ROP instructions or not.  The fix is to delay disabling shrink
wrapping until we've decided whether we will or won't be emitting the ROP
instructions.

This patch passed bootstrap and regtesting on powerpc64le-linux with no
regressions, with the unpatched build FAILing the new test case and the
patched build PASSing the new test case.
Ok for trunk?

Peter



rs6000: ROP - Do not disable shrink-wrapping for leaf functions [PR114759]

Only disable shrink-wrapping when using -mrop-protect when we know we
will be emitting the ROP instructions (ie, non-leaf functions).

2024-06-17  Peter Bergner  

gcc/
PR target/114759
* config/rs6000/rs6000.cc (rs6000_override_options_after_change): Move
the disabling of shrink-wrapping from here
* config/rs6000/rs6000-logue.cc (rs6000_stack_info): ...to here.

gcc/testsuite/
PR target/114759
* gcc.target/powerpc/pr114759-1.c: New test.
---
 gcc/config/rs6000/rs6000-logue.cc |  6 +-
 gcc/config/rs6000/rs6000.cc   |  4 
 gcc/testsuite/gcc.target/powerpc/pr114759-1.c | 16 
 3 files changed, 21 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr114759-1.c

diff --git a/gcc/config/rs6000/rs6000-logue.cc 
b/gcc/config/rs6000/rs6000-logue.cc
index 193e2122c0f..659da0bd53f 100644
--- a/gcc/config/rs6000/rs6000-logue.cc
+++ b/gcc/config/rs6000/rs6000-logue.cc
@@ -720,7 +720,11 @@ rs6000_stack_info (void)
   && info->calls_p
   && DEFAULT_ABI == ABI_ELFv2
   && rs6000_rop_protect)
-info->rop_hash_size = 8;
+{
+  /* If we are inserting ROP-protect instructions, disable shrink wrap.  */
+  flag_shrink_wrap = 0;
+  info->rop_hash_size = 8;
+}
   else if (rs6000_rop_protect && DEFAULT_ABI != ABI_ELFv2)
 {
   /* We can't check this in rs6000_option_override_internal since
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index e4dc629ddcc..fd6e013c346 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -3427,10 +3427,6 @@ rs6000_override_options_after_change (void)
 }
   else if (!OPTION_SET_P (flag_cunroll_grow_size))
 flag_cunroll_grow_size = flag_peel_loops || optimize >= 3;
-
-  /* If we are inserting ROP-protect instructions, disable shrink wrap.  */
-  if (rs6000_rop_protect)
-flag_shrink_wrap = 0;
 }
 
 #ifdef TARGET_USES_LINUX64_OPT
diff --git a/gcc/testsuite/gcc.target/powerpc/pr114759-1.c 
b/gcc/testsuite/gcc.target/powerpc/pr114759-1.c
new file mode 100644
index 000..b4ba366402f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr114759-1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mdejagnu-cpu=power10 -mrop-protect 
-fdump-rtl-pro_and_epilogue" } */
+/* { dg-require-effective-target rop_ok } */
+
+/* Verify we still attempt shrink-wrapping when using -mrop-protect
+   and there are no function calls.  */
+
+long
+foo (long arg)
+{
+  if (arg)
+asm ("" ::: "r20");
+  return 0;
+}
+
+/* { dg-final { scan-rtl-dump-times "Performing shrink-wrapping" 1 
"pro_and_epilogue" } } */
-- 
2.43.0



[PATCH-1v4] fwprop: Replace rtx_cost with insn_cost in try_fwprop_subst_pattern [PR113325]

2024-06-17 Thread HAO CHEN GUI
Hi,
  This patch replaces rtx_cost with insn_cost in forward propagation.
In the PR, one constant vector should be propagated and replace a
pseudo in a store insn if we know it's a duplicated constant vector.
It reduces the insn cost but not rtx cost. In this case, the cost is
determined by destination operand (memory or pseudo). Unfortunately,
rtx cost can't help.

  The test case is added in the second rs6000 specific patch.

  Compared to previous version, the main changes are:
1. Invalidate recog_data when the cached INSN is swapped out.
2. Pass strict_p according to prop.likely_profitable_p () to
change_is_worthwhile.

Previous version
https://gcc.gnu.org/pipermail/gcc-patches/2024-June/654276.html


  The patch causes a regression cases on i386 as the pattern cost
regulation has a bug. Please refer the patch and discussion here.
https://gcc.gnu.org/pipermail/gcc-patches/2024-May/651363.html

  Bootstrapped and tested on powerpc64-linux BE and LE with no
regressions. Is it OK for the trunk?

ChangeLog
fwprop: invoke change_is_worthwhile to judge if a replacement is worthwhile

gcc/
* fwprop.cc (try_fwprop_subst_pattern): Invoke change_is_worthwhile
to judge if a replacement is worthwhile.
* recog.cc (swap_change): Invalidate recog_data when the cached INSN
is swapped out.
* rtl-ssa/changes.cc (rtl_ssa::changes_are_worthwhile): Check if the
insn cost of new rtl is unknown and fail the replacement.

patch.diff
diff --git a/gcc/fwprop.cc b/gcc/fwprop.cc
index de543923b92..4a9f68b66b1 100644
--- a/gcc/fwprop.cc
+++ b/gcc/fwprop.cc
@@ -471,29 +471,18 @@ try_fwprop_subst_pattern (obstack_watermark &attempt, 
insn_change &use_change,
   redo_changes (0);
 }

-  /* ??? In theory, it should be better to use insn costs rather than
- set_src_costs here.  That would involve replacing this code with
- change_is_worthwhile.  */
   bool ok = recog (attempt, use_change);
-  if (ok && !prop.changed_mem_p () && !use_insn->is_asm ())
-if (rtx use_set = single_set (use_rtl))
-  {
-   bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_rtl));
-   temporarily_undo_changes (0);
-   auto old_cost = set_src_cost (SET_SRC (use_set),
- GET_MODE (SET_DEST (use_set)), speed);
-   redo_changes (0);
-   auto new_cost = set_src_cost (SET_SRC (use_set),
- GET_MODE (SET_DEST (use_set)), speed);
-   if (new_cost > old_cost
-   || (new_cost == old_cost && !prop.likely_profitable_p ()))
- {
-   if (dump_file)
- fprintf (dump_file, "change not profitable"
-  " (cost %d -> cost %d)\n", old_cost, new_cost);
-   ok = false;
- }
-  }
+  if (ok && !prop.changed_mem_p () && !use_insn->is_asm ()
+  && single_set (use_rtl))
+{
+  bool strict_p = !prop.likely_profitable_p ();
+  if (!change_is_worthwhile (use_change, strict_p))
+   {
+ if (dump_file)
+   fprintf (dump_file, "change not profitable");
+ ok = false;
+   }
+}

   if (!ok)
 {
diff --git a/gcc/recog.cc b/gcc/recog.cc
index a6799e3f5e6..56370e40e01 100644
--- a/gcc/recog.cc
+++ b/gcc/recog.cc
@@ -614,7 +614,11 @@ swap_change (int num)
   else
 std::swap (*changes[num].loc, changes[num].old);
   if (changes[num].object && !MEM_P (changes[num].object))
-std::swap (INSN_CODE (changes[num].object), changes[num].old_code);
+{
+  std::swap (INSN_CODE (changes[num].object), changes[num].old_code);
+  if (recog_data.insn == changes[num].object)
+   recog_data.insn = nullptr;
+}
 }

 /* Temporarily undo all the changes numbered NUM and up, with a view
diff --git a/gcc/rtl-ssa/changes.cc b/gcc/rtl-ssa/changes.cc
index 11639e81bb7..c5ac4956a19 100644
--- a/gcc/rtl-ssa/changes.cc
+++ b/gcc/rtl-ssa/changes.cc
@@ -186,6 +186,14 @@ rtl_ssa::changes_are_worthwhile (array_slice changes,
   if (!change->is_deletion ())
{
  change->new_cost = insn_cost (change->rtl (), for_speed);
+ /* If the cost is unknown, replacement is not worthwhile.  */
+ if (!change->new_cost)
+   {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+   fprintf (dump_file,
+"Reject replacement due to unknown insn cost.\n");
+ return false;
+   }
  new_cost += change->new_cost;
  if (for_speed)
weighted_new_cost += (cfg_bb->count.to_sreal_scale (entry_count)



[PATCH 11/11] Handle subroutine types in CodeView

2024-06-17 Thread Mark Harmstone
Translates DW_TAG_subroutine_type DIEs into LF_PROCEDURE symbols.

gcc/
* dwarf2codeview.cc
(struct codeview_custom_type): Add lf_procedure and lf_arglist
to union.
(write_lf_procedure, write_lf_arglist): New functions.
(write_custom_types): Call write_lf_procedure and
write_lf_arglist.
(get_type_num_subroutine_type): New function.
(get_type_num): Handle DW_TAG_subroutine_type DIEs.
* dwarf2codeview.h (LF_PROCEDURE, LF_ARGLIST): Define.
---
 gcc/dwarf2codeview.cc | 238 ++
 gcc/dwarf2codeview.h  |   2 +
 2 files changed, 240 insertions(+)

diff --git a/gcc/dwarf2codeview.cc b/gcc/dwarf2codeview.cc
index 06267639169..e8ed3713480 100644
--- a/gcc/dwarf2codeview.cc
+++ b/gcc/dwarf2codeview.cc
@@ -262,6 +262,19 @@ struct codeview_custom_type
   uint8_t length;
   uint8_t position;
 } lf_bitfield;
+struct
+{
+  uint32_t return_type;
+  uint8_t calling_convention;
+  uint8_t attributes;
+  uint16_t num_parameters;
+  uint32_t arglist;
+} lf_procedure;
+struct
+{
+  uint32_t num_entries;
+  uint32_t *args;
+} lf_arglist;
   };
 };
 
@@ -1623,6 +1636,102 @@ write_lf_bitfield (codeview_custom_type *t)
   asm_fprintf (asm_out_file, "%LLcv_type%x_end:\n", t->num);
 }
 
+/* Write an LF_PROCEDURE type.  Function pointers are implemented as pointers
+   to one of these.  */
+
+static void
+write_lf_procedure (codeview_custom_type *t)
+{
+  /* This is lf_procedure in binutils and lfProc in Microsoft's cvinfo.h:
+
+struct lf_procedure
+{
+  uint16_t size;
+  uint16_t kind;
+  uint32_t return_type;
+  uint8_t calling_convention;
+  uint8_t attributes;
+  uint16_t num_parameters;
+  uint32_t arglist;
+} ATTRIBUTE_PACKED;
+  */
+
+  fputs (integer_asm_op (2, false), asm_out_file);
+  asm_fprintf (asm_out_file, "%LLcv_type%x_end - %LLcv_type%x_start\n",
+  t->num, t->num);
+
+  asm_fprintf (asm_out_file, "%LLcv_type%x_start:\n", t->num);
+
+  fputs (integer_asm_op (2, false), asm_out_file);
+  fprint_whex (asm_out_file, t->kind);
+  putc ('\n', asm_out_file);
+
+  fputs (integer_asm_op (4, false), asm_out_file);
+  fprint_whex (asm_out_file, t->lf_procedure.return_type);
+  putc ('\n', asm_out_file);
+
+  fputs (integer_asm_op (1, false), asm_out_file);
+  fprint_whex (asm_out_file, t->lf_procedure.calling_convention);
+  putc ('\n', asm_out_file);
+
+  fputs (integer_asm_op (1, false), asm_out_file);
+  fprint_whex (asm_out_file, t->lf_procedure.attributes);
+  putc ('\n', asm_out_file);
+
+  fputs (integer_asm_op (2, false), asm_out_file);
+  fprint_whex (asm_out_file, t->lf_procedure.num_parameters);
+  putc ('\n', asm_out_file);
+
+  fputs (integer_asm_op (4, false), asm_out_file);
+  fprint_whex (asm_out_file, t->lf_procedure.arglist);
+  putc ('\n', asm_out_file);
+
+  asm_fprintf (asm_out_file, "%LLcv_type%x_end:\n", t->num);
+}
+
+/* Write an LF_ARGLIST type.  This is just a list of other types.  LF_PROCEDURE
+   entries point to one of these.  */
+
+static void
+write_lf_arglist (codeview_custom_type *t)
+{
+  /* This is lf_arglist in binutils and lfArgList in Microsoft's cvinfo.h:
+
+struct lf_arglist
+{
+  uint16_t size;
+  uint16_t kind;
+  uint32_t num_entries;
+  uint32_t args[];
+} ATTRIBUTE_PACKED;
+  */
+
+  fputs (integer_asm_op (2, false), asm_out_file);
+  asm_fprintf (asm_out_file, "%LLcv_type%x_end - %LLcv_type%x_start\n",
+  t->num, t->num);
+
+  asm_fprintf (asm_out_file, "%LLcv_type%x_start:\n", t->num);
+
+  fputs (integer_asm_op (2, false), asm_out_file);
+  fprint_whex (asm_out_file, t->kind);
+  putc ('\n', asm_out_file);
+
+  fputs (integer_asm_op (4, false), asm_out_file);
+  fprint_whex (asm_out_file, t->lf_arglist.num_entries);
+  putc ('\n', asm_out_file);
+
+  for (uint32_t i = 0; i < t->lf_arglist.num_entries; i++)
+{
+  fputs (integer_asm_op (4, false), asm_out_file);
+  fprint_whex (asm_out_file, t->lf_arglist.args[i]);
+  putc ('\n', asm_out_file);
+}
+
+  free (t->lf_arglist.args);
+
+  asm_fprintf (asm_out_file, "%LLcv_type%x_end:\n", t->num);
+}
+
 /* Write the .debug$T section, which contains all of our custom type
definitions.  */
 
@@ -1673,6 +1782,14 @@ write_custom_types (void)
case LF_BITFIELD:
  write_lf_bitfield (custom_types);
  break;
+
+   case LF_PROCEDURE:
+ write_lf_procedure (custom_types);
+ break;
+
+   case LF_ARGLIST:
+ write_lf_arglist (custom_types);
+ break;
}
 
   free (custom_types);
@@ -2488,6 +2605,123 @@ get_type_num_struct (dw_die_ref type, bool in_struct, 
bool *is_fwd_ref)
   return ct->num;
 }
 
+/* Process a DW_TAG_subroutine_type DIE, adding an LF_ARGLIST and an
+   LF_PROCEDURE type, and returning the number of the latter.  */
+
+static uint32_

[PATCH 09/11] Handle arrays for CodeView

2024-06-17 Thread Mark Harmstone
Translates DW_TAG_array_type DIEs into LF_ARRAY symbols.

gcc/
* dwarf2codeview.cc
(struct codeview_custom_type): Add lf_array to union.
(write_lf_array): New function.
(write_custom_types): Call write_lf_array.
(get_type_num_array_type): New function.
(get_type_num): Handle DW_TAG_array_type DIEs.
* dwarf2codeview.h (LF_ARRAY): Define.
---
 gcc/dwarf2codeview.cc | 179 ++
 gcc/dwarf2codeview.h  |   1 +
 2 files changed, 180 insertions(+)

diff --git a/gcc/dwarf2codeview.cc b/gcc/dwarf2codeview.cc
index 9e3b64522b2..3f1ce5577fc 100644
--- a/gcc/dwarf2codeview.cc
+++ b/gcc/dwarf2codeview.cc
@@ -250,6 +250,12 @@ struct codeview_custom_type
   codeview_integer length;
   char *name;
 } lf_structure;
+struct
+{
+  uint32_t element_type;
+  uint32_t index_type;
+  codeview_integer length_in_bytes;
+} lf_array;
   };
 };
 
@@ -1520,6 +1526,53 @@ write_lf_union (codeview_custom_type *t)
   asm_fprintf (asm_out_file, "%LLcv_type%x_end:\n", t->num);
 }
 
+/* Write an LF_ARRAY type.  */
+
+static void
+write_lf_array (codeview_custom_type *t)
+{
+  size_t leaf_len;
+
+  /* This is lf_array in binutils and lfArray in Microsoft's cvinfo.h:
+
+struct lf_array
+{
+  uint16_t size;
+  uint16_t kind;
+  uint32_t element_type;
+  uint32_t index_type;
+  uint16_t length_in_bytes;
+  char name[];
+} ATTRIBUTE_PACKED;
+  */
+
+  fputs (integer_asm_op (2, false), asm_out_file);
+  asm_fprintf (asm_out_file, "%LLcv_type%x_end - %LLcv_type%x_start\n",
+  t->num, t->num);
+
+  asm_fprintf (asm_out_file, "%LLcv_type%x_start:\n", t->num);
+
+  fputs (integer_asm_op (2, false), asm_out_file);
+  fprint_whex (asm_out_file, t->kind);
+  putc ('\n', asm_out_file);
+
+  fputs (integer_asm_op (4, false), asm_out_file);
+  fprint_whex (asm_out_file, t->lf_array.element_type);
+  putc ('\n', asm_out_file);
+
+  fputs (integer_asm_op (4, false), asm_out_file);
+  fprint_whex (asm_out_file, t->lf_array.index_type);
+  putc ('\n', asm_out_file);
+
+  leaf_len = 13 + write_cv_integer (&t->lf_array.length_in_bytes);
+
+  ASM_OUTPUT_ASCII (asm_out_file, "", 1);
+
+  write_cv_padding (4 - (leaf_len % 4));
+
+  asm_fprintf (asm_out_file, "%LLcv_type%x_end:\n", t->num);
+}
+
 /* Write the .debug$T section, which contains all of our custom type
definitions.  */
 
@@ -1562,6 +1615,10 @@ write_custom_types (void)
case LF_UNION:
  write_lf_union (custom_types);
  break;
+
+   case LF_ARRAY:
+ write_lf_array (custom_types);
+ break;
}
 
   free (custom_types);
@@ -2346,6 +2403,124 @@ get_type_num_struct (dw_die_ref type, bool in_struct, 
bool *is_fwd_ref)
   return ct->num;
 }
 
+/* Process a DW_TAG_array_type DIE, adding an LF_ARRAY type and returning its
+   number.  */
+
+static uint32_t
+get_type_num_array_type (dw_die_ref type, bool in_struct)
+{
+  dw_die_ref base_type, t, first_child, c, *dimension_arr;
+  uint64_t size = 0;
+  unsigned int dimensions, i;
+  uint32_t element_type;
+
+  base_type = get_AT_ref (type, DW_AT_type);
+  if (!base_type)
+return 0;
+
+  /* We need to know the size of our base type.  Loop through until we find
+ it.  */
+  t = base_type;
+  while (t && size == 0)
+{
+  switch (dw_get_die_tag (t))
+   {
+   case DW_TAG_const_type:
+   case DW_TAG_volatile_type:
+   case DW_TAG_typedef:
+   case DW_TAG_enumeration_type:
+ t = get_AT_ref (t, DW_AT_type);
+ break;
+
+   case DW_TAG_base_type:
+   case DW_TAG_structure_type:
+   case DW_TAG_class_type:
+   case DW_TAG_union_type:
+   case DW_TAG_pointer_type:
+ size = get_AT_unsigned (t, DW_AT_byte_size);
+ break;
+
+   default:
+ return 0;
+   }
+}
+
+  if (size == 0)
+return 0;
+
+  first_child = dw_get_die_child (type);
+  if (!first_child)
+return 0;
+
+  element_type = get_type_num (base_type, in_struct, false);
+  if (element_type == 0)
+return 0;
+
+  /* Create an array of our DW_TAG_subrange_type children, in reverse order.
+ We have to do this because unlike DWARF CodeView doesn't have
+ multidimensional arrays, so instead we do arrays of arrays.  */
+
+  dimensions = 0;
+  c = first_child;
+  do
+{
+  c = dw_get_die_sib (c);
+  if (dw_get_die_tag (c) != DW_TAG_subrange_type)
+   continue;
+
+  dimensions++;
+}
+  while (c != first_child);
+
+  if (dimensions == 0)
+return 0;
+
+  dimension_arr = (dw_die_ref *) xmalloc (sizeof (dw_die_ref) * dimensions);
+
+  c = first_child;
+  i = 0;
+  do
+{
+  c = dw_get_die_sib (c);
+  if (dw_get_die_tag (c) != DW_TAG_subrange_type)
+   continue;
+
+  dimension_arr[dimensions - i - 1] = c;
+  i++;
+}
+  while (c != first_child);
+
+  /* Record an LF_ARRAY entry for each array dimensi

[PATCH 08/11] Handle unions for CodeView.

2024-06-17 Thread Mark Harmstone
Translates DW_TAG_union_type DIEs into LF_UNION symbols.

gcc/
* dwarf2codeview.cc (write_lf_union): New function.
(write_custom_types): Call write_lf_union.
(add_struct_forward_def): Handle DW_TAG_union_type DIEs.
(get_type_num_struct): Handle unions.
(get_type_num): Handle DW_TAG_union_type DIEs.
* dwarf2codeview.h (LF_UNION): Define.
---
 gcc/dwarf2codeview.cc | 91 ---
 gcc/dwarf2codeview.h  |  1 +
 2 files changed, 86 insertions(+), 6 deletions(-)

diff --git a/gcc/dwarf2codeview.cc b/gcc/dwarf2codeview.cc
index 9c6614f6297..9e3b64522b2 100644
--- a/gcc/dwarf2codeview.cc
+++ b/gcc/dwarf2codeview.cc
@@ -1454,6 +1454,72 @@ write_lf_structure (codeview_custom_type *t)
   asm_fprintf (asm_out_file, "%LLcv_type%x_end:\n", t->num);
 }
 
+/* Write an LF_UNION type.  */
+
+static void
+write_lf_union (codeview_custom_type *t)
+{
+  size_t name_len, leaf_len;
+
+  /* This is lf_union in binutils and lfUnion in Microsoft's cvinfo.h:
+
+struct lf_union
+{
+  uint16_t size;
+  uint16_t kind;
+  uint16_t num_members;
+  uint16_t properties;
+  uint32_t field_list;
+  uint16_t length;
+  char name[];
+} ATTRIBUTE_PACKED;
+  */
+
+  fputs (integer_asm_op (2, false), asm_out_file);
+  asm_fprintf (asm_out_file, "%LLcv_type%x_end - %LLcv_type%x_start\n",
+  t->num, t->num);
+
+  asm_fprintf (asm_out_file, "%LLcv_type%x_start:\n", t->num);
+
+  fputs (integer_asm_op (2, false), asm_out_file);
+  fprint_whex (asm_out_file, t->kind);
+  putc ('\n', asm_out_file);
+
+  fputs (integer_asm_op (2, false), asm_out_file);
+  fprint_whex (asm_out_file, t->lf_structure.num_members);
+  putc ('\n', asm_out_file);
+
+  fputs (integer_asm_op (2, false), asm_out_file);
+  fprint_whex (asm_out_file, t->lf_structure.properties);
+  putc ('\n', asm_out_file);
+
+  fputs (integer_asm_op (4, false), asm_out_file);
+  fprint_whex (asm_out_file, t->lf_structure.field_list);
+  putc ('\n', asm_out_file);
+
+  leaf_len = 12 + write_cv_integer (&t->lf_structure.length);
+
+  if (t->lf_structure.name)
+{
+  name_len = strlen (t->lf_structure.name) + 1;
+  ASM_OUTPUT_ASCII (asm_out_file, t->lf_structure.name, name_len);
+}
+  else
+{
+  static const char unnamed_struct[] = "";
+
+  name_len = sizeof (unnamed_struct);
+  ASM_OUTPUT_ASCII (asm_out_file, unnamed_struct, name_len);
+}
+
+  leaf_len += name_len;
+  write_cv_padding (4 - (leaf_len % 4));
+
+  free (t->lf_structure.name);
+
+  asm_fprintf (asm_out_file, "%LLcv_type%x_end:\n", t->num);
+}
+
 /* Write the .debug$T section, which contains all of our custom type
definitions.  */
 
@@ -1492,6 +1558,10 @@ write_custom_types (void)
case LF_CLASS:
  write_lf_structure (custom_types);
  break;
+
+   case LF_UNION:
+ write_lf_union (custom_types);
+ break;
}
 
   free (custom_types);
@@ -2026,7 +2096,7 @@ flush_deferred_types (void)
   last_deferred_type = NULL;
 }
 
-/* Add a forward definition for a struct or class.  */
+/* Add a forward definition for a struct, class, or union.  */
 
 static uint32_t
 add_struct_forward_def (dw_die_ref type)
@@ -2047,6 +2117,10 @@ add_struct_forward_def (dw_die_ref type)
   ct->kind = LF_STRUCTURE;
   break;
 
+case DW_TAG_union_type:
+  ct->kind = LF_UNION;
+  break;
+
 default:
   break;
 }
@@ -2068,9 +2142,9 @@ add_struct_forward_def (dw_die_ref type)
   return ct->num;
 }
 
-/* Process a DW_TAG_structure_type or DW_TAG_class_type DIE, add an
-   LF_FIELDLIST and an LF_STRUCTURE / LF_CLASS type, and return the number of
-   the latter.  */
+/* Process a DW_TAG_structure_type, DW_TAG_class_type, or DW_TAG_union_type
+   DIE, add an LF_FIELDLIST and an LF_STRUCTURE / LF_CLASS / LF_UNION type,
+   and return the number of the latter.  */
 
 static uint32_t
 get_type_num_struct (dw_die_ref type, bool in_struct, bool *is_fwd_ref)
@@ -2227,8 +2301,8 @@ get_type_num_struct (dw_die_ref type, bool in_struct, 
bool *is_fwd_ref)
   ct = ct2;
 }
 
-  /* Now add an LF_STRUCTURE / LF_CLASS, pointing to the LF_FIELDLIST we just
- added.  */
+  /* Now add an LF_STRUCTURE / LF_CLASS / LF_UNION, pointing to the
+ LF_FIELDLIST we just added.  */
 
   ct = (codeview_custom_type *) xmalloc (sizeof (codeview_custom_type));
 
@@ -2244,6 +2318,10 @@ get_type_num_struct (dw_die_ref type, bool in_struct, 
bool *is_fwd_ref)
   ct->kind = LF_STRUCTURE;
   break;
 
+case DW_TAG_union_type:
+  ct->kind = LF_UNION;
+  break;
+
 default:
   break;
 }
@@ -2325,6 +2403,7 @@ get_type_num (dw_die_ref type, bool in_struct, bool 
no_fwd_ref)
 
 case DW_TAG_structure_type:
 case DW_TAG_class_type:
+case DW_TAG_union_type:
   num = get_type_num_struct (type, in_struct, &is_fwd_ref);
   break;
 
diff --git a/gcc/dwarf2codeview.h b/gcc/dwar

[PATCH 06/11] Handle enums for CodeView

2024-06-17 Thread Mark Harmstone
Translates DW_TAG_enumeration_type DIEs into LF_ENUM symbols.

gcc/
* dwarf2codeview.cc (MAX_FIELDLIST_SIZE): Define.
(struct codeview_integer): New structure.
(struct codeview_subtype): Likewise
(struct codeview_custom_type): Add lf_fieldlist and lf_enum
to union.
(write_cv_integer, cv_integer_len): New functions.
(write_lf_fieldlist, write_lf_enum): Likewise.
(write_custom_types): Call write_lf_fieldlist and write_lf_enum.
(add_enum_forward_def): New function.
(get_type_num_enumeration_type): Likewise.
(get_type_num): Handle DW_TAG_enumeration_type DIEs.
* dwarf2codeview.h (LF_FIELDLIST, LF_INDEX, LF_ENUMERATE): Define.
(LF_ENUM, LF_CHAR, LF_SHORT, LF_USHORT, LF_LONG): Likewise.
(LF_ULONG, LF_QUADWORD, LF_UQUADWORD): Likewise.
(CV_ACCESS_PRIVATE, CV_ACCESS_PROTECTED): Likewise.
(CV_ACCESS_PUBLIC, CV_PROP_FWDREF): Likewise.
---
 gcc/dwarf2codeview.cc | 524 ++
 gcc/dwarf2codeview.h  |  17 ++
 2 files changed, 541 insertions(+)

diff --git a/gcc/dwarf2codeview.cc b/gcc/dwarf2codeview.cc
index 05f5f60997e..475a53573e9 100644
--- a/gcc/dwarf2codeview.cc
+++ b/gcc/dwarf2codeview.cc
@@ -63,6 +63,11 @@ along with GCC; see the file COPYING3.  If not see
 #define SYMBOL_START_LABEL "Lcvsymstart"
 #define SYMBOL_END_LABEL   "Lcvsymend"
 
+/* There's two bytes available for each type's size, but follow MSVC's lead in
+   capping the LF_FIELDLIST size at fb00 (minus 8 bytes for the LF_INDEX
+   pointing to the overflow entry).  */
+#define MAX_FIELDLIST_SIZE 0xfaf8
+
 #define HASH_SIZE 16
 
 struct codeview_string
@@ -170,6 +175,31 @@ struct die_hasher : free_ptr_hash 
   }
 };
 
+struct codeview_integer
+{
+  bool neg;
+  uint64_t num;
+};
+
+struct codeview_subtype
+{
+  struct codeview_subtype *next;
+  uint16_t kind;
+
+  union
+  {
+struct
+{
+  char *name;
+  struct codeview_integer value;
+} lf_enumerate;
+struct
+{
+  uint32_t type_num;
+} lf_index;
+  };
+};
+
 struct codeview_custom_type
 {
   struct codeview_custom_type *next;
@@ -188,6 +218,20 @@ struct codeview_custom_type
   uint32_t base_type;
   uint16_t modifier;
 } lf_modifier;
+struct
+{
+  size_t length;
+  codeview_subtype *subtypes;
+  codeview_subtype *last_subtype;
+} lf_fieldlist;
+struct
+{
+  uint16_t count;
+  uint16_t properties;
+  uint32_t underlying_type;
+  uint32_t fieldlist;
+  char *name;
+} lf_enum;
   };
 };
 
@@ -978,6 +1022,292 @@ write_lf_modifier (codeview_custom_type *t)
   asm_fprintf (asm_out_file, "%LLcv_type%x_end:\n", t->num);
 }
 
+/* Write a CodeView extensible integer.  If the value is non-negative and
+   < 0x8000, the value gets written directly as an uint16_t.  Otherwise, we
+   output two bytes for the integer type (LF_CHAR, LF_SHORT, ...), and the
+   actual value follows.  */
+
+static size_t
+write_cv_integer (codeview_integer *i)
+{
+  if (i->neg)
+{
+  if (i->num <= 0x80)
+   {
+ fputs (integer_asm_op (2, false), asm_out_file);
+ fprint_whex (asm_out_file, LF_CHAR);
+ putc ('\n', asm_out_file);
+
+ fputs (integer_asm_op (1, false), asm_out_file);
+ fprint_whex (asm_out_file, -i->num);
+ putc ('\n', asm_out_file);
+
+ return 3;
+   }
+  else if (i->num <= 0x8000)
+   {
+ fputs (integer_asm_op (2, false), asm_out_file);
+ fprint_whex (asm_out_file, LF_SHORT);
+ putc ('\n', asm_out_file);
+
+ fputs (integer_asm_op (2, false), asm_out_file);
+ fprint_whex (asm_out_file, -i->num);
+ putc ('\n', asm_out_file);
+
+ return 4;
+   }
+  else if (i->num <= 0x8000)
+   {
+ fputs (integer_asm_op (2, false), asm_out_file);
+ fprint_whex (asm_out_file, LF_LONG);
+ putc ('\n', asm_out_file);
+
+ fputs (integer_asm_op (4, false), asm_out_file);
+ fprint_whex (asm_out_file, -i->num);
+ putc ('\n', asm_out_file);
+
+ return 6;
+   }
+  else
+   {
+ fputs (integer_asm_op (2, false), asm_out_file);
+ fprint_whex (asm_out_file, LF_QUADWORD);
+ putc ('\n', asm_out_file);
+
+ fputs (integer_asm_op (8, false), asm_out_file);
+ fprint_whex (asm_out_file, -i->num);
+ putc ('\n', asm_out_file);
+
+ return 10;
+   }
+}
+  else
+{
+  if (i->num <= 0x7fff)
+   {
+ fputs (integer_asm_op (2, false), asm_out_file);
+ fprint_whex (asm_out_file, i->num);
+ putc ('\n', asm_out_file);
+
+ return 2;
+   }
+  else if (i->num <= 0x)
+   {
+ fputs (integer_asm_op (2, false), asm_out_file);
+ fprint_whex (asm_out_file, LF_USHORT);
+ putc ('

Re: [PATCH] rs6000: ROP - Do not disable shrink-wrapping for leaf functions [PR114759]

2024-06-17 Thread Peter Bergner
On 6/17/24 6:11 PM, Segher Boessenkool wrote:
> "ROP insns" are the instructions used in such exploits, not what you
> mean here :-)
> 
> The instructions are called "hash*"C, so maybe call tbem "hash insns"
> or "ROP protect hash insns"?.

Ok, that bad verbiage was in the extra commentary not part of the git
log entry.  That said, I'll reword that to the following:

 Only disable shrink-wrapping when using -mrop-protect when we know we
-will be emitting the ROP instructions (ie, non-leaf functions).
+will be emitting the ROP protect hash instructions (ie, non-leaf functions).




>>  * config/rs6000/rs6000.cc (rs6000_override_options_after_change): Move
>>  the disabling of shrink-wrapping from here
>>  * config/rs6000/rs6000-logue.cc (rs6000_stack_info): ...to here.
> 
> Hrm.  Can you do it in some particular caller of rs6000_stack_info,
> instead?  The rs6000_stack_info function itself is not suppposed to
> change any state whatsoever.

Sure, I can look at maybe moving that to the caller or maybe somewhere
better.  I'll repost the patch once I find a better location.



> The comment should say *why*!  The fact that we do is clear from the
> code itself already.  But why do we want this?
> 
>> --- a/gcc/config/rs6000/rs6000.cc
>> +++ b/gcc/config/rs6000/rs6000.cc
>> @@ -3427,10 +3427,6 @@ rs6000_override_options_after_change (void)
>>  }
>>else if (!OPTION_SET_P (flag_cunroll_grow_size))
>>  flag_cunroll_grow_size = flag_peel_loops || optimize >= 3;
>> -
>> -  /* If we are inserting ROP-protect instructions, disable shrink wrap.  */
>> -  if (rs6000_rop_protect)
>> -flag_shrink_wrap = 0;
>>  }
> 
> (Yes, I know the original code didn't say either, but let's try to make
> things better :-) )

Yeah, I didn't write that, I only moved it, but I can try to come up with
an explanation of why we need to disable it now.  That said, my hope is to
not have to disable shrink-wrapping even when we emit the ROP protect hash
insns in the future, but that will take some extra work.  If I can manage
that, then this should all just go away. :-)  Until then, we can stick
with this patch's micro-optimization.




>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/pr114759-1.c
>> @@ -0,0 +1,16 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O2 -mdejagnu-cpu=power10 -mrop-protect 
>> -fdump-rtl-pro_and_epilogue" } */
>> +/* { dg-require-effective-target rop_ok } */
> 
> Do you want rop_ok while you are *forcing* it to be okay anyway?  Why?

At the moment, yes, since the rop_ok test not only checks for the -mcpu= level,
it also verifies that the ABI is ok.  Currently, rop_ok makes sure we have
Power10 and ELFv2 ABI being used.  So currently, if we were to run this test
on BE, we'd get an UNSUPPORTED using the rop_ok check, but if we removed it,
we'd see a FAIL.  

As we discussed offline, the plan is to eventually enable emitting the ROP 
protect
hash insns on other ABIs, but until then, I think we want to keep the rop_ok 
check
so as to keep Bill's CI builder from flagging it as a FAIL.

Peter




[PATCH 02/11] Handle CodeView base types

2024-06-17 Thread Mark Harmstone
Adds a get_type_num function to translate type DIEs into CodeView
numbers, along with a hash table for this.  For now we just deal with
the base types (integers, Unicode chars, floats, and bools).

gcc/
* dwarf2codeview.cc (struct codeview_type): New structure.
(struct die_hasher): Likewise.
(types_htab): New variable.
(codeview_debug_finish): Free types_htab if allocated.
(get_type_num_base_type, get_type_num): New function.
(add_variable): Call get_type_num.
* dwarf2codeview.h (T_CHAR, T_SHORT, T_LONG, T_QUAD): Define.
(T_UCHAR, T_USHORT, T_ULONG, T_UQUAD, T_BOOL08): Likewise.
(T_REAL32, T_REAL64, T_REAL80, T_REAL128, T_RCHAR): Likewise.
(T_WCHAR, T_INT4, T_UINT4, T_CHAR16, T_CHAR32, T_CHAR8): Likewise.
---
 gcc/dwarf2codeview.cc | 196 +-
 gcc/dwarf2codeview.h  |  23 +
 2 files changed, 218 insertions(+), 1 deletion(-)

diff --git a/gcc/dwarf2codeview.cc b/gcc/dwarf2codeview.cc
index 60e84635971..eb7c1270e31 100644
--- a/gcc/dwarf2codeview.cc
+++ b/gcc/dwarf2codeview.cc
@@ -147,6 +147,27 @@ struct codeview_symbol
   };
 };
 
+struct codeview_type
+{
+  dw_die_ref die;
+  uint32_t num;
+};
+
+struct die_hasher : free_ptr_hash 
+{
+  typedef dw_die_ref compare_type;
+
+  static hashval_t hash (const codeview_type *x)
+  {
+return htab_hash_pointer (x->die);
+  }
+
+  static bool equal (const codeview_type *x, const dw_die_ref y)
+  {
+return x->die == y;
+  }
+};
+
 static unsigned int line_label_num;
 static unsigned int func_label_num;
 static unsigned int sym_label_num;
@@ -159,6 +180,7 @@ static codeview_function *funcs, *last_func;
 static const char* last_filename;
 static uint32_t last_file_id;
 static codeview_symbol *sym, *last_sym;
+static hash_table *types_htab;
 
 /* Record new line number against the current function.  */
 
@@ -838,6 +860,178 @@ codeview_debug_finish (void)
   write_source_files ();
   write_line_numbers ();
   write_codeview_symbols ();
+
+  if (types_htab)
+delete types_htab;
+}
+
+/* Translate a DWARF base type (DW_TAG_base_type) into its CodeView
+   equivalent.  */
+
+static uint32_t
+get_type_num_base_type (dw_die_ref type)
+{
+  unsigned int size = get_AT_unsigned (type, DW_AT_byte_size);
+
+  switch (get_AT_unsigned (type, DW_AT_encoding))
+{
+case DW_ATE_signed_char:
+  {
+   const char *name = get_AT_string (type, DW_AT_name);
+
+   if (size != 1)
+ return 0;
+
+   if (name && !strcmp (name, "signed char"))
+ return T_CHAR;
+   else
+ return T_RCHAR;
+  }
+
+case DW_ATE_unsigned_char:
+  if (size != 1)
+   return 0;
+
+  return T_UCHAR;
+
+case DW_ATE_signed:
+  switch (size)
+   {
+   case 2:
+ return T_SHORT;
+
+   case 4:
+ {
+   const char *name = get_AT_string (type, DW_AT_name);
+
+   if (name && !strcmp (name, "int"))
+ return T_INT4;
+   else
+ return T_LONG;
+ }
+
+   case 8:
+ return T_QUAD;
+
+   default:
+ return 0;
+   }
+
+case DW_ATE_unsigned:
+  switch (size)
+   {
+   case 2:
+ {
+   const char *name = get_AT_string (type, DW_AT_name);
+
+   if (name && !strcmp (name, "wchar_t"))
+ return T_WCHAR;
+   else
+ return T_USHORT;
+ }
+
+   case 4:
+ {
+   const char *name = get_AT_string (type, DW_AT_name);
+
+   if (name && !strcmp (name, "unsigned int"))
+ return T_UINT4;
+   else
+ return T_ULONG;
+ }
+
+   case 8:
+ return T_UQUAD;
+
+   default:
+ return 0;
+   }
+
+case DW_ATE_UTF:
+  switch (size)
+   {
+   case 1:
+ return T_CHAR8;
+
+   case 2:
+ return T_CHAR16;
+
+   case 4:
+ return T_CHAR32;
+
+   default:
+ return 0;
+   }
+
+case DW_ATE_float:
+  switch (size)
+   {
+   case 4:
+ return T_REAL32;
+
+   case 8:
+ return T_REAL64;
+
+   case 12:
+ return T_REAL80;
+
+   case 16:
+ return T_REAL128;
+
+   default:
+ return 0;
+   }
+
+case DW_ATE_boolean:
+  if (size == 1)
+   return T_BOOL08;
+  else
+   return 0;
+
+default:
+  return 0;
+}
+}
+
+/* Process a DIE representing a type definition and return its number.  If
+   it's something we can't handle, return 0.  We keep a hash table so that
+   we're not adding the same type multiple times - though if we do it's not
+   disastrous, as ld will deduplicate everything for us.  */
+
+static uint32_t
+get_type_num (dw_die_ref type)
+{
+  codeview_type **slot, *t;
+
+  if (!type)
+return 0;
+
+  if (!types_htab)
+types_htab = new hash_table (10);
+
+  slot = types_htab->find_slot

Re: [committed] testsuite: Add -Wno-psabi to vshuf-mem.C test

2024-06-17 Thread Andreas Krebbel

On 6/14/24 20:03, Jakub Jelinek wrote:

Also wonder about the
// { dg-additional-options "-march=z14" { target s390*-*-* } }
line, doesn't that mean the test will FAIL on all pre-z14 HW?
Shouldn't it use some z14_runtime or similar effective target, or
check in main (in that case copied over to g++.target/s390) whether
z14 instructions can be actually used at runtime?


Oh right. I'll remove that line and replicate the testcase in the arch 
specific test dir.


Andreas




[PATCH 03/11] Handle typedefs for CodeView

2024-06-17 Thread Mark Harmstone
gcc/
* dwarf2codeview.cc (get_type_num): Handle typedefs.
---
 gcc/dwarf2codeview.cc | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/gcc/dwarf2codeview.cc b/gcc/dwarf2codeview.cc
index eb7c1270e31..5006a176260 100644
--- a/gcc/dwarf2codeview.cc
+++ b/gcc/dwarf2codeview.cc
@@ -1024,6 +1024,12 @@ get_type_num (dw_die_ref type)
   t->num = get_type_num_base_type (type);
   break;
 
+case DW_TAG_typedef:
+  /* FIXME - signed longs typedef'd as "HRESULT" should get their
+own type (T_HRESULT) */
+  t->num = get_type_num (get_AT_ref (type, DW_AT_type));
+  break;
+
 default:
   t->num = 0;
   break;
-- 
2.44.2



[PATCH 10/11] Handle bitfields for CodeView

2024-06-17 Thread Mark Harmstone
Translates structure members with DW_AT_data_bit_offset set in DWARF
into LF_BITFIELD symbols.

gcc/
* dwarf2codeview.cc
(struct codeview_custom_type): Add lf_bitfield to union.
(write_lf_bitfield): New function.
(write_custom_types): Call write_lf_bitfield.
(create_bitfield): New function.
(get_type_num_struct): Handle bitfields.
* dwarf2codeview.h (LF_BITFIELD): Define.
---
 gcc/dwarf2codeview.cc | 89 ++-
 gcc/dwarf2codeview.h  |  1 +
 2 files changed, 88 insertions(+), 2 deletions(-)

diff --git a/gcc/dwarf2codeview.cc b/gcc/dwarf2codeview.cc
index 3f1ce5577fc..06267639169 100644
--- a/gcc/dwarf2codeview.cc
+++ b/gcc/dwarf2codeview.cc
@@ -256,6 +256,12 @@ struct codeview_custom_type
   uint32_t index_type;
   codeview_integer length_in_bytes;
 } lf_array;
+struct
+{
+  uint32_t base_type;
+  uint8_t length;
+  uint8_t position;
+} lf_bitfield;
   };
 };
 
@@ -1573,6 +1579,50 @@ write_lf_array (codeview_custom_type *t)
   asm_fprintf (asm_out_file, "%LLcv_type%x_end:\n", t->num);
 }
 
+/* Write an LF_BITFIELD type.  */
+
+static void
+write_lf_bitfield (codeview_custom_type *t)
+{
+  /* This is lf_bitfield in binutils and lfBitfield in Microsoft's cvinfo.h:
+
+struct lf_bitfield
+{
+  uint16_t size;
+  uint16_t kind;
+  uint32_t base_type;
+  uint8_t length;
+  uint8_t position;
+} ATTRIBUTE_PACKED;
+  */
+
+  fputs (integer_asm_op (2, false), asm_out_file);
+  asm_fprintf (asm_out_file, "%LLcv_type%x_end - %LLcv_type%x_start\n",
+  t->num, t->num);
+
+  asm_fprintf (asm_out_file, "%LLcv_type%x_start:\n", t->num);
+
+  fputs (integer_asm_op (2, false), asm_out_file);
+  fprint_whex (asm_out_file, t->kind);
+  putc ('\n', asm_out_file);
+
+  fputs (integer_asm_op (4, false), asm_out_file);
+  fprint_whex (asm_out_file, t->lf_bitfield.base_type);
+  putc ('\n', asm_out_file);
+
+  fputs (integer_asm_op (1, false), asm_out_file);
+  fprint_whex (asm_out_file, t->lf_bitfield.length);
+  putc ('\n', asm_out_file);
+
+  fputs (integer_asm_op (1, false), asm_out_file);
+  fprint_whex (asm_out_file, t->lf_bitfield.position);
+  putc ('\n', asm_out_file);
+
+  write_cv_padding (2);
+
+  asm_fprintf (asm_out_file, "%LLcv_type%x_end:\n", t->num);
+}
+
 /* Write the .debug$T section, which contains all of our custom type
definitions.  */
 
@@ -1619,6 +1669,10 @@ write_custom_types (void)
case LF_ARRAY:
  write_lf_array (custom_types);
  break;
+
+   case LF_BITFIELD:
+ write_lf_bitfield (custom_types);
+ break;
}
 
   free (custom_types);
@@ -2199,6 +2253,33 @@ add_struct_forward_def (dw_die_ref type)
   return ct->num;
 }
 
+/* Add an LF_BITFIELD type, returning its number.  DWARF represents bitfields
+   as members in a struct with a DW_AT_data_bit_offset attribute, whereas in
+   CodeView they're a distinct type.  */
+
+static uint32_t
+create_bitfield (dw_die_ref c)
+{
+  codeview_custom_type *ct;
+  uint32_t base_type;
+
+  base_type = get_type_num (get_AT_ref (c, DW_AT_type), true, false);
+  if (base_type == 0)
+return 0;
+
+  ct = (codeview_custom_type *) xmalloc (sizeof (codeview_custom_type));
+
+  ct->next = NULL;
+  ct->kind = LF_BITFIELD;
+  ct->lf_bitfield.base_type = base_type;
+  ct->lf_bitfield.length = get_AT_unsigned (c, DW_AT_bit_size);
+  ct->lf_bitfield.position = get_AT_unsigned (c, DW_AT_data_bit_offset);
+
+  add_custom_type (ct);
+
+  return ct->num;
+}
+
 /* Process a DW_TAG_structure_type, DW_TAG_class_type, or DW_TAG_union_type
DIE, add an LF_FIELDLIST and an LF_STRUCTURE / LF_CLASS / LF_UNION type,
and return the number of the latter.  */
@@ -2279,8 +2360,12 @@ get_type_num_struct (dw_die_ref type, bool in_struct, 
bool *is_fwd_ref)
  break;
}
 
- el->lf_member.type = get_type_num (get_AT_ref (c, DW_AT_type), true,
-   false);
+ if (get_AT (c, DW_AT_data_bit_offset))
+   el->lf_member.type = create_bitfield (c);
+ else
+   el->lf_member.type = get_type_num (get_AT_ref (c, DW_AT_type),
+  true, false);
+
  el->lf_member.offset.neg = false;
  el->lf_member.offset.num = get_AT_unsigned (c,
  
DW_AT_data_member_location);
diff --git a/gcc/dwarf2codeview.h b/gcc/dwarf2codeview.h
index 70eed6bf2aa..70eae554b80 100644
--- a/gcc/dwarf2codeview.h
+++ b/gcc/dwarf2codeview.h
@@ -64,6 +64,7 @@ along with GCC; see the file COPYING3.  If not see
 #define LF_MODIFIER0x1001
 #define LF_POINTER 0x1002
 #define LF_FIELDLIST   0x1203
+#define LF_BITFIELD0x1205
 #define LF_INDEX   0x1404
 #define LF_ENUMERATE   0x1502
 #define LF_ARRAY   0x1503

[PATCH 05/11] Handle const and varible modifiers for CodeView

2024-06-17 Thread Mark Harmstone
Translate DW_TAG_const_type and DW_TAG_volatile_type DIEs into
LF_MODIFIER symbols.

gcc/
* dwarf2codeview.cc
(struct codeview_custom_type): Add lf_modifier to union.
(write_cv_padding, write_lf_modifier): New functions.
(write_custom_types): Call write_lf_modifier.
(get_type_num_const_type): New function.
(get_type_num_volatile_type): Likewise.
(get_type_num): Handle DW_TAG_const_type and
DW_TAG_volatile_type DIEs.
* dwarf2codeview.h (MOD_const, MOD_volatile): Define.
(LF_MODIFIER): Likewise.
---
 gcc/dwarf2codeview.cc | 157 ++
 gcc/dwarf2codeview.h  |   5 ++
 2 files changed, 162 insertions(+)

diff --git a/gcc/dwarf2codeview.cc b/gcc/dwarf2codeview.cc
index 51401f2d5bc..05f5f60997e 100644
--- a/gcc/dwarf2codeview.cc
+++ b/gcc/dwarf2codeview.cc
@@ -183,6 +183,11 @@ struct codeview_custom_type
   uint32_t base_type;
   uint32_t attributes;
 } lf_pointer;
+struct
+{
+  uint32_t base_type;
+  uint16_t modifier;
+} lf_modifier;
   };
 };
 
@@ -903,6 +908,76 @@ write_lf_pointer (codeview_custom_type *t)
   asm_fprintf (asm_out_file, "%LLcv_type%x_end:\n", t->num);
 }
 
+/* All CodeView type definitions have to be aligned to a four-byte boundary,
+   so write some padding bytes if necessary.  These have to be specific values:
+   f3, f2, f1.  */
+
+static void
+write_cv_padding (size_t padding)
+{
+  if (padding == 4 || padding == 0)
+return;
+
+  if (padding == 3)
+{
+  fputs (integer_asm_op (1, false), asm_out_file);
+  fprint_whex (asm_out_file, 0xf3);
+  putc ('\n', asm_out_file);
+}
+
+  if (padding >= 2)
+{
+  fputs (integer_asm_op (1, false), asm_out_file);
+  fprint_whex (asm_out_file, 0xf2);
+  putc ('\n', asm_out_file);
+}
+
+  fputs (integer_asm_op (1, false), asm_out_file);
+  fprint_whex (asm_out_file, 0xf1);
+  putc ('\n', asm_out_file);
+}
+
+/* Write an LF_MODIFIER type, representing a const and/or volatile modification
+   of another type.  */
+
+static void
+write_lf_modifier (codeview_custom_type *t)
+{
+  /* This is lf_modifier in binutils and lfModifier in Microsoft's cvinfo.h:
+
+struct lf_modifier
+{
+  uint16_t size;
+  uint16_t kind;
+  uint32_t base_type;
+  uint16_t modifier;
+  uint16_t padding;
+} ATTRIBUTE_PACKED;
+  */
+
+  fputs (integer_asm_op (2, false), asm_out_file);
+  asm_fprintf (asm_out_file, "%LLcv_type%x_end - %LLcv_type%x_start\n",
+  t->num, t->num);
+
+  asm_fprintf (asm_out_file, "%LLcv_type%x_start:\n", t->num);
+
+  fputs (integer_asm_op (2, false), asm_out_file);
+  fprint_whex (asm_out_file, t->kind);
+  putc ('\n', asm_out_file);
+
+  fputs (integer_asm_op (4, false), asm_out_file);
+  fprint_whex (asm_out_file, t->lf_modifier.base_type);
+  putc ('\n', asm_out_file);
+
+  fputs (integer_asm_op (2, false), asm_out_file);
+  fprint_whex (asm_out_file, t->lf_modifier.modifier);
+  putc ('\n', asm_out_file);
+
+  write_cv_padding (2);
+
+  asm_fprintf (asm_out_file, "%LLcv_type%x_end:\n", t->num);
+}
+
 /* Write the .debug$T section, which contains all of our custom type
definitions.  */
 
@@ -924,6 +999,10 @@ write_custom_types (void)
case LF_POINTER:
  write_lf_pointer (custom_types);
  break;
+
+   case LF_MODIFIER:
+ write_lf_modifier (custom_types);
+ break;
}
 
   free (custom_types);
@@ -1159,6 +1238,76 @@ get_type_num_pointer_type (dw_die_ref type)
   return ct->num;
 }
 
+/* Process a DW_TAG_const_type DIE, adding an LF_MODIFIER type and returning
+   its number.  */
+
+static uint32_t
+get_type_num_const_type (dw_die_ref type)
+{
+  dw_die_ref base_type;
+  uint32_t base_type_num;
+  codeview_custom_type *ct;
+  bool is_volatile = false;
+
+  base_type = get_AT_ref (type, DW_AT_type);
+  if (!base_type)
+return 0;
+
+  /* Handle case when this is a const volatile type - we only need one
+ LF_MODIFIER for this.  */
+  if (dw_get_die_tag (base_type) == DW_TAG_volatile_type)
+{
+  is_volatile = true;
+
+  base_type = get_AT_ref (base_type, DW_AT_type);
+  if (!base_type)
+   return 0;
+}
+
+  base_type_num = get_type_num (base_type);
+  if (base_type_num == 0)
+return 0;
+
+  ct = (codeview_custom_type *) xmalloc (sizeof (codeview_custom_type));
+
+  ct->next = NULL;
+  ct->kind = LF_MODIFIER;
+  ct->lf_modifier.base_type = base_type_num;
+  ct->lf_modifier.modifier = MOD_const;
+
+  if (is_volatile)
+ct->lf_modifier.modifier |= MOD_volatile;
+
+  add_custom_type (ct);
+
+  return ct->num;
+}
+
+/* Process a DW_TAG_volatile_type DIE, adding an LF_MODIFIER type and
+   returning its number.  */
+
+static uint32_t
+get_type_num_volatile_type (dw_die_ref type)
+{
+  uint32_t base_type_num;
+  codeview_custom_type *ct;
+
+  base_type_num = get_type_num (get_AT_ref (type, DW_AT_ty

[PATCH 07/11] Handle structs and classes for CodeView

2024-06-17 Thread Mark Harmstone
Translates DW_TAG_structure_type DIEs into LF_STRUCTURE symbols, and
DW_TAG_class_type DIEs into LF_CLASS symbols.

gcc/
* dwarf2codeview.cc
(struct codeview_type): Add is_fwd_ref member.
(struct codeview_subtype): Add lf_member to union.
(struct codeview_custom_type): Add lf_structure to union.
(struct codeview_deferred_type): New structure.
(deferred_types, last_deferred_type): New variables.
(get_type_num): Add new args to prototype.
(write_lf_fieldlist): Handle LF_MEMBER subtypes.
(write_lf_structure): New function.
(write_custom_types): Call write_lf_structure.
(get_type_num_pointer_type): Add in_struct argument.
(get_type_num_const_type): Likewise.
(get_type_num_volatile_type): Likewise.
(add_enum_forward_def): Fix get_type_num call.
(get_type_num_enumeration_type): Add in-struct argument.
(add_deferred_type, flush_deferred_types): New functions.
(add_struct_forward_def, get_type_num_struct): Likewise.
(get_type_num): Handle self-referential structs.
(add_variable): Fix get_type_num call.
(codeview_debug_early_finish): Call flush_deferred_types.
* dwarf2codeview.h (LF_CLASS, LF_STRUCTURE, LF_MEMBER): Define.
---
 gcc/dwarf2codeview.cc | 513 --
 gcc/dwarf2codeview.h  |   3 +
 2 files changed, 493 insertions(+), 23 deletions(-)

diff --git a/gcc/dwarf2codeview.cc b/gcc/dwarf2codeview.cc
index 475a53573e9..9c6614f6297 100644
--- a/gcc/dwarf2codeview.cc
+++ b/gcc/dwarf2codeview.cc
@@ -158,6 +158,7 @@ struct codeview_type
 {
   dw_die_ref die;
   uint32_t num;
+  bool is_fwd_ref;
 };
 
 struct die_hasher : free_ptr_hash 
@@ -197,6 +198,13 @@ struct codeview_subtype
 {
   uint32_t type_num;
 } lf_index;
+struct
+{
+  uint16_t attributes;
+  uint32_t type;
+  codeview_integer offset;
+  char *name;
+} lf_member;
   };
 };
 
@@ -232,9 +240,25 @@ struct codeview_custom_type
   uint32_t fieldlist;
   char *name;
 } lf_enum;
+struct
+{
+  uint16_t num_members;
+  uint16_t properties;
+  uint32_t field_list;
+  uint32_t derived_from;
+  uint32_t vshape;
+  codeview_integer length;
+  char *name;
+} lf_structure;
   };
 };
 
+struct codeview_deferred_type
+{
+  struct codeview_deferred_type *next;
+  dw_die_ref type;
+};
+
 static unsigned int line_label_num;
 static unsigned int func_label_num;
 static unsigned int sym_label_num;
@@ -249,8 +273,9 @@ static uint32_t last_file_id;
 static codeview_symbol *sym, *last_sym;
 static hash_table *types_htab;
 static codeview_custom_type *custom_types, *last_custom_type;
+static codeview_deferred_type *deferred_types, *last_deferred_type;
 
-static uint32_t get_type_num (dw_die_ref type);
+static uint32_t get_type_num (dw_die_ref type, bool in_struct, bool 
no_fwd_ref);
 
 /* Record new line number against the current function.  */
 
@@ -1217,6 +1242,51 @@ write_lf_fieldlist (codeview_custom_type *t)
  free (v->lf_enumerate.name);
  break;
 
+   case LF_MEMBER:
+ /* This is lf_member in binutils and lfMember in Microsoft's
+cvinfo.h:
+
+   struct lf_member
+   {
+ uint16_t kind;
+ uint16_t attributes;
+ uint32_t type;
+ uint16_t offset;
+ char name[];
+   } ATTRIBUTE_PACKED;
+ */
+
+ fputs (integer_asm_op (2, false), asm_out_file);
+ fprint_whex (asm_out_file, LF_MEMBER);
+ putc ('\n', asm_out_file);
+
+ fputs (integer_asm_op (2, false), asm_out_file);
+ fprint_whex (asm_out_file, v->lf_member.attributes);
+ putc ('\n', asm_out_file);
+
+ fputs (integer_asm_op (4, false), asm_out_file);
+ fprint_whex (asm_out_file, v->lf_member.type);
+ putc ('\n', asm_out_file);
+
+ leaf_len = 8 + write_cv_integer (&v->lf_member.offset);
+
+ if (v->lf_member.name)
+   {
+ name_len = strlen (v->lf_member.name) + 1;
+ ASM_OUTPUT_ASCII (asm_out_file, v->lf_member.name, name_len);
+   }
+ else
+   {
+ name_len = 1;
+ ASM_OUTPUT_ASCII (asm_out_file, "", name_len);
+   }
+
+ leaf_len += name_len;
+ write_cv_padding (4 - (leaf_len % 4));
+
+ free (v->lf_member.name);
+ break;
+
case LF_INDEX:
  /* This is lf_index in binutils and lfIndex in Microsoft's cvinfo.h:
 
@@ -1308,6 +1378,82 @@ write_lf_enum (codeview_custom_type *t)
   asm_fprintf (asm_out_file, "%LLcv_type%x_end:\n", t->num);
 }
 
+/* Write an LF_STRUCTURE or LF_CLASS type (the two have the same structure).  
*/
+
+static void
+write_lf_structure (codeview_custom_type *t)
+{
+  size_t name_len, leaf_len

[PATCH 01/11] Output CodeView data about variables

2024-06-17 Thread Mark Harmstone
Parse the DW_TAG_variable DIEs, and outputs S_GDATA32 (for global variables)
and S_LDATA32 (static global variables) symbols into the .debug$S section.

gcc/
* dwarf2codeview.cc (S_LDATA32, S_GDATA32): Define.
(struct codeview_symbol): New structure.
(sym, last_sym): New variables.
(write_data_symbol): New function.
(write_codeview_symbols): Call write_data_symbol.
(add_variable, codeview_debug_early_finish): New functions.
* dwarf2codeview.h (codeview_debug_early_finish): Prototype.
* dwarf2out.cc
(dwarf2out_early_finish): Call codeview_debug_early_finish.
---
 gcc/dwarf2codeview.cc | 160 ++
 gcc/dwarf2codeview.h  |   1 +
 gcc/dwarf2out.cc  |   5 ++
 3 files changed, 166 insertions(+)

diff --git a/gcc/dwarf2codeview.cc b/gcc/dwarf2codeview.cc
index db776d79be4..60e84635971 100644
--- a/gcc/dwarf2codeview.cc
+++ b/gcc/dwarf2codeview.cc
@@ -46,6 +46,8 @@ along with GCC; see the file COPYING3.  If not see
 
 #define CHKSUM_TYPE_MD51
 
+#define S_LDATA32  0x110c
+#define S_GDATA32  0x110d
 #define S_COMPILE3 0x113c
 
 #define CV_CFL_80386   0x03
@@ -129,6 +131,22 @@ struct codeview_function
   codeview_line_block *blocks, *last_block;
 };
 
+struct codeview_symbol
+{
+  codeview_symbol *next;
+  uint16_t kind;
+
+  union
+  {
+struct
+{
+  uint32_t type;
+  char *name;
+  dw_die_ref die;
+} data_symbol;
+  };
+};
+
 static unsigned int line_label_num;
 static unsigned int func_label_num;
 static unsigned int sym_label_num;
@@ -140,6 +158,7 @@ static codeview_string *strings, *last_string;
 static codeview_function *funcs, *last_func;
 static const char* last_filename;
 static uint32_t last_file_id;
+static codeview_symbol *sym, *last_sym;
 
 /* Record new line number against the current function.  */
 
@@ -698,6 +717,77 @@ write_compile3_symbol (void)
   targetm.asm_out.internal_label (asm_out_file, SYMBOL_END_LABEL, label_num);
 }
 
+/* Write an S_GDATA32 symbol, representing a global variable, or an S_LDATA32
+   symbol, for a static global variable.  */
+
+static void
+write_data_symbol (codeview_symbol *s)
+{
+  unsigned int label_num = ++sym_label_num;
+  dw_attr_node *loc;
+  dw_loc_descr_ref loc_ref;
+
+  /* This is struct datasym in binutils:
+
+  struct datasym
+  {
+   uint16_t size;
+   uint16_t kind;
+   uint32_t type;
+   uint32_t offset;
+   uint16_t section;
+   char name[];
+  } ATTRIBUTE_PACKED;
+  */
+
+  /* Extract the DW_AT_location attribute from the DIE, and make sure it's in
+ in a format we can parse.  */
+
+  loc = get_AT (s->data_symbol.die, DW_AT_location);
+  if (!loc)
+goto end;
+
+  if (loc->dw_attr_val.val_class != dw_val_class_loc)
+goto end;
+
+  loc_ref = loc->dw_attr_val.v.val_loc;
+  if (!loc_ref || loc_ref->dw_loc_opc != DW_OP_addr)
+goto end;
+
+  /* Output the S_GDATA32 / S_LDATA32 record.  */
+
+  fputs (integer_asm_op (2, false), asm_out_file);
+  asm_fprintf (asm_out_file,
+  "%L" SYMBOL_END_LABEL "%u - %L" SYMBOL_START_LABEL "%u\n",
+  label_num, label_num);
+
+  targetm.asm_out.internal_label (asm_out_file, SYMBOL_START_LABEL, label_num);
+
+  fputs (integer_asm_op (2, false), asm_out_file);
+  fprint_whex (asm_out_file, s->kind);
+  putc ('\n', asm_out_file);
+
+  fputs (integer_asm_op (4, false), asm_out_file);
+  fprint_whex (asm_out_file, s->data_symbol.type);
+  putc ('\n', asm_out_file);
+
+  asm_fprintf (asm_out_file, "\t.secrel32 ");
+  output_addr_const (asm_out_file, loc_ref->dw_loc_oprnd1.v.val_addr);
+  fputc ('\n', asm_out_file);
+
+  asm_fprintf (asm_out_file, "\t.secidx ");
+  output_addr_const (asm_out_file, loc_ref->dw_loc_oprnd1.v.val_addr);
+  fputc ('\n', asm_out_file);
+
+  ASM_OUTPUT_ASCII (asm_out_file, s->data_symbol.name,
+   strlen (s->data_symbol.name) + 1);
+
+  targetm.asm_out.internal_label (asm_out_file, SYMBOL_END_LABEL, label_num);
+
+end:
+  free (s->data_symbol.name);
+}
+
 /* Write the CodeView symbols into the .debug$S section.  */
 
 static void
@@ -714,6 +804,22 @@ write_codeview_symbols (void)
 
   write_compile3_symbol ();
 
+  while (sym)
+{
+  codeview_symbol *n = sym->next;
+
+  switch (sym->kind)
+   {
+   case S_LDATA32:
+   case S_GDATA32:
+ write_data_symbol (sym);
+ break;
+   }
+
+  free (sym);
+  sym = n;
+}
+
   asm_fprintf (asm_out_file, "%LLcv_syms_end:\n");
 }
 
@@ -734,4 +840,58 @@ codeview_debug_finish (void)
   write_codeview_symbols ();
 }
 
+/* Process a DW_TAG_variable DIE, and add an S_GDATA32 or S_LDATA32 symbol for
+   this.  */
+
+static void
+add_variable (dw_die_ref die)
+{
+  codeview_symbol *s;
+  const char *name;
+
+  name = get_AT_string (die, DW_AT_name);
+  if (!name)
+return;
+
+  s = (codeview_symbol *) xm

[PATCH 04/11] Handle pointers for CodeView

2024-06-17 Thread Mark Harmstone
Translates DW_TAG_pointer_type DIEs into LF_POINTER symbols, which get
output into the .debug$T section.

gcc/
* dwarf2codeview.cc (FIRST_TYPE): Define.
(struct codeview_custom_type): New structure.
(custom_types, last_custom_type): New variables.
(get_type_num): Prototype.
(write_lf_pointer, write_custom_types): New functions.
(codeview_debug_finish): Call write_custom_types.
(add_custom_type, get_type_num_pointer_type): New functions.
(get_type_num): Handle DW_TAG_pointer_type DIEs.
* dwarf2codeview.h (T_VOID): Define.
(CV_POINTER_32, CV_POINTER_64): Likewise.
(T_32PVOID, T_64PVOID): Likewise.
(CV_PTR_NEAR32, CV_PTR64, LF_POINTER): Likewise.
---
 gcc/dwarf2codeview.cc | 179 +-
 gcc/dwarf2codeview.h  |  13 +++
 2 files changed, 188 insertions(+), 4 deletions(-)

diff --git a/gcc/dwarf2codeview.cc b/gcc/dwarf2codeview.cc
index 5006a176260..51401f2d5bc 100644
--- a/gcc/dwarf2codeview.cc
+++ b/gcc/dwarf2codeview.cc
@@ -56,6 +56,8 @@ along with GCC; see the file COPYING3.  If not see
 #define CV_CFL_C   0x00
 #define CV_CFL_CXX 0x01
 
+#define FIRST_TYPE 0x1000
+
 #define LINE_LABEL "Lcvline"
 #define END_FUNC_LABEL "Lcvendfunc"
 #define SYMBOL_START_LABEL "Lcvsymstart"
@@ -168,6 +170,22 @@ struct die_hasher : free_ptr_hash 
   }
 };
 
+struct codeview_custom_type
+{
+  struct codeview_custom_type *next;
+  uint32_t num;
+  uint16_t kind;
+
+  union
+  {
+struct
+{
+  uint32_t base_type;
+  uint32_t attributes;
+} lf_pointer;
+  };
+};
+
 static unsigned int line_label_num;
 static unsigned int func_label_num;
 static unsigned int sym_label_num;
@@ -181,6 +199,9 @@ static const char* last_filename;
 static uint32_t last_file_id;
 static codeview_symbol *sym, *last_sym;
 static hash_table *types_htab;
+static codeview_custom_type *custom_types, *last_custom_type;
+
+static uint32_t get_type_num (dw_die_ref type);
 
 /* Record new line number against the current function.  */
 
@@ -845,6 +866,71 @@ write_codeview_symbols (void)
   asm_fprintf (asm_out_file, "%LLcv_syms_end:\n");
 }
 
+/* Write an LF_POINTER type.  */
+
+static void
+write_lf_pointer (codeview_custom_type *t)
+{
+  /* This is lf_pointer in binutils and lfPointer in Microsoft's cvinfo.h:
+
+struct lf_pointer
+{
+  uint16_t size;
+  uint16_t kind;
+  uint32_t base_type;
+  uint32_t attributes;
+} ATTRIBUTE_PACKED;
+  */
+
+  fputs (integer_asm_op (2, false), asm_out_file);
+  asm_fprintf (asm_out_file, "%LLcv_type%x_end - %LLcv_type%x_start\n",
+  t->num, t->num);
+
+  asm_fprintf (asm_out_file, "%LLcv_type%x_start:\n", t->num);
+
+  fputs (integer_asm_op (2, false), asm_out_file);
+  fprint_whex (asm_out_file, t->kind);
+  putc ('\n', asm_out_file);
+
+  fputs (integer_asm_op (4, false), asm_out_file);
+  fprint_whex (asm_out_file, t->lf_pointer.base_type);
+  putc ('\n', asm_out_file);
+
+  fputs (integer_asm_op (4, false), asm_out_file);
+  fprint_whex (asm_out_file, t->lf_pointer.attributes);
+  putc ('\n', asm_out_file);
+
+  asm_fprintf (asm_out_file, "%LLcv_type%x_end:\n", t->num);
+}
+
+/* Write the .debug$T section, which contains all of our custom type
+   definitions.  */
+
+static void
+write_custom_types (void)
+{
+  targetm.asm_out.named_section (".debug$T", SECTION_DEBUG, NULL);
+
+  fputs (integer_asm_op (4, false), asm_out_file);
+  fprint_whex (asm_out_file, CV_SIGNATURE_C13);
+  putc ('\n', asm_out_file);
+
+  while (custom_types)
+{
+  codeview_custom_type *n = custom_types->next;
+
+  switch (custom_types->kind)
+   {
+   case LF_POINTER:
+ write_lf_pointer (custom_types);
+ break;
+   }
+
+  free (custom_types);
+  custom_types = n;
+}
+}
+
 /* Finish CodeView debug info emission.  */
 
 void
@@ -861,6 +947,9 @@ codeview_debug_finish (void)
   write_line_numbers ();
   write_codeview_symbols ();
 
+  if (custom_types)
+write_custom_types ();
+
   if (types_htab)
 delete types_htab;
 }
@@ -993,10 +1082,88 @@ get_type_num_base_type (dw_die_ref type)
 }
 }
 
-/* Process a DIE representing a type definition and return its number.  If
-   it's something we can't handle, return 0.  We keep a hash table so that
-   we're not adding the same type multiple times - though if we do it's not
-   disastrous, as ld will deduplicate everything for us.  */
+/* Add a new codeview_custom_type to our singly-linked custom_types list.  */
+
+static void
+add_custom_type (codeview_custom_type *ct)
+{
+  uint32_t num;
+
+  if (last_custom_type)
+{
+  num = last_custom_type->num + 1;
+  last_custom_type->next = ct;
+}
+  else
+{
+  num = FIRST_TYPE;
+  custom_types = ct;
+}
+
+  last_custom_type = ct;
+
+  ct->num = num;
+}
+
+/* Process a DW_TAG_pointer_type DIE.  If 

[PATCH 00/11] CodeView variables and type system

2024-06-17 Thread Mark Harmstone
This patch series adds support for outputting global variables when the
-gcodeview option is provided, along with the type system to go along
with this.

As with previous patches, the best way to see the output is run
Microsoft's cvdump.exe against the object file:
https://github.com/microsoft/microsoft-pdb/raw/master/cvdump/cvdump.exe

You'll also need a recentish version of binutils in order to get ld to
output an actual PDB file that can be read by MSVC or windbg.

This ought to be fairly complete as far as C is concerned. Still to come
are functions, local variables, and some C++ things.

Mark Harmstone (11):
  Output CodeView data about variables
  Handle CodeView base types
  Handle typedefs for CodeView
  Handle pointers for CodeView
  Handle const and varible modifiers for CodeView
  Handle enums for CodeView
  Handle structs and classes for CodeView
  Handle unions for CodeView.
  Handle arrays for CodeView
  Handle bitfields for CodeView
  Handle subroutine types in CodeView

 gcc/dwarf2codeview.cc | 2278 -
 gcc/dwarf2codeview.h  |   67 ++
 gcc/dwarf2out.cc  |5 +
 3 files changed, 2341 insertions(+), 9 deletions(-)

-- 
2.44.2



[PATCH] function.h: eliminate macros "dom_computed" and "n_bbs_in_dom_tree"

2024-06-17 Thread David Malcolm
Be explicit when we use "cfun".

No functional change intended.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.

OK for trunk?

gcc/ChangeLog:
* dominance.cc (compute_dom_fast_query): Replace uses of
"dom_computed" macro with explicit use of cfun.
(compute_dom_fast_query_in_region): Likewise.
(calculate_dominance_info): Likewise, also for macro
"n_bbs_in_dom_tree".
(calculate_dominance_info_for_region): Likewise for
"dom_computed" macro.
(get_immediate_dominator): Likewise.
(set_immediate_dominator): Likewise.
(get_dominated_by): Likewise.
(redirect_immediate_dominators): Likewise.
(nearest_common_dominator): Likewise.
(dominated_by_p): Likewise.
(bb_dom_dfs_in): Likewise.
(bb_dom_dfs_out): Likewise.
(recompute_dominator): Likewise.
(iterate_fix_dominators): Likewise.
(add_to_dominance_info): Likewise, also for macro
"n_bbs_in_dom_tree".
(delete_from_dominance_info): Likewise.
(set_dom_info_availability): Likewise for
"dom_computed" macro.
* function.h (dom_computed): Delete macro.
(n_bbs_in_dom_tree): Delete macro.

Signed-off-by: David Malcolm 
---
 gcc/dominance.cc | 70 +---
 gcc/function.h   |  3 ---
 2 files changed, 36 insertions(+), 37 deletions(-)

diff --git a/gcc/dominance.cc b/gcc/dominance.cc
index 0357210ed27f..528b38caa9db 100644
--- a/gcc/dominance.cc
+++ b/gcc/dominance.cc
@@ -672,7 +672,7 @@ compute_dom_fast_query (enum cdi_direction dir)
 
   gcc_checking_assert (dom_info_available_p (dir));
 
-  if (dom_computed[dir_index] == DOM_OK)
+  if (cfun->cfg->x_dom_computed[dir_index] == DOM_OK)
 return;
 
   FOR_ALL_BB_FN (bb, cfun)
@@ -681,7 +681,7 @@ compute_dom_fast_query (enum cdi_direction dir)
assign_dfs_numbers (bb->dom[dir_index], &num);
 }
 
-  dom_computed[dir_index] = DOM_OK;
+  cfun->cfg->x_dom_computed[dir_index] = DOM_OK;
 }
 
 /* Analogous to the previous function but compute the data for reducible
@@ -697,7 +697,7 @@ compute_dom_fast_query_in_region (enum cdi_direction dir,
 
   gcc_checking_assert (dom_info_available_p (dir));
 
-  if (dom_computed[dir_index] == DOM_OK)
+  if (cfun->cfg->x_dom_computed[dir_index] == DOM_OK)
 return;
 
   /* Assign dfs numbers for region nodes except for entry and exit nodes.  */
@@ -708,7 +708,7 @@ compute_dom_fast_query_in_region (enum cdi_direction dir,
assign_dfs_numbers (bb->dom[dir_index], &num);
 }
 
-  dom_computed[dir_index] = DOM_OK;
+  cfun->cfg->x_dom_computed[dir_index] = DOM_OK;
 }
 
 /* The main entry point into this module.  DIR is set depending on whether
@@ -721,7 +721,7 @@ calculate_dominance_info (cdi_direction dir, bool 
compute_fast_query)
 {
   unsigned int dir_index = dom_convert_dir_to_idx (dir);
 
-  if (dom_computed[dir_index] == DOM_OK)
+  if (cfun->cfg->x_dom_computed[dir_index] == DOM_OK)
 {
   checking_verify_dominators (dir);
   return;
@@ -730,14 +730,14 @@ calculate_dominance_info (cdi_direction dir, bool 
compute_fast_query)
   timevar_push (TV_DOMINANCE);
   if (!dom_info_available_p (dir))
 {
-  gcc_assert (!n_bbs_in_dom_tree[dir_index]);
+  gcc_assert (!cfun->cfg->x_n_bbs_in_dom_tree[dir_index]);
 
   basic_block b;
   FOR_ALL_BB_FN (b, cfun)
{
  b->dom[dir_index] = et_new_tree (b);
}
-  n_bbs_in_dom_tree[dir_index] = n_basic_blocks_for_fn (cfun);
+  cfun->cfg->x_n_bbs_in_dom_tree[dir_index] = n_basic_blocks_for_fn (cfun);
 
   dom_info di (cfun, dir);
   di.calc_dfs_tree ();
@@ -749,7 +749,7 @@ calculate_dominance_info (cdi_direction dir, bool 
compute_fast_query)
et_set_father (b->dom[dir_index], d->dom[dir_index]);
}
 
-  dom_computed[dir_index] = DOM_NO_FAST_QUERY;
+  cfun->cfg->x_dom_computed[dir_index] = DOM_NO_FAST_QUERY;
 }
   else
 checking_verify_dominators (dir);
@@ -772,7 +772,7 @@ calculate_dominance_info_for_region (cdi_direction dir,
   basic_block bb;
   unsigned int i;
 
-  if (dom_computed[dir_index] == DOM_OK)
+  if (cfun->cfg->x_dom_computed[dir_index] == DOM_OK)
 return;
 
   timevar_push (TV_DOMINANCE);
@@ -791,7 +791,7 @@ calculate_dominance_info_for_region (cdi_direction dir,
 if (basic_block d = di.get_idom (bb))
   et_set_father (bb->dom[dir_index], d->dom[dir_index]);
 
-  dom_computed[dir_index] = DOM_NO_FAST_QUERY;
+  cfun->cfg->x_dom_computed[dir_index] = DOM_NO_FAST_QUERY;
   compute_dom_fast_query_in_region (dir, region);
 
   timevar_pop (TV_DOMINANCE);
@@ -858,7 +858,7 @@ get_immediate_dominator (enum cdi_direction dir, 
basic_block bb)
   unsigned int dir_index = dom_convert_dir_to_idx (dir);
   struct et_node *node = bb->dom[dir_index];
 
-  gcc_checking_assert (dom_computed[dir_index]);
+  gcc_checking_assert (cfun->cfg->x_dom_computed[dir_index]);
 
   if (!node->father)
   

RE: [PATCH] aarch64: Add fix_truncv4sfv4hi2 pattern [PR113882]

2024-06-17 Thread Pengxuan Zheng (QUIC)
> Pengxuan Zheng  writes:
> > This patch adds the fix_truncv4sfv4hi2 (V4SF->V4HI) pattern which is
> > implemented using fix_truncv4sfv4si2 (V4SF->V4SI) and then truncv4siv4hi2
> (V4SI->V4HI).
> >
> > PR target/113882
> >
> > gcc/ChangeLog:
> >
> > * config/aarch64/aarch64-simd.md (fix_truncv4sfv4hi2): New pattern.
> 
> Could we handle this by extending the target-independent code instead?
> Richard mentioned in comment 1 that the current set of intermediate
> conversions is hard-coded, but it didn't sound like he was implying that the
> set shouldn't change.

Yes, Richard. I checked the target-independent code. In fact, SLP already 
handles this type of intermediate conversions. However, the logic is guarded by 
"!flag_trapping_math". Therefore, if we pass -fno-trapping-math , SLP actually 
generates the right vectorized code. Also, looks like the check for 
"!flag_trapping_math" was added intentionally in r14-2085-g77a50c772771f6 to 
fix 
some PRs. So, I'm not sure what we should do here. Thoughts?

  if (GET_MODE_SIZE (lhs_mode) != GET_MODE_SIZE (rhs_mode)
  && (code == FLOAT_EXPR ||
  (code == FIX_TRUNC_EXPR && !flag_trapping_math)))

Thanks,
Pengxuan
> 
> Thanks,
> Richard
> 
> > gcc/testsuite/ChangeLog:
> >
> > * gcc.target/aarch64/fix_trunc2.c: New test.
> >
> > Signed-off-by: Pengxuan Zheng 
> > ---
> >  gcc/config/aarch64/aarch64-simd.md| 13 +
> >  gcc/testsuite/gcc.target/aarch64/fix_trunc2.c | 14 ++
> >  2 files changed, 27 insertions(+)
> >  create mode 100644 gcc/testsuite/gcc.target/aarch64/fix_trunc2.c
> >
> > diff --git a/gcc/config/aarch64/aarch64-simd.md
> > b/gcc/config/aarch64/aarch64-simd.md
> > index 868f4486218..096f7b56a27 100644
> > --- a/gcc/config/aarch64/aarch64-simd.md
> > +++ b/gcc/config/aarch64/aarch64-simd.md
> > @@ -3032,6 +3032,19 @@ (define_expand
> "2"
> >"TARGET_SIMD"
> >{})
> >
> > +
> > +(define_expand "fix_truncv4sfv4hi2"
> > +  [(match_operand:V4HI 0 "register_operand")
> > +   (match_operand:V4SF 1 "register_operand")]
> > +  "TARGET_SIMD"
> > +  {
> > +rtx tmp = gen_reg_rtx (V4SImode);
> > +emit_insn (gen_fix_truncv4sfv4si2 (tmp, operands[1]));
> > +emit_insn (gen_truncv4siv4hi2 (operands[0], tmp));
> > +DONE;
> > +  }
> > +)
> > +
> >  (define_expand "ftrunc2"
> >[(set (match_operand:VHSDF 0 "register_operand")
> > (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")] diff
> > --git a/gcc/testsuite/gcc.target/aarch64/fix_trunc2.c
> > b/gcc/testsuite/gcc.target/aarch64/fix_trunc2.c
> > new file mode 100644
> > index 000..57cc00913a3
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/aarch64/fix_trunc2.c
> > @@ -0,0 +1,14 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2" } */
> > +
> > +void
> > +f (short *__restrict a, float *__restrict b) {
> > +  a[0] = b[0];
> > +  a[1] = b[1];
> > +  a[2] = b[2];
> > +  a[3] = b[3];
> > +}
> > +
> > +/* { dg-final { scan-assembler-times {fcvtzs\tv[0-9]+.4s, v[0-9]+.4s}
> > +1 } } */
> > +/* { dg-final { scan-assembler-times {xtn\tv[0-9]+.4h, v[0-9]+.4s} 1
> > +} } */


[RFC v3] RISC-V: Promote Zaamo/Zalrsc to a when using an old binutils

2024-06-17 Thread Patrick O'Neill
Binutils 2.42 and before don't support Zaamo/Zalrsc. Promote Zaamo/Zalrsc to
'a' in the -march string when assembling.

This change respects Zaamo/Zalrsc when generating code.

Testcases that check for the default isa string will fail with the old binutils
since zaamo/zalrsc aren't emitted anymore. All other Zaamo/Zalrsc testcases
pass.

gcc/ChangeLog:

* common/config/riscv/riscv-common.cc
(riscv_subset_list::to_string): Add toggle to promote Zaamo/Zalrsc
extensions to 'a'.
(riscv_arch_str): Ditto.
(riscv_expand_arch): Ditto.
(riscv_expand_arch_from_cpu): Ditto.
(riscv_expand_arch_upgrade_exts): New function. Wrapper around
riscv_expand_arch to preserve the function signature.
(riscv_expand_arch_no_upgrade_exts): Ditto
(riscv_expand_arch_from_cpu_upgrade_exts): New function. Wrapper around
riscv_expand_arch_from_cpu to preserve the function signature.
(riscv_expand_arch_from_cpu_no_upgrade_exts): Ditto.
* config/riscv/riscv-protos.h (riscv_arch_str): Add toggle to function
prototype.
* config/riscv/riscv-subset.h: Ditto.
* config/riscv/riscv-target-attr.cc (riscv_process_target_attr):
* config/riscv/riscv.cc (riscv_emit_attribute):
(riscv_declare_function_name):
* config/riscv/riscv.h (riscv_expand_arch): Remove.
(riscv_expand_arch_from_cpu): Ditto.
(riscv_expand_arch_upgrade_exts): Add toggle wrapper functions.
(riscv_expand_arch_no_upgrade_exts): Ditto.
(riscv_expand_arch_from_cpu_upgrade_exts): Ditto.
(riscv_expand_arch_from_cpu_no_upgrade_exts): Ditto.
(EXTRA_SPEC_FUNCTIONS): Ditto.
(OPTION_DEFAULT_SPECS): Use non-upgraded march string when invoking the
compiler.
(ASM_SPEC): Use upgraded march string when invoking the assembler.

Signed-off-by: Patrick O'Neill 
---
v3 ChangeLog:
Rebased on non-promoting patch.
Wrap all Zaamo/Zalrsc upgrade code in #ifndef to prevent compiler
warnings about unused/potentially undefined variables.
Silence unused parameter warning with a voidcast.
---
RFC since I'm not sure if this upgrade behavior is more trouble than
it's worth - this is a pretty invasive change. Happy to iterate further
or just drop these changes.
---
 gcc/common/config/riscv/riscv-common.cc | 111 +---
 gcc/config/riscv/riscv-protos.h |   3 +-
 gcc/config/riscv/riscv-subset.h |   2 +-
 gcc/config/riscv/riscv-target-attr.cc   |   4 +-
 gcc/config/riscv/riscv.cc   |   7 +-
 gcc/config/riscv/riscv.h|  46 ++
 6 files changed, 137 insertions(+), 36 deletions(-)

diff --git a/gcc/common/config/riscv/riscv-common.cc 
b/gcc/common/config/riscv/riscv-common.cc
index 1dc1d9904c7..05c26f73b73 100644
--- a/gcc/common/config/riscv/riscv-common.cc
+++ b/gcc/common/config/riscv/riscv-common.cc
@@ -907,7 +907,7 @@ riscv_subset_list::add (const char *subset, bool implied_p)
VERSION_P to determine append version info or not.  */

 std::string
-riscv_subset_list::to_string (bool version_p) const
+riscv_subset_list::to_string (bool version_p, bool upgrade_exts) const
 {
   std::ostringstream oss;
   oss << "rv" << m_xlen;
@@ -916,10 +916,17 @@ riscv_subset_list::to_string (bool version_p) const
   riscv_subset_t *subset;

   bool skip_zifencei = false;
-  bool skip_zaamo_zalrsc = false;
   bool skip_zicsr = false;
   bool i2p0 = false;

+#ifndef HAVE_AS_MARCH_ZAAMO_ZALRSC
+  bool upgrade_zaamo_zalrsc = false;
+  bool has_a_ext = false;
+  bool insert_a_ext = false;
+  bool inserted_a_ext = false;
+  riscv_subset_t *a_subset;
+#endif
+
   /* For RISC-V ISA version 2.2 or earlier version, zicsr and zifencei is
  included in the base ISA.  */
   if (riscv_isa_spec == ISA_SPEC_CLASS_2P2)
@@ -945,8 +952,33 @@ riscv_subset_list::to_string (bool version_p) const
   skip_zifencei = true;
 #endif
 #ifndef HAVE_AS_MARCH_ZAAMO_ZALRSC
-  /* Skip since binutils 2.42 and earlier don't recognize zaamo/zalrsc.  */
-  skip_zaamo_zalrsc = true;
+  /* Upgrade Zaamo/Zalrsc extensions to 'a' since binutils 2.42 and earlier
+ don't recognize zaamo/zalrsc.  */
+  upgrade_zaamo_zalrsc = upgrade_exts;
+  if (upgrade_zaamo_zalrsc)
+{
+  for (subset = m_head; subset != NULL; subset = subset->next)
+   {
+ if (subset->name == "a")
+   has_a_ext = true;
+ if (subset->name == "zaamo" || subset->name == "zalrsc")
+   insert_a_ext = true;
+   }
+  if (insert_a_ext && !has_a_ext)
+   {
+ unsigned int major_version = 0, minor_version = 0;
+ get_default_version ("a", &major_version, &minor_version);
+ a_subset = new riscv_subset_t ();
+ a_subset->name = "a";
+ a_subset->implied_p = false;
+ a_subset->major_version = major_version;
+ a_subset->minor_version = minor_version;
+   }
+}
+#else
+  /* Silence unused parameter warning when HAV

[COMMITTED] aarch64: Add testcase for PR97405

2024-06-17 Thread Andrew Pinski
This aarch64 sve specific code was fixed by r15-917-gc9842f99042454
which added a riscv specific testcase so adding an aarch64 one to test
the fix does not regress is a good idea.

Committed as obvious after testing the testcase for aarch64-linux-gnu.

PR tree-optimization/97405

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/sve/pr97405-1.c: New test.

Signed-off-by: Andrew Pinski 
---
 gcc/testsuite/gcc.target/aarch64/sve/pr97405-1.c | 13 +
 1 file changed, 13 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/pr97405-1.c

diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr97405-1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/pr97405-1.c
new file mode 100644
index 000..5efa32c9928
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr97405-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv8.2-a+sve -O2" }
+/* PR tree-optimization/97405 */
+#include "arm_sve.h"
+
+void
+a (svuint8x3_t b, unsigned char *p, int c) {
+  if (c)
+svst1_u8(svptrue_pat_b8(SV_VL16), p, svget3_u8(b, 1));
+  else
+svst1_u8(svwhilelt_b8(6, 6), p, svget3_u8(b, 1));
+}
+
-- 
2.43.0



Re: [PATCH V3 2/2] RISC-V: Move mode assertion out of conditional branch in emit_insn

2024-06-17 Thread Jeff Law




On 6/17/24 12:33 PM, Edwin Lu wrote:

When emitting insns, we have an early assertion to ensure the input
operand's mode and the expanded operand's mode are the same; however, it
does not perform this check if the pattern does not have an explicit
machine mode specifying the operand. In this scenario, it will always
assume that mode = Pmode to correctly satisfy the
maybe_legitimize_operand check, however, there may be problems when
working in 32 bit environments.

Make the assert unconditional and replace it with an internal error for
more descriptive logging

gcc/ChangeLog:

* config/riscv/riscv-v.cc: Move assert out of conditional block

OK.

Jeff



Re: [PATCH V3 1/2] RISC-V: Fix vwsll combine on rv32 targets

2024-06-17 Thread Jeff Law




On 6/17/24 12:33 PM, Edwin Lu wrote:

On rv32 targets, vwsll_zext1_scalar_ would trigger an ice in
maybe_legitimize_instruction when zero extending a uint32 to uint64 due
to a mismatch between the input operand's mode (DI) and the expanded insn
operand's mode (Pmode == SI). Ensure that mode of the operands match

Tested on rv32/64 gcv newlib. Letting CI perform additional testing

gcc/ChangeLog:

* config/riscv/autovec-opt.md: Fix mode mismatch

OK
jeff




Re: [PATCH] rs6000: Compute rop_hash_save_offset for non-Altivec compiles [PR115389]

2024-06-17 Thread Peter Bergner
On 6/16/24 9:40 PM, Kewen.Lin wrote:
> on 2024/6/17 10:31, Peter Bergner wrote:
>> On 6/16/24 9:10 PM, Kewen.Lin wrote:
>>> on 2024/6/15 01:05, Peter Bergner wrote:
 That said, the --with-cpu=power5 build without fortran did bootstrap and
 regtest with no regressions, so the build did test that code path and
 exposed no problems.
>>>
>>> OK, nice!  Thanks!
>>
>> I assume this means you're "OK" with the updated patch, correct?
> 
> Yes, OK for trunk, thanks!

Thanks.  We will need backports to GCC 11, as it is broken back to when
ROP was first added then.  I'll let things burn-in on trunk for a couple
of days so Bill's CI builders have a chance to test it on all of our
configs.  





>> Do you want to take a stab at writing that or do you want me to do that?
> 
> Either is fine for me, then let me give it a shot.

Sounds good, thanks.  That will allow me to handle the other ROP issues
I came across, which are reported in PR114759.

Peter




Re: [PATCH] rs6000: ROP - Do not disable shrink-wrapping for leaf functions [PR114759]

2024-06-17 Thread Segher Boessenkool
Hi!

On Mon, Jun 17, 2024 at 05:26:39PM -0500, Peter Bergner wrote:
> While auditing our ROP code generation for some test cases I wrote, I noticed
> a few issues which I'm tracking in PR114759.  The first issue I noticed is we
> disable shrink-wrapping when using -mrop-protect, even in the cases where we
> never emit the ROP instructions because they're not needed.

Please don't call this "ROP instructions".  -mrop-protect tries to make
it much harder to succesfully do exploits in a style called "return-
oriented programming", starting from a stack overwrite normally.  It
does this by hashing the return address together with the stack pointer
value and with the previous hash value (so the whole call stack hashed),
and checking that before returning.

"ROP insns" are the instructions used in such exploits, not what you
mean here :-)

The instructions are called "hash*"C, so maybe call tbem "hash insns"
or "ROP protect hash insns"?.

> The problem is
> we disable shrink-wrapping too early, before we know whether we will need to
> emit the ROP instructions or not.  The fix is to delay disabling shrink
> wrapping until we've decided whether we will or won't be emitting the ROP
> instructions.

>   * config/rs6000/rs6000.cc (rs6000_override_options_after_change): Move
>   the disabling of shrink-wrapping from here
>   * config/rs6000/rs6000-logue.cc (rs6000_stack_info): ...to here.

Hrm.  Can you do it in some particular caller of rs6000_stack_info,
instead?  The rs6000_stack_info function itself is not suppposed to
change any state whatsoever.

> --- a/gcc/config/rs6000/rs6000-logue.cc
> +++ b/gcc/config/rs6000/rs6000-logue.cc
> @@ -720,7 +720,11 @@ rs6000_stack_info (void)
>&& info->calls_p
>&& DEFAULT_ABI == ABI_ELFv2
>&& rs6000_rop_protect)
> -info->rop_hash_size = 8;
> +{
> +  /* If we are inserting ROP-protect instructions, disable shrink wrap.  
> */
> +  flag_shrink_wrap = 0;
> +  info->rop_hash_size = 8;
> +}

The comment should say *why*!  The fact that we do is clear from the
code itself already.  But why do we want this?

> --- a/gcc/config/rs6000/rs6000.cc
> +++ b/gcc/config/rs6000/rs6000.cc
> @@ -3427,10 +3427,6 @@ rs6000_override_options_after_change (void)
>  }
>else if (!OPTION_SET_P (flag_cunroll_grow_size))
>  flag_cunroll_grow_size = flag_peel_loops || optimize >= 3;
> -
> -  /* If we are inserting ROP-protect instructions, disable shrink wrap.  */
> -  if (rs6000_rop_protect)
> -flag_shrink_wrap = 0;
>  }

(Yes, I know the original code didn't say either, but let's try to make
things better :-) )

> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr114759-1.c
> @@ -0,0 +1,16 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mdejagnu-cpu=power10 -mrop-protect 
> -fdump-rtl-pro_and_epilogue" } */
> +/* { dg-require-effective-target rop_ok } */

Do you want rop_ok while you are *forcing* it to be okay anyway?  Why?


Segher


Re: [PATCH] rs6000: Shrink rs6000_init_generated_builtins size [PR115324]

2024-06-17 Thread Segher Boessenkool
Hi!

Thanks for posting this again.  Much easier to find that way :-)

On Mon, Jun 17, 2024 at 07:15:48PM +0200, Jakub Jelinek wrote:
> While my r15-1001-g4cf2de9b5268224 PCH PIE power fix change decreased the
> .data section sizes (219792 -> 189336), it increased the size of already
> huge rs6000_init_generated_builtins generated function, from 218328
> to 228668 bytes.  That is because there are thousands of array references
> to global arrays and we keep constructing the addresses of the arrays
> again and again.

Less than 5%, for some perspective ;-)

> Ideally some optimization would figure out we have a single function which
> has
> 461   rs6000_overload_info
>1257   rs6000_builtin_info_fntype
>1768   rs6000_builtin_decls
>2548   rs6000_instance_info_fntype
> array references and that maybe it might be a good idea to just preload
> the addresses of those arrays into some register if it decreases code size
> and doesn't slow things down.
> The function actually is called just once and is huge, so code size is even
> more important than speed, which is dominated by all the GC allocations
> anyway.

Yup.

> Until that is done, here is a slightly cleaner version of the hack, which
> makes the function noipa (so that LTO doesn't undo it) for GCC 8.1+ and
> passes the 4 arrays as arguments to the function from the caller.
> This decreases the function size from 228668 bytes to 207572 bytes.
> 
> Bootstrapped/regtested on powerpc64le-linux, ok for trunk?

> 2024-06-17  Jakub Jelinek  
> 
>   PR target/115324
>   * config/rs6000/rs6000-gen-builtins.cc (write_decls): Change
>   declaration of rs6000_init_generated_builtins from no arguments
>   to 4 pointer arguments.
>   (write_init_bif_table): Change rs6000_builtin_info_fntype to
>   builtin_info_fntype and rs6000_builtin_decls to builtin_decls.
>   (write_init_ovld_table): Change rs6000_instance_info_fntype to
>   instance_info_fntype, rs6000_builtin_decls to builtin_decls and
>   rs6000_overload_info to overload_info.
>   (write_init_file): Add __noipa__ attribute to
>   rs6000_init_generated_builtins for GCC 8.1+ and change the function
>   from no arguments to 4 pointer arguments.  Change rs6000_builtin_decls
>   to builtin_decls.
>   * config/rs6000/rs6000-builtin.cc (rs6000_init_builtins): Adjust
>   rs6000_init_generated_builtins caller.

It would have been much easier to review if you had done the renaming in
a separate patch :-)  You typically notice such things when writing the
changelog is much harder than expected, and this is the True Value of
changelogs!

Seen from the other side, when reviewing a patch I like to start with
the changelog (after the commit message), it should tell everything
there is to know, and then if something in the actiual patch surprises
me, something is not ideal, or wrong even.

> +  /* The reason to pass pointers to the function instead of accessing
> + the rs6000_{{builtin,instance}_info_fntype,overload_info,builtin_decls}
> + arrays directly is to decrease size of the already large function and
> + noipa prevents the compiler with LTO to undo that optimization.  */

Some of these array names no longer have the rs6000_ prefix now.  Oh
wait, you already took that into account?  I'm not saying anything :-)

The patch is fine for trunk, thank you!  If you want backports those
are okay, too (but I don't think you want any?  Or does this work
withput the previous patches as well?)


Segher


[pushed] wwwdocs: backends: Adjust SimulAVR link

2024-06-17 Thread Gerald Pfeifer
The original link gives a "301 Moved Permanently", easily fixed by 
appending a slash.

Pushed.

Gerald

---
 htdocs/backends.html | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/htdocs/backends.html b/htdocs/backends.html
index 1f7c85d7..d86783a6 100644
--- a/htdocs/backends.html
+++ b/htdocs/backends.html
@@ -128,8 +128,8 @@ xtensa | C
   https://github.com/sprintersb/atest?tab=readme-ov-file#running-the-avr-gcc-testsuite-using-the-avrtest-simulator";
 >README: Running the avr-gcc Testsuite using the avrtest Simulator
 
-SimulAVR at https://www.nongnu.org/simulavr";
-  >https://www.nongnu.org/simulavr
+SimulAVR at https://www.nongnu.org/simulavr/";
+  >https://www.nongnu.org/simulavr/
 
 
 
-- 
2.45.2


Re: [wwwdocs,pushed] backends.html - Update weblinks to AVR simulators

2024-06-17 Thread Gerald Pfeifer
On Sat, 15 Jun 2024, Georg-Johann Lay wrote:
> Applied this one:

Cool.

> +SimulAVR at https://www.nongnu.org/simulavr";

This one gives a http response of "301 Moved Permanently" redirecting to 
https://www.nongnu.org/simulavr/ . I'll fix this in a minute.

On a related note, though, can we update the references to the simulators 
from (exemplary)

   +avrtest at
   +  https://github.com/sprintersb/atest";
   +>https://github.com/sprintersb/atest

to

   +https://github.com/sprintersb/atest";>avrtest


Thanks,
Gerald


[C PATCH, v4] Fix for redeclared enumerator initialized with different type [PR115109]

2024-06-17 Thread Martin Uecker


This is a new version of the patch.  This adds the -fno-short-enums flag 
to the tests. I will commit it if the CI for am does not claim this time.

Bootstrapped and regression tested on x86_64.


c23: Fix for redeclared enumerator initialized with different type 
[PR115109]

c23 specifies that the type of a redeclared enumerator is the one of the
previous declaration.  Convert initializers with different type accordingly
and emit an error when the value does not fit.

2024-06-01 Martin Uecker  

PR c/115109

gcc/c/
* c-decl.cc (build_enumerator): When redeclaring an
enumerator convert value to previous type.  For redeclared
enumerators use underlying type for computing the next value.

gcc/testsuite/
* gcc.dg/pr115109.c: New test.
* gcc.dg/c23-tag-enum-6.c: New test.
* gcc.dg/c23-tag-enum-7.c: New test.

commit c8a0ec5150299689e6e36b0044ea811b82d90b2f
Author: Martin Uecker 
Date:   Sat May 18 22:00:04 2024 +0200

c23: Fix for redeclared enumerator initialized with different type 
[PR115109]

c23 specifies that the type of a redeclared enumerator is the one of the
previous declaration.  Convert initializers with different type accordingly
and emit an error when the value does not fit.

2024-06-01 Martin Uecker  

PR c/115109

gcc/c/
* c-decl.cc (build_enumerator): When redeclaring an
enumerator convert value to previous type.  For redeclared
enumerators use underlying type for computing the next value.

gcc/testsuite/
* gcc.dg/pr115109.c: New test.
* gcc.dg/c23-tag-enum-6.c: New test.
* gcc.dg/c23-tag-enum-7.c: New test.

diff --git a/gcc/c/c-decl.cc b/gcc/c/c-decl.cc
index 6c09eb73128..01326570e2b 100644
--- a/gcc/c/c-decl.cc
+++ b/gcc/c/c-decl.cc
@@ -10277,6 +10277,7 @@ build_enumerator (location_t decl_loc, location_t loc,
  struct c_enum_contents *the_enum, tree name, tree value)
 {
   tree decl;
+  tree old_decl;
 
   /* Validate and default VALUE.  */
 
@@ -10336,6 +10337,23 @@ build_enumerator (location_t decl_loc, location_t loc,
 definition.  */
   value = convert (the_enum->enum_type, value);
 }
+  else if (flag_isoc23
+  && (old_decl = lookup_name_in_scope (name, current_scope))
+  && old_decl != error_mark_node
+  && TREE_TYPE (old_decl)
+  && TREE_TYPE (TREE_TYPE (old_decl))
+  && TREE_CODE (old_decl) == CONST_DECL)
+{
+  /* Enumeration constants in a redeclaration have the previous type.  */
+  tree previous_type = TREE_TYPE (DECL_INITIAL (old_decl));
+  if (!int_fits_type_p (value, previous_type))
+   {
+ error_at (loc, "value of redeclared enumerator outside the range "
+"of %qT", previous_type);
+ locate_old_decl (old_decl);
+   }
+  value = convert (previous_type, value);
+}
   else
 {
   /* Even though the underlying type of an enum is unspecified, the
@@ -10402,9 +10420,14 @@ build_enumerator (location_t decl_loc, location_t loc,
 false);
 }
   else
-the_enum->enum_next_value
-  = build_binary_op (EXPR_LOC_OR_LOC (value, input_location),
-PLUS_EXPR, value, integer_one_node, false);
+{
+  /* In a redeclaration the type can already be the enumeral type.  */
+  if (TREE_CODE (TREE_TYPE (value)) == ENUMERAL_TYPE)
+   value = convert (ENUM_UNDERLYING_TYPE (TREE_TYPE (value)), value);
+  the_enum->enum_next_value
+   = build_binary_op (EXPR_LOC_OR_LOC (value, input_location),
+  PLUS_EXPR, value, integer_one_node, false);
+}
   the_enum->enum_overflow = tree_int_cst_lt (the_enum->enum_next_value, value);
   if (the_enum->enum_overflow
   && !ENUM_FIXED_UNDERLYING_TYPE_P (the_enum->enum_type))
diff --git a/gcc/testsuite/gcc.dg/c23-tag-enum-6.c 
b/gcc/testsuite/gcc.dg/c23-tag-enum-6.c
new file mode 100644
index 000..29aef7ee3fd
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/c23-tag-enum-6.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-std=c23 -fno-short-enums" } */
+
+#include 
+
+enum E : int { a = 1, b = 2 };
+enum E : int { b = _Generic(a, enum E: 2), a = 1 };
+
+enum H { x = 1 };
+enum H { x = 2UL + UINT_MAX }; /* { dg-error "outside the range" } */
+
+enum K : int { z = 1 };
+enum K : int { z = 2UL + UINT_MAX };   /* { dg-error "outside the range" } */
+
+enum F { A = 0, B = UINT_MAX };
+enum F { B = UINT_MAX, A };/* { dg-error "outside the range" } */
+
+enum G : unsigned int { C = 0, D = UINT_MAX };
+enum G : unsigned int { D = UINT_MAX, C }; /* { dg-error 
"overflow" } */
+
diff --git a/gcc/testsuite/gcc.dg/c23-tag-enum-7.c 
b/gcc/testsuite/gcc.dg/c23-tag-enum-7.c
new file mode 100644
index 000..d4c787c8f71
--- /dev/n

Re: [c-family] Add minimal support for __bf16 to -fdump-ada-spec

2024-06-17 Thread Andrew Pinski
On Mon, Jun 17, 2024 at 2:29 PM Eric Botcazou  wrote:
>
> Tested on x86-64/Linux, applied on the mainline.
>
>
> 2024-06-17  Eric Botcazou  
>
> c-family/
> * c-ada-spec.cc (is_float16): New predicate.
> (dump_ada_node) : Call it.

Hmm, is_float16 seems to be me would be _Float16 rather than __bf16.
Those two are two different formats; both could be supported on a
target (both aarch64 and x86_64 support both at the same time).
Also for __bf16, I think comparing against the format being
arm_bfloat_half_format would be a better choice rather than depending
on the name.

Thanks,
Andrew Pinski

>
> --
> Eric Botcazou


[c-family] Add minimal support for __bf16 to -fdump-ada-spec

2024-06-17 Thread Eric Botcazou
Tested on x86-64/Linux, applied on the mainline.


2024-06-17  Eric Botcazou  

c-family/
* c-ada-spec.cc (is_float16): New predicate.
(dump_ada_node) : Call it.

-- 
Eric Botcazoudiff --git a/gcc/c-family/c-ada-spec.cc b/gcc/c-family/c-ada-spec.cc
index a41e93aeafb..e1b1b2a4b73 100644
--- a/gcc/c-family/c-ada-spec.cc
+++ b/gcc/c-family/c-ada-spec.cc
@@ -2077,6 +2077,22 @@ dump_ada_enum_type (pretty_printer *pp, tree node, tree type, int spc)
 }
 }
 
+/* Return true if NODE is the __bf16 type.  */
+
+static bool
+is_float16 (tree node)
+{
+  if (!TYPE_NAME (node) || TREE_CODE (TYPE_NAME (node)) != TYPE_DECL)
+return false;
+
+  tree name = DECL_NAME (TYPE_NAME (node));
+
+  if (IDENTIFIER_POINTER (name) [0] != '_')
+return false;
+
+  return id_equal (name, "__bf16");
+}
+
 /* Return true if NODE is the _Float32/_Float32x type.  */
 
 static bool
@@ -2210,7 +2226,12 @@ dump_ada_node (pretty_printer *pp, tree node, tree type, int spc,
   break;
 
 case REAL_TYPE:
-  if (is_float32 (node))
+  if (is_float16 (node))
+	{
+	  pp_string (pp, "Short_Float");
+	  break;
+	}
+  else if (is_float32 (node))
 	{
 	  pp_string (pp, "Float");
 	  break;


Re: [PATCH] xtensa: constantsynth: Reforge to fix some non-fatal issues

2024-06-17 Thread Max Filippov
Hi Suwa-san,

On Mon, Jun 17, 2024 at 04:17:15PM +0900, Takayuki 'January June' Suwa wrote:
> The previous constant synthesis logic had some issues that were non-fatal
> but worth considering:
> 
> - It didn't work with DFmode literals, because those were cast to SImode
>   rather SFmode when splitting into two natural-width words by
>   split_double().
> 
> - It didn't work with large literals when TARGET_AUTO_LITPOOLS was enabled,
>   because those were relaxed MOVI immediates rather references to literal
>   pool entries,
> 
> - It didn't take into account that when literals with the same RTL
>   representation are pooled multiple times within a function, those entries
>   are shared (especially important when optimizing for size).
> 
> This patch addresses the above issues by making appropriate tweaks to the
> constant synthesis logic.
> 
> gcc/ChangeLog:
> 
>   * config/xtensa/xtensa-protos.h (xtensa_constantsynth):
>   Change the second argument from HOST_WIDE_INT to rtx.
>   * config/xtensa/xtensa.cc (#include):
>   Add "context.h" and "pass_manager.h".
>   (machine_function): Add a new hash_map field "litpool_usage".
>   (xtensa_constantsynth): Make "src" (the second operand) accept
>   RTX literal instead of its value, and treat both bare and pooled
>   SI/SFmode literals equally by bit-exact canonicalization into
>   CONST_INT RTX internally.  And then, make avoid synthesis if
>   such multiple identical canonicalized literals are found in same
>   function when optimizing for size.  Finally, for literals where
>   synthesis is not possible or has been avoided, re-emit "move"
>   RTXes with canonicalized ones to increase the chances of sharing
>   literal pool entries.
>   * config/xtensa/xtensa.md (split patterns for constant synthesis):
>   Change to simply invoke xtensa_constantsynth() as mentioned above,
>   and add new patterns for when TARGET_AUTO_LITPOOLS is enabled.
> ---
>  gcc/config/xtensa/xtensa-protos.h |  2 +-
>  gcc/config/xtensa/xtensa.cc   | 75 ---
>  gcc/config/xtensa/xtensa.md   | 56 ++-
>  3 files changed, 103 insertions(+), 30 deletions(-)

This series introduced a few ICE regressions:

+FAIL: gcc.dg/atomic/c11-atomic-exec-2.c   -Os  (internal compiler error: 
Segmentation fault)
+FAIL: gcc.dg/atomic/c11-atomic-exec-3.c   -Os  (internal compiler error: 
Segmentation fault)
+FAIL: gcc.dg/atomic/c11-atomic-exec-4.c   -Os  (internal compiler error: 
Segmentation fault)
+FAIL: gcc.dg/torture/vec-cvt-1.c   -Os  (internal compiler error: Segmentation 
fault)
+FAIL: c-c++-common/torture/complex-sign-mixed-add.c   -Os  (internal compiler 
error: Segmentation fault)
+FAIL: c-c++-common/torture/complex-sign-mixed-div.c   -Os  (internal compiler 
error: Segmentation fault)
+FAIL: c-c++-common/torture/complex-sign-mixed-sub.c   -Os  (internal compiler 
error: Segmentation fault)
+FAIL: gfortran.dg/bind-c-contiguous-1.f90   -Os  (internal compiler error: 
Segmentation fault)
+FAIL: gfortran.dg/bind-c-contiguous-4.f90   -Os  (internal compiler error: 
Segmentation fault)
+FAIL: gfortran.dg/minlocval_4.f90   -Os  (internal compiler error: 
Segmentation fault)

they all have a backtrace like this:

/home/jcmvbkbc/ws/tensilica/gcc/gcc/gcc/testsuite/gcc.dg/atomic/c11-atomic-exec-4.c:
 In function 'test_main_long_double_postinc':
/home/jcmvbkbc/ws/tensilica/gcc/gcc/gcc/testsuite/gcc.dg/atomic/c11-atomic-exec-4.c:73:1:
 internal compiler error: Segmentation fault
/home/jcmvbkbc/ws/tensilica/gcc/gcc/gcc/testsuite/gcc.dg/atomic/c11-atomic-exec-4.c:97:1:
 note: in expansion of macro 'TEST_FUNCS'   
   
0xf0493f crash_signal
/home/jcmvbkbc/ws/tensilica/gcc/gcc/gcc/toplev.cc:319
0x7fcc65b98d5f ???
./signal/../sysdeps/unix/sysv/linux/x86_64/sigaction.c:0
0x98cd63 lookup_page_table_entry
/home/jcmvbkbc/ws/tensilica/gcc/gcc/gcc/ggc-page.cc:630 


0x98cd63 ggc_set_mark(void const*)
/home/jcmvbkbc/ws/tensilica/gcc/gcc/gcc/ggc-page.cc:1553
0x12b31bd gt_ggc_mx_hash_map_rtx_int_(void*)
./gt-xtensa.h:39
0xc19207 gt_ggc_mx_function(void*)  



/home/jcmvbkbc/ws/tensilica/gcc/builds/gcc-15-1382-g448482d3d5c2-xtensa-call0-le/gcc/gtype-desc.cc:1696

 
0xc19207 gt_ggc_mx_function(void*)

/home/jcmvbkbc/ws/tensilica/gcc/builds/gcc-15-1382-g448482d3d5c2-xtensa-call0-le/gcc/gtype-desc.cc:1680
  

Re: [PATCH] middle-end/114189 - drop uses of vcond{,u,eq}_optab

2024-06-17 Thread Kewen.Lin
Hi Richi,

on 2024/6/14 18:31, Richard Biener wrote:
> The following retires vcond{,u,eq} optabs by stopping to use them
> from the middle-end.  Targets instead (should) implement vcond_mask
> and vec_cmp{,u,eq} optabs.  The PR this change refers to lists
> possibly affected targets - those implementing these patterns,
> and in particular it lists mips, sparc and ia64 as targets that
> most definitely will regress while others might simply remove
> their vcond{,u,eq} patterns.
> 
> I'd appreciate testing, I do not expect fallout for x86 or arm/aarch64.
> I know riscv doesn't implement any of the legacy optabs.  But less
> maintained vector targets might need adjustments.

Thanks for making this change, this patch can be bootstrapped on ppc64{,le}
but both have one failure on gcc/testsuite/gcc.target/powerpc/pr66144-3.c,
by looking into it, I found it just exposed one oversight in the current
rs6000 vcond_mask support (the condition mask location is wrong), so I think
this change is fine for rs6000 port, I'll also test SPEC2017 for this (with
rs6000 vcond_mask change) soon.

BR,
Kewen

> 
> I want to get rid of those optabs for GCC 15.  If I don't hear from
> you I will assume your target is fine.
> 
> Thanks,
> Richard.
> 
>   PR middle-end/114189
>   * optabs-query.h (get_vcond_icode): Always return CODE_FOR_nothing.
>   (get_vcond_eq_icode): Likewise.
> ---
>  gcc/optabs-query.h | 13 -
>  1 file changed, 4 insertions(+), 9 deletions(-)
> 
> diff --git a/gcc/optabs-query.h b/gcc/optabs-query.h
> index 0cb2c21ba85..31fbce80175 100644
> --- a/gcc/optabs-query.h
> +++ b/gcc/optabs-query.h
> @@ -112,14 +112,9 @@ get_vec_cmp_eq_icode (machine_mode vmode, machine_mode 
> mask_mode)
> mode CMODE, unsigned if UNS is true, resulting in a value of mode VMODE.  
> */
>  
>  inline enum insn_code
> -get_vcond_icode (machine_mode vmode, machine_mode cmode, bool uns)
> +get_vcond_icode (machine_mode, machine_mode, bool)
>  {
> -  enum insn_code icode = CODE_FOR_nothing;
> -  if (uns)
> -icode = convert_optab_handler (vcondu_optab, vmode, cmode);
> -  else
> -icode = convert_optab_handler (vcond_optab, vmode, cmode);
> -  return icode;
> +  return CODE_FOR_nothing;
>  }
>  
>  /* Return insn code for a conditional operator with a mask mode
> @@ -135,9 +130,9 @@ get_vcond_mask_icode (machine_mode vmode, machine_mode 
> mmode)
> mode CMODE (only EQ/NE), resulting in a value of mode VMODE.  */
>  
>  inline enum insn_code
> -get_vcond_eq_icode (machine_mode vmode, machine_mode cmode)
> +get_vcond_eq_icode (machine_mode, machine_mode)
>  {
> -  return convert_optab_handler (vcondeq_optab, vmode, cmode);
> +  return CODE_FOR_nothing;
>  }
>  
>  /* Enumerates the possible extraction_insn operations.  */



Re: [PATCH] diagnostics: Fix add_misspelling_candidates [PR115440]

2024-06-17 Thread Joseph Myers
On Mon, 17 Jun 2024, Jakub Jelinek wrote:

> 2024-06-17  Jakub Jelinek  
> 
>   PR driver/115440
>   * opts-common.cc (add_misspelling_candidates): If opt1 is non-NULL,
>   add a space and opt1 to the alternative suggestion text.
> 
>   * g++.dg/cpp1z/pr115440.C: New test.

OK.

-- 
Joseph S. Myers
josmy...@redhat.com



[committed] c: Implement C2Y alignof on incomplete arrays

2024-06-17 Thread Joseph Myers
C2Y has adopted support for alignof applied to incomplete array types
(N3273).  Add this support to GCC.  As the relevant checks are in
c-family code that doesn't have access to functions such as
pedwarn_c23, this remains a hard error for older versions and isn't
handled by -Wc23-c2y-compat, although preferably it would work like
pedwarn_c23 (pedwarn-if-pedantic for older versions, warning with
-Wc23-c2y-compat in C2Y mode).

Bootstrapped with no regressions for x86_64-pc-linux-gnu.

gcc/c-family/
* c-common.cc (c_sizeof_or_alignof_type): Allow alignof on an
incomplete array type for C2Y.

gcc/testsuite/
* gcc.dg/c23-align-10.c, gcc.dg/c2y-align-1.c,
gcc.dg/c2y-align-2.c: New tests.

diff --git a/gcc/c-family/c-common.cc b/gcc/c-family/c-common.cc
index 24335deeb58..7d752acd430 100644
--- a/gcc/c-family/c-common.cc
+++ b/gcc/c-family/c-common.cc
@@ -3972,7 +3972,9 @@ c_sizeof_or_alignof_type (location_t loc,
   value = size_one_node;
 }
   else if (!COMPLETE_TYPE_P (type)
-  && (!c_dialect_cxx () || is_sizeof || type_code != ARRAY_TYPE))
+  && ((!c_dialect_cxx () && !flag_isoc2y)
+  || is_sizeof
+  || type_code != ARRAY_TYPE))
 {
   if (complain)
error_at (loc, "invalid application of %qs to incomplete type %qT",
diff --git a/gcc/testsuite/gcc.dg/c23-align-10.c 
b/gcc/testsuite/gcc.dg/c23-align-10.c
new file mode 100644
index 000..bd6b9c268c3
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/c23-align-10.c
@@ -0,0 +1,6 @@
+/* Test C2Y alignof on an incomplete array type: not allowed in C23.  */
+/* { dg-do compile } */
+/* { dg-options "-std=c23 -pedantic-errors" } */
+
+int a = alignof(int[]); /* { dg-error "incomplete" } */
+int b = alignof(int[][1]); /* { dg-error "incomplete" } */
diff --git a/gcc/testsuite/gcc.dg/c2y-align-1.c 
b/gcc/testsuite/gcc.dg/c2y-align-1.c
new file mode 100644
index 000..3f9ab18c518
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/c2y-align-1.c
@@ -0,0 +1,6 @@
+/* Test C2Y alignof on an incomplete array type.  */
+/* { dg-do compile } */
+/* { dg-options "-std=c2y -pedantic-errors" } */
+
+int a = alignof(int[]);
+int b = alignof(int[][1]);
diff --git a/gcc/testsuite/gcc.dg/c2y-align-2.c 
b/gcc/testsuite/gcc.dg/c2y-align-2.c
new file mode 100644
index 000..b7b87150413
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/c2y-align-2.c
@@ -0,0 +1,8 @@
+/* Test C2Y alignof on an incomplete array type: still not allowed for other
+   incomplete types.  */
+/* { dg-do compile } */
+/* { dg-options "-std=c2y -pedantic-errors" } */
+
+int a = alignof(void); /* { dg-error "void" } */
+struct s;
+int b = alignof(struct s); /* { dg-error "incomplete" } */

-- 
Joseph S. Myers
josmy...@redhat.com



Re: [Patch, Fortran, 96418] Fix Test coarray_alloc_comp_4.f08 ICEs

2024-06-17 Thread Harald Anlauf

Hi Andre,

Am 17.06.24 um 09:51 schrieb Andre Vehreschild:

Regarding your question on the coarray-tests that are not in the
coarray-directory: These test in most cases test only one method of
implementing coarrays. I.e., they are either testing just -fcoarray=single or
-fcoarray=lib -lcaf_single, which are two different approaches. The tests in
the coarray-directory test all available methods to implement coarrays.  Pushing


ah, that explains it.  I only looked at some of the test sources,
but did not think of looking at caf.exp ...


all coarray-tests into the coarray-directory will fail a lot of them, because
the behavior of -fcoarray=single and -fcoarray=lib -lcaf_single is different in
some corner cases. That's why the coarray-tests in the main gfortran-dir are
separate.

I do understand why it may be confusing, but I don't see an easy solution. Does
this answer your question?


Indeed it does!

Thanks,
Harald



Re: [PATCH 2/3] Enabled LRA for ia64.

2024-06-17 Thread Frank Scheiner

On 17.06.24 20:53, Jonathan Wakely wrote:

On Mon, 17 Jun 2024 at 19:03, Joseph Myers  wrote:


On Fri, 14 Jun 2024, Jonathan Wakely wrote:


Both, ideally. The libstdc++ test should definitely be fixed because
it fails with released versions of glibc already in the wild. But
glibc should also be fixed because it's a standards conformance issue.


The __ctx macro used in various sys/ucontext.h headers prepends __ in
standards conformance modes (the point being to avoid breaking the API
outside such modes when we fixed the namespace issues).

#ifdef __USE_MISC
# define __ctx(fld) fld
#else
# define __ctx(fld) __ ## fld
#endif

(bits/sigcontext.h didn't get any such fixes as it's not included at all
in standards conformance modes, only if __USE_MISC.)


I see, thanks. So it's not a problem in C, only in C++ due to G++
defining _GNU_SOURCE.

Let's just change the libstdc++ tests then.


Great, I did test that patched in the same way as in [1] on Friday. It
makes the three failing tests pass:

```
# make check-target-libstdc++-v3
RUNTESTFLAGS="conformance.exp=17_intro/names*\ experimental/names.cc"

Test run by root on Fri Jun 14 16:04:26 2024
Native configuration is ia64-t2-linux-gnu

=== libstdc++ tests ===

Schedule of variations:
unix

Running target unix
Running
/dev/shm/gcc-15-lra/src.gcc.ia64-toolchain-3.240529.123346.921189/gcc/libstdc++-v3/testsuite/libstdc++-dg/conformance.exp
...
PASS: 17_intro/names.cc  -std=gnu++17 (test for excess errors)
PASS: 17_intro/names_pstl.cc  -std=gnu++17 (test for excess errors)
PASS: experimental/names.cc  -std=gnu++17 (test for excess errors)

=== libstdc++ Summary ===

# of expected passes3
```

[1]:
https://gcc.gnu.org/git/?p=gcc.git;a=patch;h=cf5f7791056b3ed993bc8024be767a86157514a9

You most likely want the workaround as separate patch on this list, as
the failures were happening for both the non-LRA and LRA case, right?

Cheers,
Frank


Re: [committed] testsuite: Add -Wno-psabi to vshuf-mem.C test

2024-06-17 Thread Jakub Jelinek
On Mon, Jun 17, 2024 at 09:09:37PM +0200, Andreas Krebbel wrote:
> On 6/14/24 20:03, Jakub Jelinek wrote:
> > Also wonder about the
> > // { dg-additional-options "-march=z14" { target s390*-*-* } }
> > line, doesn't that mean the test will FAIL on all pre-z14 HW?
> > Shouldn't it use some z14_runtime or similar effective target, or
> > check in main (in that case copied over to g++.target/s390) whether
> > z14 instructions can be actually used at runtime?
> 
> Oh right. I'll remove that line and replicate the testcase in the arch
> specific test dir.

Though, looking around some more, perhaps
// { dg-additional-options "-march=z14" { target s390_vxe } }
might be all that is needed, even in current dir.

Jakub



Re: [PATCH] rs6000: Compute rop_hash_save_offset for non-Altivec compiles [PR115389]

2024-06-17 Thread Peter Bergner
On 6/16/24 9:10 PM, Kewen.Lin wrote:
> on 2024/6/15 01:05, Peter Bergner wrote:
>> That said, the --with-cpu=power5 build without fortran did bootstrap and
>> regtest with no regressions, so the build did test that code path and
>> exposed no problems.
> 
> OK, nice!  Thanks!

I assume this means you're "OK" with the updated patch, correct?




>> Currently, TARGET_ALTIVEC_ABI is defined as:
>>
>>   #define TARGET_ALTIVEC_ABI rs6000_altivec_abi
>>
>> Would it make sense to redine it to:
>>
>>   #define TARGET_ALTIVEC_ABI (TARGET_ALTIVEC && rs6000_altivec_abi)
>>
>> ...or add some code in rs6000 option handling to disable rs6000_altivec_abi
>> when TARGET_ALTIVEC is false?  or do we care enough to even change it? 
>> :-)
> 
> Assuming the current code is robust enough (perfectly guarded by some altivec 
> related
> condition like this altivec register saving slot), there may not any actual 
> errors,
> but considering not surprising people, I'm inclined to add some option 
> handlings for
> it, like unsetting rs6000_altivec_abi if !TARGET_ALTIVEC and give some 
> warning if it's
> explicitly specified, what do you think?

I like it, since if Altivec is disabled, having TARGET_ALTIVEC_ABI enabled 
makes no
sense to me.  That is orthogonal to this bug though, so should be a separate 
patch.
Do you want to take a stab at writing that or do you want me to do that?


Peter




Re: [C PATCH, v3] Fix for redeclared enumerator initialized with different type [PR115109]

2024-06-17 Thread Joseph Myers
On Sat, 15 Jun 2024, Martin Uecker wrote:

> The patch fails on arm because the tests make assumptions
> about enums that are not true everywhere. Should we just 
> limit the tests to x86?

For compilation tests, using -fno-short-enums should work.  That won't 
work for link / execute tests, but in those cases you can use { target { ! 
short_enums } }.  (If there are other issues beyond a short-enums default, 
other effective-targets may be needed.)

-- 
Joseph S. Myers
josmy...@redhat.com

Re: [PATCH 2/3] Enabled LRA for ia64.

2024-06-17 Thread Jonathan Wakely
On Mon, 17 Jun 2024 at 19:03, Joseph Myers  wrote:
>
> On Fri, 14 Jun 2024, Jonathan Wakely wrote:
>
> > Both, ideally. The libstdc++ test should definitely be fixed because
> > it fails with released versions of glibc already in the wild. But
> > glibc should also be fixed because it's a standards conformance issue.
>
> The __ctx macro used in various sys/ucontext.h headers prepends __ in
> standards conformance modes (the point being to avoid breaking the API
> outside such modes when we fixed the namespace issues).
>
> #ifdef __USE_MISC
> # define __ctx(fld) fld
> #else
> # define __ctx(fld) __ ## fld
> #endif
>
> (bits/sigcontext.h didn't get any such fixes as it's not included at all
> in standards conformance modes, only if __USE_MISC.)

I see, thanks. So it's not a problem in C, only in C++ due to G++
defining _GNU_SOURCE.

Let's just change the libstdc++ tests then.



[PATCH V3 2/2] RISC-V: Move mode assertion out of conditional branch in emit_insn

2024-06-17 Thread Edwin Lu
When emitting insns, we have an early assertion to ensure the input
operand's mode and the expanded operand's mode are the same; however, it
does not perform this check if the pattern does not have an explicit
machine mode specifying the operand. In this scenario, it will always
assume that mode = Pmode to correctly satisfy the
maybe_legitimize_operand check, however, there may be problems when
working in 32 bit environments.

Make the assert unconditional and replace it with an internal error for
more descriptive logging

gcc/ChangeLog:

* config/riscv/riscv-v.cc: Move assert out of conditional block

Signed-off-by: Edwin Lu 
Co-authored-by: Robin Dapp 
---
V2: change assert to internal error

V3: No change
---
 gcc/config/riscv/riscv-v.cc | 25 +++--
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 8911f5783c8..5306711c1b7 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -50,6 +50,7 @@
 #include "rtx-vector-builder.h"
 #include "targhooks.h"
 #include "predict.h"
+#include "errors.h"
 
 using namespace riscv_vector;
 
@@ -290,11 +291,17 @@ public:
   always Pmode.  */
if (mode == VOIDmode)
  mode = Pmode;
-   else
- /* Early assertion ensures same mode since maybe_legitimize_operand
-will check this.  */
- gcc_assert (GET_MODE (ops[opno]) == VOIDmode
- || GET_MODE (ops[opno]) == mode);
+
+   /* Early assertion ensures same mode since maybe_legitimize_operand
+  will check this.  */
+   machine_mode required_mode = GET_MODE (ops[opno]);
+   if (required_mode != VOIDmode && required_mode != mode)
+ internal_error ("expected mode %s for operand %d of "
+ "insn %s but got mode %s.\n",
+ GET_MODE_NAME (mode),
+ opno,
+ insn_data[(int) icode].name,
+ GET_MODE_NAME (required_mode));
 
add_input_operand (ops[opno], mode);
   }
@@ -346,7 +353,13 @@ public:
 else if (m_insn_flags & VXRM_RDN_P)
   add_rounding_mode_operand (VXRM_RDN);
 
-gcc_assert (insn_data[(int) icode].n_operands == m_opno);
+
+if (insn_data[(int) icode].n_operands != m_opno)
+  internal_error ("invalid number of operands for insn %s, "
+ "expected %d but got %d.\n",
+ insn_data[(int) icode].name,
+ insn_data[(int) icode].n_operands, m_opno);
+
 expand (icode, any_mem_p);
   }
 
-- 
2.34.1



[PATCH V3 1/2] RISC-V: Fix vwsll combine on rv32 targets

2024-06-17 Thread Edwin Lu
On rv32 targets, vwsll_zext1_scalar_ would trigger an ice in
maybe_legitimize_instruction when zero extending a uint32 to uint64 due
to a mismatch between the input operand's mode (DI) and the expanded insn
operand's mode (Pmode == SI). Ensure that mode of the operands match

Tested on rv32/64 gcv newlib. Letting CI perform additional testing

gcc/ChangeLog:

* config/riscv/autovec-opt.md: Fix mode mismatch

Signed-off-by: Edwin Lu 
Co-authored-by: Robin Dapp 
---
V2: Remove subreg check

V3: Update _trunc_scalar splitter as well
---
 gcc/config/riscv/autovec-opt.md | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
index 6a2eabbd854..d7a3cfd4602 100644
--- a/gcc/config/riscv/autovec-opt.md
+++ b/gcc/config/riscv/autovec-opt.md
@@ -1517,8 +1517,7 @@ (define_insn_and_split "*vwsll_zext1_scalar_"
   "&& 1"
   [(const_int 0)]
   {
-if (GET_CODE (operands[2]) == SUBREG)
-  operands[2] = SUBREG_REG (operands[2]);
+operands[2] = gen_lowpart (Pmode, operands[2]);
 insn_code icode = code_for_pred_vwsll_scalar (mode);
 riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, operands);
 DONE;
@@ -1584,8 +1583,7 @@ (define_insn_and_split "*vwsll_zext1_trunc_scalar_"
   "&& 1"
   [(const_int 0)]
   {
-if (GET_CODE (operands[2]) == SUBREG)
-  operands[2] = SUBREG_REG (operands[2]);
+operands[2] = gen_lowpart (Pmode, operands[2]);
 insn_code icode = code_for_pred_vwsll_scalar (mode);
 riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, operands);
 DONE;
-- 
2.34.1



[PATCH V3 0/2] Fix ICE with vwsll combine on 32bit targets

2024-06-17 Thread Edwin Lu
The following testcases have been failing on rv32 targets since 
r15-953-gaf4bf422a69:
FAIL: gcc.target/riscv/rvv/autovec/binop/vwsll-1.c (internal compiler
error: in maybe_legitimize_operand, at optabs.cc:8056)
FAIL: gcc.target/riscv/rvv/autovec/binop/vwsll-1.c (test for excess
errors)

Fix the bug and also robustify our emit_insn by making an assertion
check unconditional

I'm not sure if this ICE warrants its own separate testcase since it is
already being tested. I do have a minimal testcase on hand if we would
like to add one.

V2: Remove subreg condition and change assert to internal error

V3: Update the _trunc_scalar splitter as well

Edwin Lu (2):
  RISC-V: Fix vwsll combine on rv32 targets
  RISC-V: Move mode assertion out of conditional branch in emit_insn

 gcc/config/riscv/autovec-opt.md |  6 ++
 gcc/config/riscv/riscv-v.cc | 25 +++--
 2 files changed, 21 insertions(+), 10 deletions(-)

-- 
2.34.1



[PATCH] c++: ICE with generic lambda and pack expansion [PR115425]

2024-06-17 Thread Marek Polacek
Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

-- >8 --
In r13-272 we hardened the *_PACK_EXPANSION and *_ARGUMENT_PACK macros.
That trips up here because make_pack_expansion returns error_mark_node
and we access that with PACK_EXPANSION_LOCAL_P.

PR c++/115425

gcc/cp/ChangeLog:

* pt.cc (tsubst_pack_expansion): Return error_mark_node if
make_pack_expansion doesn't work out.

gcc/testsuite/ChangeLog:

* g++.dg/cpp2a/lambda-generic12.C: New test.
---
 gcc/cp/pt.cc  |  2 ++
 gcc/testsuite/g++.dg/cpp2a/lambda-generic12.C | 25 +++
 2 files changed, 27 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/lambda-generic12.C

diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index 607753ae6b7..e676372f75b 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -13775,6 +13775,8 @@ tsubst_pack_expansion (tree t, tree args, 
tsubst_flags_t complain,
   else
result = tsubst (pattern, args, complain, in_decl);
   result = make_pack_expansion (result, complain);
+  if (result == error_mark_node)
+   return error_mark_node;
   PACK_EXPANSION_LOCAL_P (result) = PACK_EXPANSION_LOCAL_P (t);
   PACK_EXPANSION_SIZEOF_P (result) = PACK_EXPANSION_SIZEOF_P (t);
   if (PACK_EXPANSION_AUTO_P (t))
diff --git a/gcc/testsuite/g++.dg/cpp2a/lambda-generic12.C 
b/gcc/testsuite/g++.dg/cpp2a/lambda-generic12.C
new file mode 100644
index 000..219529c7c32
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/lambda-generic12.C
@@ -0,0 +1,25 @@
+// PR c++/115425
+// { dg-do compile { target c++20 } }
+
+using size_t = decltype(sizeof(0));
+
+template 
+struct X {};
+
+template
+void foo(X);
+
+template
+struct S;
+
+template
+auto test() {
+  constexpr static auto x = foo>(); // { dg-error "no 
matching function" }
+  return [](X) {
+(typename S::type{}, ...);
+  }(X<__integer_pack (0)...>{});
+}
+
+int main() {
+  test();
+}

base-commit: b63c7d92012f92e0517190cf263d29bbef8a06bf
-- 
2.45.1



Re: [PATCH 30/52 v2] pdp11: Remove macro {FLOAT,DOUBLE,LONG_DOUBLE}_TYPE_SIZE

2024-06-17 Thread Paul Koning
Thanks Kewen.

Given that background, the patch is OK.

paul

> On Jun 16, 2024, at 10:01 PM, Kewen.Lin  wrote:
> 
> Hi Paul,
> 
> on 2024/6/14 23:20, Paul Koning wrote:
>> Ok, I understand better now.  But if those macros are supposed to be 
>> replaced by hook functions, could you make that replacement part of the 
>> proposed patch?
> 
> The default implementation of the introduced hook mode_for_floating_type
> returns SFmode for float and DFmode for double or long double, which matches
> what pdp11 port requires, so there is no need to add its own hook 
> implementation.
> This patch series only re-define this hook macro with the customized hook
> implementation for those ports which need something beyond the default.
> 
> BR,
> Kewen
> 
>> 
>>  paul
>> 
>>> On Jun 13, 2024, at 11:22 PM, Kewen.Lin  wrote:
>>> 
>>> Hi Paul,
>>> 
>>> on 2024/6/14 04:07, Paul Koning wrote:
 What is the effect of this change?  The original code intended to have 
 "float" mean a 32 bit value, and "double" a 64 bit value.  There aren't 
 any larger floats, so I defined the long double size as 64 also.  Is the 
 right answer not to define it?
>>> 
>>> Since sub-patch 09/52 will poison {FLOAT,DOUBLE,LONG_DOUBLE}_TYPE_SIZE, 
>>> target code building will fail
>>> if it still has these macros.  As I'd like to squash these target changes 
>>> onto 09/52, so I didn't note
>>> the background/context here, sorry about that.
>>> 
 
 That part I understand, but why does the patch also remove FLOAT_TYPE_SIZE 
 and DOUBLE_TYPE_SIZE without explanation and without mention in the 
 changelog?
>>> 
>>> Oops, thanks for catching!  I just noticed this sub-patch has inconsistent 
>>> subject & changelog, I should
>>> have noticed this as it has a quite different subject from the others. :(  
>>> With your finding, I just
>>> re-visited all the other sub-patches, luckily they are consistent.
>>> 
>>> The below is the updated revision, hope it looks good to you.  Thanks again.
>>> 
>>> BR,
>>> Kewen
>>> -
>>> 
>>> Subject: [PATCH] pdp11: Remove macro {FLOAT,DOUBLE,LONG_DOUBLE}_TYPE_SIZE
>>> 
>>> This is to remove macros {FLOAT,{,LONG_}DOUBLE}_TYPE_SIZE
>>> defines in pdp11 port, as we want to replace these macros
>>> with hook mode_for_floating_type and poison them.
>>> 
>>> gcc/ChangeLog:
>>> 
>>>   * config/pdp11/pdp11.h (FLOAT_TYPE_SIZE): Remove.
>>>   (DOUBLE_TYPE_SIZE): Likewise.
>>>   (LONG_DOUBLE_TYPE_SIZE): Likewise.
>>> ---
>>> gcc/config/pdp11/pdp11.h | 11 ---
>>> 1 file changed, 11 deletions(-)
>>> 
>>> diff --git a/gcc/config/pdp11/pdp11.h b/gcc/config/pdp11/pdp11.h
>>> index 2446fea0b58..6c8e045bc57 100644
>>> --- a/gcc/config/pdp11/pdp11.h
>>> +++ b/gcc/config/pdp11/pdp11.h
>>> @@ -71,17 +71,6 @@ along with GCC; see the file COPYING3.  If not see
>>> #define LONG_TYPE_SIZE 32
>>> #define LONG_LONG_TYPE_SIZE64
>>> 
>>> -/* In earlier versions, FLOAT_TYPE_SIZE was selectable as 32 or 64,
>>> -   but that conflicts with Fortran language rules.  Since there is no
>>> -   obvious reason why we should have that feature -- other targets
>>> -   generally don't have float and double the same size -- I've removed
>>> -   it.  Note that it continues to be true (for now) that arithmetic is
>>> -   always done with 64-bit values, i.e., the FPU is always in "double"
>>> -   mode.  */
>>> -#define FLOAT_TYPE_SIZE32
>>> -#define DOUBLE_TYPE_SIZE   64
>>> -#define LONG_DOUBLE_TYPE_SIZE  64
>>> -
>>> /* machine types from ansi */
>>> #define SIZE_TYPE "short unsigned int" /* definition of size_t */
>>> #define WCHAR_TYPE "short int" /* or long int */
>>> --
>>> 2.43.0
>>> 
>>> 
>> 
> 



Re: [PATCH 2/3] Enabled LRA for ia64.

2024-06-17 Thread Joseph Myers
On Fri, 14 Jun 2024, Jonathan Wakely wrote:

> Both, ideally. The libstdc++ test should definitely be fixed because
> it fails with released versions of glibc already in the wild. But
> glibc should also be fixed because it's a standards conformance issue.

The __ctx macro used in various sys/ucontext.h headers prepends __ in 
standards conformance modes (the point being to avoid breaking the API 
outside such modes when we fixed the namespace issues).

#ifdef __USE_MISC
# define __ctx(fld) fld
#else
# define __ctx(fld) __ ## fld
#endif

(bits/sigcontext.h didn't get any such fixes as it's not included at all 
in standards conformance modes, only if __USE_MISC.)

-- 
Joseph S. Myers
josmy...@redhat.com



Re: [PATCH] rs6000, altivec-2-runnable.c update the require-effective-target

2024-06-17 Thread Peter Bergner
On 6/14/24 1:37 PM, Carl Love wrote:
> Per the additional feedback after patch: 
> 
>   commit c892525813c94b018464d5a4edc17f79186606b7
>   Author: Carl Love 
>   Date:   Tue Jun 11 14:01:16 2024 -0400
> 
>   rs6000, altivec-2-runnable.c should be a runnable test
> 
>   The test case has "dg-do compile" set not "dg-do run" for a runnable
>   test.  This patch changes the dg-do command argument to run.
> 
>   gcc/testsuite/ChangeLog:gcc/testsuite/ChangeLog:
>   * gcc.target/powerpc/altivec-2-runnable.c: Change dg-do
>   argument to run.

Test case altivec-1-runnable.c seems to have the same issue, in that it
is currently a dg-do compile test case rather than the intended dg-do run.
Can you have a look at changing that to dg-do run too?  My guess it that
this one will want something similar to some other altivec test cases, ala:

/* { dg-do run { target vmx_hw } } */
/* { dg-do compile { target { ! vmx_hw } } } */
/* { dg-require-effective-target powerpc_altivec_ok } */
/* { dg-options "-O2 -maltivec -mabi=altivec" } */


That said, I don't like not having a -mdejagnu-cpu=... here.
I think for our server cpus, this is fine, but on an embedded system
with a old ISA default for -mcpu=... (so we be doing a dg-do compile),
just adding -maltivec to that default may not make much sense for that
default and probably should be an error.  Maybe something like:

/* { dg-do run { target vmx_hw } } */
/* { dg-do compile { target { ! vmx_hw } } } */
/* { dg-require-effective-target powerpc_altivec_ok } */
/* { dg-options "-O2 -mdejagnu=power7" } */

...makes more sense?   Ke Wen & Segher, thoughts on that?
Ke Wen, should powerpc_altivec_ok be powerpc_altivec here???

Peter




Re: [pushed 2/3] libcpp: move label_text to its own header

2024-06-17 Thread Bert Wesarg
Hi,

On Thu, Jun 6, 2024 at 7:05 PM Andrew Pinski  wrote:
>
> On Thu, Jun 6, 2024 at 9:00 AM David Malcolm  wrote:
> >
> > On Thu, 2024-06-06 at 08:40 -0700, Andrew Pinski wrote:
> > > On Thu, Jun 6, 2024 at 6:02 AM Bert Wesarg
> > >  wrote:
> > > >
> > > > Dear David,
> > > >
> > > > On Tue, May 28, 2024 at 10:07 PM David Malcolm
> > > >  wrote:
> > > > >
> > > > > No functional change intended.
> > > > >
> > > > > Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
> > > > > Pushed to trunk as r15-874-g9bda2c4c81b668.
> > > > >
> > > > > libcpp/ChangeLog:
> > > > > * Makefile.in (TAGS_SOURCES): Add include/label-text.h.
> > > > > * include/label-text.h: New file.
> > > > > * include/rich-location.h: Include "label-text.h".
> > > > > (class label_text): Move to label-text.h.
> > > > >
> > > > > Signed-off-by: David Malcolm 
> > > > > ---
> > > > >  libcpp/Makefile.in |   2 +-
> > > > >  libcpp/include/label-text.h| 102
> > > > > +
> > > > >  libcpp/include/rich-location.h |  79 +
> > > > >  3 files changed, 105 insertions(+), 78 deletions(-)
> > > > >  create mode 100644 libcpp/include/label-text.h
> > > > >
> > > > > diff --git a/libcpp/Makefile.in b/libcpp/Makefile.in
> > > > > index ebbca3fb..7e47153264c0 100644
> > > > > --- a/libcpp/Makefile.in
> > > > > +++ b/libcpp/Makefile.in
> > > > > @@ -271,7 +271,7 @@ ETAGS = @ETAGS@
> > > > >
> > > > >  TAGS_SOURCES = $(libcpp_a_SOURCES) internal.h system.h ucnid.h \
> > > > >  include/cpplib.h include/line-map.h include/mkdeps.h
> > > > > include/symtab.h \
> > > > > -include/rich-location.h
> > > > > +include/rich-location.h include/label-text.h
> > > >
> > > > this does not seem to be enough that the new header will be
> > > > installed.
> > > > I get compile errors when compiling an plug-in with this patch:
> > > >
> > > > In file included from
> > > > /home/bitten/opt/gcc-15-20240602/lib/gcc/x86_64-pc-linux-
> > > > gnu/15.0.0/plugin/include/diagnostic.h:24,
> > > > from
> > > > /home/bitten/builds/oCyPvWN6/1/perftools/cicd/scorep/src/build-gcc-
> > > > plugin/../src/adapters/compiler/gcc-
> > > > plugin/scorep_plugin_inst_descriptor.cpp:43:
> > > > /home/bitten/opt/gcc-15-20240602/lib/gcc/x86_64-pc-linux-
> > > > gnu/15.0.0/plugin/include/rich-location.h:25:10:
> > > > fatal error: label-text.h: No such file or directory
> > > > 25 | #include "label-text.h"
> > > > > ^~
> > > > compilation terminated.
> > >
> > > I have a fix which I am testing.
> >
> > Likewise (and sorry about the breakage)
>
> Committed as r15-1076-g6e6471806d886b .

Thanks. I can confirm, that my external plugin builds again.

Bert

>
> >
> > Dave
> >


Re: [PATCH] c-family: Fix -Warray-compare warning ICE [PR115290]

2024-06-17 Thread Marek Polacek
On Mon, Jun 17, 2024 at 07:09:03PM +0200, Jakub Jelinek wrote:
> Hi!
> 
> The warning code uses %D to print the ARRAY_REF first operands.
> That works in the most common case where those operands are decls, but
> as can be seen on the following testcase, they can be other expressions
> with array type.
> Just changing %D to %E isn't enough, because then the diagnostics can
> suggest something like
> note: use '&(x) != 0 ? (int (*)[32])&a : (int (*)[32])&b[0] == &(y) != 0 ? 
> (int (*)[32])&a : (int (*)[32])&b[0]' to compare the addresses
> which is a bad suggestion, the %E printing doesn't know that the
> warning code will want to add & before it and [0] after it.
> So, the following patch adds ()s around the operand as well, but does
> that only for non-decls, for decls keeps it as &arr[0] like before.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk
> and release branches?

Ok, thanks.
 
> 2024-06-17  Jakub Jelinek  
> 
>   PR c/115290
>   * c-warn.cc (do_warn_array_compare): Use %E rather than %D for
>   printing op0 and op1; if those operands aren't decls, also print
>   parens around them.
> 
>   * c-c++-common/Warray-compare-3.c: New test.
> 
> --- gcc/c-family/c-warn.cc.jj 2024-06-04 13:19:03.371609456 +0200
> +++ gcc/c-family/c-warn.cc2024-06-17 15:07:09.005737065 +0200
> @@ -3832,11 +3832,16 @@ do_warn_array_compare (location_t locati
>/* C doesn't allow +arr.  */
>if (c_dialect_cxx ())
>   inform (location, "use unary %<+%> which decays operands to pointers "
> - "or %<&%D[0] %s &%D[0]%> to compare the addresses",
> - op0, op_symbol_code (code), op1);
> + "or %<&%s%E%s[0] %s &%s%E%s[0]%> to compare the addresses",
> + DECL_P (op0) ? "" : "(", op0, DECL_P (op0) ? "" : ")",
> + op_symbol_code (code),
> + DECL_P (op1) ? "" : "(", op1, DECL_P (op1) ? "" : ")");
>else
> - inform (location, "use %<&%D[0] %s &%D[0]%> to compare the addresses",
> - op0, op_symbol_code (code), op1);
> + inform (location,
> + "use %<&%s%E%s[0] %s &%s%E%s[0]%> to compare the addresses",
> + DECL_P (op0) ? "" : "(", op0, DECL_P (op0) ? "" : ")",
> + op_symbol_code (code),
> + DECL_P (op1) ? "" : "(", op1, DECL_P (op1) ? "" : ")");
>  }
>  }
>  
> --- gcc/testsuite/c-c++-common/Warray-compare-3.c.jj  2024-06-17 
> 15:13:57.098422635 +0200
> +++ gcc/testsuite/c-c++-common/Warray-compare-3.c 2024-06-17 
> 15:13:24.339849049 +0200
> @@ -0,0 +1,13 @@
> +/* PR c/115290 */
> +/* { dg-do compile } */
> +/* { dg-options "-Warray-compare" } */
> +
> +int a[32][32], b[32][32];
> +
> +int
> +foo (int x, int y)
> +{
> +  return (x ? a : b) == (y ? a : b); /* { dg-warning "comparison between two 
> arrays" } */
> +/* { dg-message "use '&\\\(\[^\n\r]*\\\)\\\[0\\\] == 
> &\\\(\[^\n\r]*\\\)\\\[0\\\]' to compare the addresses" "" { target c } .-1 } 
> */
> +/* { dg-message "use unary '\\\+' which decays operands to pointers or 
> '&\\\(\[^\n\r]*\\\)\\\[0\\\] == &\\\(\[^\n\r]*\\\)\\\[0\\\]' to compare the 
> addresses" "" { target c++ } .-2 } */
> +}
> 
>   Jakub
> 

Marek



[PATCH] rs6000: Shrink rs6000_init_generated_builtins size [PR115324]

2024-06-17 Thread Jakub Jelinek
Hi!

While my r15-1001-g4cf2de9b5268224 PCH PIE power fix change decreased the
.data section sizes (219792 -> 189336), it increased the size of already
huge rs6000_init_generated_builtins generated function, from 218328
to 228668 bytes.  That is because there are thousands of array references
to global arrays and we keep constructing the addresses of the arrays
again and again.

Ideally some optimization would figure out we have a single function which
has
461   rs6000_overload_info
   1257   rs6000_builtin_info_fntype
   1768   rs6000_builtin_decls
   2548   rs6000_instance_info_fntype
array references and that maybe it might be a good idea to just preload
the addresses of those arrays into some register if it decreases code size
and doesn't slow things down.
The function actually is called just once and is huge, so code size is even
more important than speed, which is dominated by all the GC allocations
anyway.

Until that is done, here is a slightly cleaner version of the hack, which
makes the function noipa (so that LTO doesn't undo it) for GCC 8.1+ and
passes the 4 arrays as arguments to the function from the caller.
This decreases the function size from 228668 bytes to 207572 bytes.

Bootstrapped/regtested on powerpc64le-linux, ok for trunk?

2024-06-17  Jakub Jelinek  

PR target/115324
* config/rs6000/rs6000-gen-builtins.cc (write_decls): Change
declaration of rs6000_init_generated_builtins from no arguments
to 4 pointer arguments.
(write_init_bif_table): Change rs6000_builtin_info_fntype to
builtin_info_fntype and rs6000_builtin_decls to builtin_decls.
(write_init_ovld_table): Change rs6000_instance_info_fntype to
instance_info_fntype, rs6000_builtin_decls to builtin_decls and
rs6000_overload_info to overload_info.
(write_init_file): Add __noipa__ attribute to
rs6000_init_generated_builtins for GCC 8.1+ and change the function
from no arguments to 4 pointer arguments.  Change rs6000_builtin_decls
to builtin_decls.
* config/rs6000/rs6000-builtin.cc (rs6000_init_builtins): Adjust
rs6000_init_generated_builtins caller.

--- gcc/config/rs6000/rs6000-gen-builtins.cc.jj 2024-06-03 23:11:02.662631144 
+0200
+++ gcc/config/rs6000/rs6000-gen-builtins.cc2024-06-03 23:38:31.727620920 
+0200
@@ -2376,7 +2376,10 @@ write_decls (void)
   "rs6000_instance_info_fntype[RS6000_INST_MAX];\n");
   fprintf (header_file, "extern ovldrecord rs6000_overload_info[];\n\n");
 
-  fprintf (header_file, "extern void rs6000_init_generated_builtins ();\n\n");
+  fprintf (header_file,
+  "extern void rs6000_init_generated_builtins (tree *, tree *,\n");
+  fprintf (header_file,
+  "\t\t\t\t\tovldrecord *, tree *);\n\n");
   fprintf (header_file,
   "extern bool rs6000_builtin_is_supported (rs6000_gen_builtins);\n");
   fprintf (header_file,
@@ -2651,7 +2654,7 @@ write_init_bif_table (void)
   for (int i = 0; i <= curr_bif; i++)
 {
   fprintf (init_file,
-  "  rs6000_builtin_info_fntype[RS6000_BIF_%s]"
+  "  builtin_info_fntype[RS6000_BIF_%s]"
   "\n= %s;\n",
   bifs[i].idname, bifs[i].fndecl);
 
@@ -2678,7 +2681,7 @@ write_init_bif_table (void)
}
 
   fprintf (init_file,
-  "  rs6000_builtin_decls[(int)RS6000_BIF_%s] = t\n",
+  "  builtin_decls[(int)RS6000_BIF_%s] = t\n",
   bifs[i].idname);
   fprintf (init_file,
   "= add_builtin_function (\"%s\",\n",
@@ -2719,7 +2722,7 @@ write_init_bif_table (void)
  fprintf (init_file, "}\n");
  fprintf (init_file, "  else\n");
  fprintf (init_file, "{\n");
- fprintf (init_file, "  rs6000_builtin_decls"
+ fprintf (init_file, "  builtin_decls"
   "[(int)RS6000_BIF_%s] = NULL_TREE;\n", bifs[i].idname);
  fprintf (init_file, "}\n");
}
@@ -2740,7 +2743,7 @@ write_init_ovld_table (void)
   for (int i = 0; i <= curr_ovld; i++)
 {
   fprintf (init_file,
-  "  rs6000_instance_info_fntype[RS6000_INST_%s]"
+  "  instance_info_fntype[RS6000_INST_%s]"
   "\n= %s;\n",
   ovlds[i].ovld_id_name, ovlds[i].fndecl);
 
@@ -2772,7 +2775,7 @@ write_init_ovld_table (void)
}
 
  fprintf (init_file,
-  "  rs6000_builtin_decls[(int)RS6000_OVLD_%s] = t\n",
+  "  builtin_decls[(int)RS6000_OVLD_%s] = t\n",
   stanza->stanza_id);
  fprintf (init_file,
   "= add_builtin_function (\"%s\",\n",
@@ -2793,7 +2796,7 @@ write_init_ovld_table (void)
  fprintf (init_file, "\n");
 
  fprintf (init_file,
-  "  rs6000_overload_info[RS6000_OVLD_%s - base]"
+  "  overload_info[RS6000_OVLD_%s - base]"
   ".first_instance\n",
  

Re: [PATCH] middle-end/114189 - drop uses of vcond{,u,eq}_optab

2024-06-17 Thread Stefan Schulze Frielinghaus
On Mon, Jun 17, 2024 at 08:16:34AM +0200, Richard Biener wrote:
> On Mon, 17 Jun 2024, Kewen.Lin wrote:
> 
> > Hi Richi,
> > 
> > on 2024/6/14 18:31, Richard Biener wrote:
> > > The following retires vcond{,u,eq} optabs by stopping to use them
> > > from the middle-end.  Targets instead (should) implement vcond_mask
> > > and vec_cmp{,u,eq} optabs.  The PR this change refers to lists
> > > possibly affected targets - those implementing these patterns,
> > > and in particular it lists mips, sparc and ia64 as targets that
> > > most definitely will regress while others might simply remove
> > > their vcond{,u,eq} patterns.
> > > 
> > > I'd appreciate testing, I do not expect fallout for x86 or arm/aarch64.
> > > I know riscv doesn't implement any of the legacy optabs.  But less
> > > maintained vector targets might need adjustments.
> > 
> > Thanks for making this change, this patch can be bootstrapped on ppc64{,le}
> > but both have one failure on gcc/testsuite/gcc.target/powerpc/pr66144-3.c,
> > by looking into it, I found it just exposed one oversight in the current
> > rs6000 vcond_mask support (the condition mask location is wrong), so I think
> > this change is fine for rs6000 port, I'll also test SPEC2017 for this (with
> > rs6000 vcond_mask change) soon.
> 
> Btw, for those targets where the patch works out fine it would be nice
> to delete their vcond{,u,eq} expanders (and double-check that doesn't
> cause issues on its own).
> 
> Can target maintainers note whether their targets support all condition
> codes for their vector comparisons (including FP variants)?  And 
> whether they choose to implement all condition codes in vec_cmp
> and adjust with inversion / operand swapping for not supported cases?

On s390 we support all comparison operations with inverse / operand
swapping via s390_expand_vec_compare.  However, we still have some
failures for which I opened PR115519.  Currently it is unclear to me
what precisely is missing and will have a further look.  vcond_mask
expander is also implemented for all modes.

Cheers,
Stefan

> 
> Thanks,
> Richard.
> 
> > BR,
> > Kewen
> > 
> > > 
> > > I want to get rid of those optabs for GCC 15.  If I don't hear from
> > > you I will assume your target is fine.
> > > 
> > > Thanks,
> > > Richard.
> > > 
> > >   PR middle-end/114189
> > >   * optabs-query.h (get_vcond_icode): Always return CODE_FOR_nothing.
> > >   (get_vcond_eq_icode): Likewise.
> > > ---
> > >  gcc/optabs-query.h | 13 -
> > >  1 file changed, 4 insertions(+), 9 deletions(-)
> > > 
> > > diff --git a/gcc/optabs-query.h b/gcc/optabs-query.h
> > > index 0cb2c21ba85..31fbce80175 100644
> > > --- a/gcc/optabs-query.h
> > > +++ b/gcc/optabs-query.h
> > > @@ -112,14 +112,9 @@ get_vec_cmp_eq_icode (machine_mode vmode, 
> > > machine_mode mask_mode)
> > > mode CMODE, unsigned if UNS is true, resulting in a value of mode 
> > > VMODE.  */
> > >  
> > >  inline enum insn_code
> > > -get_vcond_icode (machine_mode vmode, machine_mode cmode, bool uns)
> > > +get_vcond_icode (machine_mode, machine_mode, bool)
> > >  {
> > > -  enum insn_code icode = CODE_FOR_nothing;
> > > -  if (uns)
> > > -icode = convert_optab_handler (vcondu_optab, vmode, cmode);
> > > -  else
> > > -icode = convert_optab_handler (vcond_optab, vmode, cmode);
> > > -  return icode;
> > > +  return CODE_FOR_nothing;
> > >  }
> > >  
> > >  /* Return insn code for a conditional operator with a mask mode
> > > @@ -135,9 +130,9 @@ get_vcond_mask_icode (machine_mode vmode, 
> > > machine_mode mmode)
> > > mode CMODE (only EQ/NE), resulting in a value of mode VMODE.  */
> > >  
> > >  inline enum insn_code
> > > -get_vcond_eq_icode (machine_mode vmode, machine_mode cmode)
> > > +get_vcond_eq_icode (machine_mode, machine_mode)
> > >  {
> > > -  return convert_optab_handler (vcondeq_optab, vmode, cmode);
> > > +  return CODE_FOR_nothing;
> > >  }
> > >  
> > >  /* Enumerates the possible extraction_insn operations.  */
> > 
> > 
> 
> -- 
> Richard Biener 
> SUSE Software Solutions Germany GmbH,
> Frankenstrasse 146, 90461 Nuernberg, Germany;
> GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)


Re: [PATCH 30/52 v2] pdp11: Remove macro {FLOAT,DOUBLE,LONG_DOUBLE}_TYPE_SIZE

2024-06-17 Thread Kewen.Lin
Hi Paul,

on 2024/6/14 23:20, Paul Koning wrote:
> Ok, I understand better now.  But if those macros are supposed to be replaced 
> by hook functions, could you make that replacement part of the proposed patch?

The default implementation of the introduced hook mode_for_floating_type
returns SFmode for float and DFmode for double or long double, which matches
what pdp11 port requires, so there is no need to add its own hook 
implementation.
This patch series only re-define this hook macro with the customized hook
implementation for those ports which need something beyond the default.

BR,
Kewen

> 
>   paul
> 
>> On Jun 13, 2024, at 11:22 PM, Kewen.Lin  wrote:
>>
>> Hi Paul,
>>
>> on 2024/6/14 04:07, Paul Koning wrote:
>>> What is the effect of this change?  The original code intended to have 
>>> "float" mean a 32 bit value, and "double" a 64 bit value.  There aren't any 
>>> larger floats, so I defined the long double size as 64 also.  Is the right 
>>> answer not to define it?
>>
>> Since sub-patch 09/52 will poison {FLOAT,DOUBLE,LONG_DOUBLE}_TYPE_SIZE, 
>> target code building will fail
>> if it still has these macros.  As I'd like to squash these target changes 
>> onto 09/52, so I didn't note
>> the background/context here, sorry about that.
>>
>>>
>>> That part I understand, but why does the patch also remove FLOAT_TYPE_SIZE 
>>> and DOUBLE_TYPE_SIZE without explanation and without mention in the 
>>> changelog?
>>
>> Oops, thanks for catching!  I just noticed this sub-patch has inconsistent 
>> subject & changelog, I should
>> have noticed this as it has a quite different subject from the others. :(  
>> With your finding, I just
>> re-visited all the other sub-patches, luckily they are consistent.
>>
>> The below is the updated revision, hope it looks good to you.  Thanks again.
>>
>> BR,
>> Kewen
>> -
>>
>> Subject: [PATCH] pdp11: Remove macro {FLOAT,DOUBLE,LONG_DOUBLE}_TYPE_SIZE
>>
>> This is to remove macros {FLOAT,{,LONG_}DOUBLE}_TYPE_SIZE
>> defines in pdp11 port, as we want to replace these macros
>> with hook mode_for_floating_type and poison them.
>>
>> gcc/ChangeLog:
>>
>>* config/pdp11/pdp11.h (FLOAT_TYPE_SIZE): Remove.
>>(DOUBLE_TYPE_SIZE): Likewise.
>>(LONG_DOUBLE_TYPE_SIZE): Likewise.
>> ---
>> gcc/config/pdp11/pdp11.h | 11 ---
>> 1 file changed, 11 deletions(-)
>>
>> diff --git a/gcc/config/pdp11/pdp11.h b/gcc/config/pdp11/pdp11.h
>> index 2446fea0b58..6c8e045bc57 100644
>> --- a/gcc/config/pdp11/pdp11.h
>> +++ b/gcc/config/pdp11/pdp11.h
>> @@ -71,17 +71,6 @@ along with GCC; see the file COPYING3.  If not see
>> #define LONG_TYPE_SIZE 32
>> #define LONG_LONG_TYPE_SIZE64
>>
>> -/* In earlier versions, FLOAT_TYPE_SIZE was selectable as 32 or 64,
>> -   but that conflicts with Fortran language rules.  Since there is no
>> -   obvious reason why we should have that feature -- other targets
>> -   generally don't have float and double the same size -- I've removed
>> -   it.  Note that it continues to be true (for now) that arithmetic is
>> -   always done with 64-bit values, i.e., the FPU is always in "double"
>> -   mode.  */
>> -#define FLOAT_TYPE_SIZE32
>> -#define DOUBLE_TYPE_SIZE   64
>> -#define LONG_DOUBLE_TYPE_SIZE  64
>> -
>> /* machine types from ansi */
>> #define SIZE_TYPE "short unsigned int" /* definition of size_t */
>> #define WCHAR_TYPE "short int" /* or long int */
>> --
>> 2.43.0
>>
>>
> 



[PATCH] c-family: Fix -Warray-compare warning ICE [PR115290]

2024-06-17 Thread Jakub Jelinek
Hi!

The warning code uses %D to print the ARRAY_REF first operands.
That works in the most common case where those operands are decls, but
as can be seen on the following testcase, they can be other expressions
with array type.
Just changing %D to %E isn't enough, because then the diagnostics can
suggest something like
note: use '&(x) != 0 ? (int (*)[32])&a : (int (*)[32])&b[0] == &(y) != 0 ? (int 
(*)[32])&a : (int (*)[32])&b[0]' to compare the addresses
which is a bad suggestion, the %E printing doesn't know that the
warning code will want to add & before it and [0] after it.
So, the following patch adds ()s around the operand as well, but does
that only for non-decls, for decls keeps it as &arr[0] like before.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk
and release branches?

2024-06-17  Jakub Jelinek  

PR c/115290
* c-warn.cc (do_warn_array_compare): Use %E rather than %D for
printing op0 and op1; if those operands aren't decls, also print
parens around them.

* c-c++-common/Warray-compare-3.c: New test.

--- gcc/c-family/c-warn.cc.jj   2024-06-04 13:19:03.371609456 +0200
+++ gcc/c-family/c-warn.cc  2024-06-17 15:07:09.005737065 +0200
@@ -3832,11 +3832,16 @@ do_warn_array_compare (location_t locati
   /* C doesn't allow +arr.  */
   if (c_dialect_cxx ())
inform (location, "use unary %<+%> which decays operands to pointers "
-   "or %<&%D[0] %s &%D[0]%> to compare the addresses",
-   op0, op_symbol_code (code), op1);
+   "or %<&%s%E%s[0] %s &%s%E%s[0]%> to compare the addresses",
+   DECL_P (op0) ? "" : "(", op0, DECL_P (op0) ? "" : ")",
+   op_symbol_code (code),
+   DECL_P (op1) ? "" : "(", op1, DECL_P (op1) ? "" : ")");
   else
-   inform (location, "use %<&%D[0] %s &%D[0]%> to compare the addresses",
-   op0, op_symbol_code (code), op1);
+   inform (location,
+   "use %<&%s%E%s[0] %s &%s%E%s[0]%> to compare the addresses",
+   DECL_P (op0) ? "" : "(", op0, DECL_P (op0) ? "" : ")",
+   op_symbol_code (code),
+   DECL_P (op1) ? "" : "(", op1, DECL_P (op1) ? "" : ")");
 }
 }
 
--- gcc/testsuite/c-c++-common/Warray-compare-3.c.jj2024-06-17 
15:13:57.098422635 +0200
+++ gcc/testsuite/c-c++-common/Warray-compare-3.c   2024-06-17 
15:13:24.339849049 +0200
@@ -0,0 +1,13 @@
+/* PR c/115290 */
+/* { dg-do compile } */
+/* { dg-options "-Warray-compare" } */
+
+int a[32][32], b[32][32];
+
+int
+foo (int x, int y)
+{
+  return (x ? a : b) == (y ? a : b); /* { dg-warning "comparison between two 
arrays" } */
+/* { dg-message "use '&\\\(\[^\n\r]*\\\)\\\[0\\\] == 
&\\\(\[^\n\r]*\\\)\\\[0\\\]' to compare the addresses" "" { target c } .-1 } */
+/* { dg-message "use unary '\\\+' which decays operands to pointers or 
'&\\\(\[^\n\r]*\\\)\\\[0\\\] == &\\\(\[^\n\r]*\\\)\\\[0\\\]' to compare the 
addresses" "" { target c++ } .-2 } */
+}

Jakub



[committed] c++: Fix up floating point conversion rank comparison for _Float32 and float if float/double are same size [PR115511]

2024-06-17 Thread Jakub Jelinek
Hi!

On AVR and SH with some options sizeof (float) == sizeof (double) and
the 2 types have the same set of values.
http://eel.is/c++draft/conv.rank#2.2 for this says that double still
has bigger rank than float and http://eel.is/c++draft/conv.rank#2.2
says that extended type with the same set of values as more than one
standard floating point type shall have the same rank as double.
I've implemented the latter rule as
   if (cnt > 1 && mv2 == long_double_type_node)
 return -2;
with the _Float64/double/long double case having same mode case (various
targets with -mlong-double-64) in mind.
But never thought there are actually targets where float and double
are the same, that needs handling too, if cnt > 1 (that is the extended
type mv1 has same set of values as 2 or 3 of float/double/long double)
and mv2 is float, we need to return 2, because mv1 in that case should
have same rank as double and double has bigger rank than float.

Bootstrapped/regtested on x86_64-linux and i686-linux and checked with
a cross-compiler to avr-none on the testcase, which previously ICEd because
the function returned _Float32 and float have the same rank, just different
subrank and for _Float32 vs. double also returned they have the same rank.
Committed to trunk as obvious, will backport to 14/13 soon.

2024-06-17  Jakub Jelinek  

PR target/111343
PR c++/115511
* typeck.cc (cp_compare_floating_point_conversion_ranks): If an
extended floating point type mv1 has same set of values as more
than one standard floating point type and mv2 is float, return 2.

* g++.dg/cpp23/ext-floating18.C: New test.

--- gcc/cp/typeck.cc.jj 2024-06-04 13:19:03.755604346 +0200
+++ gcc/cp/typeck.cc2024-06-17 10:32:02.063088961 +0200
@@ -393,6 +393,9 @@ cp_compare_floating_point_conversion_ran
  has higher rank.  */
   if (cnt > 1 && mv2 == long_double_type_node)
 return -2;
+  /* And similarly if t2 is float, t2 has lower rank.  */
+  if (cnt > 1 && mv2 == float_type_node)
+return 2;
   /* Otherwise, they have equal rank, but extended types
  (other than std::bfloat16_t) have higher subrank.
  std::bfloat16_t shouldn't have equal rank to any standard
--- gcc/testsuite/g++.dg/cpp23/ext-floating18.C.jj  2024-06-17 
18:39:01.740020581 +0200
+++ gcc/testsuite/g++.dg/cpp23/ext-floating18.C 2024-06-17 18:47:19.152779782 
+0200
@@ -0,0 +1,26 @@
+// P1467R9 - Extended floating-point types and standard names.
+// { dg-do compile { target c++23 } }
+// { dg-options "" }
+// { dg-add-options float32 }
+
+constexpr int foo (float) { return 1; }
+constexpr int foo (double) { return 2; }
+constexpr int foo (long double) { return 3; }
+
+#ifdef __STDCPP_FLOAT32_T__
+#if __FLT_MAX_EXP__ == __FLT32_MAX_EXP__ \
+&& __FLT_MAX_DIG__ == __FLT32_MAX_DIG__
+#if __FLT_MAX_EXP__ == __DBL_MAX_EXP__ \
+&& __FLT_MAX_DIG__ == __DBL_MAX_DIG__
+static_assert (foo (1.0f32) == 2);
+#else
+static_assert (foo (1.0f32) == 1);
+#endif
+#endif
+#endif
+#ifdef __STDCPP_FLOAT64_T__
+#if __DBL_MAX_EXP__ == __FLT64_MAX_EXP__ \
+&& __DBL_MAX_DIG__ == __FLT64_MAX_DIG__
+static_assert (foo (1.0f64) == 2);
+#endif
+#endif

Jakub



Re: [PATCH] rs6000: Compute rop_hash_save_offset for non-Altivec compiles [PR115389]

2024-06-17 Thread Kewen.Lin
on 2024/6/15 01:05, Peter Bergner wrote:
> On 6/13/24 10:26 PM, Peter Bergner wrote:
>> On 6/13/24 9:26 PM, Kewen.Lin wrote:
> I understand this is just copied from the if arm, but if I read this 
> right, it can be
> simplified as:

 Ok, I'll retest with that simplification.
>>
>> So I retested a normal powerpc64le-linux build (ie, we default to Power8
>> with Altivec) and it bootstrapped and regtested with no regressions.
>> I then attempted a --with-cpu=power5 build to test the non-altivec path,
>> but both the unpatched and patched builds died building libgfortran with
>> the following error: "error: ‘_Float128’ is not supported on this target".
>> I believe that is related to PR113652.  I'll kick off the build again,
>> this time disabling Fortran and seeing if the build completes.
> 
> My bad for calling the --with-cpu=power5 bootstrap build on ELFv2 a "bug".
> It's not, since ELFv2 mandates a cpu with at least ISA 2.07 (eg. Power8)
> support and some of the libgfortran code was written assuming that, so what
> I was trying to do was really not supported (ie, luser error).
> 
> That said, the --with-cpu=power5 build without fortran did bootstrap and
> regtest with no regressions, so the build did test that code path and
> exposed no problems.

OK, nice!  Thanks!

> 
> 
> 
 That's what I expected too! :-)  However, I was surprised to learn that 
 -mno-altivec
 does *not* disable TARGET_ALTIVEC_ABI.  I had to explicitly use the -mabi= 
 option to
 expose the bug.
>>>
>>> oh, it's surprising, I learn something today! :) I guess it's not 
>>> intentional but just no
>>> one noticed it, as it seems nonsense to have altivec ABI extension but not 
>>> using any altivec
>>> features.
> 
> Currently, TARGET_ALTIVEC_ABI is defined as:
> 
>   #define TARGET_ALTIVEC_ABI rs6000_altivec_abi
> 
> Would it make sense to redine it to:
> 
>   #define TARGET_ALTIVEC_ABI (TARGET_ALTIVEC && rs6000_altivec_abi)
> 
> ...or add some code in rs6000 option handling to disable rs6000_altivec_abi
> when TARGET_ALTIVEC is false?  or do we care enough to even change it? :-)

Assuming the current code is robust enough (perfectly guarded by some altivec 
related
condition like this altivec register saving slot), there may not any actual 
errors,
but considering not surprising people, I'm inclined to add some option 
handlings for
it, like unsetting rs6000_altivec_abi if !TARGET_ALTIVEC and give some warning 
if it's
explicitly specified, what do you think?

BR,
Kewen



Re: [PATCH] RISC-V: Add configure check for Zaamo/Zalrsc assembler support

2024-06-17 Thread Patrick O'Neill



On 6/13/24 13:02, Jeff Law wrote:



On 6/12/24 5:20 PM, Patrick O'Neill wrote:

Binutils 2.42 and before don't support Zaamo/Zalrsc. Add a configure
check to prevent emitting Zaamo/Zalrsc in the arch string when the
assember does not support it.

gcc/ChangeLog:

* common/config/riscv/riscv-common.cc
  (riscv_subset_list::to_string): Skip zaamo/zalrsc when not
  supported by the assembler.
* config.in: Regenerate.
* configure: Regenerate.
* configure.ac: Add zaamo/zalrsc assmeber check.

OK.

It looks like you've got some unexpected diff fragmets in configure -- 
all the LARGE_OFF_T stuff.  They look OK to me, but something like 
that is usually a sign of different autoconf versions.   I wouldn't 
lose any sleep if you left them as-is or removed those hunks before 
committing.


jeff


Removed the hunks and committed.
Sent the committed version to the list for the archiver.

I'll rebase the promotion RFC [1] on top and resolve the warning that 
Andreas Schwab noticed.


Patrick

[1]: 
https://patchwork.sourceware.org/project/gcc/patch/20240613233059.1451117-1-patr...@rivosinc.com/


[Committed] RISC-V: Add configure check for Zaamo/Zalrsc assembler support

2024-06-17 Thread Patrick O'Neill
Binutils 2.42 and before don't support Zaamo/Zalrsc. Add a configure
check to prevent emitting Zaamo/Zalrsc in the arch string when the
assember does not support it.

gcc/ChangeLog:

* common/config/riscv/riscv-common.cc
(riscv_subset_list::to_string): Skip zaamo/zalrsc when not
supported by the assembler.
* config.in: Regenerate.
* configure: Regenerate.
* configure.ac: Add zaamo/zalrsc assmeber check.

Signed-off-by: Patrick O'Neill 
Acked-by: Palmer Dabbelt  # RISC-V
Reviewed-by: Palmer Dabbelt  # RISC-V
---
Tested using newlib rv64gc with binutils tip-of-tree and 2.42.

This results in calls being emitted when compiling for _zaamo_zalrsc
when the assember does not support these extensions.

> cat amo.c
void foo (int* bar, int* baz)
{
  __atomic_add_fetch(bar, baz, __ATOMIC_RELAXED);
}
> gcc -march=rv64id_zaamo_zalrsc -O3 amo.c
results in:
foo:
sext.w  a1,a1
li  a2,0
tail__atomic_fetch_add_4

As a result there are some testsuite failures on zalrsc specific
testcases and when using an old version of binutils on non-a targets.
Not a cause for concern imo but worth calling out.
Also testcases that check for the default isa string will fail with
the old binutils since zaamo/zalrsc aren't emitted anymore.
---
 gcc/common/config/riscv/riscv-common.cc | 11 +
 gcc/config.in   |  6 +
 gcc/configure   | 31 +
 gcc/configure.ac|  5 
 4 files changed, 53 insertions(+)

diff --git a/gcc/common/config/riscv/riscv-common.cc 
b/gcc/common/config/riscv/riscv-common.cc
index 78dfd6b1470..1dc1d9904c7 100644
--- a/gcc/common/config/riscv/riscv-common.cc
+++ b/gcc/common/config/riscv/riscv-common.cc
@@ -916,6 +916,7 @@ riscv_subset_list::to_string (bool version_p) const
   riscv_subset_t *subset;
 
   bool skip_zifencei = false;
+  bool skip_zaamo_zalrsc = false;
   bool skip_zicsr = false;
   bool i2p0 = false;
 
@@ -943,6 +944,10 @@ riscv_subset_list::to_string (bool version_p) const
  a mistake in that binutils 2.35 supports zicsr but not zifencei.  */
   skip_zifencei = true;
 #endif
+#ifndef HAVE_AS_MARCH_ZAAMO_ZALRSC
+  /* Skip since binutils 2.42 and earlier don't recognize zaamo/zalrsc.  */
+  skip_zaamo_zalrsc = true;
+#endif
 
   for (subset = m_head; subset != NULL; subset = subset->next)
 {
@@ -954,6 +959,12 @@ riscv_subset_list::to_string (bool version_p) const
  subset->name == "zicsr")
continue;
 
+  if (skip_zaamo_zalrsc && subset->name == "zaamo")
+   continue;
+
+  if (skip_zaamo_zalrsc && subset->name == "zalrsc")
+   continue;
+
   /* For !version_p, we only separate extension with underline for
 multi-letter extension.  */
   if (!first &&
diff --git a/gcc/config.in b/gcc/config.in
index e41b6dc97cd..acab3c0f126 100644
--- a/gcc/config.in
+++ b/gcc/config.in
@@ -629,6 +629,12 @@
 #endif
 
 
+/* Define if the assembler understands -march=rv*_zaamo_zalrsc. */
+#ifndef USED_FOR_TARGET
+#undef HAVE_AS_MARCH_ZAAMO_ZALRSC
+#endif
+
+
 /* Define if the assembler understands -march=rv*_zifencei. */
 #ifndef USED_FOR_TARGET
 #undef HAVE_AS_MARCH_ZIFENCEI
diff --git a/gcc/configure b/gcc/configure
index 94970e24051..9dc0b65dfaa 100755
--- a/gcc/configure
+++ b/gcc/configure
@@ -30820,6 +30820,37 @@ if test $gcc_cv_as_riscv_march_zifencei = yes; then
 
 $as_echo "#define HAVE_AS_MARCH_ZIFENCEI 1" >>confdefs.h
 
+fi
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for 
-march=rv32i_zaamo_zalrsc support" >&5
+$as_echo_n "checking assembler for -march=rv32i_zaamo_zalrsc support... " >&6; 
}
+if ${gcc_cv_as_riscv_march_zaamo_zalrsc+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  gcc_cv_as_riscv_march_zaamo_zalrsc=no
+  if test x$gcc_cv_as != x; then
+$as_echo '' > conftest.s
+if { ac_try='$gcc_cv_as $gcc_cv_as_flags -march=rv32i_zaamo_zalrsc -o 
conftest.o conftest.s >&5'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }
+then
+   gcc_cv_as_riscv_march_zaamo_zalrsc=yes
+else
+  echo "configure: failed program was" >&5
+  cat conftest.s >&5
+fi
+rm -f conftest.o conftest.s
+  fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: 
$gcc_cv_as_riscv_march_zaamo_zalrsc" >&5
+$as_echo "$gcc_cv_as_riscv_march_zaamo_zalrsc" >&6; }
+if test $gcc_cv_as_riscv_march_zaamo_zalrsc = yes; then
+
+$as_echo "#define HAVE_AS_MARCH_ZAAMO_ZALRSC 1" >>confdefs.h
+
 fi
 
 ;;
diff --git a/gcc/configure.ac b/gcc/configure.ac
index 35475cf5aae..b2243e9954a 100644
--- a/gcc/configure.ac
+++ b/gcc/configure.ac
@@ -5452,6 +5452,11 @@ configured with --enable-newlib-nano-formatted-io.])
   [-march=rv32i_zifencei2p0],,,
   [AC_DEFINE(HAVE_AS_MARCH_ZIFENCEI, 1,
 [

[PATCH] diagnostics: Fix add_misspelling_candidates [PR115440]

2024-06-17 Thread Jakub Jelinek
Hi!

The option_map array for most entries contains just non-NULL opt0
{ "-Wno-", NULL, "-W", false, true },
{ "-fno-", NULL, "-f", false, true },
{ "-gno-", NULL, "-g", false, true },
{ "-mno-", NULL, "-m", false, true },
{ "--debug=", NULL, "-g", false, false },
{ "--machine-", NULL, "-m", true, false },
{ "--machine-no-", NULL, "-m", false, true },
{ "--machine=", NULL, "-m", false, false },
{ "--machine=no-", NULL, "-m", false, true },
{ "--machine", "", "-m", false, false },
{ "--machine", "no-", "-m", false, true },
{ "--optimize=", NULL, "-O", false, false },
{ "--std=", NULL, "-std=", false, false },
{ "--std", "", "-std=", false, false },
{ "--warn-", NULL, "-W", true, false },
{ "--warn-no-", NULL, "-W", false, true },
{ "--", NULL, "-f", true, false },
{ "--no-", NULL, "-f", false, true }
and so add_misspelling_candidates works correctly for it, but 3 out of
these,
{ "--machine", "", "-m", false, false },
{ "--machine", "no-", "-m", false, true },
and
{ "--std", "", "-std=", false, false },
use non-NULL opt1.  That says that
--machine foo
should map to
-mfoo
and
--machine no-foo
should map to
-mno-foo
and
--std c++17
should map to
-std=c++17
add_misspelling_canidates was not handling this, so it hapilly
registered say
--stdc++17
or
--machineavx512
(twice) as spelling alternatives, when those options aren't recognized.
Instead we support
--std c++17
or
--machine avx512
--machine no-avx512

The following patch fixes that.  On this particular testcase, we no longer
suggest anything, even when among the suggestion is say that
--std c++17
or
-std=c++17
etc.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2024-06-17  Jakub Jelinek  

PR driver/115440
* opts-common.cc (add_misspelling_candidates): If opt1 is non-NULL,
add a space and opt1 to the alternative suggestion text.

* g++.dg/cpp1z/pr115440.C: New test.

--- gcc/opts-common.cc.jj   2024-06-14 19:44:34.434236887 +0200
+++ gcc/opts-common.cc  2024-06-17 10:58:14.351178400 +0200
@@ -524,6 +524,7 @@ add_misspelling_candidates (auto_vecsafe_push (alternative);
}
 }
--- gcc/testsuite/g++.dg/cpp1z/pr115440.C.jj2024-06-17 10:55:28.607380969 
+0200
+++ gcc/testsuite/g++.dg/cpp1z/pr115440.C   2024-06-17 11:04:38.334075632 
+0200
@@ -0,0 +1,8 @@
+// PR driver/115440
+// { dg-do compile { target c++17_only } }
+// { dg-options "--c++17" }
+
+int i;
+
+// { dg-bogus "unrecognized command-line option '--c\\\+\\\+17'; did you mean 
'--stdc\\\+\\\+17'" "" { target *-*-* } 0 }
+// { dg-error "unrecognized command-line option '--c\\\+\\\+17'" "" { target 
*-*-* } 0 }

Jakub



Re: Patch ping

2024-06-17 Thread Segher Boessenkool
On Mon, Jun 17, 2024 at 03:26:52PM +0200, Jakub Jelinek wrote:
> I'd like to ping the
> https://gcc.gnu.org/pipermail/gcc-patches/2024-June/653573.html
> patch.  While the committed and backported patch fixed PCH on PIE
> cc1/cc1plus etc. on PowerPC, it grew up the size of the
> rs6000_init_generated_builtins function quite a lot.
> The above patch decreases it back, to even less than the size of
> the function before my fix.

A patch in the middle of a thread.  I missed it, sorry.  Please send
patches as separate threads?


Segher


Ping^2 [PATCHv5] Optab: add isfinite_optab for __builtin_isfinite

2024-06-17 Thread HAO CHEN GUI
Hi,
  Gently ping it.
https://gcc.gnu.org/pipermail/gcc-patches/2024-May/652991.html

Thanks
Gui Haochen

在 2024/6/3 10:37, HAO CHEN GUI 写道:
> Hi,
>   All issues were addressed. Gently ping it.
> https://gcc.gnu.org/pipermail/gcc-patches/2024-May/652991.html
> 
> Thanks
> Gui Haochen
> 
> 在 2024/5/29 14:36, HAO CHEN GUI 写道:
>> Hi,
>>   This patch adds an optab for __builtin_isfinite. The finite check can be
>> implemented on rs6000 by a single instruction. It needs an optab to be
>> expanded to the certain sequence of instructions.
>>
>>   The subsequent patches will implement the expand on rs6000.
>>
>>   Compared to previous version, the main change is to specify return
>> value of the optab should be either 0 or 1.
>> https://gcc.gnu.org/pipermail/gcc-patches/2024-May/652864.html
>>
>>   Bootstrapped and tested on x86 and powerpc64-linux BE and LE with no
>> regressions. Is this OK for trunk?
>>
>> Thanks
>> Gui Haochen
>>
>> ChangeLog
>> optab: Add isfinite_optab for isfinite builtin
>>
>> gcc/
>>  * builtins.cc (interclass_mathfn_icode): Set optab to isfinite_optab
>>  for isfinite builtin.
>>  * optabs.def (isfinite_optab): New.
>>  * doc/md.texi (isfinite): Document.
>>
>>
>> patch.diff
>> diff --git a/gcc/builtins.cc b/gcc/builtins.cc
>> index f8d94c4b435..53e9d210541 100644
>> --- a/gcc/builtins.cc
>> +++ b/gcc/builtins.cc
>> @@ -2459,8 +2459,10 @@ interclass_mathfn_icode (tree arg, tree fndecl)
>>errno_set = true; builtin_optab = ilogb_optab; break;
>>  CASE_FLT_FN (BUILT_IN_ISINF):
>>builtin_optab = isinf_optab; break;
>> -case BUILT_IN_ISNORMAL:
>>  case BUILT_IN_ISFINITE:
>> +  builtin_optab = isfinite_optab;
>> +  break;
>> +case BUILT_IN_ISNORMAL:
>>  CASE_FLT_FN (BUILT_IN_FINITE):
>>  case BUILT_IN_FINITED32:
>>  case BUILT_IN_FINITED64:
>> diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
>> index 5730bda80dc..3eb4216141e 100644
>> --- a/gcc/doc/md.texi
>> +++ b/gcc/doc/md.texi
>> @@ -8557,6 +8557,12 @@ operand 2, greater than operand 2 or is unordered 
>> with operand 2.
>>
>>  This pattern is not allowed to @code{FAIL}.
>>
>> +@cindex @code{isfinite@var{m}2} instruction pattern
>> +@item @samp{isfinite@var{m}2}
>> +Return 1 if operand 1 is a finite floating point number and 0
>> +otherwise.  @var{m} is a scalar floating point mode.  Operand 0
>> +has mode @code{SImode}, and operand 1 has mode @var{m}.
>> +
>>  @end table
>>
>>  @end ifset
>> diff --git a/gcc/optabs.def b/gcc/optabs.def
>> index ad14f9328b9..dcd77315c2a 100644
>> --- a/gcc/optabs.def
>> +++ b/gcc/optabs.def
>> @@ -352,6 +352,7 @@ OPTAB_D (fmod_optab, "fmod$a3")
>>  OPTAB_D (hypot_optab, "hypot$a3")
>>  OPTAB_D (ilogb_optab, "ilogb$a2")
>>  OPTAB_D (isinf_optab, "isinf$a2")
>> +OPTAB_D (isfinite_optab, "isfinite$a2")
>>  OPTAB_D (issignaling_optab, "issignaling$a2")
>>  OPTAB_D (ldexp_optab, "ldexp$a3")
>>  OPTAB_D (log10_optab, "log10$a2")


Ping^2 [PATCH-1v3, rs6000] Implement optab_isinf for SFDF and IEEE128

2024-06-17 Thread HAO CHEN GUI
Hi,
   Gently ping the series of patches.
 [PATCH-1v3, rs6000] Implement optab_isinf for SFDF and IEEE128
 https://gcc.gnu.org/pipermail/gcc-patches/2024-May/652593.html
 [PATCH-2v3, rs6000] Implement optab_isfinite for SFDF and IEEE128
 https://gcc.gnu.org/pipermail/gcc-patches/2024-May/652594.html
 [PATCH-3v3, rs6000] Implement optab_isnormal for SFDF and IEEE128
 https://gcc.gnu.org/pipermail/gcc-patches/2024-May/652595.html

Thanks
Gui Haochen

在 2024/6/3 10:40, HAO CHEN GUI 写道:
> Hi,
>   Gently ping the series of patches.
> [PATCH-1v3, rs6000] Implement optab_isinf for SFDF and IEEE128
> https://gcc.gnu.org/pipermail/gcc-patches/2024-May/652593.html
> [PATCH-2v3, rs6000] Implement optab_isfinite for SFDF and IEEE128
> https://gcc.gnu.org/pipermail/gcc-patches/2024-May/652594.html
> [PATCH-3v3, rs6000] Implement optab_isnormal for SFDF and IEEE128
> https://gcc.gnu.org/pipermail/gcc-patches/2024-May/652595.html
> 
> Thanks
> Gui Haochen
> 
> 在 2024/5/24 14:02, HAO CHEN GUI 写道:
>> Hi,
>>   This patch implemented optab_isinf for SFDF and IEEE128 by test
>> data class instructions.
>>
>>   Compared with previous version, the main change is to narrow
>> down the predict for float operand according to review's advice.
>> https://gcc.gnu.org/pipermail/gcc-patches/2024-May/652128.html
>>
>>   Bootstrapped and tested on powerpc64-linux BE and LE with no
>> regressions. Is it OK for trunk?
>>
>> Thanks
>> Gui Haochen
>>
>> ChangeLog
>> rs6000: Implement optab_isinf for SFDF and IEEE128
>>
>> gcc/
>>  PR target/97786
>>  * config/rs6000/vsx.md (isinf2 for SFDF): New expand.
>>  (isinf2 for IEEE128): New expand.
>>
>> gcc/testsuite/
>>  PR target/97786
>>  * gcc.target/powerpc/pr97786-1.c: New test.
>>  * gcc.target/powerpc/pr97786-2.c: New test.
>>
>> patch.diff
>> diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
>> index f135fa079bd..08cce11da60 100644
>> --- a/gcc/config/rs6000/vsx.md
>> +++ b/gcc/config/rs6000/vsx.md
>> @@ -5313,6 +5313,24 @@ (define_expand "xststdcp"
>>operands[4] = CONST0_RTX (SImode);
>>  })
>>
>> +(define_expand "isinf2"
>> +  [(use (match_operand:SI 0 "gpc_reg_operand"))
>> +   (use (match_operand:SFDF 1 "vsx_register_operand"))]
>> +  "TARGET_HARD_FLOAT && TARGET_P9_VECTOR"
>> +{
>> +  emit_insn (gen_xststdcp (operands[0], operands[1], GEN_INT (0x30)));
>> +  DONE;
>> +})
>> +
>> +(define_expand "isinf2"
>> +  [(use (match_operand:SI 0 "gpc_reg_operand"))
>> +   (use (match_operand:IEEE128 1 "vsx_register_operand"))]
>> +  "TARGET_HARD_FLOAT && TARGET_P9_VECTOR"
>> +{
>> +  emit_insn (gen_xststdcqp_ (operands[0], operands[1], GEN_INT 
>> (0x30)));
>> +  DONE;
>> +})
>> +
>>  ;; The VSX Scalar Test Negative Quad-Precision
>>  (define_expand "xststdcnegqp_"
>>[(set (match_dup 2)
>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr97786-1.c 
>> b/gcc/testsuite/gcc.target/powerpc/pr97786-1.c
>> new file mode 100644
>> index 000..c1c4f64ee8b
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/pr97786-1.c
>> @@ -0,0 +1,22 @@
>> +/* { dg-do compile } */
>> +/* { dg-require-effective-target powerpc_vsx } */
>> +/* { dg-options "-O2 -mdejagnu-cpu=power9" } */
>> +
>> +int test1 (double x)
>> +{
>> +  return __builtin_isinf (x);
>> +}
>> +
>> +int test2 (float x)
>> +{
>> +  return __builtin_isinf (x);
>> +}
>> +
>> +int test3 (float x)
>> +{
>> +  return __builtin_isinff (x);
>> +}
>> +
>> +/* { dg-final { scan-assembler-not {\mfcmp} } } */
>> +/* { dg-final { scan-assembler-times {\mxststdcsp\M} 2 } } */
>> +/* { dg-final { scan-assembler-times {\mxststdcdp\M} 1 } } */
>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr97786-2.c 
>> b/gcc/testsuite/gcc.target/powerpc/pr97786-2.c
>> new file mode 100644
>> index 000..ed305e8572e
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/pr97786-2.c
>> @@ -0,0 +1,17 @@
>> +/* { dg-do compile } */
>> +/* { dg-require-effective-target ppc_float128_hw } */
>> +/* { dg-require-effective-target powerpc_vsx } */
>> +/* { dg-options "-O2 -mdejagnu-cpu=power9 -mabi=ieeelongdouble -Wno-psabi" 
>> } */
>> +
>> +int test1 (long double x)
>> +{
>> +  return __builtin_isinf (x);
>> +}
>> +
>> +int test2 (long double x)
>> +{
>> +  return __builtin_isinfl (x);
>> +}
>> +
>> +/* { dg-final { scan-assembler-not {\mxscmpuqp\M} } } */
>> +/* { dg-final { scan-assembler-times {\mxststdcqp\M} 2 } } */


Re: [PATCH] rs6000: Compute rop_hash_save_offset for non-Altivec compiles [PR115389]

2024-06-17 Thread Kewen.Lin
on 2024/6/17 10:31, Peter Bergner wrote:
> On 6/16/24 9:10 PM, Kewen.Lin wrote:
>> on 2024/6/15 01:05, Peter Bergner wrote:
>>> That said, the --with-cpu=power5 build without fortran did bootstrap and
>>> regtest with no regressions, so the build did test that code path and
>>> exposed no problems.
>>
>> OK, nice!  Thanks!
> 
> I assume this means you're "OK" with the updated patch, correct?

Yes, OK for trunk, thanks!

>>> Currently, TARGET_ALTIVEC_ABI is defined as:
>>>
>>>   #define TARGET_ALTIVEC_ABI rs6000_altivec_abi
>>>
>>> Would it make sense to redine it to:
>>>
>>>   #define TARGET_ALTIVEC_ABI (TARGET_ALTIVEC && rs6000_altivec_abi)
>>>
>>> ...or add some code in rs6000 option handling to disable rs6000_altivec_abi
>>> when TARGET_ALTIVEC is false?  or do we care enough to even change it? 
>>> :-)
>>
>> Assuming the current code is robust enough (perfectly guarded by some 
>> altivec related
>> condition like this altivec register saving slot), there may not any actual 
>> errors,
>> but considering not surprising people, I'm inclined to add some option 
>> handlings for
>> it, like unsetting rs6000_altivec_abi if !TARGET_ALTIVEC and give some 
>> warning if it's
>> explicitly specified, what do you think?
> 
> I like it, since if Altivec is disabled, having TARGET_ALTIVEC_ABI enabled 
> makes no
> sense to me.  That is orthogonal to this bug though, so should be a separate 
> patch.

Yes.

> Do you want to take a stab at writing that or do you want me to do that?

Either is fine for me, then let me give it a shot.

BR,
Kewen



Re: [PATCH] middle-end/114189 - drop uses of vcond{,u,eq}_optab

2024-06-17 Thread Kewen.Lin
on 2024/6/17 14:16, Richard Biener wrote:
> On Mon, 17 Jun 2024, Kewen.Lin wrote:
> 
>> Hi Richi,
>>
>> on 2024/6/14 18:31, Richard Biener wrote:
>>> The following retires vcond{,u,eq} optabs by stopping to use them
>>> from the middle-end.  Targets instead (should) implement vcond_mask
>>> and vec_cmp{,u,eq} optabs.  The PR this change refers to lists
>>> possibly affected targets - those implementing these patterns,
>>> and in particular it lists mips, sparc and ia64 as targets that
>>> most definitely will regress while others might simply remove
>>> their vcond{,u,eq} patterns.
>>>
>>> I'd appreciate testing, I do not expect fallout for x86 or arm/aarch64.
>>> I know riscv doesn't implement any of the legacy optabs.  But less
>>> maintained vector targets might need adjustments.
>>
>> Thanks for making this change, this patch can be bootstrapped on ppc64{,le}
>> but both have one failure on gcc/testsuite/gcc.target/powerpc/pr66144-3.c,
>> by looking into it, I found it just exposed one oversight in the current
>> rs6000 vcond_mask support (the condition mask location is wrong), so I think
>> this change is fine for rs6000 port, I'll also test SPEC2017 for this (with
>> rs6000 vcond_mask change) soon.
> 
> Btw, for those targets where the patch works out fine it would be nice
> to delete their vcond{,u,eq} expanders (and double-check that doesn't
> cause issues on its own).

OK, will do, thanks for reminding!

> 
> Can target maintainers note whether their targets support all condition
> codes for their vector comparisons (including FP variants)?  And 

On Power, hardware only supports EQ and GT for vector INT (well ISA 3.0 supports
NE for b/h/w), while EQ, GT & GE for vector FP.  But vec_cmp optab supports
{EQ,NE,LT,LE,GT,GE} for signed, {EQ,NE,LTU,LEU,GTU,GEU} for unsigned, and
{EQ,NE,LT,LE,GT,GE,UNORDERED,ORDERED,UNEQ,LTGT,UNGE,UNGT,UNLT,UNLE} for fp.

> whether they choose to implement all condition codes in vec_cmp
> and adjust with inversion / operand swapping for not supported cases?

Yes for rs6000 port, some relies on define_insn_and_split.

BR,
Kewen



[PATCH v1 7/7] RISC-V: Add testcases for unsigned .SAT_ADD vector form 8

2024-06-17 Thread pan2 . li
From: Pan Li 

After the middle-end support the form 8 of unsigned SAT_ADD and
the RISC-V backend implement the .SAT_ADD for vector mode, add
more test case to cover the form 8.

Form 8:
  #define DEF_VEC_SAT_U_ADD_FMT_8(T)   \
  void __attribute__((noinline))   \
  vec_sat_u_add_##T##_fmt_8 (T *out, T *op_1, T *op_2, unsigned limit) \
  {\
unsigned i;\
for (i = 0; i < limit; i++)\
  {\
T x = op_1[i]; \
T y = op_2[i]; \
out[i] = x > (T)(x + y) ? -1 : (x + y);\
  }\
  }

Passed the rv64gcv regression tests.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h: Add helper
macro for testing.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-29.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-30.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-31.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-32.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-29.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-30.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-31.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-32.c: New test.

Signed-off-by: Pan Li 
---
 .../riscv/rvv/autovec/binop/vec_sat_arith.h   | 16 
 .../rvv/autovec/binop/vec_sat_u_add-29.c  | 19 +
 .../rvv/autovec/binop/vec_sat_u_add-30.c  | 20 +
 .../rvv/autovec/binop/vec_sat_u_add-31.c  | 20 +
 .../rvv/autovec/binop/vec_sat_u_add-32.c  | 20 +
 .../rvv/autovec/binop/vec_sat_u_add-run-29.c  | 75 +++
 .../rvv/autovec/binop/vec_sat_u_add-run-30.c  | 75 +++
 .../rvv/autovec/binop/vec_sat_u_add-run-31.c  | 75 +++
 .../rvv/autovec/binop/vec_sat_u_add-run-32.c  | 75 +++
 9 files changed, 395 insertions(+)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-29.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-30.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-31.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-32.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-29.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-30.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-31.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-32.c

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h
index 46fae4555be..443f88261ba 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h
@@ -101,6 +101,19 @@ vec_sat_u_add_##T##_fmt_7 (T *out, T *op_1, T *op_2, 
unsigned limit) \
 }\
 }
 
+#define DEF_VEC_SAT_U_ADD_FMT_8(T)   \
+void __attribute__((noinline))   \
+vec_sat_u_add_##T##_fmt_8 (T *out, T *op_1, T *op_2, unsigned limit) \
+{\
+  unsigned i;\
+  for (i = 0; i < limit; i++)\
+{\
+  T x = op_1[i]; \
+  T y = op_2[i]; \
+  out[i] = x > (T)(x + y) ? -1 : (x + y);\
+}\
+}
+
 #define RUN_VEC_SAT_U_ADD_FMT_1(T, out, op_1, op_2, N) \
   vec_sat_u_add_##T##_fmt_1(out, op_1, op_2, N)
 
@@ -122,6 +135,9 @@ vec_sat_u_add_##T##_fmt_7 (T *out, T *op_1, T *op_2, 
unsigned limit) \
 #define RUN_VEC_SAT_U_ADD_FMT_7(T, out, op_1, op_2, N) \
   vec_sat_u_add_##T##_fmt_7(out, op_1, op_2, N)
 
+#define RUN_VEC_SAT_U_ADD_FMT_8(T, out, op_1, op_2, N) \
+  vec_sat_u_add_##T##_fmt_8(out, op_1, op_2, N)
+
 
/**/
 /* Saturation Sub (Unsigned and Signed)   

[PATCH v1 5/7] RISC-V: Add testcases for unsigned .SAT_ADD vector form 6

2024-06-17 Thread pan2 . li
From: Pan Li 

After the middle-end support the form 6 of unsigned SAT_ADD and
the RISC-V backend implement the .SAT_ADD for vector mode, add
more test case to cover the form 6.

Form 6:
  #define DEF_VEC_SAT_U_ADD_FMT_6(T)   \
  void __attribute__((noinline))   \
  vec_sat_u_add_##T##_fmt_6 (T *out, T *op_1, T *op_2, unsigned limit) \
  {\
unsigned i;\
for (i = 0; i < limit; i++)\
  {\
T x = op_1[i]; \
T y = op_2[i]; \
out[i] = x <= (T)(x + y) ? (x + y) : -1;   \
  }\
  }

Passed the rv64gcv regression tests.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h: Add helper
macro for testing.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-21.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-23.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-24.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-21.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-22.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-23.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-24.c: New test.

Signed-off-by: Pan Li 
---
 .../riscv/rvv/autovec/binop/vec_sat_arith.h   | 16 
 .../rvv/autovec/binop/vec_sat_u_add-21.c  | 19 +
 .../rvv/autovec/binop/vec_sat_u_add-22.c  | 20 +
 .../rvv/autovec/binop/vec_sat_u_add-23.c  | 20 +
 .../rvv/autovec/binop/vec_sat_u_add-24.c  | 20 +
 .../rvv/autovec/binop/vec_sat_u_add-run-21.c  | 75 +++
 .../rvv/autovec/binop/vec_sat_u_add-run-22.c  | 75 +++
 .../rvv/autovec/binop/vec_sat_u_add-run-23.c  | 75 +++
 .../rvv/autovec/binop/vec_sat_u_add-run-24.c  | 75 +++
 9 files changed, 395 insertions(+)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-21.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-23.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-24.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-21.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-22.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-23.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-24.c

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h
index 1f2ee31577d..0f08822cbeb 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h
@@ -75,6 +75,19 @@ vec_sat_u_add_##T##_fmt_5 (T *out, T *op_1, T *op_2, 
unsigned limit) \
 }\
 }
 
+#define DEF_VEC_SAT_U_ADD_FMT_6(T)   \
+void __attribute__((noinline))   \
+vec_sat_u_add_##T##_fmt_6 (T *out, T *op_1, T *op_2, unsigned limit) \
+{\
+  unsigned i;\
+  for (i = 0; i < limit; i++)\
+{\
+  T x = op_1[i]; \
+  T y = op_2[i]; \
+  out[i] = x <= (T)(x + y) ? (x + y) : -1;   \
+}\
+}
+
 #define RUN_VEC_SAT_U_ADD_FMT_1(T, out, op_1, op_2, N) \
   vec_sat_u_add_##T##_fmt_1(out, op_1, op_2, N)
 
@@ -90,6 +103,9 @@ vec_sat_u_add_##T##_fmt_5 (T *out, T *op_1, T *op_2, 
unsigned limit) \
 #define RUN_VEC_SAT_U_ADD_FMT_5(T, out, op_1, op_2, N) \
   vec_sat_u_add_##T##_fmt_5(out, op_1, op_2, N)
 
+#define RUN_VEC_SAT_U_ADD_FMT_6(T, out, op_1, op_2, N) \
+  vec_sat_u_add_##T##_fmt_6(out, op_1, op_2, N)
+
 
/**/
 /* Saturation Sub (Unsigned and Signed)  

[PATCH v1 6/7] RISC-V: Add testcases for unsigned .SAT_ADD vector form 7

2024-06-17 Thread pan2 . li
From: Pan Li 

After the middle-end support the form 7 of unsigned SAT_ADD and
the RISC-V backend implement the .SAT_ADD for vector mode, add
more test case to cover the form 7.

Form 7:
  #define DEF_VEC_SAT_U_ADD_FMT_7(T)   \
  void __attribute__((noinline))   \
  vec_sat_u_add_##T##_fmt_7 (T *out, T *op_1, T *op_2, unsigned limit) \
  {\
unsigned i;\
for (i = 0; i < limit; i++)\
  {\
T x = op_1[i]; \
T y = op_2[i]; \
out[i] = (T)(x + y) < x ? -1 : (x + y);\
  }\
  }

Passed the rv64gcv regression tests.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h: Add helper
macro for testing.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-25.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-26.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-27.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-28.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-25.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-26.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-27.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-28.c: New test.

Signed-off-by: Pan Li 
---
 .../riscv/rvv/autovec/binop/vec_sat_arith.h   | 16 
 .../rvv/autovec/binop/vec_sat_u_add-25.c  | 19 +
 .../rvv/autovec/binop/vec_sat_u_add-26.c  | 20 +
 .../rvv/autovec/binop/vec_sat_u_add-27.c  | 20 +
 .../rvv/autovec/binop/vec_sat_u_add-28.c  | 20 +
 .../rvv/autovec/binop/vec_sat_u_add-run-25.c  | 75 +++
 .../rvv/autovec/binop/vec_sat_u_add-run-26.c  | 75 +++
 .../rvv/autovec/binop/vec_sat_u_add-run-27.c  | 75 +++
 .../rvv/autovec/binop/vec_sat_u_add-run-28.c  | 75 +++
 9 files changed, 395 insertions(+)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-25.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-26.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-27.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-28.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-25.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-26.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-27.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-28.c

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h
index 0f08822cbeb..46fae4555be 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h
@@ -88,6 +88,19 @@ vec_sat_u_add_##T##_fmt_6 (T *out, T *op_1, T *op_2, 
unsigned limit) \
 }\
 }
 
+#define DEF_VEC_SAT_U_ADD_FMT_7(T)   \
+void __attribute__((noinline))   \
+vec_sat_u_add_##T##_fmt_7 (T *out, T *op_1, T *op_2, unsigned limit) \
+{\
+  unsigned i;\
+  for (i = 0; i < limit; i++)\
+{\
+  T x = op_1[i]; \
+  T y = op_2[i]; \
+  out[i] = (T)(x + y) < x ? -1 : (x + y);\
+}\
+}
+
 #define RUN_VEC_SAT_U_ADD_FMT_1(T, out, op_1, op_2, N) \
   vec_sat_u_add_##T##_fmt_1(out, op_1, op_2, N)
 
@@ -106,6 +119,9 @@ vec_sat_u_add_##T##_fmt_6 (T *out, T *op_1, T *op_2, 
unsigned limit) \
 #define RUN_VEC_SAT_U_ADD_FMT_6(T, out, op_1, op_2, N) \
   vec_sat_u_add_##T##_fmt_6(out, op_1, op_2, N)
 
+#define RUN_VEC_SAT_U_ADD_FMT_7(T, out, op_1, op_2, N) \
+  vec_sat_u_add_##T##_fmt_7(out, op_1, op_2, N)
+
 
/**/
 /* Saturation Sub (Unsigned and Signed) 

[PATCH v1 4/7] RISC-V: Add testcases for unsigned .SAT_ADD vector form 5

2024-06-17 Thread pan2 . li
From: Pan Li 

After the middle-end support the form 5 of unsigned SAT_ADD and
the RISC-V backend implement the .SAT_ADD for vector mode, add
more test case to cover the form 5.

Form 5:
  #define DEF_VEC_SAT_U_ADD_FMT_5(T)   \
  void __attribute__((noinline))   \
  vec_sat_u_add_##T##_fmt_5 (T *out, T *op_1, T *op_2, unsigned limit) \
  {\
unsigned i;\
for (i = 0; i < limit; i++)\
  {\
T x = op_1[i]; \
T y = op_2[i]; \
T ret; \
out[i] = __builtin_add_overflow (x, y, &ret) == 0 ? ret : -1;  \
  }\
  }

Passed the rv64gcv regression tests.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h: Add helper
macro for testing.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-17.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-18.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-19.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-20.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-17.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-18.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-19.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-20.c: New test.

Signed-off-by: Pan Li 
---
 .../riscv/rvv/autovec/binop/vec_sat_arith.h   | 17 +
 .../rvv/autovec/binop/vec_sat_u_add-17.c  | 19 +
 .../rvv/autovec/binop/vec_sat_u_add-18.c  | 20 +
 .../rvv/autovec/binop/vec_sat_u_add-19.c  | 20 +
 .../rvv/autovec/binop/vec_sat_u_add-20.c  | 20 +
 .../rvv/autovec/binop/vec_sat_u_add-run-17.c  | 75 +++
 .../rvv/autovec/binop/vec_sat_u_add-run-18.c  | 75 +++
 .../rvv/autovec/binop/vec_sat_u_add-run-19.c  | 75 +++
 .../rvv/autovec/binop/vec_sat_u_add-run-20.c  | 75 +++
 9 files changed, 396 insertions(+)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-17.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-18.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-19.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-20.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-17.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-18.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-19.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-20.c

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h
index e00769e35b6..1f2ee31577d 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h
@@ -61,6 +61,20 @@ vec_sat_u_add_##T##_fmt_4 (T *out, T *op_1, T *op_2, 
unsigned limit) \
 }\
 }
 
+#define DEF_VEC_SAT_U_ADD_FMT_5(T)   \
+void __attribute__((noinline))   \
+vec_sat_u_add_##T##_fmt_5 (T *out, T *op_1, T *op_2, unsigned limit) \
+{\
+  unsigned i;\
+  for (i = 0; i < limit; i++)\
+{\
+  T x = op_1[i]; \
+  T y = op_2[i]; \
+  T ret; \
+  out[i] = __builtin_add_overflow (x, y, &ret) == 0 ? ret : -1;  \
+}\
+}
+
 #define RUN_VEC_SAT_U_ADD_FMT_1(T, out, op_1, op_2, N) \
   vec_sat_u_add_##T##_fmt_1(out, op_1, op_2, N)
 
@@ -73,6 +87,9 @@ vec_sat_u_add_##T##_fmt_4 (T *out, T *op_1, T *op_2, unsigned 
limit) \
 #define RUN_VEC_SAT_U_ADD_FMT_4(T, out, op_1, op_2, N) \
   vec_sat_u_add_##T##_fmt_4(out, op_1, op_2, N)
 
+#define RUN_VEC_SAT_U_ADD_FMT_5(T, out, op_1, op_2, N) \
+  vec_sat_u_add_##T##_fmt_5(out, op_1, op_2, N)
+

[PATCH v1 3/7] RISC-V: Add testcases for unsigned .SAT_ADD vector form 4

2024-06-17 Thread pan2 . li
From: Pan Li 

After the middle-end support the form 4 of unsigned SAT_ADD and
the RISC-V backend implement the .SAT_ADD for vector mode, add
more test case to cover the form 4.

Form 4:
  #define DEF_VEC_SAT_U_ADD_FMT_4(T)   \
  void __attribute__((noinline))   \
  vec_sat_u_add_##T##_fmt_4 (T *out, T *op_1, T *op_2, unsigned limit) \
  {\
unsigned i;\
for (i = 0; i < limit; i++)\
  {\
T x = op_1[i]; \
T y = op_2[i]; \
T ret; \
out[i] = __builtin_add_overflow (x, y, &ret) ? -1 : ret;   \
  }\
  }

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h: Add helper
macro for testing.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-13.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-14.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-15.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-16.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-13.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-14.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-15.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-16.c: New test.

Passed the rv64gcv regression tests.

Signed-off-by: Pan Li 
---
 .../riscv/rvv/autovec/binop/vec_sat_arith.h   | 17 +
 .../rvv/autovec/binop/vec_sat_u_add-13.c  | 19 +
 .../rvv/autovec/binop/vec_sat_u_add-14.c  | 20 +
 .../rvv/autovec/binop/vec_sat_u_add-15.c  | 20 +
 .../rvv/autovec/binop/vec_sat_u_add-16.c  | 20 +
 .../rvv/autovec/binop/vec_sat_u_add-run-13.c  | 75 +++
 .../rvv/autovec/binop/vec_sat_u_add-run-14.c  | 75 +++
 .../rvv/autovec/binop/vec_sat_u_add-run-15.c  | 75 +++
 .../rvv/autovec/binop/vec_sat_u_add-run-16.c  | 75 +++
 9 files changed, 396 insertions(+)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-13.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-14.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-15.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-16.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-13.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-14.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-15.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-16.c

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h
index 76f393fffbd..e00769e35b6 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h
@@ -47,6 +47,20 @@ vec_sat_u_add_##T##_fmt_3 (T *out, T *op_1, T *op_2, 
unsigned limit) \
 }\
 }
 
+#define DEF_VEC_SAT_U_ADD_FMT_4(T)   \
+void __attribute__((noinline))   \
+vec_sat_u_add_##T##_fmt_4 (T *out, T *op_1, T *op_2, unsigned limit) \
+{\
+  unsigned i;\
+  for (i = 0; i < limit; i++)\
+{\
+  T x = op_1[i]; \
+  T y = op_2[i]; \
+  T ret; \
+  out[i] = __builtin_add_overflow (x, y, &ret) ? -1 : ret;   \
+}\
+}
+
 #define RUN_VEC_SAT_U_ADD_FMT_1(T, out, op_1, op_2, N) \
   vec_sat_u_add_##T##_fmt_1(out, op_1, op_2, N)
 
@@ -56,6 +70,9 @@ vec_sat_u_add_##T##_fmt_3 (T *out, T *op_1, T *op_2, unsigned 
limit) \
 #define RUN_VEC_SAT_U_ADD_FMT_3(T, out, op_1, op_2, N) \
   vec_sat_u_add_##T##_fmt_3(out, op_1, op_2, N)
 
+#define RUN_VEC_SAT_U_ADD_FMT_4(T, out, op_1, op_2, N) \
+  vec_sat_u_add_##T##_fmt_4(out, op_1, op_2, N)
+

[PATCH v1 1/7] RISC-V: Add testcases for unsigned .SAT_ADD vector form 2

2024-06-17 Thread pan2 . li
From: Pan Li 

After the middle-end support the form 2 of unsigned SAT_ADD and
the RISC-V backend implement the .SAT_ADD for vector mode, add
more test case to cover the form 2.

Form 2:
  #define DEF_VEC_SAT_U_ADD_FMT_2(T)   \
  void __attribute__((noinline))   \
  vec_sat_u_add_##T##_fmt_2 (T *out, T *op_1, T *op_2, unsigned limit) \
  {\
unsigned i;\
for (i = 0; i < limit; i++)\
  {\
T x = op_1[i]; \
T y = op_2[i]; \
out[i] = (T)(x + y) >= x ? (x + y) : -1;   \
  }\
  }

Passed the rv64gcv regression tests.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h: Add helper
macro for testing.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-5.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-6.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-7.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-8.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-5.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-6.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-7.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-8.c: New test.

Signed-off-by: Pan Li 
---
 .../riscv/rvv/autovec/binop/vec_sat_arith.h   | 16 
 .../riscv/rvv/autovec/binop/vec_sat_u_add-5.c | 19 +
 .../riscv/rvv/autovec/binop/vec_sat_u_add-6.c | 20 +
 .../riscv/rvv/autovec/binop/vec_sat_u_add-7.c | 20 +
 .../riscv/rvv/autovec/binop/vec_sat_u_add-8.c | 20 +
 .../rvv/autovec/binop/vec_sat_u_add-run-5.c   | 75 +++
 .../rvv/autovec/binop/vec_sat_u_add-run-6.c   | 75 +++
 .../rvv/autovec/binop/vec_sat_u_add-run-7.c   | 75 +++
 .../rvv/autovec/binop/vec_sat_u_add-run-8.c   | 75 +++
 9 files changed, 395 insertions(+)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-5.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-6.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-7.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-8.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-5.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-6.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-7.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-8.c

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h
index 450f0fbbc72..57b1bce4bd2 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h
@@ -19,9 +19,25 @@ vec_sat_u_add_##T##_fmt_1 (T *out, T *op_1, T *op_2, 
unsigned limit) \
 }\
 }
 
+#define DEF_VEC_SAT_U_ADD_FMT_2(T)   \
+void __attribute__((noinline))   \
+vec_sat_u_add_##T##_fmt_2 (T *out, T *op_1, T *op_2, unsigned limit) \
+{\
+  unsigned i;\
+  for (i = 0; i < limit; i++)\
+{\
+  T x = op_1[i]; \
+  T y = op_2[i]; \
+  out[i] = (T)(x + y) >= x ? (x + y) : -1;   \
+}\
+}
+
 #define RUN_VEC_SAT_U_ADD_FMT_1(T, out, op_1, op_2, N) \
   vec_sat_u_add_##T##_fmt_1(out, op_1, op_2, N)
 
+#define RUN_VEC_SAT_U_ADD_FMT_2(T, out, op_1, op_2, N) \
+  vec_sat_u_add_##T##_fmt_2(out, op_1, op_2, N)
+
 
/**/
 /* Saturation Sub (Unsigned and Signed)   
*/
 
/**/
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-5.c 
b/gcc/testsuite/gcc.target/risc

[PATCH v1 2/7] RISC-V: Add testcases for unsigned .SAT_ADD vector form 3

2024-06-17 Thread pan2 . li
From: Pan Li 

After the middle-end support the form 3 of unsigned SAT_ADD and
the RISC-V backend implement the .SAT_ADD for vector mode, add
more test case to cover the form 3.

Form 3:
  #define DEF_VEC_SAT_U_ADD_FMT_3(T)   \
  void __attribute__((noinline))   \
  vec_sat_u_add_##T##_fmt_3 (T *out, T *op_1, T *op_2, unsigned limit) \
  {\
unsigned i;\
for (i = 0; i < limit; i++)\
  {\
T x = op_1[i]; \
T y = op_2[i]; \
T ret; \
T overflow = __builtin_add_overflow (x, y, &ret);  \
out[i] = (T)(-overflow) | ret; \
  }\
  }

Passed the rv64gcv regression tests.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h: Add helper
macro for testing.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-10.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-11.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-12.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-9.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-10.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-11.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-12.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-9.c: New test.

Signed-off-by: Pan Li 
---
 .../riscv/rvv/autovec/binop/vec_sat_arith.h   | 18 +
 .../rvv/autovec/binop/vec_sat_u_add-10.c  | 20 +
 .../rvv/autovec/binop/vec_sat_u_add-11.c  | 20 +
 .../rvv/autovec/binop/vec_sat_u_add-12.c  | 20 +
 .../riscv/rvv/autovec/binop/vec_sat_u_add-9.c | 19 +
 .../rvv/autovec/binop/vec_sat_u_add-run-10.c  | 75 +++
 .../rvv/autovec/binop/vec_sat_u_add-run-11.c  | 75 +++
 .../rvv/autovec/binop/vec_sat_u_add-run-12.c  | 75 +++
 .../rvv/autovec/binop/vec_sat_u_add-run-9.c   | 75 +++
 9 files changed, 397 insertions(+)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-10.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-11.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-12.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-9.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-10.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-11.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-12.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-9.c

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h
index 57b1bce4bd2..76f393fffbd 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_arith.h
@@ -32,12 +32,30 @@ vec_sat_u_add_##T##_fmt_2 (T *out, T *op_1, T *op_2, 
unsigned limit) \
 }\
 }
 
+#define DEF_VEC_SAT_U_ADD_FMT_3(T)   \
+void __attribute__((noinline))   \
+vec_sat_u_add_##T##_fmt_3 (T *out, T *op_1, T *op_2, unsigned limit) \
+{\
+  unsigned i;\
+  for (i = 0; i < limit; i++)\
+{\
+  T x = op_1[i]; \
+  T y = op_2[i]; \
+  T ret; \
+  T overflow = __builtin_add_overflow (x, y, &ret);  \
+  out[i] = (T)(-overflow) | ret; \
+}\
+}
+
 #define RUN_VEC_SAT_U_ADD_FMT_1(T, out, op_1, op_2, N) \
   vec_sat_u_add_##T##_fmt_1(out, op_1, op_2, N)
 
 #define RUN_VEC_SAT_U_ADD_FMT_2(T, out, op_1, op_2, N) \
   vec_sat_u_add_##T##_fmt_2(out, op_1, op_2, N)
 
+#define RUN_VEC_SAT_U_ADD_FMT_3(T, out, op_1, op_2, N) \

Re: [PATCH v3] aarch64: Add vector popcount besides QImode [PR113859]

2024-06-17 Thread Andrew Pinski
On Mon, Jun 17, 2024, 5:59 AM Tamar Christina 
wrote:

> Hi,
>
> > -Original Message-
> > From: Pengxuan Zheng 
> > Sent: Friday, June 14, 2024 12:57 AM
> > To: gcc-patches@gcc.gnu.org
> > Cc: Pengxuan Zheng 
> > Subject: [PATCH v3] aarch64: Add vector popcount besides QImode
> [PR113859]
> >
> > This patch improves GCC’s vectorization of __builtin_popcount for
> aarch64 target
> > by adding popcount patterns for vector modes besides QImode, i.e.,
> HImode,
> > SImode and DImode.
> >
> > With this patch, we now generate the following for V8HI:
> >   cnt v1.16b, v.16b
> >   uaddlp  v2.8h, v1.16b
> >
> > For V4HI, we generate:
> >   cnt v1.8b, v.8b
> >   uaddlp  v2.4h, v1.8b
> >
> > For V4SI, we generate:
> >   cnt v1.16b, v.16b
> >   uaddlp  v2.8h, v1.16b
> >   uaddlp  v3.4s, v2.8h
> >
> > For V2SI, we generate:
> >   cnt v1.8b, v.8b
> >   uaddlp  v2.4h, v1.8b
> >   uaddlp  v3.2s, v2.4h
> >
> > For V2DI, we generate:
> >   cnt v1.16b, v.16b
> >   uaddlp  v2.8h, v1.16b
> >   uaddlp  v3.4s, v2.8h
> >   uaddlp  v4.2d, v3.4s
>
> Nice patch!  We can do better for these sequences though. Would you
> instead consider using udot with a 0 accumulator and 1 multiplicatent.
>
> Essentially
> movi v0.16b, #0
> movi v1.16b, #1
> cnt v3.16b, v2.16b
> udot  v0.4s, v3.16b, v1.16b
>
> this has 1 instruction less on the critical path so should be half the
> latency of the uaddlp variants.
>

Of course that can only be done if the udot is enabled. But yes I agree
that is better.


> For the DI case you'll still need a final uaddlp.
>
> Cheers,
> Tamar
>
> >
> >   PR target/113859
> >
> > gcc/ChangeLog:
> >
> >   * config/aarch64/aarch64-simd.md (aarch64_addlp):
> > Rename to...
> >   (@aarch64_addlp): ... This.
> >   (popcount2): New define_expand.
> >
> > gcc/testsuite/ChangeLog:
> >
> >   * gcc.target/aarch64/popcnt-vec.c: New test.
> >
> > Signed-off-by: Pengxuan Zheng 
> > ---
> >  gcc/config/aarch64/aarch64-simd.md| 28 +++-
> >  gcc/testsuite/gcc.target/aarch64/popcnt-vec.c | 69 +++
> >  2 files changed, 96 insertions(+), 1 deletion(-)
> >  create mode 100644 gcc/testsuite/gcc.target/aarch64/popcnt-vec.c
> >
> > diff --git a/gcc/config/aarch64/aarch64-simd.md
> b/gcc/config/aarch64/aarch64-
> > simd.md
> > index 0bb39091a38..ee73e13534b 100644
> > --- a/gcc/config/aarch64/aarch64-simd.md
> > +++ b/gcc/config/aarch64/aarch64-simd.md
> > @@ -3461,7 +3461,7 @@ (define_insn
> > "*aarch64_addlv_ze"
> >[(set_attr "type" "neon_reduc_add")]
> >  )
> >
> > -(define_expand "aarch64_addlp"
> > +(define_expand "@aarch64_addlp"
> >[(set (match_operand: 0 "register_operand")
> >   (plus:
> > (vec_select:
> > @@ -3517,6 +3517,32 @@ (define_insn "popcount2"
> >[(set_attr "type" "neon_cnt")]
> >  )
> >
> > +(define_expand "popcount2"
> > +  [(set (match_operand:VDQHSD 0 "register_operand")
> > +(popcount:VDQHSD (match_operand:VDQHSD 1 "register_operand")))]
> > +  "TARGET_SIMD"
> > +  {
> > +/* Generate a byte popcount. */
> > +machine_mode mode =  == 64 ? V8QImode : V16QImode;
> > +rtx tmp = gen_reg_rtx (mode);
> > +auto icode = optab_handler (popcount_optab, mode);
> > +emit_insn (GEN_FCN (icode) (tmp, gen_lowpart (mode, operands[1])));
> > +
> > +/* Use a sequence of UADDLPs to accumulate the counts. Each step
> doubles
> > +   the element size and halves the number of elements. */
> > +do
> > +  {
> > +auto icode = code_for_aarch64_addlp (ZERO_EXTEND, GET_MODE
> (tmp));
> > +mode = insn_data[icode].operand[0].mode;
> > +rtx dest = mode == mode ? operands[0] : gen_reg_rtx
> (mode);
> > +emit_insn (GEN_FCN (icode) (dest, tmp));
> > +tmp = dest;
> > +  }
> > +while (mode != mode);
> > +DONE;
> > +  }
> > +)
> > +
> >  ;; 'across lanes' max and min ops.
> >
> >  ;; Template for outputting a scalar, so we can create __builtins which
> can be
> > diff --git a/gcc/testsuite/gcc.target/aarch64/popcnt-vec.c
> > b/gcc/testsuite/gcc.target/aarch64/popcnt-vec.c
> > new file mode 100644
> > index 000..0c4926d7ca8
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/aarch64/popcnt-vec.c
> > @@ -0,0 +1,69 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2 -fno-vect-cost-model" } */
> > +
> > +/* This function should produce cnt v.16b. */
> > +void
> > +bar (unsigned char *__restrict b, unsigned char *__restrict d)
> > +{
> > +  for (int i = 0; i < 1024; i++)
> > +d[i] = __builtin_popcount (b[i]);
> > +}
> > +
> > +/* This function should produce cnt v.16b and uaddlp (Add Long
> Pairwise). */
> > +void
> > +bar1 (unsigned short *__restrict b, unsigned short *__restrict d)
> > +{
> > +  for (int i = 0; i < 1024; i++)
> > +d[i] = __builtin_popcount (b[i]);
> > +}
> > +
> > +/* This function should produce cnt v.16b and 2 uaddlp (Add Long
> Pairwise). */
> > +void
> > +bar2 (unsigned int *__restrict b, unsigned int *__restrict d)

[PATCH][v2] Enhance if-conversion for automatic arrays

2024-06-17 Thread Richard Biener
Automatic arrays that are not address-taken should not be subject to
store data races.  This applies to OMP SIMD in-branch lowered
functions result array which for the testcase otherwise prevents
vectorization with SSE and for AVX and AVX512 ends up with spurious
.MASK_STORE to the stack surviving.

This inefficiency was noted in PR111793.

I've introduced ref_can_have_store_data_races, commonizing uses
of flag_store_data_races in if-conversion, cselim and store motion.

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

PR tree-optimization/111793
* tree-ssa-alias.h (ref_can_have_store_data_races): Declare.
* tree-ssa-alias.cc (ref_can_have_store_data_races): New
function.
* tree-if-conv.cc (ifcvt_memrefs_wont_trap): Use
ref_can_have_store_data_races to allow more unconditional
stores.
* tree-ssa-loop-im.cc (execute_sm): Likewise.
* tree-ssa-phiopt.cc (cond_store_replacement): Likewise.

* gcc.dg/vect/vect-simd-clone-21.c: New testcase.
---
 .../gcc.dg/vect/vect-simd-clone-21.c  | 16 
 gcc/tree-if-conv.cc   | 11 +--
 gcc/tree-ssa-alias.cc | 19 +++
 gcc/tree-ssa-alias.h  |  2 ++
 gcc/tree-ssa-loop-im.cc   |  2 +-
 gcc/tree-ssa-phiopt.cc|  4 +---
 6 files changed, 44 insertions(+), 10 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/vect-simd-clone-21.c

diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-21.c 
b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-21.c
new file mode 100644
index 000..49c52fb59bd
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-21.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd" } */
+
+#pragma omp declare simd simdlen(4) inbranch
+__attribute__((noinline)) int
+foo (int a, int b)
+{
+  return a + b;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" { target 
i?86-*-* x86_64-*-* } } } */
+/* if-conversion shouldn't need to resort to masked stores for the result
+   array created by OMP lowering since that's automatic and does not have
+   its address taken.  */
+/* { dg-final { scan-tree-dump-not "MASK_STORE" "vect" } } */
diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc
index c4c3ed41a44..57992b6deca 100644
--- a/gcc/tree-if-conv.cc
+++ b/gcc/tree-if-conv.cc
@@ -936,12 +936,11 @@ ifcvt_memrefs_wont_trap (gimple *stmt, 
vec drs)
 
   /* an unconditionaly write won't trap if the base is written
  to unconditionally.  */
-  if (base_master_dr
- && DR_BASE_W_UNCONDITIONALLY (*base_master_dr))
-   return flag_store_data_races;
-  /* or the base is known to be not readonly.  */
-  else if (base_object_writable (DR_REF (a)))
-   return flag_store_data_races;
+  if ((base_master_dr
+  && DR_BASE_W_UNCONDITIONALLY (*base_master_dr))
+ /* or the base is known to be not readonly.  */
+ || base_object_writable (DR_REF (a)))
+   return !ref_can_have_store_data_races (base);
 }
 
   return false;
diff --git a/gcc/tree-ssa-alias.cc b/gcc/tree-ssa-alias.cc
index 1a91d63a31e..fab048b0b59 100644
--- a/gcc/tree-ssa-alias.cc
+++ b/gcc/tree-ssa-alias.cc
@@ -3704,6 +3704,25 @@ stmt_kills_ref_p (gimple *stmt, tree ref)
   return stmt_kills_ref_p (stmt, &r);
 }
 
+/* Return whether REF can be subject to store data races.  */
+
+bool
+ref_can_have_store_data_races (tree ref)
+{
+  /* With -fallow-store-data-races do not care about them.  */
+  if (flag_store_data_races)
+return false;
+
+  tree base = get_base_address (ref);
+  if (auto_var_p (base)
+  && ! may_be_aliased (base))
+/* Automatic variables not aliased are not subject to
+   data races.  */
+return false;
+
+  return true;
+}
+
 
 /* Walk the virtual use-def chain of VUSE until hitting the virtual operand
TARGET or a statement clobbering the memory reference REF in which
diff --git a/gcc/tree-ssa-alias.h b/gcc/tree-ssa-alias.h
index 5cd64e72295..5834533ae9c 100644
--- a/gcc/tree-ssa-alias.h
+++ b/gcc/tree-ssa-alias.h
@@ -144,6 +144,8 @@ extern bool call_may_clobber_ref_p (gcall *, tree, bool = 
true);
 extern bool call_may_clobber_ref_p_1 (gcall *, ao_ref *, bool = true);
 extern bool stmt_kills_ref_p (gimple *, tree);
 extern bool stmt_kills_ref_p (gimple *, ao_ref *);
+extern bool ref_can_have_store_data_races (tree);
+
 enum translate_flags
   { TR_TRANSLATE, TR_VALUEIZE_AND_DISAMBIGUATE, TR_DISAMBIGUATE };
 extern tree get_continuation_for_phi (gimple *, ao_ref *, bool,
diff --git a/gcc/tree-ssa-loop-im.cc b/gcc/tree-ssa-loop-im.cc
index f3fda2bd7ce..3acbd886a0d 100644
--- a/gcc/tree-ssa-loop-im.cc
+++ b/gcc/tree-ssa-loop-im.cc
@@ -2298,7 +2298,7 @@ execute_sm (class loop *loop, im_mem_ref *ref,
   bool always_stored = ref_always_access

[PATCH] tree-optimization/115493 - fix wrong code with SLP induction cond reduction

2024-06-17 Thread Richard Biener
The following fixes a bad final value being used when doing single-lane
SLP integer induction cond reduction vectorization.

Bootstrap and regtest running on x86_64-unknown-linux-gnu.

PR tree-optimization/115493
* tree-vect-loop.cc (vect_create_epilog_for_reduction): Use
the first scalar result.
---
 gcc/tree-vect-loop.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index d9a2ad69484..7c79e9da106 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -6843,8 +6843,8 @@ vect_create_epilog_for_reduction (loop_vec_info 
loop_vinfo,
 with the original initial value, unless induc_val is
 the same as initial_def already.  */
  tree zcompare = make_ssa_name (boolean_type_node);
- epilog_stmt = gimple_build_assign (zcompare, EQ_EXPR, new_temp,
-induc_val);
+ epilog_stmt = gimple_build_assign (zcompare, EQ_EXPR,
+scalar_results[0], induc_val);
  gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
  tree initial_def = reduc_info->reduc_initial_values[0];
  tree tmp = make_ssa_name (new_scalar_dest);
-- 
2.35.3


Patch ping

2024-06-17 Thread Jakub Jelinek
Hi!

I'd like to ping the
https://gcc.gnu.org/pipermail/gcc-patches/2024-June/653573.html
patch.  While the committed and backported patch fixed PCH on PIE
cc1/cc1plus etc. on PowerPC, it grew up the size of the
rs6000_init_generated_builtins function quite a lot.
The above patch decreases it back, to even less than the size of
the function before my fix.

Jakub



[to-be-committed][RISC-V] Handle zero_extract destination for single bit insertions

2024-06-17 Thread Jeff Law
Combine will use zero_extract destinations for certain bitfield 
insertions.  If the bitfield is a single bit constant, then we can use 
bset/bclr.


In this case we are only dealing with word_mode objects, so we don't 
have to worry about the SI->DI extension issues for TARGET_64BIT.


The testcase was derived from 502.gcc in spec from the RAU team.


An earlier version of this (TARGET_64BIT only) went through Ventana's CI 
system.  This version has gone though mine after generalizing it to 
handle rv32 as well.  I'll wait for pre-commit CI to render its verdict 
before moving forward.


Jeff


diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index 311f0d373c0..c6bd55c53f9 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -654,6 +654,18 @@ (define_split
  (any_or:DI (ashift:DI (const_int 1) (match_dup 1))
(match_dup 3)))])
 
+;; Yet another form of a bset/bclr that can be created by combine.
+(define_insn "*bsetclr_zero_extract"
+  [(set (zero_extract:X (match_operand:X 0 "register_operand" "+r")
+   (const_int 1)
+   (zero_extend:X (match_operand:QI 1 "register_operand" 
"r")))
+   (match_operand 2 "immediate_operand" "n"))]
+  "TARGET_ZBS
+   && (operands[2] == CONST0_RTX (mode)
+   || operands[2] == CONST1_RTX (mode))"
+  { return operands[2] == CONST0_RTX (mode) ? "bclr\t%0,%0,%1" : 
"bset\t%0,%0,%1"; }
+  [(set_attr "type" "bitmanip")])
+
 (define_insn "*bclr"
   [(set (match_operand:X 0 "register_operand" "=r")
(and:X (rotate:X (const_int -2)
diff --git a/gcc/testsuite/gcc.target/riscv/zbs-zext-3.c 
b/gcc/testsuite/gcc.target/riscv/zbs-zext-3.c
new file mode 100644
index 000..0239014e06b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zbs-zext-3.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc_zba_zbb_zbs -mabi=lp64d" { target { rv64 } } } 
*/
+/* { dg-options "-march=rv32gc_zba_zbb_zbs -mabi=ilp32" { target { rv32 } } } 
*/
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */
+
+/* We need to adjust the constant so this works for rv32 and rv64.  */
+#if __riscv_xlen == 32
+#define ONE 1U
+#else
+#define ONE 1ULL
+#endif
+
+void add_to_hard_reg_set(long long *a, unsigned int count) {
+  int i = 0;
+  while(i++ < count)
+*a |= (1U << i);
+}
+
+void remove_from_hard_reg_set(long long *a, unsigned int count) {
+  int i = 0;
+  while(i++ < count)
+*a &= ~(ONE << i);
+}
+
+
+/* { dg-final { scan-assembler-not "and\t" } } */
+/* { dg-final { scan-assembler-not "andn\t" } } */


[pushed] doc: Mark up __cxa_atexit as @code.

2024-06-17 Thread Gerald Pfeifer
Pushed. (The diff is a bit larger due to line breaks.)

Gerald

gcc:
* doc/install.texi (Configuration): Mark up __cxa_atexit as @code.
---
 gcc/doc/install.texi | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/gcc/doc/install.texi b/gcc/doc/install.texi
index 298031dc2de..1774a010889 100644
--- a/gcc/doc/install.texi
+++ b/gcc/doc/install.texi
@@ -1779,12 +1779,12 @@ Produce code conforming to version 20191213.
 In the absence of this configuration option the default version is 20191213.
 
 @item --enable-__cxa_atexit
-Define if you want to use __cxa_atexit, rather than atexit, to
+Define if you want to use @code{__cxa_atexit}, rather than atexit, to
 register C++ destructors for local statics and global objects.
 This is essential for fully standards-compliant handling of
-destructors, but requires __cxa_atexit in libc.  This option is currently
-only available on systems with GNU libc.  When enabled, this will cause
-@option{-fuse-cxa-atexit} to be passed by default.
+destructors, but requires @code{__cxa_atexit} in libc.  This option is
+currently only available on systems with GNU libc.  When enabled, this
+will cause @option{-fuse-cxa-atexit} to be passed by default.
 
 @item --enable-gnu-indirect-function
 Define if you want to enable the @code{ifunc} attribute.  This option is
-- 
2.45.2


Ping^2 [PATCHv5] Optab: add isnormal_optab for __builtin_isnormal

2024-06-17 Thread HAO CHEN GUI
Hi,
  Gently ping it.
https://gcc.gnu.org/pipermail/gcc-patches/2024-May/653001.html

Thanks
Gui Haochen

在 2024/6/3 10:37, HAO CHEN GUI 写道:
> Hi,
>   All issues were addressed. Gently ping it.
> https://gcc.gnu.org/pipermail/gcc-patches/2024-May/653001.html
> 
> Thanks
> Gui Haochen
> 
> 
> 在 2024/5/29 14:36, HAO CHEN GUI 写道:
>> Hi,
>>   This patch adds an optab for __builtin_isnormal. The normal check can be
>> implemented on rs6000 by a single instruction. It needs an optab to be
>> expanded to the certain sequence of instructions.
>>
>>   The subsequent patches will implement the expand on rs6000.
>>
>>   Compared to previous version, the main change is to specify return
>> value of the optab should be either 0 or 1.
>> https://gcc.gnu.org/pipermail/gcc-patches/2024-May/652865.html
>>
>>   Bootstrapped and tested on x86 and powerpc64-linux BE and LE with no
>> regressions. Is this OK for trunk?
>>
>> Thanks
>> Gui Haochen
>>
>> ChangeLog
>> optab: Add isnormal_optab for isnormal builtin
>>
>> gcc/
>>  * builtins.cc (interclass_mathfn_icode): Set optab to isnormal_optab
>>  for isnormal builtin.
>>  * optabs.def (isnormal_optab): New.
>>  * doc/md.texi (isnormal): Document.
>>
>>
>> patch.diff
>> diff --git a/gcc/builtins.cc b/gcc/builtins.cc
>> index 53e9d210541..89ba56abf17 100644
>> --- a/gcc/builtins.cc
>> +++ b/gcc/builtins.cc
>> @@ -2463,6 +2463,8 @@ interclass_mathfn_icode (tree arg, tree fndecl)
>>builtin_optab = isfinite_optab;
>>break;
>>  case BUILT_IN_ISNORMAL:
>> +  builtin_optab = isnormal_optab;
>> +  break;
>>  CASE_FLT_FN (BUILT_IN_FINITE):
>>  case BUILT_IN_FINITED32:
>>  case BUILT_IN_FINITED64:
>> diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
>> index 3eb4216141e..4fd7da095fe 100644
>> --- a/gcc/doc/md.texi
>> +++ b/gcc/doc/md.texi
>> @@ -8563,6 +8563,12 @@ Return 1 if operand 1 is a finite floating point 
>> number and 0
>>  otherwise.  @var{m} is a scalar floating point mode.  Operand 0
>>  has mode @code{SImode}, and operand 1 has mode @var{m}.
>>
>> +@cindex @code{isnormal@var{m}2} instruction pattern
>> +@item @samp{isnormal@var{m}2}
>> +Return 1 if operand 1 is a normal floating point number and 0
>> +otherwise.  @var{m} is a scalar floating point mode.  Operand 0
>> +has mode @code{SImode}, and operand 1 has mode @var{m}.
>> +
>>  @end table
>>
>>  @end ifset
>> diff --git a/gcc/optabs.def b/gcc/optabs.def
>> index dcd77315c2a..3c401fc0b4c 100644
>> --- a/gcc/optabs.def
>> +++ b/gcc/optabs.def
>> @@ -353,6 +353,7 @@ OPTAB_D (hypot_optab, "hypot$a3")
>>  OPTAB_D (ilogb_optab, "ilogb$a2")
>>  OPTAB_D (isinf_optab, "isinf$a2")
>>  OPTAB_D (isfinite_optab, "isfinite$a2")
>> +OPTAB_D (isnormal_optab, "isnormal$a2")
>>  OPTAB_D (issignaling_optab, "issignaling$a2")
>>  OPTAB_D (ldexp_optab, "ldexp$a3")
>>  OPTAB_D (log10_optab, "log10$a2")


RE: [PATCH v3] aarch64: Add vector popcount besides QImode [PR113859]

2024-06-17 Thread Tamar Christina
Hi,

> -Original Message-
> From: Pengxuan Zheng 
> Sent: Friday, June 14, 2024 12:57 AM
> To: gcc-patches@gcc.gnu.org
> Cc: Pengxuan Zheng 
> Subject: [PATCH v3] aarch64: Add vector popcount besides QImode [PR113859]
> 
> This patch improves GCC’s vectorization of __builtin_popcount for aarch64 
> target
> by adding popcount patterns for vector modes besides QImode, i.e., HImode,
> SImode and DImode.
> 
> With this patch, we now generate the following for V8HI:
>   cnt v1.16b, v.16b
>   uaddlp  v2.8h, v1.16b
> 
> For V4HI, we generate:
>   cnt v1.8b, v.8b
>   uaddlp  v2.4h, v1.8b
> 
> For V4SI, we generate:
>   cnt v1.16b, v.16b
>   uaddlp  v2.8h, v1.16b
>   uaddlp  v3.4s, v2.8h
> 
> For V2SI, we generate:
>   cnt v1.8b, v.8b
>   uaddlp  v2.4h, v1.8b
>   uaddlp  v3.2s, v2.4h
> 
> For V2DI, we generate:
>   cnt v1.16b, v.16b
>   uaddlp  v2.8h, v1.16b
>   uaddlp  v3.4s, v2.8h
>   uaddlp  v4.2d, v3.4s

Nice patch!  We can do better for these sequences though. Would you instead 
consider using udot with a 0 accumulator and 1 multiplicatent.

Essentially
movi v0.16b, #0
movi v1.16b, #1
cnt v3.16b, v2.16b
udot  v0.4s, v3.16b, v1.16b

this has 1 instruction less on the critical path so should be half the latency 
of the uaddlp variants.

For the DI case you'll still need a final uaddlp.

Cheers,
Tamar

> 
>   PR target/113859
> 
> gcc/ChangeLog:
> 
>   * config/aarch64/aarch64-simd.md (aarch64_addlp):
> Rename to...
>   (@aarch64_addlp): ... This.
>   (popcount2): New define_expand.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/aarch64/popcnt-vec.c: New test.
> 
> Signed-off-by: Pengxuan Zheng 
> ---
>  gcc/config/aarch64/aarch64-simd.md| 28 +++-
>  gcc/testsuite/gcc.target/aarch64/popcnt-vec.c | 69 +++
>  2 files changed, 96 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/popcnt-vec.c
> 
> diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-
> simd.md
> index 0bb39091a38..ee73e13534b 100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -3461,7 +3461,7 @@ (define_insn
> "*aarch64_addlv_ze"
>[(set_attr "type" "neon_reduc_add")]
>  )
> 
> -(define_expand "aarch64_addlp"
> +(define_expand "@aarch64_addlp"
>[(set (match_operand: 0 "register_operand")
>   (plus:
> (vec_select:
> @@ -3517,6 +3517,32 @@ (define_insn "popcount2"
>[(set_attr "type" "neon_cnt")]
>  )
> 
> +(define_expand "popcount2"
> +  [(set (match_operand:VDQHSD 0 "register_operand")
> +(popcount:VDQHSD (match_operand:VDQHSD 1 "register_operand")))]
> +  "TARGET_SIMD"
> +  {
> +/* Generate a byte popcount. */
> +machine_mode mode =  == 64 ? V8QImode : V16QImode;
> +rtx tmp = gen_reg_rtx (mode);
> +auto icode = optab_handler (popcount_optab, mode);
> +emit_insn (GEN_FCN (icode) (tmp, gen_lowpart (mode, operands[1])));
> +
> +/* Use a sequence of UADDLPs to accumulate the counts. Each step doubles
> +   the element size and halves the number of elements. */
> +do
> +  {
> +auto icode = code_for_aarch64_addlp (ZERO_EXTEND, GET_MODE (tmp));
> +mode = insn_data[icode].operand[0].mode;
> +rtx dest = mode == mode ? operands[0] : gen_reg_rtx (mode);
> +emit_insn (GEN_FCN (icode) (dest, tmp));
> +tmp = dest;
> +  }
> +while (mode != mode);
> +DONE;
> +  }
> +)
> +
>  ;; 'across lanes' max and min ops.
> 
>  ;; Template for outputting a scalar, so we can create __builtins which can be
> diff --git a/gcc/testsuite/gcc.target/aarch64/popcnt-vec.c
> b/gcc/testsuite/gcc.target/aarch64/popcnt-vec.c
> new file mode 100644
> index 000..0c4926d7ca8
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/popcnt-vec.c
> @@ -0,0 +1,69 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fno-vect-cost-model" } */
> +
> +/* This function should produce cnt v.16b. */
> +void
> +bar (unsigned char *__restrict b, unsigned char *__restrict d)
> +{
> +  for (int i = 0; i < 1024; i++)
> +d[i] = __builtin_popcount (b[i]);
> +}
> +
> +/* This function should produce cnt v.16b and uaddlp (Add Long Pairwise). */
> +void
> +bar1 (unsigned short *__restrict b, unsigned short *__restrict d)
> +{
> +  for (int i = 0; i < 1024; i++)
> +d[i] = __builtin_popcount (b[i]);
> +}
> +
> +/* This function should produce cnt v.16b and 2 uaddlp (Add Long Pairwise). 
> */
> +void
> +bar2 (unsigned int *__restrict b, unsigned int *__restrict d)
> +{
> +  for (int i = 0; i < 1024; i++)
> +d[i] = __builtin_popcount (b[i]);
> +}
> +
> +/* This function should produce cnt v.16b and 3 uaddlp (Add Long Pairwise). 
> */
> +void
> +bar3 (unsigned long long *__restrict b, unsigned long long *__restrict d)
> +{
> +  for (int i = 0; i < 1024; i++)
> +d[i] = __builtin_popcountll (b[i]);
> +}
> +
> +/* SLP
> +   This function should produce cnt v.8b and uaddlp (Add Long Pairwise)

Re: [RFC PATCH] ARM: thumb1: Use LDMIA/STMIA for DI/DF loads/stores

2024-06-17 Thread Richard Earnshaw (lists)
Hi Siarahei,

On 16/06/2024 09:51, Siarhei Volkau wrote:
> If the address register is dead after load/store operation it looks
> beneficial to use LDMIA/STMIA instead of pair of LDR/STR instructions,
> at least if optimizing for size.
> 
> E.g.
>  ldr r0, [r3, #0]
>  ldr r1, [r3, #4]  @ r3 is dead after
> will be replaced by
>  ldmia r3!, {r0, r1}
> 
> also for reused reg is legal to:
>  ldr r2, [r3, #0]
>  ldr r3, [r3, #4] @ r3 reused
> will be replaced by
>  ldmia r3, {r2, r3}
> 
> However, I know little about other thumb CPUs except Cortex M0/M0+.
> 1. Is there any drawbacks if optimizing speed?
> 2. Might it be profitable for thumb2?

I like the idea behind this patch, but I think I'd try first doing this as a 
peephole2 rule to rewrite the address in this case.  That has the additional 
advantage that we then estimate the size of the instruction more accurately.  

I think it would then be easy to extend this to thumb2 as well if it looks like 
a win (perhaps only for -Os in the thumb2 case).


> 
> Regarding code size with the patch gives for v6-m/nofp:
>libgcc:  -52 bytes / -0.10%
> Newlib's libc:  -68 bytes / -0.03%
>  libm:  -96 bytes / -0.10%
> libstdc++: -140 bytes / -0.02%
> 
> Also I have questions regarding testing the patch.
> It's obscure how to do it properly, for now I compile
> for arm-none-eabi target and make check seems failing
> on any compilable test due to missing symbols from libnosys.
> I guess that arm-gnu-elf is the correct triple but it still
> advisable for proper commands to make & run the testsuite.

For testing, I'd start with something like 
gcc/testsuite/gcc.target/arm/thumb-andsi.c as a template and adapt that for 
your specific case.  Matching something like "ldmia\tr[0-7]!," should be enough.

R.

> 
> Signed-off-by: Siarhei Volkau 
> ---
>  gcc/config/arm/arm-protos.h |  2 +-
>  gcc/config/arm/arm.cc   |  7 ++-
>  gcc/config/arm/thumb1.md| 10 --
>  3 files changed, 15 insertions(+), 4 deletions(-)
> 
> diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
> index 2cd560c9925..548bfbaccdc 100644
> --- a/gcc/config/arm/arm-protos.h
> +++ b/gcc/config/arm/arm-protos.h
> @@ -254,7 +254,7 @@ extern int thumb_shiftable_const (unsigned HOST_WIDE_INT);
>  extern enum arm_cond_code maybe_get_arm_condition_code (rtx);
>  extern void thumb1_final_prescan_insn (rtx_insn *);
>  extern void thumb2_final_prescan_insn (rtx_insn *);
> -extern const char *thumb_load_double_from_address (rtx *);
> +extern const char *thumb_load_double_from_address (rtx *, rtx_insn *);
>  extern const char *thumb_output_move_mem_multiple (int, rtx *);
>  extern const char *thumb_call_via_reg (rtx);
>  extern void thumb_expand_cpymemqi (rtx *);
> diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
> index b8c32db0a1d..73c2478ed77 100644
> --- a/gcc/config/arm/arm.cc
> +++ b/gcc/config/arm/arm.cc
> @@ -28350,7 +28350,7 @@ thumb1_output_interwork (void)
> a computed memory address.  The computed address may involve a
> register which is overwritten by the load.  */
>  const char *
> -thumb_load_double_from_address (rtx *operands)
> +thumb_load_double_from_address (rtx *operands, rtx_insn *insn)
>  {
>rtx addr;
>rtx base;
> @@ -28368,6 +28368,11 @@ thumb_load_double_from_address (rtx *operands)
>switch (GET_CODE (addr))
>  {
>  case REG:
> +  if (find_reg_note (insn, REG_DEAD, addr))
> +return "ldmia\t%m1!, {%0, %H0}";
> +  else if (REGNO (addr) == REGNO (operands[0]) + 1)
> +return "ldmia\t%m1, {%0, %H0}";
> +
>operands[2] = adjust_address (operands[1], SImode, 4);
>  
>if (REGNO (operands[0]) == REGNO (addr))
> diff --git a/gcc/config/arm/thumb1.md b/gcc/config/arm/thumb1.md
> index d7074b43f60..8da6887b560 100644
> --- a/gcc/config/arm/thumb1.md
> +++ b/gcc/config/arm/thumb1.md
> @@ -637,8 +637,11 @@
>  case 5:
>return \"stmia\\t%0, {%1, %H1}\";
>  case 6:
> -  return thumb_load_double_from_address (operands);
> +  return thumb_load_double_from_address (operands, insn);
>  case 7:
> +  if (MEM_P (operands[0]) && REG_P (XEXP (operands[0], 0))
> +  && find_reg_note (insn, REG_DEAD, XEXP (operands[0], 0)))
> +return \"stmia\\t%m0!, {%1, %H1}\";
>operands[2] = gen_rtx_MEM (SImode,
>plus_constant (Pmode, XEXP (operands[0], 0), 4));
>output_asm_insn (\"str\\t%1, %0\;str\\t%H1, %2\", operands);
> @@ -970,8 +973,11 @@
>  case 2:
>return \"stmia\\t%0, {%1, %H1}\";
>  case 3:
> -  return thumb_load_double_from_address (operands);
> +  return thumb_load_double_from_address (operands, insn);
>  case 4:
> +  if (MEM_P (operands[0]) && REG_P (XEXP (operands[0], 0))
> +  && find_reg_note (insn, REG_DEAD, XEXP (operands[0], 0)))
> +return \"stmia\\t%m0!, {%1, %H1}\";
>operands[2] = gen_rtx_MEM (SImode,
>plus_const

[PATCH] tree-optimization/115508 - fix ICE with SLP scheduling and extern vector

2024-06-17 Thread Richard Biener
When there's a permute after an extern vector we can run into a case
that didn't consider the scheduled node being a permute which lacks
a representative.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

PR tree-optimization/115508
* tree-vect-slp.cc (vect_schedule_slp_node): Guard check on
representative.

* gcc.target/i386/pr115508.c: New testcase.
---
 gcc/testsuite/gcc.target/i386/pr115508.c | 15 +++
 gcc/tree-vect-slp.cc |  1 +
 2 files changed, 16 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr115508.c

diff --git a/gcc/testsuite/gcc.target/i386/pr115508.c 
b/gcc/testsuite/gcc.target/i386/pr115508.c
new file mode 100644
index 000..a97b2007f7a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr115508.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=znver1" } */
+
+typedef long long v4di __attribute__((vector_size(4 * sizeof (long long;
+
+v4di vec_var;
+extern long long array1[];
+long long g(void)
+{
+  int total_error_4 = 0;
+  total_error_4 += array1 [0] + array1 [1] + array1 [2] + array1 [3];
+  v4di t = vec_var;
+  long long iorvar = t [1] | t [0] | t [2] | t [3];
+  return iorvar + total_error_4;
+}
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 38e7fadb679..6ef04b14dd8 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -9674,6 +9674,7 @@ vect_schedule_slp_node (vec_info *vinfo,
  si = gsi_after_labels (vinfo->bbs[0]);
}
   else if (is_a  (vinfo)
+  && SLP_TREE_CODE (node) != VEC_PERM_EXPR
   && gimple_bb (last_stmt) != gimple_bb (stmt_info->stmt)
   && gimple_could_trap_p (stmt_info->stmt))
{
-- 
2.35.3


  1   2   >