[PATCH V3] rs6000: Load high and low part of 64bit constant independently

2023-01-03 Thread Jiufu Guo via Gcc-patches
Hi,

Compare with previous version, this patch updates the comments only.
https://gcc.gnu.org/pipermail/gcc-patches/2022-December/608293.html

For a complicate 64bit constant, below is one instruction-sequence to
build:
lis 9,0x800a
ori 9,9,0xabcd
sldi 9,9,32
oris 9,9,0xc167
ori 9,9,0xfa16

while we can also use below sequence to build:
lis 9,0xc167
lis 10,0x800a
ori 9,9,0xfa16
ori 10,10,0xabcd
rldimi 9,10,32,0
This sequence is using 2 registers to build high and low part firstly,
and then merge them.

In parallel aspect, this sequence would be faster. (Ofcause, using 1 more
register with potential register pressure).

The instruction sequence with two registers for parallel version can be
generated only if can_create_pseudo_p.  Otherwise, the one register
sequence is generated.

Bootstrap and regtest pass on ppc64{,le}.
Is this ok for trunk?


BR,
Jeff(Jiufu)


gcc/ChangeLog:

* config/rs6000/rs6000.cc (rs6000_emit_set_long_const): Generate
more parallel code if can_create_pseudo_p.

gcc/testsuite/ChangeLog:

* gcc.target/powerpc/parall_5insn_const.c: New test.

---
 gcc/config/rs6000/rs6000.cc   | 39 +--
 .../gcc.target/powerpc/parall_5insn_const.c   | 27 +
 2 files changed, 54 insertions(+), 12 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/parall_5insn_const.c

diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 6ac3adcec6b..b4f03499252 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -10366,19 +10366,34 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
 }
   else
 {
-  temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
-
-  emit_move_insn (temp, GEN_INT (sext_hwi (ud4 << 16, 32)));
-  if (ud3 != 0)
-   emit_move_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud3)));
+  if (can_create_pseudo_p ())
+   {
+ /* lis HIGH,UD4 ; ori HIGH,UD3 ;
+lis LOW,UD2 ; ori LOW,UD1 ; rldimi LOW,HIGH,32,0.  */
+ rtx high = gen_reg_rtx (DImode);
+ rtx low = gen_reg_rtx (DImode);
+ HOST_WIDE_INT num = (ud2 << 16) | ud1;
+ rs6000_emit_set_long_const (low, sext_hwi (num, 32));
+ num = (ud4 << 16) | ud3;
+ rs6000_emit_set_long_const (high, sext_hwi (num, 32));
+ emit_insn (gen_rotldi3_insert_3 (dest, high, GEN_INT (32), low,
+  GEN_INT (0x)));
+   }
+  else
+   {
+ /* lis DEST,UD4 ; ori DEST,UD3 ; rotl DEST,32 ;
+oris DEST,UD2 ; ori DEST,UD1.  */
+ emit_move_insn (dest, GEN_INT (sext_hwi (ud4 << 16, 32)));
+ if (ud3 != 0)
+   emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud3)));
 
-  emit_move_insn (ud2 != 0 || ud1 != 0 ? temp : dest,
- gen_rtx_ASHIFT (DImode, temp, GEN_INT (32)));
-  if (ud2 != 0)
-   emit_move_insn (ud1 != 0 ? temp : dest,
-   gen_rtx_IOR (DImode, temp, GEN_INT (ud2 << 16)));
-  if (ud1 != 0)
-   emit_move_insn (dest, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
+ emit_move_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32)));
+ if (ud2 != 0)
+   emit_move_insn (dest,
+   gen_rtx_IOR (DImode, dest, GEN_INT (ud2 << 16)));
+ if (ud1 != 0)
+   emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1)));
+   }
 }
 }
 
diff --git a/gcc/testsuite/gcc.target/powerpc/parall_5insn_const.c 
b/gcc/testsuite/gcc.target/powerpc/parall_5insn_const.c
new file mode 100644
index 000..e3a9a7264cf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/parall_5insn_const.c
@@ -0,0 +1,27 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mno-prefixed -save-temps" } */
+/* { dg-require-effective-target has_arch_ppc64 } */
+
+/* { dg-final { scan-assembler-times {\mlis\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mori\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mrldimi\M} 2 } } */
+
+void __attribute__ ((noinline)) foo (unsigned long long *a)
+{
+  /* 2 lis + 2 ori + 1 rldimi for each constant.  */
+  *a++ = 0x800aabcdc167fa16ULL;
+  *a++ = 0x7543a876867f616ULL;
+}
+
+long long A[] = {0x800aabcdc167fa16ULL, 0x7543a876867f616ULL};
+int
+main ()
+{
+  long long res[2];
+
+  foo (res);
+  if (__builtin_memcmp (res, A, sizeof (res)) != 0)
+__builtin_abort ();
+
+  return 0;
+}
-- 
2.17.1



[PATCH-4, rs6000] Change ilp32 target check for some scalar-extract-sig and scalar-insert-exp test cases

2023-01-03 Thread HAO CHEN GUI via Gcc-patches
Hi,
  "ilp32" is used in these test cases to make sure test cases only run on a
32-bit environment. Unfortunately, these cases also run with
"-m32/-mpowerpc64" which causes unexpected errors. This patch changes the
target check to skip if "has_arch_ppc64" is set. So the test cases won't run
when arch_ppc64 has been set.

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.

Gui Haochen

ChangeLog
2023-01-03  Haochen Gui  

gcc/testsuite/
* gcc.target/powerpc/bfp/scalar-extract-sig-2.c: Replace ilp32 check
with dg-skip-if has_arch_ppc64.
* gcc.target/powerpc/bfp/scalar-insert-exp-2.c: Likewise.
* gcc.target/powerpc/bfp/scalar-insert-exp-5.c: Likewise.

patch.diff
diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-2.c 
b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-2.c
index 39ee74c94dc..148b5fbd9fa 100644
--- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-2.c
+++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-2.c
@@ -1,5 +1,5 @@
 /* { dg-do compile { target { powerpc*-*-* } } } */
-/* { dg-require-effective-target ilp32 } */
+/* { dg-skip-if "" { has_arch_ppc64 } } */
 /* { dg-require-effective-target powerpc_p9vector_ok } */
 /* { dg-options "-mdejagnu-cpu=power9" } */

diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-2.c 
b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-2.c
index efd69725905..956c1183beb 100644
--- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-2.c
+++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-2.c
@@ -1,5 +1,5 @@
 /* { dg-do compile { target { powerpc*-*-* } } } */
-/* { dg-require-effective-target ilp32 } */
+/* { dg-skip-if "" { has_arch_ppc64 } } */
 /* { dg-require-effective-target powerpc_p9vector_ok } */
 /* { dg-options "-mdejagnu-cpu=power9" } */

diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-5.c 
b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-5.c
index f85966a6fdf..9a7949fb89a 100644
--- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-5.c
+++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-5.c
@@ -1,5 +1,5 @@
 /* { dg-do compile { target { powerpc*-*-* } } } */
-/* { dg-require-effective-target ilp32 } */
+/* { dg-skip-if "" { has_arch_ppc64 } } */
 /* { dg-require-effective-target powerpc_p9vector_ok } */
 /* { dg-options "-mdejagnu-cpu=power9" } */



[PATCH-3, rs6000] Change mode and insn condition for scalar insert exp instruction

2023-01-03 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch changes the mode of exponent to GPR in scalar insert exp
pattern, as the exponent can be put into a 32-bit register. Also the
condition check is changed from TARGET_64BIT to TARGET_POWERPC64.

  The test cases are modified according to the changes of expand pattern.

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.

Gui Haochen

ChangeLog
2023-01-03  Haochen Gui  

gcc/
* config/rs6000/rs6000-builtins.def
(__builtin_vsx_scalar_insert_exp): Replace bif-pattern from xsiexpdp
to xsiexpdp_di.
(__builtin_vsx_scalar_insert_exp_dp): Replace bif-pattern from
xsiexpdpf to xsiexpdpf_di.
* config/rs6000/vsx.md (xsiexpdp): Rename to...
(xsiexpdp_): ..., set the mode of second operand to GPR and
replace TARGET_64BIT with TARGET_POWERPC64.
(xsiexpdpf): Rename to...
(xsiexpdpf_): ..., set the mode of second operand to GPR and
replace TARGET_64BIT with TARGET_POWERPC64.

gcc/testsuite/
* gcc.target/powerpc/bfp/scalar-insert-exp-0.c: Replace lp64 check
with has_arch_ppc64.
* gcc.target/powerpc/bfp/scalar-insert-exp-1.c: Likewise.
* gcc.target/powerpc/bfp/scalar-insert-exp-12.c: Likewise.
* gcc.target/powerpc/bfp/scalar-insert-exp-13.c: Likewise.
* gcc.target/powerpc/bfp/scalar-insert-exp-3.c: Likewise.
* gcc.target/powerpc/bfp/scalar-insert-exp-4.c: Likewise.

patch.diff
diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index 25647b7bdd2..b1b5002d7d9 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -2854,10 +2854,10 @@

   const double __builtin_vsx_scalar_insert_exp (unsigned long long, \
 unsigned long long);
-VSIEDP xsiexpdp {}
+VSIEDP xsiexpdp_di {}

   const double __builtin_vsx_scalar_insert_exp_dp (double, unsigned long long);
-VSIEDPF xsiexpdpf {}
+VSIEDPF xsiexpdpf_di {}

   pure vsc __builtin_vsx_xl_len_r (void *, signed long);
 XL_LEN_R xl_len_r {}
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 27e03a4cf6c..3376090cc6f 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -5137,22 +5137,22 @@ (define_insn "xsiexpqp_"
   [(set_attr "type" "vecmove")])

 ;; VSX Scalar Insert Exponent Double-Precision
-(define_insn "xsiexpdp"
+(define_insn "xsiexpdp_"
   [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
(unspec:DF [(match_operand:DI 1 "register_operand" "r")
-   (match_operand:DI 2 "register_operand" "r")]
+   (match_operand:GPR 2 "register_operand" "r")]
 UNSPEC_VSX_SIEXPDP))]
-  "TARGET_P9_VECTOR && TARGET_64BIT"
+  "TARGET_P9_VECTOR && TARGET_POWERPC64"
   "xsiexpdp %x0,%1,%2"
   [(set_attr "type" "fpsimple")])

 ;; VSX Scalar Insert Exponent Double-Precision Floating Point Argument
-(define_insn "xsiexpdpf"
+(define_insn "xsiexpdpf_"
   [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
(unspec:DF [(match_operand:DF 1 "register_operand" "r")
-   (match_operand:DI 2 "register_operand" "r")]
+   (match_operand:GPR 2 "register_operand" "r")]
 UNSPEC_VSX_SIEXPDP))]
-  "TARGET_P9_VECTOR && TARGET_64BIT"
+  "TARGET_P9_VECTOR && TARGET_POWERPC64"
   "xsiexpdp %x0,%1,%2"
   [(set_attr "type" "fpsimple")])

diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-0.c 
b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-0.c
index d8243258a67..88d77564158 100644
--- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-0.c
+++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-0.c
@@ -1,7 +1,7 @@
 /* { dg-do compile { target { powerpc*-*-* } } } */
-/* { dg-require-effective-target lp64 } */
 /* { dg-require-effective-target powerpc_p9vector_ok } */
 /* { dg-options "-mdejagnu-cpu=power9" } */
+/* { dg-require-effective-target has_arch_ppc64 } */

 /* This test should succeed only on 64-bit configurations.  */
 #include 
diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-1.c 
b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-1.c
index 8260b107178..2f219ddc83a 100644
--- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-1.c
+++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-1.c
@@ -1,7 +1,7 @@
 /* { dg-do compile { target { powerpc*-*-* } } } */
-/* { dg-require-effective-target lp64 } */
 /* { dg-require-effective-target powerpc_p9vector_ok } */
 /* { dg-options "-mdejagnu-cpu=power8" } */
+/* { dg-require-effective-target has_arch_ppc64 } */

 /* This test should succeed only on 64-bit configurations.  */
 #include 
diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-12.c 
b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-12.c
index 384fc9cc675..9eade34d9ad 100644
--- 

[PATCH-2, rs6000] Change mode and insn condition for scalar extract sig instruction

2023-01-03 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch changes the return type of __builtin_vsx_scalar_extract_sig
from const signed long to const signed long long, so that it can be called
with "-m32/-mpowerpc64" option. The bif needs TARGET_POWERPC64 instead of
TARGET_64BIT. So the condition check in the expander is changed.

  The test cases are modified according to the changes of expand pattern.

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.

Gui Haochen

ChangeLog
2023-01-03  Haochen Gui  

gcc/
* config/rs6000/rs6000-builtins.def
(__builtin_vsx_scalar_extract_sig): Set return type to const signed
long long.
* config/rs6000/vsx.md (xsxsigdp): Replace TARGET_64BIT with
TARGET_POWERPC64.

gcc/testsuite/
* gcc.target/powerpc/bfp/scalar-extract-sig-0.c: Replace lp64 check
with has_arch_ppc64.
* gcc.target/powerpc/bfp/scalar-extract-sig-1.c: Likewise.
* gcc.target/powerpc/bfp/scalar-extract-sig-6.c: Likewise.

patch.diff
diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index a8f1d3f1b3d..25647b7bdd2 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -2849,7 +2849,7 @@
   pure vsc __builtin_vsx_lxvl (const void *, signed long);
 LXVL lxvl {}

-  const signed long __builtin_vsx_scalar_extract_sig (double);
+  const signed long long __builtin_vsx_scalar_extract_sig (double);
 VSESDP xsxsigdp {}

   const double __builtin_vsx_scalar_insert_exp (unsigned long long, \
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 229c26c3a61..27e03a4cf6c 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -5111,7 +5111,7 @@ (define_insn "xsxsigdp"
   [(set (match_operand:DI 0 "register_operand" "=r")
(unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
 UNSPEC_VSX_SXSIG))]
-  "TARGET_P9_VECTOR && TARGET_64BIT"
+  "TARGET_P9_VECTOR && TARGET_POWERPC64"
   "xsxsigdp %0,%x1"
   [(set_attr "type" "integer")])

diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-0.c 
b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-0.c
index 637080652b7..d22f7d1b274 100644
--- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-0.c
+++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-0.c
@@ -1,7 +1,7 @@
 /* { dg-do compile { target { powerpc*-*-* } } } */
-/* { dg-require-effective-target lp64 } */
 /* { dg-require-effective-target powerpc_p9vector_ok } */
 /* { dg-options "-mdejagnu-cpu=power9" } */
+/* { dg-require-effective-target has_arch_ppc64 } */

 /* This test should succeed only on 64-bit configurations.  */
 #include 
diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-1.c 
b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-1.c
index f12eed3d9d5..64747d73a51 100644
--- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-1.c
+++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-1.c
@@ -1,7 +1,7 @@
 /* { dg-do compile { target { powerpc*-*-* } } } */
-/* { dg-require-effective-target lp64 } */
 /* { dg-require-effective-target powerpc_p9vector_ok } */
 /* { dg-options "-mdejagnu-cpu=power8" } */
+/* { dg-require-effective-target has_arch_ppc64 } */

 /* This test should succeed only on 64-bit configurations.  */
 #include 
diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-6.c 
b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-6.c
index c85072da138..561be53fb9b 100644
--- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-6.c
+++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-6.c
@@ -1,7 +1,7 @@
 /* { dg-do run { target { powerpc*-*-* } } } */
-/* { dg-require-effective-target lp64 } */
 /* { dg-require-effective-target p9vector_hw } */
 /* { dg-options "-mdejagnu-cpu=power9" } */
+/* { dg-require-effective-target has_arch_ppc64 } */

 /* This test should succeed only on 64-bit configurations.  */
 #include 


[PATCH-1, rs6000] Change mode and insn condition for scalar extract exp instruction

2023-01-03 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch changes the return type of __builtin_vsx_scalar_extract_exp
from const signed long to const signed int, as the exponent can be put in
a signed int. It is also inline with the external interface definition of
the bif. The mode of exponent operand in "xsxexpdp" is changed to GPR mode
and TARGET_64BIT check is removed, as the instruction can be executed on
a 32-bit environment.

  The test cases are modified according to the changes of expand pattern.

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.

Gui Haochen

ChangeLog
2022-12-23  Haochen Gui  

gcc/
* config/rs6000/rs6000-builtins.def
(__builtin_vsx_scalar_extract_exp): Set return type to const unsigned
int and set its bif-pattern to xsxexpdp_si, move it from power9-64 to
power9 catalog.
* config/rs6000/vsx.md (xsxexpdp): Rename to ...
(xsxexpdp_): ..., set mode of operand 0 to GPR and remove
TARGET_64BIT check.
* doc/extend.texi (scalar_extract_exp): Remove 64-bit environment
requirement when it has a 64-bit argument.

gcc/testsuite/
* gcc.target/powerpc/bfp/scalar-extract-exp-0.c: Remove lp64 check.
* gcc.target/powerpc/bfp/scalar-extract-exp-1.c: Likewise.
* gcc.target/powerpc/bfp/scalar-extract-exp-2.c: Deleted as the case is
invalid.
* gcc.target/powerpc/bfp/scalar-extract-exp-6.c: Remove lp64 check.

patch.diff
diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index f76f54793d7..a8f1d3f1b3d 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -2833,6 +2833,8 @@
   const signed int __builtin_dtstsfi_ov_td (const int<6>, _Decimal128);
 TSTSFI_OV_TD dfptstsfi_unordered_td {}

+  const signed int  __builtin_vsx_scalar_extract_exp (double);
+VSEEDP xsxexpdp_si {}

 [power9-64]
   void __builtin_altivec_xst_len_r (vsc, void *, long);
@@ -2847,9 +2849,6 @@
   pure vsc __builtin_vsx_lxvl (const void *, signed long);
 LXVL lxvl {}

-  const signed long __builtin_vsx_scalar_extract_exp (double);
-VSEEDP xsxexpdp {}
-
   const signed long __builtin_vsx_scalar_extract_sig (double);
 VSESDP xsxsigdp {}

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 992fbc983be..229c26c3a61 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -5089,11 +5089,11 @@ (define_insn "xsxexpqp_"
   [(set_attr "type" "vecmove")])

 ;; VSX Scalar Extract Exponent Double-Precision
-(define_insn "xsxexpdp"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-   (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
+(define_insn "xsxexpdp_"
+  [(set (match_operand:GPR 0 "register_operand" "=r")
+   (unspec:GPR [(match_operand:DF 1 "vsx_register_operand" "wa")]
 UNSPEC_VSX_SXEXPDP))]
-  "TARGET_P9_VECTOR && TARGET_64BIT"
+  "TARGET_P9_VECTOR"
   "xsxexpdp %0,%x1"
   [(set_attr "type" "integer")])

diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index d3812fa55b0..7c087967234 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -19598,7 +19598,10 @@ bool scalar_test_neg (double source);
 bool scalar_test_neg (__ieee128 source);
 @end smallexample

-The @code{scalar_extract_exp} and @code{scalar_extract_sig}
+The @code{scalar_extract_exp} with a 64-bit source argument
+function requires an environment supporting ISA 3.0 or later.
+The @code{scalar_extract_exp} with a 128-bit source argument
+and @code{scalar_extract_sig}
 functions require a 64-bit environment supporting ISA 3.0 or later.
 The @code{scalar_extract_exp} and @code{scalar_extract_sig} built-in
 functions return the significand and the biased exponent value
diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-0.c 
b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-0.c
index 35bf1b240f3..d971833748e 100644
--- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-0.c
+++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-0.c
@@ -1,9 +1,7 @@
 /* { dg-do compile { target { powerpc*-*-* } } } */
-/* { dg-require-effective-target lp64 } */
 /* { dg-require-effective-target powerpc_p9vector_ok } */
 /* { dg-options "-mdejagnu-cpu=power9" } */

-/* This test should succeed only on 64-bit configurations.  */
 #include 

 unsigned int
diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-1.c 
b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-1.c
index 9737762c1d4..1cb438f9b70 100644
--- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-1.c
+++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-1.c
@@ -1,9 +1,7 @@
 /* { dg-do compile { target { powerpc*-*-* } } } */
-/* { dg-require-effective-target lp64 } */
 /* { dg-require-effective-target powerpc_p9vector_ok } */
 /* { dg-options "-mdejagnu-cpu=power8" } */

-/* This test should succeed only on 64-bit 

Re: [PATCH V4 2/2] rs6000: use li;x?oris to build constant

2023-01-03 Thread Jiufu Guo via Gcc-patches
Hi,

I would like to have a ping on this patch:
https://gcc.gnu.org/pipermail/gcc-patches/2022-December/608292.html


BR,
Jeff (Jiufu)


Jiufu Guo  writes:

> Hi,
>
> For constant C:
> If '(c & 0xULL) == 0x' or say:
> 32(1) || 1(0) || 15(x) || 16(0), we could use "lis; xoris" to build.
>
> Here N(M) means N continuous bit M, x for M means it is ok for either
> 1 or 0; '||' means concatenation.
>
> This patch update rs6000_emit_set_long_const to support those constants.
>
> Compare with previous version:
> https://gcc.gnu.org/pipermail/gcc-patches/2022-December/607618.htm
> This patch fix conflicts with trunk.
>
> Bootstrap and regtest pass on ppc64{,le}.
>
> Is this ok for trunk?
>
> BR,
> Jeff (Jiufu)
>
>
>   PR target/106708
>
> gcc/ChangeLog:
>
>   * config/rs6000/rs6000.cc (rs6000_emit_set_long_const): Add to build
>   constants through "lis; xoris".
>
> gcc/testsuite/ChangeLog:
>
>   * gcc.target/powerpc/pr106708.c: Add test function.
>
> ---
>  gcc/config/rs6000/rs6000.cc |  7 +++
>  gcc/testsuite/gcc.target/powerpc/pr106708.c | 10 +-
>  2 files changed, 16 insertions(+), 1 deletion(-)
>
> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
> index 8c1192a10c8..1138d5e8cd4 100644
> --- a/gcc/config/rs6000/rs6000.cc
> +++ b/gcc/config/rs6000/rs6000.cc
> @@ -10251,6 +10251,13 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT 
> c)
>if (ud1 != 0)
>   emit_move_insn (dest, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
>  }
> +  else if (ud4 == 0x && ud3 == 0x && !(ud2 & 0x8000) && ud1 == 0)
> +{
> +  /* lis; xoris */
> +  temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
> +  emit_move_insn (temp, GEN_INT (sext_hwi ((ud2 | 0x8000) << 16, 32)));
> +  emit_move_insn (dest, gen_rtx_XOR (DImode, temp, GEN_INT 
> (0x8000)));
> +}
>else if (ud4 == 0x && ud3 == 0x && (ud1 & 0x8000))
>  {
>/* li; xoris */
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr106708.c 
> b/gcc/testsuite/gcc.target/powerpc/pr106708.c
> index dc9ceda8367..a015c71e630 100644
> --- a/gcc/testsuite/gcc.target/powerpc/pr106708.c
> +++ b/gcc/testsuite/gcc.target/powerpc/pr106708.c
> @@ -4,7 +4,7 @@
>  /* { dg-require-effective-target has_arch_ppc64 } */
>  
>  long long arr[]
> -  = {0x7cdeab55LL, 0x98765432LL, 0xabcdLL};
> += {0x7cdeab55LL, 0x98765432LL, 0xabcdLL, 0x6543LL};
>  
>  void __attribute__ ((__noipa__)) lixoris (long long *arg)
>  {
> @@ -27,6 +27,13 @@ void __attribute__ ((__noipa__)) lisrldicl (long long *arg)
>  /* { dg-final { scan-assembler-times {\mlis .*,0xabcd\M} 1 } } */
>  /* { dg-final { scan-assembler-times {\mrldicl .*,0,32\M} 1 } } */
>  
> +void __attribute__ ((__noipa__)) lisxoris (long long *arg)
> +{
> +  *arg = 0x6543LL;
> +}
> +/* { dg-final { scan-assembler-times {\mlis .*,0xe543\M} 1 } } */
> +/* { dg-final { scan-assembler-times {\mxoris .*0x8000\M} 1 } } */
> +
>  int
>  main ()
>  {
> @@ -35,6 +42,7 @@ main ()
>lixoris (a);
>lioris (a + 1);
>lisrldicl (a + 2);
> +  lisxoris (a + 3);
>if (__builtin_memcmp (a, arr, sizeof (arr)) != 0)
>  __builtin_abort ();
>return 0;


[PATCH] Add link to gmplib.org

2023-01-03 Thread Benson Muite via Gcc-patches
Link is missing from install documentation
---
 gcc/doc/install.texi | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/doc/install.texi b/gcc/doc/install.texi
index ccc8d15fd08..18e8709a169 100644
--- a/gcc/doc/install.texi
+++ b/gcc/doc/install.texi
@@ -396,7 +396,8 @@ install the libraries.
 @table @asis
 @item GNU Multiple Precision Library (GMP) version 4.3.2 (or later)

-Necessary to build GCC@.  If a GMP source distribution is found in a
+Necessary to build GCC@.  It can be downloaded from
+@uref{https://gmplib.org/}.  If a GMP source distribution is found in a
 subdirectory of your GCC sources named @file{gmp}, it will be built
 together with GCC.  Alternatively, if GMP is already installed but it
 is not in your library search path, you will have to configure with the
-- 
2.39.0


Re: [PATCH] [OpenMP] GC unused SIMD clones

2023-01-03 Thread Sandra Loosemore via Gcc-patches

On 1/2/23 03:20, Tobias Burnus wrote:

On 25.11.22 03:13, Sandra Loosemore wrote:

This patch is a followup to my not-yet-reviewed patch
[PATCH v4] OpenMP: Generate SIMD clones for functions with "declare
target"


That patch got reviewed and went into mainline on Nov 15, 2022 as
https://gcc.gnu.org/r13-4309-g309e2d95e3b930c6f15c8a5346b913158404c76d>> 


In comments on a previous iteration of that patch, I was asked to do
something to delete unused SIMD clones to avoid code bloat; this is it.

I've implemented something like a simple mark-and-sweep algorithm.
Clones that are used are marked at the point where the call is
generated in the vectorizer.  The loop that iterates over functions to
apply the passes after IPA is modified to defer processing of unmarked
clones, and anything left over is deleted.



Jakub referred to Honza for the review, who wrote yesterday off list (to
me and Sandra):


I am really sorry for taking so long time.  It was busy month for me
and I was not very keen about the idea, since we had such logic
implemented many years ago but removed it to be able to determine
functions to be output early and optimize code layout.

I see that this is not possible with current organization where
vectorization is run late, so I guess it does make sense to do what you
are doing.

Patch is OK,
Honza


Thanks for the review! (And to Sandra: thanks for the patch.)

I leave it to Sandra to commit her patch and only want to update the
gcc-patches@ email. However. I think we can expect a commit tomorrow.
(Today is a holiday at her place - as new year's day fell on a Sunday.)


Yes, the patch is committed now, and also backported to og12.  Thanks, 
Honza, for the review, and Tobias, for your assistance.  I'm really glad 
to get this project done, finally.  :-)


-Sandra


[PATCH] libiberty: Handle Windows nul device in unlink-if-ordinary.c [PR108276]

2023-01-03 Thread Himal via Gcc-patches
libiberty/ChangeLog:
        * unlink-if-ordinary.c (unlink_if_ordinary):
Handle Windows nul device

---
 libiberty/unlink-if-ordinary.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/libiberty/unlink-if-ordinary.c b/libiberty/unlink-if-ordinary.c
index 84328b216..ae9090e54 100644
--- a/libiberty/unlink-if-ordinary.c
+++ b/libiberty/unlink-if-ordinary.c
@@ -62,11 +62,18 @@ was made to unlink the file because it is special.
 int
 unlink_if_ordinary (const char *name)
 {
+/* MS-Windows 'stat' function (and in turn, S_ISREG)
+   reports the null device as a regular file.  */
+#ifdef _WIN32
+if (stricmp (name, "nul") == 0)
+  return 1;
+#else
   struct stat st;

   if (lstat (name, ) == 0
   && (S_ISREG (st.st_mode) || S_ISLNK (st.st_mode)))
 return unlink (name);
+#endif

   return 1;
 }
--
2.39.0




Re: [RFC/PATCH] Remove the workaround for _Float128 precision [PR107299]

2023-01-03 Thread Michael Meissner via Gcc-patches
On Wed, Dec 21, 2022 at 09:40:24PM +, Joseph Myers wrote:
> On Wed, 21 Dec 2022, Segher Boessenkool wrote:
> 
> > > --- a/gcc/tree.cc
> > > +++ b/gcc/tree.cc
> > > @@ -9442,15 +9442,6 @@ build_common_tree_nodes (bool signed_char)
> > >if (!targetm.floatn_mode (n, extended).exists ())
> > >   continue;
> > >int precision = GET_MODE_PRECISION (mode);
> > > -  /* Work around the rs6000 KFmode having precision 113 not
> > > -  128.  */
> > 
> > It has precision 126 now fwiw.
> > 
> > Joseph: what do you think about this patch?  Is the workaround it
> > removes still useful in any way, do we need to do that some other way if
> > we remove this?
> 
> I think it's best for the TYPE_PRECISION, for any type with the binary128 
> format, to be 128 (not 126).
> 
> It's necessary that _Float128, _Float64x and long double all have the same 
> TYPE_PRECISION when they have the same (binary128) format, or at least 
> that TYPE_PRECISION for _Float128 >= that for long double >= that for 
> _Float64x, so that the rules in c_common_type apply properly.
> 
> How the TYPE_PRECISION compares to that of __ibm128, or of long double 
> when that's double-double, is less important.

When I did the original implementation years ago, there were various implicit
assumptions that for any one precision, there must be only one floating point
type.

I tend to agree that logically the precision should be 128, but until we go
through and fix all of these assumptions, it may be problematical.  This shows
up in the whole infrastructure of looking for a FP type with larger precision
than a given precision.  There just isn't an ordering that works and preserves
all values.

I'm coming to think that we may want 2 types of FP, one is a standard FP type
where you can convert to a larger type, and the other for various FP types
where there is no default widening conversion.

And logically there is the issue with 16-bit floats, giving we have different
versions of 16-bit float.

And if an implementation ever wanted to support both BID and DFP decimal types
at the same time, they would have similar issues.

-- 
Michael Meissner, IBM
PO Box 98, Ayer, Massachusetts, USA, 01432
email: meiss...@linux.ibm.com


Re: [PATCH] c++, TLS: Support cross-tu static initialization for targets without alias support [PR106435].

2023-01-03 Thread Iain Sandoe



> On 3 Jan 2023, at 22:22, Jason Merrill  wrote:
> 
> On 12/7/22 10:39, Iain Sandoe wrote:
>>  This has been tested on x86_64 and arm64 Darwin and on x86_64 linux gnu.
>>  The basic patch is live in the homebrew macOS support and so has had quite
>>  wide coverage on non-trivial codebases.
>>OK for master?
>>  Iain
>>Since this actually fixes wrong code, I wonder if we should also consider
>>  back-porting.
>>--- >8 ---
>> The description below relates to the code path when TARGET_SUPPORTS_ALIASES 
>> is
>> false; current operation is maintained for targets with alias support and any
>> new support code should be DCEd in that case.
>> --
>> Currently, cross-tu static initialisation is not supported for targets 
>> without
>> alias support.
>> The patch adds support by building a shim function in place of the alias for
>> these targets; the shim simply calls the generic initialiser.  Although this 
>> is
>> slightly less efficient than the alias, in practice (for targets that allow
>> sibcalls) the penalty is a single jump when code is optimised.
>> From the perspective of a TU referencing an extern TLS variable, there is no
>> way to determine if it requires a guarded dynamic init.  So, in the 
>> referencing
>> TU, we build a weak reference to the potential init and check at runtime if 
>> the
>> init is present before calling it.  This strategy is fine for targets that 
>> have
>> ELF semantics, but fails at link time for Mach-O (which does not permit the
>> reference to be undefined in the static link).
>> The actual initialiser call is contained in a wrapper function, and to 
>> resolve
>> the Mach-O linker issue, in the TU that is referencing the var, we now 
>> generate
>> both the wrapper _and_ a weak definition of a dummy init function.  In the 
>> case
>> that there _is_ a dynamic init (in a different TU), that version will be 
>> non-weak
>> and will be override the weak dummy one.
> 
> IIUC, this isn't reliable in general; in specific, I believe that the glibc 
> dynamic loader no longer prefers strong definitions to weak ones.

Neither does Darwin’s dynamic loader, this implemenation works there because 
the static linker _will_ override the weak def with a strong one.  IIUC, 
binutils ld does this too.

If we need this to work between DSOs then that potentially presents a problem 
(for Darwin the DSO is identified so that the symbol will be found in the 
library that resolved it in the static link, [but that can be defeated by 
forcing “flat linking”]), I am not sure if glibc dynamic loader would do 
something similar (although this code path is not taken on ELF targets since 
they have the symbol aliases).

> Perhaps on targets that don't allow weakrefs to be unbound,

Darwin would allow it if we were able to tell the static linker that the symbol 
is permitted to be undefined - but since we don’t know the symbol’s name 
outside the FE, that is not going to fly.

> we should unconditionally emit the init function where the variable is 
> defined, even if it does nothing, and unconditionally call it from the 
> wrapper?

OK. that seems a safer option .. I will have to look at it when I have a chance.

thanks
Iain 
> 
>> In the case that we have a trivial
>> static init (so no init in any other TU) the weak-defined dummy init will be
>> called (a single return insn for optimised code).  We mitigate the call to
>> the dummy init by reworking the wrapper code-gen path to remove the test for
>> the weak reference function (as it will always be true) since the static 
>> linker
>> will now determine the function to be called.
>> Signed-off-by: Iain Sandoe 
>>  PR c++/106435
>> gcc/c-family/ChangeLog:
>>  * c-opts.cc (c_common_post_options): Allow fextern-tls-init for targets
>>  without alias support.
>> gcc/cp/ChangeLog:
>>  * decl2.cc (get_tls_init_fn): Allow targets without alias support.
>>  (handle_tls_init): Emit aliases for single init functions where the
>>  target supporst this, otherwise emit a stub function that calls the
>>  main tls init function.  (generate_tls_dummy_init): New.
>> gcc/testsuite/ChangeLog:
>>  * g++.dg/cpp0x/pr106435-b.cc: New file.
>>  * g++.dg/cpp0x/pr106435.C: New test.
>>  * g++.dg/cpp0x/pr106435.h: New file.
>> ---
>>  gcc/c-family/c-opts.cc   |  2 +-
>>  gcc/cp/decl2.cc  | 80 
>>  gcc/testsuite/g++.dg/cpp0x/pr106435-b.cc | 22 +++
>>  gcc/testsuite/g++.dg/cpp0x/pr106435.C| 24 +++
>>  gcc/testsuite/g++.dg/cpp0x/pr106435.h| 27 
>>  5 files changed, 142 insertions(+), 13 deletions(-)
>>  create mode 100644 gcc/testsuite/g++.dg/cpp0x/pr106435-b.cc
>>  create mode 100644 gcc/testsuite/g++.dg/cpp0x/pr106435.C
>>  create mode 100644 gcc/testsuite/g++.dg/cpp0x/pr106435.h
>> diff --git a/gcc/c-family/c-opts.cc b/gcc/c-family/c-opts.cc
>> index 70745aa4e7c..064645f980d 100644
>> --- a/gcc/c-family/c-opts.cc
>> +++ 

gcc-11: FTBFS on hurd-i386

2023-01-03 Thread Svante Signell via Gcc-patches
Source: gcc-11
Version: 11.3.0-10
Severity: important
Tags: patch
User: debian-h...@lists.debian.org
Usertags: hurd
Affects: gcc-11, gcc-snapshot
X-Debbugs-CC: debian-h...@lists.debian.org

Hi,

gcc-11-11.3.0-10 in sid FTBFS on hurd-i386 due to failing linkage of
pthread_once (same error already fixed in gcc-12 and gcc-13):
/<>/build/i686-gnu/libstdc++-v3/include/i686-gnu/bits/gthr-
default.h:700: undefined reference to `pthread_once'
and more.

The patch at hand is: libstdc++-hurd.diff, attached here for convenience.

Thanks!

This is notably needed because in glibc 2.34, the move of pthread functions
into libc.so happened for Linux only, not GNU/Hurd.

The pthread_self() function can also always be used fine as it is on
GNU/Hurd.

libstdc++-v3/ChangeLog:

* config/os/gnu-linux/os_defines.h [!__linux__]
  (_GLIBCXX_NATIVE_THREAD_ID, _GLIBCXX_GTHREAD_USE_WEAK): Do not define.

--- a/libstdc++-v3/config/os/gnu-linux/os_defines.h
+++ b/libstdc++-v3/config/os/gnu-linux/os_defines.h
@@ -49,22 +49,24 @@
 // version dynamically in case it has changed since libstdc++ was configured.
 #define _GLIBCXX_NO_OBSOLETE_ISINF_ISNAN_DYNAMIC __GLIBC_PREREQ(2,23)
 
-#if __GLIBC_PREREQ(2, 27)
-// Since glibc 2.27 pthread_self() is usable without linking to libpthread.
-# define _GLIBCXX_NATIVE_THREAD_ID pthread_self()
-#else
+#ifdef __linux__
+# if __GLIBC_PREREQ(2, 27)
+// Since glibc 2.27 Linux' pthread_self() is usable without linking to libpthread.
+#  define _GLIBCXX_NATIVE_THREAD_ID pthread_self()
+# else
 // Before then it was in libc.so.6 but not libc.a, and always returns 0,
 // which breaks the invariant this_thread::get_id() != thread::id{}.
 // So only use it if we know the libpthread version is available.
 // Otherwise use (__gthread_t)1 as the ID of the main (and only) thread.
-# define _GLIBCXX_NATIVE_THREAD_ID \
-  (__gthread_active_p() ? __gthread_self() : (__gthread_t)1)
-#endif
+#  define _GLIBCXX_NATIVE_THREAD_ID \
+   (__gthread_active_p() ? __gthread_self() : (__gthread_t)1)
+# endif
 
-#if __GLIBC_PREREQ(2, 34)
-// Since glibc 2.34 all pthreads functions are usable without linking to
+# if __GLIBC_PREREQ(2, 34)
+// Since glibc 2.34 all Linux pthreads functions are usable without linking to
 // libpthread.
-# define _GLIBCXX_GTHREAD_USE_WEAK 0
+#  define _GLIBCXX_GTHREAD_USE_WEAK 0
+# endif
 #endif
 
 #endif


Re: [PATCH] c++, TLS: Support cross-tu static initialization for targets without alias support [PR106435].

2023-01-03 Thread Jason Merrill via Gcc-patches

On 12/7/22 10:39, Iain Sandoe wrote:

  This has been tested on x86_64 and arm64 Darwin and on x86_64 linux gnu.
  The basic patch is live in the homebrew macOS support and so has had quite
  wide coverage on non-trivial codebases.
  
  OK for master?

  Iain
  
  Since this actually fixes wrong code, I wonder if we should also consider

  back-porting.
  
  --- >8 ---


The description below relates to the code path when TARGET_SUPPORTS_ALIASES is
false; current operation is maintained for targets with alias support and any
new support code should be DCEd in that case.

--

Currently, cross-tu static initialisation is not supported for targets without
alias support.

The patch adds support by building a shim function in place of the alias for
these targets; the shim simply calls the generic initialiser.  Although this is
slightly less efficient than the alias, in practice (for targets that allow
sibcalls) the penalty is a single jump when code is optimised.

 From the perspective of a TU referencing an extern TLS variable, there is no
way to determine if it requires a guarded dynamic init.  So, in the referencing
TU, we build a weak reference to the potential init and check at runtime if the
init is present before calling it.  This strategy is fine for targets that have
ELF semantics, but fails at link time for Mach-O (which does not permit the
reference to be undefined in the static link).

The actual initialiser call is contained in a wrapper function, and to resolve
the Mach-O linker issue, in the TU that is referencing the var, we now generate
both the wrapper _and_ a weak definition of a dummy init function.  In the case
that there _is_ a dynamic init (in a different TU), that version will be 
non-weak
and will be override the weak dummy one.


IIUC, this isn't reliable in general; in specific, I believe that the 
glibc dynamic loader no longer prefers strong definitions to weak ones.


Perhaps on targets that don't allow weakrefs to be unbound, we should 
unconditionally emit the init function where the variable is defined, 
even if it does nothing, and unconditionally call it from the wrapper?



In the case that we have a trivial
static init (so no init in any other TU) the weak-defined dummy init will be
called (a single return insn for optimised code).  We mitigate the call to
the dummy init by reworking the wrapper code-gen path to remove the test for
the weak reference function (as it will always be true) since the static linker
will now determine the function to be called.

Signed-off-by: Iain Sandoe 

PR c++/106435

gcc/c-family/ChangeLog:

* c-opts.cc (c_common_post_options): Allow fextern-tls-init for targets
without alias support.

gcc/cp/ChangeLog:

* decl2.cc (get_tls_init_fn): Allow targets without alias support.
(handle_tls_init): Emit aliases for single init functions where the
target supporst this, otherwise emit a stub function that calls the
main tls init function.  (generate_tls_dummy_init): New.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/pr106435-b.cc: New file.
* g++.dg/cpp0x/pr106435.C: New test.
* g++.dg/cpp0x/pr106435.h: New file.
---
  gcc/c-family/c-opts.cc   |  2 +-
  gcc/cp/decl2.cc  | 80 
  gcc/testsuite/g++.dg/cpp0x/pr106435-b.cc | 22 +++
  gcc/testsuite/g++.dg/cpp0x/pr106435.C| 24 +++
  gcc/testsuite/g++.dg/cpp0x/pr106435.h| 27 
  5 files changed, 142 insertions(+), 13 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/cpp0x/pr106435-b.cc
  create mode 100644 gcc/testsuite/g++.dg/cpp0x/pr106435.C
  create mode 100644 gcc/testsuite/g++.dg/cpp0x/pr106435.h

diff --git a/gcc/c-family/c-opts.cc b/gcc/c-family/c-opts.cc
index 70745aa4e7c..064645f980d 100644
--- a/gcc/c-family/c-opts.cc
+++ b/gcc/c-family/c-opts.cc
@@ -1070,7 +1070,7 @@ c_common_post_options (const char **pfilename)
  
if (flag_extern_tls_init)

  {
-  if (!TARGET_SUPPORTS_ALIASES || !SUPPORTS_WEAK)
+  if (!SUPPORTS_WEAK)
{
  /* Lazy TLS initialization for a variable in another TU requires
 alias and weak reference support.  */
diff --git a/gcc/cp/decl2.cc b/gcc/cp/decl2.cc
index f95529a5c9a..c6550c0c2fc 100644
--- a/gcc/cp/decl2.cc
+++ b/gcc/cp/decl2.cc
@@ -3672,9 +3672,8 @@ get_tls_init_fn (tree var)
if (!flag_extern_tls_init && DECL_EXTERNAL (var))
  return NULL_TREE;
  
-  /* If the variable is internal, or if we can't generate aliases,

- call the local init function directly.  */
-  if (!TREE_PUBLIC (var) || !TARGET_SUPPORTS_ALIASES)
+  /* If the variable is internal call the local init function directly.  */
+  if (!TREE_PUBLIC (var))
  return get_local_tls_init_fn (DECL_SOURCE_LOCATION (var));
  
tree sname = mangle_tls_init_fn (var);

@@ -3811,8 +3810,12 @@ generate_tls_wrapper (tree fn)
if (tree init_fn = get_tls_init_fn (var))
  {
tree if_stmt = 

Re: [PATCH] Darwin, crts: Provide scalb and significand as a crt [PR107631]

2023-01-03 Thread Joseph Myers
On Tue, 3 Jan 2023, Iain Sandoe wrote:

>  1. Modula-2 should not forward the builtins unless the target supports them,
> either by expansion or the relevant lib functions.  So that would need 
> some
>configury and conditional build code.

Note that such configure tests could only readily be in the library 
configure scripts, not in the compiler configure scripts - you can't do 
target compile or link tests in host-side configure.  (To a limited extent 
it's possible to grep target headers in host-side configure, though not to 
actually preprocess them since the compiler required for such 
preprocessing doesn't exist at that point.)

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: [PATCH] Darwin, crts: Provide scalb and significand as a crt [PR107631]

2023-01-03 Thread Iain Sandoe
Thanks Joseph,

> On 3 Jan 2023, at 18:15, Joseph Myers  wrote:
> 
> On Sat, 31 Dec 2022, Iain Sandoe wrote:
> 
>> builtins.def unconditionally defines these builtins to be DEF_EXT_LIB_BUILTIN
>> which expands to the libcall, this is currently hard-wired to FALLBACK_P = 
>> true.
>> 
>> but, AFAIU the builtins.def descriptions:
>> 
>> FALLBACK_P should be false if the libc (or libm, I suppose, if that’s 
>> different)
>> does not have the function, perhaps that’s an underlying bug or at least an
>> oversight?
>> 
>> (or, of course, I misunderstood the intent of that param)
> 
> FALLBACK_P true means that it's the user's responsibility, if calling 
> __builtin_X, to make sure the library function X is also available in 
> cases where the call doesn't get expected inline - that is, that the API 
> for that __builtin_X function is that it may call an underlying library 
> function X, which is expected to exist and have a compatible interface.
> 
> Information about whether a function is present in libc / libm is 
> generally only relevant when __builtin_X might expand to call Y instead of 
> X; then GCC needs to know whether Y is available.

Ah I had misunderstood the param.

Since the mechanism used by Modula-2 ‘forwards’ the builins by using them,
that means that it always produces the libcalls which results in link errors 
when
the m2 libraries are used (i.e. the user is not in control of the use/non-use).

>> - at present, it seems that this crt might be the least invasive 
>> solution (since ‘significand*()’ are not obsolete AFAIU, we still need 
>> to provide those implementations, regardless of any subsitution of 
>> scalbn*() in Modula-2).
> 
> The significand functions can be considered obsolete and were never in any 
> standard (thus glibc does not provide a version for _Float128, for 
> example).

So,  it seems that either:

 1. Modula-2 should not forward the builtins unless the target supports them,
either by expansion or the relevant lib functions.  So that would need some
   configury and conditional build code.

 2. Preferrably it should not forward the obsolete/obsolescent cases:
   scalb*()
   significand*()

Additionally, if there is an actual use in the Modula-2 runtime (as opposed to
forwarding the functionality to the end user), then it should implement that 
avoiding
these obsolete functions.

I will withdraw my Darwin patch, and discuss with Gaius how to resolve this in
Modula-2.

thanks
Iain


> 
> -- 
> Joseph S. Myers
> jos...@codesourcery.com



Re: [PATCH][X86_64] Separate znver4 insn reservations from older znvers

2023-01-03 Thread Jan Hubicka via Gcc-patches
> 
> On Tue, 3 Jan 2023, Jan Hubicka wrote:
> 
> > >   * gcc/common/config/i386/i386-common.cc (processor_alias_table):
> > >   Use CPU_ZNVER4 for znver4.
> > >   * config/i386/i386.md: Add znver4.md.
> > >   * config/i386/znver4.md: New.
> > OK,
> > thanks!
> 
> Honza, I'm curious what are your further plans for this, you mentioned
> merging znver4.md back in znver.md if I recall correctly?

I was looking into that over Christmas (and it was also reason for my
first pass through where I was asking for various differences).  There
are number of small divergences between znver.md and znver4.md that seem
to make the merged automaton bigger than having two automatons.
So merging both meaningfuly would mean modifying znver1-3 model or
znver4 models.  With Tejas I think we mostly verified that the areas
znver4 modes is different from znver1-3 are correct for znver4 and
sometimes also for znver3 (for example the branching unit is present
already there but not bodelled).

Splitting znver1-3 and 4 is definitly not optimal.  However given the
time constrains and desire to not break znver1-3 I think going with
znver4.md is good option at least for GCC12/13.

Overall I am not sure how beneficial the model overall is:
since we schedule on BB basis and model CPU as in-order with no register
renaming, the scheduler has rarely chance to fill most of execution
units and de-facto optimizes for wastly different CPU than reality is).
We get noticebale SPEC perfomance boost for -fschedule-insns2 but it
seems to be mostly for scheduling for latencies.
LLVM's model seems to do more than we do, but comparing both compilers
I was not really able to tell if either of them get noticeable benefit
from the actual model of reservation units (and not only latencies).

I would welcome toughts/ideas/measurements on this.
Honza
> 
> Alexander


Re: [PATCH] Darwin, crts: Provide scalb and significand as a crt [PR107631]

2023-01-03 Thread Joseph Myers
On Sat, 31 Dec 2022, Iain Sandoe wrote:

> builtins.def unconditionally defines these builtins to be DEF_EXT_LIB_BUILTIN
> which expands to the libcall, this is currently hard-wired to FALLBACK_P = 
> true.
> 
> but, AFAIU the builtins.def descriptions:
> 
>  FALLBACK_P should be false if the libc (or libm, I suppose, if that’s 
> different)
>  does not have the function, perhaps that’s an underlying bug or at least an
>  oversight?
> 
>  (or, of course, I misunderstood the intent of that param)

FALLBACK_P true means that it's the user's responsibility, if calling 
__builtin_X, to make sure the library function X is also available in 
cases where the call doesn't get expected inline - that is, that the API 
for that __builtin_X function is that it may call an underlying library 
function X, which is expected to exist and have a compatible interface.

Information about whether a function is present in libc / libm is 
generally only relevant when __builtin_X might expand to call Y instead of 
X; then GCC needs to know whether Y is available.

> - at present, it seems that this crt might be the least invasive 
> solution (since ‘significand*()’ are not obsolete AFAIU, we still need 
> to provide those implementations, regardless of any subsitution of 
> scalbn*() in Modula-2).

The significand functions can be considered obsolete and were never in any 
standard (thus glibc does not provide a version for _Float128, for 
example).

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: [PATCH] libgcc: Fix uninitialized RA signing on AArch64 [PR107678]

2023-01-03 Thread Wilco Dijkstra via Gcc-patches
Hi Richard,

> Hmm, but the point of the original patch was to support code generators
> that emit DW_CFA_val_expression instead of DW_CFA_AARCH64_negate_ra_state.
> Doesn't this patch undo that?

Well it wasn't clear from the code or comments that was supported. I've
added that back in v2.

> Also, if I understood correctly, the reason we use REG_UNSAVED is to
> ensure that state from one frame isn't carried across to a parent frame,
> in cases where the parent frame lacks any signing.  That is, each frame
> should start out with a zero bit even if a child frame is unwound while
> it has a set bit.

This works fine since all registers are initialized to REG_UNSAVED every frame.

In v2 I've removed some clutter and encode the signing state in REG_UNSAVED/
REG_UNDEFINED.

Cheers,
Wilco

v2: Further cleanup, support DW_CFA_expression.

A recent change only initializes the regs.how[] during Dwarf unwinding
which resulted in an uninitialized offset used in return address signing
and random failures during unwinding.  The fix is to encode the return
address signing state in REG_UNSAVED and REG_UNDEFINED.

Passes bootstrap & regress, OK for commit?

libgcc/
PR target/107678
* unwind-dw2.c (execute_cfa_program): Use REG_UNSAVED/UNDEFINED
to encode return address signing state.
* config/aarch64/aarch64-unwind.h (aarch64_demangle_return_addr)
Check current return address signing state.
(aarch64_frob_update_contex): Remove.

---
diff --git a/libgcc/config/aarch64/aarch64-unwind.h 
b/libgcc/config/aarch64/aarch64-unwind.h
index 
26db9cbd9e5c526e0c410a4fc6be2bedb7d261cf..1afc3f9d308b95bc787398263e629bab226ff1ba
 100644
--- a/libgcc/config/aarch64/aarch64-unwind.h
+++ b/libgcc/config/aarch64/aarch64-unwind.h
@@ -29,8 +29,6 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If 
not, see
 
 #define MD_DEMANGLE_RETURN_ADDR(context, fs, addr) \
   aarch64_demangle_return_addr (context, fs, addr)
-#define MD_FROB_UPDATE_CONTEXT(context, fs) \
-  aarch64_frob_update_context (context, fs)
 
 static inline int
 aarch64_cie_signed_with_b_key (struct _Unwind_Context *context)
@@ -55,42 +53,27 @@ aarch64_cie_signed_with_b_key (struct _Unwind_Context 
*context)
 
 static inline void *
 aarch64_demangle_return_addr (struct _Unwind_Context *context,
- _Unwind_FrameState *fs ATTRIBUTE_UNUSED,
+ _Unwind_FrameState *fs,
  _Unwind_Word addr_word)
 {
   void *addr = (void *)addr_word;
-  if (context->flags & RA_SIGNED_BIT)
+  const int reg = DWARF_REGNUM_AARCH64_RA_STATE;
+
+  if (fs->regs.how[reg] == REG_UNSAVED)
+return addr;
+
+  /* Return-address signing state is toggled by DW_CFA_GNU_window_save (where
+ REG_UNDEFINED means enabled), or set by a DW_CFA_expression.  */
+  if (fs->regs.how[reg] == REG_UNDEFINED
+  || (_Unwind_GetGR (context, reg) & 0x1) != 0)
 {
   _Unwind_Word salt = (_Unwind_Word) context->cfa;
   if (aarch64_cie_signed_with_b_key (context) != 0)
return __builtin_aarch64_autib1716 (addr, salt);
   return __builtin_aarch64_autia1716 (addr, salt);
 }
-  else
-return addr;
-}
-
-/* Do AArch64 private initialization on CONTEXT based on frame info FS.  Mark
-   CONTEXT as return address signed if bit 0 of DWARF_REGNUM_AARCH64_RA_STATE 
is
-   set.  */
-
-static inline void
-aarch64_frob_update_context (struct _Unwind_Context *context,
-_Unwind_FrameState *fs)
-{
-  const int reg = DWARF_REGNUM_AARCH64_RA_STATE;
-  int ra_signed;
-  if (fs->regs.how[reg] == REG_UNSAVED)
-ra_signed = fs->regs.reg[reg].loc.offset & 0x1;
-  else
-ra_signed = _Unwind_GetGR (context, reg) & 0x1;
-  if (ra_signed)
-/* The flag is used for re-authenticating EH handler's address.  */
-context->flags |= RA_SIGNED_BIT;
-  else
-context->flags &= ~RA_SIGNED_BIT;
 
-  return;
+  return addr;
 }
 
 #endif /* defined AARCH64_UNWIND_H && defined __ILP32__ */
diff --git a/libgcc/unwind-dw2.c b/libgcc/unwind-dw2.c
index 
eaceace20298b9b13344aff9d1fe9ee5f9c7bd73..7c200cb6e730c5d63cf200ebe8a903f858e79d07
 100644
--- a/libgcc/unwind-dw2.c
+++ b/libgcc/unwind-dw2.c
@@ -139,7 +139,6 @@ struct _Unwind_Context
 #define EXTENDED_CONTEXT_BIT ((~(_Unwind_Word) 0 >> 2) + 1)
   /* Bit reserved on AArch64, return address has been signed with A or B
  key.  */
-#define RA_SIGNED_BIT ((~(_Unwind_Word) 0 >> 3) + 1)
   _Unwind_Word flags;
   /* 0 for now, can be increased when further fields are added to
  struct _Unwind_Context.  */
@@ -1206,8 +1205,10 @@ execute_cfa_program (const unsigned char *insn_ptr,
  /* This CFA is multiplexed with Sparc.  On AArch64 it's used to toggle
 return address signing status.  */
  reg = DWARF_REGNUM_AARCH64_RA_STATE;
- gcc_assert (fs->regs.how[reg] == REG_UNSAVED);
- fs->regs.reg[reg].loc.offset ^= 1;
+ if (fs->regs.how[reg] == REG_UNSAVED)
+  

Re: [PATCH] tree-optimization/105043: Object Size Checking docs cleanup

2023-01-03 Thread Siddhesh Poyarekar

On 2022-12-27 13:25, Jeff Law wrote:



On 12/15/22 12:25, Siddhesh Poyarekar wrote:

Break the _FORTIFY_SOURCE-specific builtins out into a separate
subsection from Object Size Checking built-ins and mention
_FORTIFY_SOURCE in there so that the link between the object size
checking builtins, the helper builtins (e.g. __builtin___memcpy_chk) and
_FORTIFY_SOURCE is clearer.

gcc/ChangeLog:

PR tree-optimization/105043
* doc/extend.texi (Object Size Checking): Split out into two
subsections and mention _FORTIFY_SOURCE.

OK
jeff



Thanks, pushed.

Sid


Re: [PATCH][X86_64] Separate znver4 insn reservations from older znvers

2023-01-03 Thread Alexander Monakov via Gcc-patches


On Tue, 3 Jan 2023, Jan Hubicka wrote:

> > * gcc/common/config/i386/i386-common.cc (processor_alias_table):
> > Use CPU_ZNVER4 for znver4.
> > * config/i386/i386.md: Add znver4.md.
> > * config/i386/znver4.md: New.
> OK,
> thanks!

Honza, I'm curious what are your further plans for this, you mentioned
merging znver4.md back in znver.md if I recall correctly?

Alexander


Re: [PATCH][X86_64] Separate znver4 insn reservations from older znvers

2023-01-03 Thread Jan Hubicka via Gcc-patches
> [Public]
> 
> Hello,
> 
> I have addressed all your comments in this revision of the patch, please find 
> attached and inlined.
> 
> * I have updated all the latencies with Agner's measurements.
> * Incorrect pipelines, loads/stores are addressed.
> * The double pumped avx512 insns take one cycle for 256 half and the next 
> cycle for remaining 256-bit half in the same pipeline, thus pipe*2.
> 
> Is this ok for trunk?
> 
> Thanks and Regards,
> Tejas
> 
> gcc/ChangeLog:
> 
>   * gcc/common/config/i386/i386-common.cc (processor_alias_table):
>   Use CPU_ZNVER4 for znver4.
>   * config/i386/i386.md: Add znver4.md.
>   * config/i386/znver4.md: New.
OK,
thanks!
Honza
> 
> Change-Id: Iea39c1c01d4992cf7ac476bd6de65887910bbcbe
> ---
>  gcc/common/config/i386/i386-common.cc |2 +-
>  gcc/config/i386/i386.md   |1 +
>  gcc/config/i386/znver4.md | 1068 +
>  3 files changed, 1070 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/config/i386/znver4.md
> 
> diff --git a/gcc/common/config/i386/i386-common.cc 
> b/gcc/common/config/i386/i386-common.cc
> index 660a977b68b..c7adea57683 100644
> --- a/gcc/common/config/i386/i386-common.cc
> +++ b/gcc/common/config/i386/i386-common.cc
> @@ -2215,7 +2215,7 @@ const pta processor_alias_table[] =
>{"znver3", PROCESSOR_ZNVER3, CPU_ZNVER3,
>  PTA_ZNVER3,
>  M_CPU_SUBTYPE (AMDFAM19H_ZNVER3), P_PROC_AVX2},
> -  {"znver4", PROCESSOR_ZNVER4, CPU_ZNVER3,
> +  {"znver4", PROCESSOR_ZNVER4, CPU_ZNVER4,
>  PTA_ZNVER4,
>  M_CPU_SUBTYPE (AMDFAM19H_ZNVER4), P_PROC_AVX512F},
>{"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index 9451883396c..3a88f16a21a 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -1319,6 +1319,7 @@
>  (include "bdver3.md")
>  (include "btver2.md")
>  (include "znver.md")
> +(include "znver4.md")
>  (include "geode.md")
>  (include "atom.md")
>  (include "slm.md")
> diff --git a/gcc/config/i386/znver4.md b/gcc/config/i386/znver4.md
> new file mode 100644
> index 000..d0b239822a8
> --- /dev/null
> +++ b/gcc/config/i386/znver4.md
> @@ -0,0 +1,1068 @@
> +;; Copyright (C) 2012-2022 Free Software Foundation, Inc.
> +;;
> +;; This file is part of GCC.
> +;;
> +;; GCC is free software; you can redistribute it and/or modify
> +;; it under the terms of the GNU General Public License as published by
> +;; the Free Software Foundation; either version 3, or (at your option)
> +;; any later version.
> +;;
> +;; GCC is distributed in the hope that it will be useful,
> +;; but WITHOUT ANY WARRANTY; without even the implied warranty of
> +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +;; GNU General Public License for more details.
> +;;
> +;; You should have received a copy of the GNU General Public License
> +;; along with GCC; see the file COPYING3.  If not see
> +;; .
> +;;
> +
> +
> +(define_attr "znver4_decode" "direct,vector,double"
> +  (const_string "direct"))
> +
> +;; AMD znver4 Scheduling
> +;; Modeling automatons for zen decoders, integer execution pipes,
> +;; AGU pipes, branch, floating point execution and fp store units.
> +(define_automaton "znver4, znver4_ieu, znver4_idiv, znver4_fdiv, znver4_agu, 
> znver4_fpu, znver4_fp_store")
> +
> +;; Decoders unit has 4 decoders and all of them can decode fast path
> +;; and vector type instructions.
> +(define_cpu_unit "znver4-decode0" "znver4")
> +(define_cpu_unit "znver4-decode1" "znver4")
> +(define_cpu_unit "znver4-decode2" "znver4")
> +(define_cpu_unit "znver4-decode3" "znver4")
> +
> +;; Currently blocking all decoders for vector path instructions as
> +;; they are dispatched separetely as microcode sequence.
> +(define_reservation "znver4-vector" 
> "znver4-decode0+znver4-decode1+znver4-decode2+znver4-decode3")
> +
> +;; Direct instructions can be issued to any of the four decoders.
> +(define_reservation "znver4-direct" 
> "znver4-decode0|znver4-decode1|znver4-decode2|znver4-decode3")
> +
> +;; Fix me: Need to revisit this later to simulate fast path double behavior.
> +(define_reservation "znver4-double" "znver4-direct")
> +
> +
> +;; Integer unit 4 ALU pipes.
> +(define_cpu_unit "znver4-ieu0" "znver4_ieu")
> +(define_cpu_unit "znver4-ieu1" "znver4_ieu")
> +(define_cpu_unit "znver4-ieu2" "znver4_ieu")
> +(define_cpu_unit "znver4-ieu3" "znver4_ieu")
> +;; Znver4 has an additional branch unit.
> +(define_cpu_unit "znver4-bru0" "znver4_ieu")
> +(define_reservation "znver4-ieu" 
> "znver4-ieu0|znver4-ieu1|znver4-ieu2|znver4-ieu3")
> +
> +;; 3 AGU pipes in znver4
> +(define_cpu_unit "znver4-agu0" "znver4_agu")
> +(define_cpu_unit "znver4-agu1" "znver4_agu")
> +(define_cpu_unit "znver4-agu2" "znver4_agu")
> +(define_reservation "znver4-agu-reserve" 
> "znver4-agu0|znver4-agu1|znver4-agu2")
> +
> +;; Load is 4 cycles. We do not model reservation of load unit.
> +(define_reservation 

[ping][PATCH 0/2] __bos and flex arrays

2023-01-03 Thread Siddhesh Poyarekar

Ping!

On 2022-12-21 17:25, Siddhesh Poyarekar wrote:

Hi,

The first patch in the series is just a minor test cleanup that I did to
make sure all tests in a test case run (instead of aborting at first
failure) and print the ones that failed.  The second patch is the actual
fix.

The patch intends to make __bos/__bdos do the right thing with structs
containing flex arrays, either directly or within nested structs and
unions.  This should improve minimum object size estimation in some
cases and also bail out more consistently so that flex arrays don't
cause false positives in fortification.

I've tested this with a bootstrap on x86_64 and also with
--with-build-config=bootstrap-ubsan to make sure that there are no new
failures due to this change.

Siddhesh Poyarekar (2):
   testsuite: Run __bos tests to completion
   tree-object-size: More consistent behaviour with flex arrays

  .../g++.dg/ext/builtin-object-size1.C | 267 
  .../g++.dg/ext/builtin-object-size2.C | 267 
  .../gcc.dg/builtin-dynamic-object-size-0.c|  14 +-
  gcc/testsuite/gcc.dg/builtin-object-size-1.c  | 263 
  gcc/testsuite/gcc.dg/builtin-object-size-12.c |  12 +-
  gcc/testsuite/gcc.dg/builtin-object-size-13.c |  17 +-
  gcc/testsuite/gcc.dg/builtin-object-size-15.c |  11 +-
  gcc/testsuite/gcc.dg/builtin-object-size-2.c  | 287 +-
  gcc/testsuite/gcc.dg/builtin-object-size-3.c  | 263 
  gcc/testsuite/gcc.dg/builtin-object-size-4.c  | 267 
  gcc/testsuite/gcc.dg/builtin-object-size-6.c  | 267 
  gcc/testsuite/gcc.dg/builtin-object-size-7.c  |  52 ++--
  gcc/testsuite/gcc.dg/builtin-object-size-8.c  |  17 +-
  .../gcc.dg/builtin-object-size-common.h   |  12 +
  .../gcc.dg/builtin-object-size-flex-common.h  |  90 ++
  ...n-object-size-flex-nested-struct-nonzero.c |   6 +
  ...ltin-object-size-flex-nested-struct-zero.c |   6 +
  .../builtin-object-size-flex-nested-struct.c  |  22 ++
  ...in-object-size-flex-nested-union-nonzero.c |   6 +
  ...iltin-object-size-flex-nested-union-zero.c |   6 +
  .../builtin-object-size-flex-nested-union.c   |  28 ++
  .../gcc.dg/builtin-object-size-flex-nonzero.c |   6 +
  .../gcc.dg/builtin-object-size-flex-zero.c|   6 +
  .../gcc.dg/builtin-object-size-flex.c |  18 ++
  gcc/testsuite/gcc.dg/pr101836.c   |  11 +-
  gcc/testsuite/gcc.dg/strict-flex-array-3.c|  11 +-
  gcc/tree-object-size.cc   | 150 -
  27 files changed, 1275 insertions(+), 1107 deletions(-)
  create mode 100644 gcc/testsuite/gcc.dg/builtin-object-size-common.h
  create mode 100644 gcc/testsuite/gcc.dg/builtin-object-size-flex-common.h
  create mode 100644 
gcc/testsuite/gcc.dg/builtin-object-size-flex-nested-struct-nonzero.c
  create mode 100644 
gcc/testsuite/gcc.dg/builtin-object-size-flex-nested-struct-zero.c
  create mode 100644 
gcc/testsuite/gcc.dg/builtin-object-size-flex-nested-struct.c
  create mode 100644 
gcc/testsuite/gcc.dg/builtin-object-size-flex-nested-union-nonzero.c
  create mode 100644 
gcc/testsuite/gcc.dg/builtin-object-size-flex-nested-union-zero.c
  create mode 100644 
gcc/testsuite/gcc.dg/builtin-object-size-flex-nested-union.c
  create mode 100644 gcc/testsuite/gcc.dg/builtin-object-size-flex-nonzero.c
  create mode 100644 gcc/testsuite/gcc.dg/builtin-object-size-flex-zero.c
  create mode 100644 gcc/testsuite/gcc.dg/builtin-object-size-flex.c



Re: [PATCH] Various fixes for DWARF register size computation

2023-01-03 Thread Jakub Jelinek via Gcc-patches
On Tue, Jan 03, 2023 at 02:25:21PM +0100, Florian Weimer wrote:
> > Though, I still wonder, because all of this is a hack for a single target
> > - x86_64-linux -m64 - I think no other target has similar constant
> > sizes,
> 
> Really?  That's odd.

I've tried about 30 cross compilers I had around, I admit it isn't
exhaustive.

> Is it because other architectures track callee-saved vector registers
> through unwinding?

I think it is far more than just vector registers, it can be floating point
registers, or just one integral or special register of a different size etc.

> > Or, if you want to do it on the compiler side, instead of predefining
> > __LIBGCC_DWARF_REG_SIZES_CONSTANT__ and __LIBGCC_DWARF_REG_MAXIMUM__
> > register conditionally a new builtin, __builtin_dwarf_reg_size,
> > which would be defined only if -fbuilding-libgcc and the compiler determines
> > dwarf_reg_size is desirable to be computed inline without a table and
> > would fold the builtin to say that
> > index <= 16U ? 8 : 0 on x86_64 -m64,
> > index <= 9U ? 4 : index - 11U <= 5U ? 12 : 0 on x86_64 -m32 etc.
> > and if the expression is too large/complex, wouldn't predefine the builtin.
> 
> I think the pre-computation of the size array is useful even for targets
> where the expression is not so simple, but the array elements are still
> constants.  A builtin like __builtin_dwarf_reg_size could use a
> reference to a constant static array, so that we can get rid of the
> array initialization code in libgcc.  Before we can do that, we need to

I think constant static array might be sometimes an option too, but not
always, not just because of AArch64, or other potential poly-int arches
(RISCV?), but also if something could depend on some runtime check.
It is true that most of the time it is constant and depends just on the
target or more often on target + options combo (mostly ABI related options).

> figure out if the fully dynamic register sizes on AArch64 with SVE are
> actually correct—and if we need to fix the non-SVE unwinder to work
> properly for SVE programs.
> 
> So I don't want to revert the size array computation just yet.
> 
> > Or, is it actually the use of table that is bad on the unwinder side,
> > or lack of a small upper bound for what you get from the table?
> > In that case you could predefine upper bound on the sizes instead (if
> > constant) and simply add if (size > __LIBGCC_DWARF_REG_SIZE_MAX__)
> > __builtin_unreachable ()).
> 
> It also matters what kind of register sizes are used in practice.

Yes, but that is hard to find out easily.  E.g. some registers might be
only saved/restored in signal frames and nowhere else, others only rarely
touched but still we'd need their sizes if they are ever used.

> Should I repost this patch with the three nits fixed?  Or should I
> revert two of the three patches I committed instead?

I lean towards reversion and trying to figure out something that works
on more than one arch.  It doesn't have to improve all arches (say
AArch64 is clearly a nightmare that isn't handled correctly even without
any changes - as noted in the PRs, if libgcc is built without SVE, it will
hardcode 8, while if it is built with SVE, it will be runtime dependent and
will be wrong in theory when some HW has 2048 bit SVE vectors - when it is
256 bytes), but still watching into what we compile -O2 -nostdinc 
-fbuilding-libgcc

static unsigned char dwarf_reg_size_table[__LIBGCC_DWARF_FRAME_REGISTERS__+1];

void
foo (void)
{
  __builtin_init_dwarf_reg_size_table (dwarf_reg_size_table);
}

on at least 10 most common arches would be useful and optimizing what is
easily possible would be nice.

Jakub



Re: [PATCH] Various fixes for DWARF register size computation

2023-01-03 Thread Florian Weimer via Gcc-patches
* Jakub Jelinek:

> On Tue, Jan 03, 2023 at 12:15:23PM +0100, Florian Weimer wrote:
>> --- a/gcc/debug.h
>> +++ b/gcc/debug.h
>> @@ -245,7 +245,18 @@ extern const struct gcc_debug_hooks vmsdbg_debug_hooks;
>>  
>>  /* Dwarf2 frame information.  */
>>  
>> -extern int dwarf_reg_sizes_constant ();
>> +/* Query size information about DWARF registers.  */
>> +struct dwarf_single_register_size
>> +{
>> +  dwarf_single_register_size();
>
> Formatting, space before (
>
>> @@ -334,27 +333,39 @@ generate_dwarf_reg_sizes (poly_uint16 *sizes)
>>  targetm.init_dwarf_reg_sizes_extra (sizes);
>>  }
>>  
>> -/* Return 0 if the DWARF register sizes are not constant, otherwise
>> -   return the size constant.  */
>> -
>> -int
>> -dwarf_reg_sizes_constant ()
>> +dwarf_single_register_size::dwarf_single_register_size()
>
> Likewise.
>
>> +  for (int i = DWARF_FRAME_REGISTERS; i >= 0; --i)
>> +{
>> +  unsigned short value;
>> +  if (!sizes[i].is_constant () || value != 0)
>
> if (!known_eq (sizes[i], 0))
> ?

Right.

> Though, I still wonder, because all of this is a hack for a single target
> - x86_64-linux -m64 - I think no other target has similar constant
> sizes,

Really?  That's odd.

Is it because other architectures track callee-saved vector registers
through unwinding?

> whether
> it wouldn't be better to revert all this compiler side stuff and handle it
> purely on the libgcc side - allow target headers to specify a simple
> expression how to compute dwarf_reg_size + don't define dwarf_reg_size_table
> array in that case and instead in a testcase verify that
> __builtin_init_dwarf_reg_size_table initializes an array to the exact same
> values as the libgcc/config/**/*.h overridden dwarf_reg_size version.
> That way, for x86_64-linux we can use
> ((index) <= __LIBGCC_DWARF_FRAME_REGISTERS__ ? 8 : 0)
> but could provide something reasonable even for other targets if it improves
> the unwinder sufficiently.
> Say s390x-linux -m64 is
> ((index) <= 32 ? 8 : (index) == 33 ? 4 : 0)
> etc.

> Or, if you want to do it on the compiler side, instead of predefining
> __LIBGCC_DWARF_REG_SIZES_CONSTANT__ and __LIBGCC_DWARF_REG_MAXIMUM__
> register conditionally a new builtin, __builtin_dwarf_reg_size,
> which would be defined only if -fbuilding-libgcc and the compiler determines
> dwarf_reg_size is desirable to be computed inline without a table and
> would fold the builtin to say that
> index <= 16U ? 8 : 0 on x86_64 -m64,
> index <= 9U ? 4 : index - 11U <= 5U ? 12 : 0 on x86_64 -m32 etc.
> and if the expression is too large/complex, wouldn't predefine the builtin.

I think the pre-computation of the size array is useful even for targets
where the expression is not so simple, but the array elements are still
constants.  A builtin like __builtin_dwarf_reg_size could use a
reference to a constant static array, so that we can get rid of the
array initialization code in libgcc.  Before we can do that, we need to
figure out if the fully dynamic register sizes on AArch64 with SVE are
actually correct—and if we need to fix the non-SVE unwinder to work
properly for SVE programs.

So I don't want to revert the size array computation just yet.

> Or, is it actually the use of table that is bad on the unwinder side,
> or lack of a small upper bound for what you get from the table?
> In that case you could predefine upper bound on the sizes instead (if
> constant) and simply add if (size > __LIBGCC_DWARF_REG_SIZE_MAX__)
> __builtin_unreachable ()).

It also matters what kind of register sizes are used in practice.
Looking at the FDE for _Unwind_RaiseException on i686, we only save
4-byte registers there, I think.  Perhaps only non-GCC-generated code
may exercise the other register sizes?  That's different on AArch64,
where the vector registers are saved as well.

Should I repost this patch with the three nits fixed?  Or should I
revert two of the three patches I committed instead?

Thanks,
Florian



[PATCH] PR tree-optimization/92342: Optimize b & -(a==c) in match.pd

2023-01-03 Thread Roger Sayle

This patch is an update/tweak of Andrew Pinski's two patches for
PR tree-optimization/92342, that were originally posted back in November:
https://gcc.gnu.org/pipermail/gcc-patches/2021-November/585111.html
https://gcc.gnu.org/pipermail/gcc-patches/2021-November/585112.html

Technically, the first of those was approved by Richard Biener, though
never committed, and my first thought was to simply push it for Andrew,
but the review of the second piece expressed concerns over comparisons
in non-integral modes, where the result may not be zero-one valued.
Indeed both transformations misbehave in the presence of vector mode
comparisons (these transformations are already implemented for
vec_cond elsewhere in match.pd), so my minor contribution is to limit
these new transformations to scalars, by testing that both the operands
and results are INTEGRAL_TYPE_P.

This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
and make -k check, both with and without --target_board=unix{-m32},
with no new failures.  Ok for mainline?


2023-01-03  Andrew Pinski  
Roger Sayle  

gcc/ChangeLog:
PR tree-optimization/92342
* match.pd ((m1 CMP m2) * d -> (m1 CMP m2) ? d : 0):
Use tcc_comparison and :c for the multiply.
(b & -(a CMP c) -> (a CMP c)?b:0): New pattern.

gcc/testsuite/ChangeLog:
PR tree-optimization/92342
* gcc.dg/tree-ssa/andnegcmp-1.c: New test.
* gcc.dg/tree-ssa/andnegcmp-2.c: New test.
* gcc.dg/tree-ssa/multcmp-1.c: New test.
* gcc.dg/tree-ssa/multcmp-1.c: New test.


Thanks in advance (and thanks to Andrew).
Roger
--

diff --git a/gcc/match.pd b/gcc/match.pd
index 697d8de..2e1f113 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -2076,10 +2076,20 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 
 /* (m1 CMP m2) * d -> (m1 CMP m2) ? d : 0  */
 (if (!canonicalize_math_p ())
- (for cmp (gt lt ge le)
+ (for cmp (tcc_comparison)
   (simplify
-   (mult (convert (cmp @0 @1)) @2)
-   (cond (cmp @0 @1) @2 { build_zero_cst (type); }
+   (mult:c (convert (cmp@0 @1 @2)) @3)
+   (if (INTEGRAL_TYPE_P (type)
+   && INTEGRAL_TYPE_P (TREE_TYPE (@0)))
+ (cond @0 @3 { build_zero_cst (type); })))
+/* (-(m1 CMP m2)) & d -> (m1 CMP m2) ? d : 0  */
+  (simplify
+   (bit_and:c (negate (convert (cmp@0 @1 @2))) @3)
+   (if (INTEGRAL_TYPE_P (type)
+   && INTEGRAL_TYPE_P (TREE_TYPE (@0)))
+ (cond @0 @3 { build_zero_cst (type); })))
+ )
+)
 
 /* For integral types with undefined overflow and C != 0 fold
x * C EQ/NE y * C into x EQ/NE y.  */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/andnegcmp-1.c 
b/gcc/testsuite/gcc.dg/tree-ssa/andnegcmp-1.c
new file mode 100644
index 000..6f16783
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/andnegcmp-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* PR tree-optimization/92342 */
+
+int
+f (int m1, int m2, int c)
+{
+  int d = m1 == m2;
+  d = -d;
+  int e = d & c;
+  return e;
+}
+
+/* { dg-final { scan-tree-dump-times "\\? c_\[0-9\]\\(D\\) : 0" 1 "optimized" 
} } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/andnegcmp-2.c 
b/gcc/testsuite/gcc.dg/tree-ssa/andnegcmp-2.c
new file mode 100644
index 000..0e25c8a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/andnegcmp-2.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* PR tree-optimization/92342 */
+
+int
+f (int m1, int m2, int c)
+{
+  int d = m1 < m2;
+  d = -d;
+  int e = c & d;
+  return e;
+}
+
+/* { dg-final { scan-tree-dump-times "\\? c_\[0-9\]\\(D\\) : 0" 1 "optimized" 
} } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/multcmp-1.c 
b/gcc/testsuite/gcc.dg/tree-ssa/multcmp-1.c
new file mode 100644
index 000..fb44cac
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/multcmp-1.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+int
+f (int m1, int m2, int c)
+{
+  int d = m1 == m2;
+  int e = d * c;
+  return e;
+}
+
+/* { dg-final { scan-tree-dump-times "\\? c_\[0-9\]\\(D\\) : 0" 1 "optimized" 
} } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/multcmp-2.c 
b/gcc/testsuite/gcc.dg/tree-ssa/multcmp-2.c
new file mode 100644
index 000..be38b2e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/multcmp-2.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+int
+f (int m1, int m2, int c)
+{
+  int d = m1 != m2;
+  int e = c * d;
+  return e;
+}
+
+/* { dg-final { scan-tree-dump-times "\\? c_\[0-9\]\\(D\\) : 0" 1 "optimized" 
} } */


Re: [PATCH] Various fixes for DWARF register size computation

2023-01-03 Thread Jakub Jelinek via Gcc-patches
On Tue, Jan 03, 2023 at 12:15:23PM +0100, Florian Weimer wrote:
> --- a/gcc/debug.h
> +++ b/gcc/debug.h
> @@ -245,7 +245,18 @@ extern const struct gcc_debug_hooks vmsdbg_debug_hooks;
>  
>  /* Dwarf2 frame information.  */
>  
> -extern int dwarf_reg_sizes_constant ();
> +/* Query size information about DWARF registers.  */
> +struct dwarf_single_register_size
> +{
> +  dwarf_single_register_size();

Formatting, space before (

> @@ -334,27 +333,39 @@ generate_dwarf_reg_sizes (poly_uint16 *sizes)
>  targetm.init_dwarf_reg_sizes_extra (sizes);
>  }
>  
> -/* Return 0 if the DWARF register sizes are not constant, otherwise
> -   return the size constant.  */
> -
> -int
> -dwarf_reg_sizes_constant ()
> +dwarf_single_register_size::dwarf_single_register_size()

Likewise.

> +  for (int i = DWARF_FRAME_REGISTERS; i >= 0; --i)
> +{
> +  unsigned short value;
> +  if (!sizes[i].is_constant () || value != 0)

if (!known_eq (sizes[i], 0))
?

Though, I still wonder, because all of this is a hack for a single target
- x86_64-linux -m64 - I think no other target has similar constant sizes, 
whether
it wouldn't be better to revert all this compiler side stuff and handle it
purely on the libgcc side - allow target headers to specify a simple
expression how to compute dwarf_reg_size + don't define dwarf_reg_size_table
array in that case and instead in a testcase verify that
__builtin_init_dwarf_reg_size_table initializes an array to the exact same
values as the libgcc/config/**/*.h overridden dwarf_reg_size version.
That way, for x86_64-linux we can use
((index) <= __LIBGCC_DWARF_FRAME_REGISTERS__ ? 8 : 0)
but could provide something reasonable even for other targets if it improves
the unwinder sufficiently.
Say s390x-linux -m64 is
((index) <= 32 ? 8 : (index) == 33 ? 4 : 0)
etc.

Or, if you want to do it on the compiler side, instead of predefining
__LIBGCC_DWARF_REG_SIZES_CONSTANT__ and __LIBGCC_DWARF_REG_MAXIMUM__
register conditionally a new builtin, __builtin_dwarf_reg_size,
which would be defined only if -fbuilding-libgcc and the compiler determines
dwarf_reg_size is desirable to be computed inline without a table and
would fold the builtin to say that
index <= 16U ? 8 : 0 on x86_64 -m64,
index <= 9U ? 4 : index - 11U <= 5U ? 12 : 0 on x86_64 -m32 etc.
and if the expression is too large/complex, wouldn't predefine the builtin.

Then you can
#if __has_builtin(__builtin_dwarf_reg_size)
use the builtin and don't provide the table + initialize it,
otherwise initialize + use the table.

Or, is it actually the use of table that is bad on the unwinder side,
or lack of a small upper bound for what you get from the table?
In that case you could predefine upper bound on the sizes instead (if
constant) and simply add if (size > __LIBGCC_DWARF_REG_SIZE_MAX__)
__builtin_unreachable ()).

Jakub



Re: [PATCH] Fix RTL simplifications of FFS, POPCOUNT and PARITY.

2023-01-03 Thread Segher Boessenkool
On Mon, Jan 02, 2023 at 09:20:33AM -0700, Jeff Law wrote:
> On 1/2/23 08:59, Jakub Jelinek wrote:
> >See the PR50161 thread in
> >https://gcc.gnu.org/legacy-ml/gcc-patches/2011-08/threads.html#01847

Nasty nasty nasty.

> >The options are to disallow different modes, which is what my patch did
> >(perhaps not all documentation has been tweaked),

Which isn't so bad, except that simplifiers (in general, not just
simplify-rtx) will try to get rid of it.  Since there is no RTL check
that tests this property, we lose.\

> > or ensure that the 
> >operand
> >of those is never constant.

This means we cannot express intermediate stages of simplification in
RTL, which goes counter the whole idea of RTL.

> Sigh.  Lack of modes on constants mucking things up elsewhere.  There's 
> no good reason other than our poor representation to force the input and 
> output modes to match for these instructions.

But things like popcount need to know the mode of the input, if it is
a negative constant anyway.  Maybe we could simply disallow that?


Segher


[PATCH] Various fixes for DWARF register size computation

2023-01-03 Thread Florian Weimer via Gcc-patches
The previous code had several issues.

1. XALLOCAVEC does not create any objects, so invocating
   the non-POD poly_uint16 assignment operator is undefined.
2. The default constructor of poly-ints does not create a
   zero poly-int object (unlike what happens with regular ints).
3. The register size array must have DWARF_FRAME_REGISTERS + 1
   elements.  The extra element can be DWARF_FRAME_RETURN_COLUMN
   or DWARF_ALT_FRAME_RETURN_COLUMN.

To fix problem 3, merely increasing the array size is sufficient,
but it inhibits the x86-64 register size optimization in libgcc
because it does not use the extra register, so it has size zero.
To re-enable the optimization, expose the maximum used register
to libgcc.  This is sufficient for the optimizers to figure out
that the memcpy call in uw_install_context_1 has a fixed size
argument on x86-64.

This restores bootstrap on aarch64-linux-gnu and powerpc64-linux-gnu.
Not sure about test suite results yet, I need to check the baseline.

gcc/

* debug.h (dwarf_reg_sizes_constant): Remove declaration.
(dwarf_single_register_size): New struct.
* dwarf2cfi.cc (generate_dwarf_reg_sizes): Initialize
extra register size.  Use in-place new for initialization.
Remove unnecessary memset.
(dwarf_reg_sizes_constant): Remove.
(dwarf_single_register_size::dwarf_single_register_size):
New constructor based on removed dwarf_reg_sizes_constant
function.  Allocate extra size element.
(expand_builtin_init_dwarf_reg_sizes): Allocate extra size
element.
* target.def (init_dwarf_reg_sizes_extra): Mention extra size
element.
* doc/tm.texi: Update.

gcc/c-family/

* c-cppbuiltin.cc (c_cpp_builtins): Switch to
dwarf_single_register_size for obtaining DWARF register sizes.
Define __LIBGCC_DWARF_REG_MAXIMUM__.

libgcc/

* unwind-dw2.c (dwarf_reg_size): Use
__LIBGCC_DWARF_REG_MAXIMUM__.

---
 gcc/c-family/c-cppbuiltin.cc | 12 
 gcc/debug.h  | 13 -
 gcc/doc/tm.texi  |  2 +-
 gcc/dwarf2cfi.cc | 45 +++-
 gcc/target.def   |  2 +-
 libgcc/unwind-dw2.c  |  7 +--
 6 files changed, 55 insertions(+), 26 deletions(-)

diff --git a/gcc/c-family/c-cppbuiltin.cc b/gcc/c-family/c-cppbuiltin.cc
index ddfd63b8eb9..8098aca41e8 100644
--- a/gcc/c-family/c-cppbuiltin.cc
+++ b/gcc/c-family/c-cppbuiltin.cc
@@ -1522,10 +1522,14 @@ c_cpp_builtins (cpp_reader *pfile)
   builtin_define_with_int_value ("__LIBGCC_DWARF_FRAME_REGISTERS__",
 DWARF_FRAME_REGISTERS);
   {
-   int value = dwarf_reg_sizes_constant ();
-   if (value > 0)
- builtin_define_with_int_value ("__LIBGCC_DWARF_REG_SIZES_CONSTANT__",
-value);
+   dwarf_single_register_size srs;
+   if (srs.common_size > 0)
+ {
+   builtin_define_with_int_value 
("__LIBGCC_DWARF_REG_SIZES_CONSTANT__",
+  srs.common_size);
+   builtin_define_with_int_value ("__LIBGCC_DWARF_REG_MAXIMUM__",
+  srs.maximum_register);
+ }
   }
   builtin_define_with_int_value ("__LIBGCC_DWARF_CIE_DATA_ALIGNMENT__",
 DWARF_CIE_DATA_ALIGNMENT);
diff --git a/gcc/debug.h b/gcc/debug.h
index 4fe9f3570ac..2e843da8b41 100644
--- a/gcc/debug.h
+++ b/gcc/debug.h
@@ -245,7 +245,18 @@ extern const struct gcc_debug_hooks vmsdbg_debug_hooks;
 
 /* Dwarf2 frame information.  */
 
-extern int dwarf_reg_sizes_constant ();
+/* Query size information about DWARF registers.  */
+struct dwarf_single_register_size
+{
+  dwarf_single_register_size();
+
+  /* The common register size, or 0 if the register size varies.  */
+  unsigned int common_size;
+
+  /* The maximum register number that is actually present.  Registers
+ above the maximum are size zero even if common_size is positive.  */
+  unsigned int maximum_register;
+};
 
 extern void dwarf2out_begin_prologue (unsigned int, unsigned int,
  const char *);
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index b6d7900f212..eb29cfb95aa 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -9847,7 +9847,7 @@ sizes of those pieces in the table used by the unwinder 
at runtime.
 It will be called by @code{generate_dwarf_reg_sizes} after
 filling in a single size corresponding to each hard register;
 @var{sizes} is the address of the table.  It will contain
-@code{DWARF_FRAME_REGISTERS} elements when this hook is called.
+@code{DWARF_FRAME_REGISTERS + 1} elements when this hook is called.
 @end deftypefn
 
 @deftypefn {Target Hook} bool TARGET_ASM_TTYPE (rtx @var{sym})
diff --git a/gcc/dwarf2cfi.cc b/gcc/dwarf2cfi.cc
index d5a27dc36c5..5bd12e070b3 100644
--- a/gcc/dwarf2cfi.cc
+++ 

Re: [PATCH] expr: Fix up store_expr into SUBREG_PROMOTED_* target [PR108264]

2023-01-03 Thread Richard Biener via Gcc-patches



> Am 03.01.2023 um 11:34 schrieb Jakub Jelinek via Gcc-patches 
> :
> 
> Hi!
> 
> The following testcase ICEs on s390x-linux (e.g. with -march=z13).
> The problem is that target is (subreg/s/u:SI (reg/v:DI 66 [ x+-4 ]) 4)
> and we call convert_move from temp to the SUBREG_REG of that, expecting
> to extend the value properly.  That works nicely if temp has some
> scalar integer mode (or partial one), but ICEs when temp has V4QImode
> on the assertion that from and to modes have the same bitsize.
> store_expr generally allows say store from V4QI to SI target because
> they have the same size and if temp is a CONST_INT, we already have code
> to convert the constant properly, so the following patch just adds handling
> of non-scalar integer modes by converting them to the mode of target
> first before convert_move extends them.
> 
> Bootstrapped/regtested on x86_64-linux, i686-linux and s390x-linux, ok for
> trunk?

Ok

Richard 

> 2023-01-03  Jakub Jelinek  
> 
>PR middle-end/108264
>* expr.cc (store_expr): For stores into SUBREG_PROMOTED_* targets
>from source which doesn't have scalar integral mode first convert
>it to outer_mode.
> 
>* gcc.dg/pr108264.c: New test.
> 
> --- gcc/expr.cc.jj2023-01-02 09:32:23.0 +0100
> +++ gcc/expr.cc2023-01-02 16:48:13.226990815 +0100
> @@ -6226,6 +6226,9 @@ store_expr (tree exp, rtx target, int ca
>  temp = convert_modes (inner_mode, outer_mode, temp,
>SUBREG_PROMOTED_SIGN (target));
>}
> +  else if (!SCALAR_INT_MODE_P (GET_MODE (temp)))
> +temp = convert_modes (outer_mode, TYPE_MODE (TREE_TYPE (exp)),
> +  temp, SUBREG_PROMOTED_SIGN (target));
> 
>   convert_move (SUBREG_REG (target), temp,
>SUBREG_PROMOTED_SIGN (target));
> --- gcc/testsuite/gcc.dg/pr108264.c.jj2023-01-02 17:01:14.865887522 +0100
> +++ gcc/testsuite/gcc.dg/pr108264.c2023-01-02 17:00:52.238209030 +0100
> @@ -0,0 +1,27 @@
> +/* PR middle-end/108264 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +/* { dg-additional-options "-fpic" { target fpic } } */
> +
> +int v;
> +extern int bar (void);
> +
> +static inline void
> +foo (char *d)
> +{
> +  switch (bar ())
> +{
> +case 2:
> +  d[0] = d[1] = d[2] = d[3] = v;
> +  break;
> +case 4:
> +  d[0] = 0;
> +}
> +}
> +
> +int
> +baz (int x)
> +{
> +  foo ((char *) );
> +  return x;
> +}
> 
>Jakub
> 


Re: [PATCH] cfgrtl: Don't try to redirect asm goto to EXIT [PR108263]

2023-01-03 Thread Richard Biener via Gcc-patches



> Am 03.01.2023 um 11:29 schrieb Jakub Jelinek via Gcc-patches 
> :
> 
> Hi!
> 
> The following testcase distilled from Linux kernel on ppc64le ICEs,
> because fixup_reorder_chain sees a bb with a single fallthru edge
> falling into a bb with simple return and decides to redirect
> that fallthru edge to EXIT.  That is possible if the bb ending
> in the fallthru edge doesn't end with a jump or ends with a normal
> unconditional jump, but not when the bb ends with asm goto which can despite
> a single fallthru have multiple labels to the fallthrough basic block.
> 
> The following patch makes sure we never try to redirect such cases to EXIT.
> 
> Bootstrapped/regtested on x86_64-linux, i686-linux and s390x-linux, ok for
> trunk?

Ok

Richard 

> 2023-01-03  Jakub Jelinek  
> 
>PR rtl-optimization/108263
>* cfgrtl.cc (fixup_reorder_chain): Avoid trying to redirect
>asm goto to EXIT.
> 
>* gcc.dg/pr108263.c: New test.
> 
> --- gcc/cfgrtl.cc.jj2023-01-02 09:32:32.0 +0100
> +++ gcc/cfgrtl.cc2023-01-02 14:13:40.658338194 +0100
> @@ -3910,6 +3910,7 @@ fixup_reorder_chain (void)
>   rtx ret_label = NULL_RTX;
>   basic_block nb;
>   edge_iterator ei;
> +  bool asm_goto = false;
> 
>   if (EDGE_COUNT (bb->succs) == 0)
>continue;
> @@ -4016,7 +4017,9 @@ fixup_reorder_chain (void)
>  || e_fall->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
>continue;
> 
> -  /* Otherwise we'll have to use the fallthru fixup below.  */
> +  /* Otherwise we'll have to use the fallthru fixup below.
> + But avoid redirecting asm goto to EXIT.  */
> +  asm_goto = true;
>}
>  else
>{
> @@ -4048,7 +4051,8 @@ fixup_reorder_chain (void)
> return rather than a jump to the return block.  */
>   rtx_insn *ret, *use;
>   basic_block dest;
> -  if (bb_is_just_return (e_fall->dest, , )
> +  if (!asm_goto
> +  && bb_is_just_return (e_fall->dest, , )
>  && ((PATTERN (ret) == simple_return_rtx && targetm.have_simple_return ())
>  || (PATTERN (ret) == ret_rtx && targetm.have_return (
>{
> --- gcc/testsuite/gcc.dg/pr108263.c.jj2023-01-02 14:09:49.381614027 +0100
> +++ gcc/testsuite/gcc.dg/pr108263.c2023-01-02 14:07:16.899772608 +0100
> @@ -0,0 +1,25 @@
> +/* PR rtl-optimization/108263 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +int v, *p;
> +
> +void
> +foo (void)
> +{
> +  int i;
> +  for (i = 0; ; i++)
> +{
> +  if (v)
> +{
> +  __label__ l1;
> +  asm goto ("" : : : : l1);
> +l1:
> +  return;
> +}
> +  if (p[i])
> +break;
> +}
> +  asm goto ("" : : "r" (i) : : l2);
> +l2:;
> +}
> 
>Jakub
> 


[PATCH] expr: Fix up store_expr into SUBREG_PROMOTED_* target [PR108264]

2023-01-03 Thread Jakub Jelinek via Gcc-patches
Hi!

The following testcase ICEs on s390x-linux (e.g. with -march=z13).
The problem is that target is (subreg/s/u:SI (reg/v:DI 66 [ x+-4 ]) 4)
and we call convert_move from temp to the SUBREG_REG of that, expecting
to extend the value properly.  That works nicely if temp has some
scalar integer mode (or partial one), but ICEs when temp has V4QImode
on the assertion that from and to modes have the same bitsize.
store_expr generally allows say store from V4QI to SI target because
they have the same size and if temp is a CONST_INT, we already have code
to convert the constant properly, so the following patch just adds handling
of non-scalar integer modes by converting them to the mode of target
first before convert_move extends them.

Bootstrapped/regtested on x86_64-linux, i686-linux and s390x-linux, ok for
trunk?

2023-01-03  Jakub Jelinek  

PR middle-end/108264
* expr.cc (store_expr): For stores into SUBREG_PROMOTED_* targets
from source which doesn't have scalar integral mode first convert
it to outer_mode.

* gcc.dg/pr108264.c: New test.

--- gcc/expr.cc.jj  2023-01-02 09:32:23.0 +0100
+++ gcc/expr.cc 2023-01-02 16:48:13.226990815 +0100
@@ -6226,6 +6226,9 @@ store_expr (tree exp, rtx target, int ca
  temp = convert_modes (inner_mode, outer_mode, temp,
SUBREG_PROMOTED_SIGN (target));
}
+  else if (!SCALAR_INT_MODE_P (GET_MODE (temp)))
+   temp = convert_modes (outer_mode, TYPE_MODE (TREE_TYPE (exp)),
+ temp, SUBREG_PROMOTED_SIGN (target));
 
   convert_move (SUBREG_REG (target), temp,
SUBREG_PROMOTED_SIGN (target));
--- gcc/testsuite/gcc.dg/pr108264.c.jj  2023-01-02 17:01:14.865887522 +0100
+++ gcc/testsuite/gcc.dg/pr108264.c 2023-01-02 17:00:52.238209030 +0100
@@ -0,0 +1,27 @@
+/* PR middle-end/108264 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-additional-options "-fpic" { target fpic } } */
+
+int v;
+extern int bar (void);
+
+static inline void
+foo (char *d)
+{
+  switch (bar ())
+{
+case 2:
+  d[0] = d[1] = d[2] = d[3] = v;
+  break;
+case 4:
+  d[0] = 0;
+}
+}
+
+int
+baz (int x)
+{
+  foo ((char *) );
+  return x;
+}

Jakub



[PATCH] cfgrtl: Don't try to redirect asm goto to EXIT [PR108263]

2023-01-03 Thread Jakub Jelinek via Gcc-patches
Hi!

The following testcase distilled from Linux kernel on ppc64le ICEs,
because fixup_reorder_chain sees a bb with a single fallthru edge
falling into a bb with simple return and decides to redirect
that fallthru edge to EXIT.  That is possible if the bb ending
in the fallthru edge doesn't end with a jump or ends with a normal
unconditional jump, but not when the bb ends with asm goto which can despite
a single fallthru have multiple labels to the fallthrough basic block.

The following patch makes sure we never try to redirect such cases to EXIT.

Bootstrapped/regtested on x86_64-linux, i686-linux and s390x-linux, ok for
trunk?

2023-01-03  Jakub Jelinek  

PR rtl-optimization/108263
* cfgrtl.cc (fixup_reorder_chain): Avoid trying to redirect
asm goto to EXIT.

* gcc.dg/pr108263.c: New test.

--- gcc/cfgrtl.cc.jj2023-01-02 09:32:32.0 +0100
+++ gcc/cfgrtl.cc   2023-01-02 14:13:40.658338194 +0100
@@ -3910,6 +3910,7 @@ fixup_reorder_chain (void)
   rtx ret_label = NULL_RTX;
   basic_block nb;
   edge_iterator ei;
+  bool asm_goto = false;
 
   if (EDGE_COUNT (bb->succs) == 0)
continue;
@@ -4016,7 +4017,9 @@ fixup_reorder_chain (void)
  || e_fall->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
continue;
 
- /* Otherwise we'll have to use the fallthru fixup below.  */
+ /* Otherwise we'll have to use the fallthru fixup below.
+But avoid redirecting asm goto to EXIT.  */
+ asm_goto = true;
}
  else
{
@@ -4048,7 +4051,8 @@ fixup_reorder_chain (void)
 return rather than a jump to the return block.  */
   rtx_insn *ret, *use;
   basic_block dest;
-  if (bb_is_just_return (e_fall->dest, , )
+  if (!asm_goto
+ && bb_is_just_return (e_fall->dest, , )
  && ((PATTERN (ret) == simple_return_rtx && targetm.have_simple_return 
())
  || (PATTERN (ret) == ret_rtx && targetm.have_return (
{
--- gcc/testsuite/gcc.dg/pr108263.c.jj  2023-01-02 14:09:49.381614027 +0100
+++ gcc/testsuite/gcc.dg/pr108263.c 2023-01-02 14:07:16.899772608 +0100
@@ -0,0 +1,25 @@
+/* PR rtl-optimization/108263 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int v, *p;
+
+void
+foo (void)
+{
+  int i;
+  for (i = 0; ; i++)
+{
+  if (v)
+   {
+ __label__ l1;
+ asm goto ("" : : : : l1);
+   l1:
+ return;
+   }
+  if (p[i])
+   break;
+}
+  asm goto ("" : : "r" (i) : : l2);
+l2:;
+}

Jakub



[COMMITTED] ada: output.adb: fix newline being inserted when buffer is full

2023-01-03 Thread Marc Poulhiès via Gcc-patches
From: Ghjuvan Lacambre 

Before this commit, when GNAT needed to emit lines longer than
the buffer, it accidentally inserted a newline in its output when
attempting to flush its buffer.

We fix this by using Flush_Buffer instead of Write_Eol in Write_Char.

gcc/ada/

* output.adb (Write_Buffer): Use Flush_Buffer instead of Write_Eol.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/output.adb | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/ada/output.adb b/gcc/ada/output.adb
index 33d027ded8e..497643d17ec 100644
--- a/gcc/ada/output.adb
+++ b/gcc/ada/output.adb
@@ -422,10 +422,10 @@ package body Output is
 
procedure Write_Char (C : Character) is
begin
-  pragma Assert (Next_Col in Buffer'Range);
-  if Next_Col = Buffer'Length then
- Write_Eol;
+  if Next_Col > Buffer'Length then
+ Flush_Buffer;
   end if;
+  pragma Assert (Next_Col in Buffer'Range);
 
   if C = ASCII.LF then
  Write_Eol;
-- 
2.34.1



[COMMITTED] ada: Fix format string parsing in GNAT.Formatted_String

2023-01-03 Thread Marc Poulhiès via Gcc-patches
From: Ronan Desplanques 

Before this patch, format strings ending with "%%" (two consecutive
percent signs) caused GNAT.Formatted_String."-" to give the wrong
output, and cause the various GNAT.Formatted_String."&" to raise
exceptions with misleading error messages.

Also before this patch, a bug in GNAT.Formatted_String."-" caused
characters from the format string to be dropped. Calling
GNAT.Formatted_String."-" on an instance of
GNAT.Formatted_String.Formatted_String caused subsequent uses of
that instance to return wrong results.

In addition to fixing the parsing of format strings, this patch
centralizes the detection of format specifiers in a unique
procedure.

gcc/ada/

* libgnat/g-forstr.adb
(Advance_And_Accumulate_Until_Next_Specifier): New procedure.
("-"): Replace inline code with call to
Advance_And_Accumulate_Until_Next_Specifier.
(Next_Format): likewise.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/libgnat/g-forstr.adb | 67 +++-
 1 file changed, 36 insertions(+), 31 deletions(-)

diff --git a/gcc/ada/libgnat/g-forstr.adb b/gcc/ada/libgnat/g-forstr.adb
index 8821de6f280..8353e2c4ad8 100644
--- a/gcc/ada/libgnat/g-forstr.adb
+++ b/gcc/ada/libgnat/g-forstr.adb
@@ -77,6 +77,12 @@ package body GNAT.Formatted_String is
   Value_Needed : Natural range 0 .. 2 := 0;
end record;
 
+   procedure Advance_And_Accumulate_Until_Next_Specifier
+ (Format : Formatted_String);
+   --  Advance Format.D.Index until either the next format specifier is
+   --  encountered, or the end of Format.D.Format is reached. The characters
+   --  advanced over are appended to Format.D.Result.
+
procedure Next_Format
  (Format : Formatted_String;
   F_Spec : out F_Data;
@@ -139,29 +145,13 @@ package body GNAT.Formatted_String is
-
 
function "-" (Format : Formatted_String) return String is
-  F : String renames Format.D.Format;
-  J : Natural renames Format.D.Index;
-  R : Unbounded_String := Format.D.Result;
-
begin
   --  Make sure we get the remaining character up to the next unhandled
   --  format specifier.
 
-  while (J <= F'Length and then F (J) /= '%')
-or else (J < F'Length - 1 and then F (J + 1) = '%')
-  loop
- Append (R, F (J));
-
- --  If we have two consecutive %, skip the second one
-
- if F (J) = '%' and then J < F'Length - 1 and then F (J + 1) = '%' then
-J := J + 1;
- end if;
-
- J := J + 1;
-  end loop;
+  Advance_And_Accumulate_Until_Next_Specifier (Format);
 
-  return To_String (R);
+  return To_String (Format.D.Result);
end "-";
 
-
@@ -318,6 +308,33 @@ package body GNAT.Formatted_String is
   F.D.Ref_Count := F.D.Ref_Count + 1;
end Adjust;
 
+   -
+   -- Advance_And_Accumulate_Until_Next_Specifier --
+   -
+
+   procedure Advance_And_Accumulate_Until_Next_Specifier
+ (Format : Formatted_String)
+   is
+   begin
+  loop
+ if Format.D.Index > Format.D.Format'Last then
+exit;
+ end if;
+
+ if Format.D.Format (Format.D.Index) /= '%' then
+Append (Format.D.Result, Format.D.Format (Format.D.Index));
+Format.D.Index := Format.D.Index + 1;
+ elsif Format.D.Index + 1 <= Format.D.Format'Last
+   and then Format.D.Format (Format.D.Index + 1) = '%'
+ then
+Append (Format.D.Result, '%');
+Format.D.Index := Format.D.Index + 2;
+ else
+exit;
+ end if;
+  end loop;
+   end Advance_And_Accumulate_Until_Next_Specifier;
+

-- Decimal_Format --

@@ -505,19 +522,7 @@ package body GNAT.Formatted_String is
 
   --  Got to next %
 
-  while (J <= F'Last and then F (J) /= '%')
-or else (J < F'Last - 1 and then F (J + 1) = '%')
-  loop
- Append (Format.D.Result, F (J));
-
- --  If we have two consecutive %, skip the second one
-
- if F (J) = '%' and then J < F'Last - 1 and then F (J + 1) = '%' then
-J := J + 1;
- end if;
-
- J := J + 1;
-  end loop;
+  Advance_And_Accumulate_Until_Next_Specifier (Format);
 
   if J >= F'Last or else F (J) /= '%'  then
  raise Format_Error with "no format specifier found for parameter"
-- 
2.34.1



[COMMITTED] ada: Another small adjustment to special resolution of membership test

2023-01-03 Thread Marc Poulhiès via Gcc-patches
From: Eric Botcazou 

This goes back to the original implementation but keeps the special size
test with universal_integer to cope with its limited range.

gcc/ada/

* sem_res.adb (Resolve_Membership_Op): Adjust again latest change.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/sem_res.adb | 19 ---
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/gcc/ada/sem_res.adb b/gcc/ada/sem_res.adb
index b54ed93a7f7..348d272a399 100644
--- a/gcc/ada/sem_res.adb
+++ b/gcc/ada/sem_res.adb
@@ -10105,11 +10105,11 @@ package body Sem_Res is
   then
  T := Etype (R);
 
-  --  If the type of the left operand is universal_integer and that of the
-  --  right operand is smaller, then we do not resolve the operands to the
-  --  tested type but to universal_integer instead. If not conforming to
-  --  the letter, it's conforming to the spirit of the specification of
-  --  membership tests, which are typically used to guard an operation and
+  --  If the left operand is of a universal numeric type and the right
+  --  operand is not, we do not resolve the operands to the tested type
+  --  but to the universal type instead. If not conforming to the letter,
+  --  it's conforming to the spirit of the specification of membership
+  --  tests, which are typically used to guard a specific operation and
   --  ought not to fail a check in doing so. Without this, in the case of
 
   --type Small_Length is range 1 .. 16;
@@ -10127,9 +10127,14 @@ package body Sem_Res is
   --  for example the large values of Long_Long_Long_Unsigned.
 
   elsif not Is_Overloaded (L)
-and then Etype (L) = Universal_Integer
+and then Is_Universal_Numeric_Type (Etype (L))
 and then (Is_Overloaded (R)
-   or else RM_Size (Etype (R)) < RM_Size (Universal_Integer))
+   or else
+ (not Is_Universal_Numeric_Type (Etype (R))
+   and then
+ (not Is_Integer_Type (Etype (R))
+   or else
+  RM_Size (Etype (R)) < RM_Size (Universal_Integer
   then
  T := Etype (L);
 
-- 
2.34.1



[COMMITTED] ada: Fix GNAT.Formatted_String's handling of real values

2023-01-03 Thread Marc Poulhiès via Gcc-patches
From: Ronan Desplanques 

Before this patch, passing a width and a precision through
arguments with the "*" syntax always failed for real values in
GNAT.Formatted_String's routines.

gcc/ada/

* libgnat/g-forstr.adb (P_Flt_Format): Add "*" syntax handling.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/libgnat/g-forstr.adb | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/ada/libgnat/g-forstr.adb b/gcc/ada/libgnat/g-forstr.adb
index c9fb86b44f7..2179818bba4 100644
--- a/gcc/ada/libgnat/g-forstr.adb
+++ b/gcc/ada/libgnat/g-forstr.adb
@@ -686,9 +686,10 @@ package body GNAT.Formatted_String is
begin
   Next_Format (Format, F, Start);
 
-  if F.Value_Needed > 0 then
+  if F.Value_Needed /= Format.D.Stored_Value then
  Raise_Wrong_Format (Format);
   end if;
+  Format.D.Stored_Value := 0;
 
   if F.Precision = Unset then
  Aft := 6;
-- 
2.34.1



[COMMITTED] ada: Make Sem_Util.Is_Aliased_View predicate more robust

2023-01-03 Thread Marc Poulhiès via Gcc-patches
From: Eric Botcazou 

The predicate implements the rules of the language so it needs to cope with
constructs rewritten by the expander, in particular explicit dereferences
that the expander uses liberally for various purposes.

This change makes the detection of rewritten calls more robust, plugging an
existing loophole for specific objects and exposing a missing propagation of
the Is_Aliased flag for certain build-in-place objects, as well as adds the
detection of rewritten return objects.

It also contains a small enhancement to Set_Debug_Info_Defining_Id aimed at
making it easier to debug the generated code by means of -gnatD.

gcc/ada/

* sem_util.ads (Set_Debug_Info_Defining_Id): Adjust comment.
* sem_util.adb (Is_Aliased_View) : Return
false for more artificial dereferences generated by the expander.
(Set_Debug_Info_Defining_Id): Set Debug_Info_Needed unconditionally
in -gnatD mode.
* exp_ch6.adb (Replace_Renaming_Declaration_Id): Also preserve the
Is_Aliased flag.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/exp_ch6.adb  |  4 
 gcc/ada/sem_util.adb | 13 +++--
 gcc/ada/sem_util.ads |  4 ++--
 3 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/gcc/ada/exp_ch6.adb b/gcc/ada/exp_ch6.adb
index 0bc2559751b..975a96668df 100644
--- a/gcc/ada/exp_ch6.adb
+++ b/gcc/ada/exp_ch6.adb
@@ -9378,6 +9378,10 @@ package body Exp_Ch6 is
   Preserve_Comes_From_Source (Orig_Id, Orig_Decl);
 
   Set_Comes_From_Source (New_Id, False);
+
+  --  Preserve aliased indication
+
+  Set_Is_Aliased (Orig_Id, Is_Aliased (New_Id));
end Replace_Renaming_Declaration_Id;
 
-
diff --git a/gcc/ada/sem_util.adb b/gcc/ada/sem_util.adb
index a1cebb08291..3132446515b 100644
--- a/gcc/ada/sem_util.adb
+++ b/gcc/ada/sem_util.adb
@@ -15255,8 +15255,15 @@ package body Sem_Util is
   then
  return Is_Aliased_View (Expression (Obj));
 
+  --  The dereference of an access-to-object value denotes an aliased view,
+  --  but this routine uses the rules of the language so we need to exclude
+  --  rewritten constructs that introduce artificial dereferences.
+
   elsif Nkind (Obj) = N_Explicit_Dereference then
- return Nkind (Original_Node (Obj)) /= N_Function_Call;
+ return not Is_Captured_Function_Call (Obj)
+   and then not
+ (Nkind (Parent (Obj)) = N_Object_Renaming_Declaration
+   and then Is_Return_Object (Defining_Entity (Parent (Obj;
 
   else
  return False;
@@ -27394,7 +27401,9 @@ package body Sem_Util is
 
procedure Set_Debug_Info_Defining_Id (N : Node_Id) is
begin
-  if Comes_From_Source (Defining_Identifier (N)) then
+  if Comes_From_Source (Defining_Identifier (N))
+or else Debug_Generated_Code
+  then
  Set_Debug_Info_Needed (Defining_Identifier (N));
   end if;
end Set_Debug_Info_Defining_Id;
diff --git a/gcc/ada/sem_util.ads b/gcc/ada/sem_util.ads
index b61695ea729..dc1bb084b54 100644
--- a/gcc/ada/sem_util.ads
+++ b/gcc/ada/sem_util.ads
@@ -3096,8 +3096,8 @@ package Sem_Util is
--  associated name (i.e. the Node_Id associated with its name).
 
procedure Set_Debug_Info_Defining_Id (N : Node_Id);
-   --  Call Set_Debug_Info_Needed on Defining_Identifier (N) if it comes
-   --  from source.
+   --  Call Set_Debug_Info_Needed on Defining_Identifier (N) if it comes from
+   --  source or we are in -gnatD mode, where we are debugging generated code.
 
procedure Set_Debug_Info_Needed (T : Entity_Id);
--  Sets the Debug_Info_Needed flag on entity T , and also on any entities
-- 
2.34.1



[COMMITTED] ada: Fix premature finalization of return temporary

2023-01-03 Thread Marc Poulhiès via Gcc-patches
From: Eric Botcazou 

Various parts of the expander and the code generator must have a consistent
view on which temporaries generated for return statements must be finalized
because they are regular temporaries, and which ones must not be since they
are allocated on the return stack directly.  The Is_Related_To_Func_Return
predicate is used for this purpose and needs to be tested consistently.

gcc/ada/

* exp_ch6.adb (Expand_Simple_Function_Return): Make sure that a
captured function call also verifies Is_Related_To_Func_Return.
Do not generate an actual subtype for special return objects.
* exp_util.ads (Is_Related_To_Func_Return): Add commentary.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/exp_ch6.adb  | 16 ++--
 gcc/ada/exp_util.ads |  4 
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/gcc/ada/exp_ch6.adb b/gcc/ada/exp_ch6.adb
index d90ee41712e..db1fd1d172b 100644
--- a/gcc/ada/exp_ch6.adb
+++ b/gcc/ada/exp_ch6.adb
@@ -6435,12 +6435,21 @@ package body Exp_Ch6 is
   --  The result type of the function
 
   Utyp : constant Entity_Id := Underlying_Type (R_Type);
+  --  The underlying result type of the function
 
   Exp : Node_Id := Expression (N);
   pragma Assert (Present (Exp));
 
   Exp_Is_Function_Call : constant Boolean :=
-Nkind (Exp) = N_Function_Call or else Is_Captured_Function_Call (Exp);
+Nkind (Exp) = N_Function_Call
+  or else
+(Is_Captured_Function_Call (Exp)
+  and then Is_Related_To_Func_Return (Entity (Prefix (Exp;
+  --  If the expression is a captured function call, then we need to make
+  --  sure that the object doing the capture is properly recognized by the
+  --  Is_Related_To_Func_Return predicate; otherwise, if it is of a type
+  --  that needs finalization, Requires_Cleanup_Actions would return true
+  --  because of it and Build_Finalizer would finalize it prematurely.
 
   Exp_Typ : constant Entity_Id := Etype (Exp);
   --  The type of the expression (not necessarily the same as R_Type)
@@ -6624,7 +6633,8 @@ package body Exp_Ch6 is
  --  size. We create an actual subtype for this purpose. However we
  --  need not do it if the expression is a function call since this
  --  will be done in the called function and doing it here too would
- --  cause a temporary with maximum size to be created.
+ --  cause a temporary with maximum size to be created. Likewise for
+ --  a special return object, since there is no copy in this case.
 
  declare
 Ubt  : constant Entity_Id := Underlying_Type (Base_Type (Exp_Typ));
@@ -6633,6 +6643,8 @@ package body Exp_Ch6 is
 
  begin
 if not Exp_Is_Function_Call
+  and then not (Is_Entity_Name (Exp)
+ and then Is_Special_Return_Object (Entity (Exp)))
   and then Has_Defaulted_Discriminants (Ubt)
   and then not Is_Constrained (Ubt)
   and then not Has_Unchecked_Union (Ubt)
diff --git a/gcc/ada/exp_util.ads b/gcc/ada/exp_util.ads
index 0d09d259f8e..b770d029cd9 100644
--- a/gcc/ada/exp_util.ads
+++ b/gcc/ada/exp_util.ads
@@ -825,6 +825,10 @@ package Exp_Util is
--  Determine whether object Id is related to an expanded return statement.
--  The case concerned is "return Id.all;".
 
+   --  This is effectively used to determine which temporaries generated for
+   --  return statements must be finalized because they are regular temporaries
+   --  and which ones must not be since they are allocated on the return stack.
+
--  WARNING: There is a matching C declaration of this subprogram in fe.h
 
function Is_Renamed_Object (N : Node_Id) return Boolean;
-- 
2.34.1



[COMMITTED] ada: Fix unescaped quotes when combining fdiagnostics-format=json and gnatdJ

2023-01-03 Thread Marc Poulhiès via Gcc-patches
From: Ghjuvan Lacambre 

This commit fixes a small bug where GNAT would emit unescaped quotes in
its -fdiagnostics-format=json output when using -gnatdJ and emitting
messages about operator functions (e.g. "=").

gcc/ada/

* errout.adb (Write_JSON_Span): Escape subprogram name.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/errout.adb | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/gcc/ada/errout.adb b/gcc/ada/errout.adb
index b30e8b51d15..261ba2e8033 100644
--- a/gcc/ada/errout.adb
+++ b/gcc/ada/errout.adb
@@ -2193,8 +2193,9 @@ package body Errout is
  end if;
 
  if Include_Subprogram_In_Messages then
-Write_Str
-  (",""subprogram"":""" & Subprogram_Name_Ptr (Error.Node) & );
+Write_Str (",""subprogram"":""");
+Write_JSON_Escaped_String (Subprogram_Name_Ptr (Error.Node));
+Write_Str ();
  end if;
 
  Write_Str ("}");
-- 
2.34.1



[COMMITTED] ada: GNAT UGN: Adjust wording in "Platform-specific Information" chapter

2023-01-03 Thread Marc Poulhiès via Gcc-patches
From: Joel Brobecker 

The wording of the introduction paragraph specified an incomplete
list of OSes. Rather than trying to update the list, this commit
changes the text to make it more general. For those parts of
this chapter which only apply to specific OSes, the documentation
is written in a way that it is clear which OS it applies to.

gcc/ada/

* doc/gnat_ugn/platform_specific_information.rst
(_Platform_Specific_Information): Minor rewording of intro text.
* gnat_ugn.texi: Regenerate.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/doc/gnat_ugn/platform_specific_information.rst | 4 ++--
 gcc/ada/gnat_ugn.texi  | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/gcc/ada/doc/gnat_ugn/platform_specific_information.rst 
b/gcc/ada/doc/gnat_ugn/platform_specific_information.rst
index 4d25dea3d1e..a136a5a0f75 100644
--- a/gcc/ada/doc/gnat_ugn/platform_specific_information.rst
+++ b/gcc/ada/doc/gnat_ugn/platform_specific_information.rst
@@ -13,8 +13,8 @@ Platform-Specific Information
 *
 
 This appendix contains information relating to the implementation
-of run-time libraries on various platforms and also covers
-topics related to the GNAT implementation on Windows and Mac OS.
+of run-time libraries on various platforms and also covers topics
+related to the GNAT implementation on specific Operating Systems.
 
 .. _`Run_Time_Libraries`:
 
diff --git a/gcc/ada/gnat_ugn.texi b/gcc/ada/gnat_ugn.texi
index 8524f0a840f..0470414e150 100644
--- a/gcc/ada/gnat_ugn.texi
+++ b/gcc/ada/gnat_ugn.texi
@@ -19,7 +19,7 @@
 
 @copying
 @quotation
-GNAT User's Guide for Native Platforms , Dec 01, 2022
+GNAT User's Guide for Native Platforms , Jan 02, 2023
 
 AdaCore
 
@@ -22450,8 +22450,8 @@ Debug Pool info:
 
 
 This appendix contains information relating to the implementation
-of run-time libraries on various platforms and also covers
-topics related to the GNAT implementation on Windows and Mac OS.
+of run-time libraries on various platforms and also covers topics
+related to the GNAT implementation on specific Operating Systems.
 
 @menu
 * Run-Time Libraries:: 
-- 
2.34.1



[COMMITTED] ada: Fix calling convention of foreign functions returning limited type

2023-01-03 Thread Marc Poulhiès via Gcc-patches
From: Eric Botcazou 

Such functions use neither Ada 2005's build-in-place mechanism nor Ada 95's
return-by-reference mechanism, but instead the common calling convention of
functions returning a nonlimited by-reference type.

gcc/ada/

* exp_ch6.adb (Is_Build_In_Place_Function): Adjust comment.
* sem_util.adb (Compute_Returns_By_Ref): Do not set Returns_By_Ref
on functions with foreign convention.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/exp_ch6.adb  | 8 
 gcc/ada/sem_util.adb | 7 +--
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/gcc/ada/exp_ch6.adb b/gcc/ada/exp_ch6.adb
index 975a96668df..d90ee41712e 100644
--- a/gcc/ada/exp_ch6.adb
+++ b/gcc/ada/exp_ch6.adb
@@ -7759,10 +7759,10 @@ package body Exp_Ch6 is
  return False;
   end if;
 
-  --  If the function is imported from a foreign language, we don't do
-  --  build-in-place, whereas Import (Ada) functions can do it. Note also
-  --  that it is OK for a build-in-place function to return a type with a
-  --  foreign convention because the machinery ensures there is no copying.
+  --  We never use build-in-place if the convention is other than Ada,
+  --  but note that it is OK for a build-in-place function to return a
+  --  type with a foreign convention because the machinery ensures there
+  --  is no copying.
 
   return (Kind in E_Function | E_Generic_Function
or else
diff --git a/gcc/ada/sem_util.adb b/gcc/ada/sem_util.adb
index 3132446515b..a05ac74d35f 100644
--- a/gcc/ada/sem_util.adb
+++ b/gcc/ada/sem_util.adb
@@ -6163,9 +6163,12 @@ package body Sem_Util is
   elsif Is_Build_In_Place_Function (Func) then
  Set_Returns_By_Ref (Func);
 
-  --  In Ada 95, limited types are returned by reference
+  --  In Ada 95, limited types are returned by reference, but not if the
+  --  convention is other than Ada.
 
-  elsif Is_Limited_View (Typ) then
+  elsif Is_Limited_View (Typ)
+and then not Has_Foreign_Convention (Func)
+  then
  Set_Returns_By_Ref (Func);
   end if;
end Compute_Returns_By_Ref;
-- 
2.34.1



[COMMITTED] ada: Make Apply_Discriminant_Check.Denotes_Explicit_Dereference more robust

2023-01-03 Thread Marc Poulhiès via Gcc-patches
From: Eric Botcazou 

The predicate implements the rules of the language so it needs to cope with
constructs rewritten by the expander, in particular explicit dereferences
that the expander uses liberally for various purposes.

This change makes the detection of rewritten calls more robust and adds the
detection of rewritten return objects.

gcc/ada/

* checks.adb (Apply_Discriminant_Check.Denotes_Explicit_Dereference):
Return false for artificial dereferences generated by the expander.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/checks.adb | 28 ++--
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/gcc/ada/checks.adb b/gcc/ada/checks.adb
index 5833be3a5de..d518e6720a2 100644
--- a/gcc/ada/checks.adb
+++ b/gcc/ada/checks.adb
@@ -1388,13 +1388,23 @@ package body Checks is
 
   function Denotes_Explicit_Dereference (Obj : Node_Id) return Boolean is
   begin
- return
-   Nkind (Obj) = N_Explicit_Dereference
- or else
-   (Is_Entity_Name (Obj)
- and then Present (Renamed_Object (Entity (Obj)))
- and then Nkind (Renamed_Object (Entity (Obj))) =
-  N_Explicit_Dereference);
+ if Is_Entity_Name (Obj) then
+return Present (Renamed_Object (Entity (Obj)))
+  and then
+Denotes_Explicit_Dereference (Renamed_Object (Entity (Obj)));
+
+ --  This routine uses the rules of the language so we need to exclude
+ --  rewritten constructs that introduce artificial dereferences.
+
+ elsif Nkind (Obj) = N_Explicit_Dereference then
+return not Is_Captured_Function_Call (Obj)
+  and then not
+(Nkind (Parent (Obj)) = N_Object_Renaming_Declaration
+  and then Is_Return_Object (Defining_Entity (Parent (Obj;
+
+ else
+return False;
+ end if;
   end Denotes_Explicit_Dereference;
 
   
@@ -1497,9 +1507,7 @@ package body Checks is
  and then not Is_Aliased_Unconstrained_Component)
or else (Ada_Version >= Ada_2005
  and then not Is_Constrained (T_Typ)
- and then Denotes_Explicit_Dereference (Lhs)
- and then Nkind (Original_Node (Lhs)) /=
-N_Function_Call))
+ and then Denotes_Explicit_Dereference (Lhs)))
   then
  T_Typ := Get_Actual_Subtype (Lhs);
   end if;
-- 
2.34.1



[COMMITTED] ada: Fix parsing bug in GNAT.Formatted_String

2023-01-03 Thread Marc Poulhiès via Gcc-patches
From: Ronan Desplanques 

Before this patch, GNAT.Formatted_String.Formatted_String failed to
handle format strings with two or more specifiers whose widths were
specified with the "*" syntax. This patch makes the parser
correctly reset its bits of state related to width and precision
parsing when needed.

gcc/ada/

* libgnat/g-forstr.adb (P_Int_Format): Fix parsing bug.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/libgnat/g-forstr.adb | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/ada/libgnat/g-forstr.adb b/gcc/ada/libgnat/g-forstr.adb
index 8353e2c4ad8..c9fb86b44f7 100644
--- a/gcc/ada/libgnat/g-forstr.adb
+++ b/gcc/ada/libgnat/g-forstr.adb
@@ -808,6 +808,7 @@ package body GNAT.Formatted_String is
  Format.D.Index := Start;
  return Format;
   end if;
+  Format.D.Stored_Value := 0;
 
   case F.Kind is
  when Unsigned_Octal =>
-- 
2.34.1



[COMMITTED] ada: Simplify [Small_]Integer_Type_For

2023-01-03 Thread Marc Poulhiès via Gcc-patches
From: Bob Duff 

Make Small_Integer_Type_For call Integer_Type_For,
so they share most of the code.

Remove Standard_Long_Integer from consideration,
because that's different on different machines (32- or 64-bit).
Standard_Integer or Standard_Long_Long_Integer will be
chosen.

gcc/ada/

* exp_util.adb (Integer_Type_For): Assertion and comment.
(Small_Integer_Type_For): Remove some code and call
Integer_Type_For instead.
* sem_util.ads (Rep_To_Pos_Flag): Improve comments. "Standard_..."
seems overly pedantic here.
* exp_attr.adb (Succ, Pred): Clean up: make the code as similar as
possible.
* exp_ch4.adb: Minor: named notation.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/exp_attr.adb | 25 +++--
 gcc/ada/exp_ch4.adb  |  4 ++--
 gcc/ada/exp_util.adb | 37 +++--
 gcc/ada/sem_util.ads | 18 +-
 4 files changed, 29 insertions(+), 55 deletions(-)

diff --git a/gcc/ada/exp_attr.adb b/gcc/ada/exp_attr.adb
index b7554e05f77..50cb307a152 100644
--- a/gcc/ada/exp_attr.adb
+++ b/gcc/ada/exp_attr.adb
@@ -5638,9 +5638,7 @@ package body Exp_Attr is
  Make_Integer_Literal (Loc, 1;
 
 else
-   --  Add Boolean parameter True, to request program error if
-   --  we have a bad representation on our hands. If checks are
-   --  suppressed, then add False instead
+   --  Add Boolean parameter depending on check suppression
 
Append_To (Exprs, Rep_To_Pos_Flag (Ptyp, Loc));
Rewrite (N,
@@ -5650,13 +5648,13 @@ package body Exp_Attr is
(Enum_Pos_To_Rep (Etyp), Loc),
Expressions => New_List (
  Make_Op_Subtract (Loc,
-Left_Opnd =>
-  Make_Function_Call (Loc,
-Name =>
-  New_Occurrence_Of
-(TSS (Etyp, TSS_Rep_To_Pos), Loc),
-  Parameter_Associations => Exprs),
-Right_Opnd => Make_Integer_Literal (Loc, 1);
+   Left_Opnd =>
+ Make_Function_Call (Loc,
+   Name =>
+ New_Occurrence_Of
+   (TSS (Etyp, TSS_Rep_To_Pos), Loc),
+   Parameter_Associations => Exprs),
+   Right_Opnd => Make_Integer_Literal (Loc, 1);
 end if;
 
 --  Suppress checks since they have all been done above
@@ -6771,9 +6769,7 @@ package body Exp_Attr is
  Make_Integer_Literal (Loc, 1;
 
 else
-   --  Add Boolean parameter True, to request program error if
-   --  we have a bad representation on our hands. Add False if
-   --  checks are suppressed.
+   --  Add Boolean parameter depending on check suppression
 
Append_To (Exprs, Rep_To_Pos_Flag (Ptyp, Loc));
Rewrite (N,
@@ -6797,7 +6793,8 @@ package body Exp_Attr is
 Analyze_And_Resolve (N, Typ, Suppress => All_Checks);
 
  --  For floating-point, we transform 'Succ into a call to the Succ
- --  floating-point attribute function in Fat_xxx (xxx is root type)
+ --  floating-point attribute function in Fat_xxx (xxx is root type).
+ --  Note that this function takes care of the overflow case.
 
  elsif Is_Floating_Point_Type (Ptyp) then
 Expand_Fpt_Attribute_R (N);
diff --git a/gcc/ada/exp_ch4.adb b/gcc/ada/exp_ch4.adb
index a8980a63d46..148b160b792 100644
--- a/gcc/ada/exp_ch4.adb
+++ b/gcc/ada/exp_ch4.adb
@@ -11836,7 +11836,7 @@ package body Exp_Ch4 is
 
  if Is_Fixed_Point_Type (Etype (Expr)) then
 Ityp := Small_Integer_Type_For
-  (Esize (Base_Type (Etype (Expr))), False);
+  (Esize (Base_Type (Etype (Expr))), Uns => False);
 
 --  Generate a temporary with the integer type to facilitate in the
 --  C backend the code generation for the unchecked conversion.
@@ -12206,7 +12206,7 @@ package body Exp_Ch4 is
 declare
Expr_Id : constant Entity_Id := Make_Temporary (Loc, 'T', Conv);
Int_Typ : constant Entity_Id :=
-   Small_Integer_Type_For (RM_Size (Btyp), False);
+ Small_Integer_Type_For (RM_Size (Btyp), Uns => False);
 
 begin
--  Generate a temporary with the integer value. Required in the
diff --git a/gcc/ada/exp_util.adb b/gcc/ada/exp_util.adb
index 84b0c0e2941..5ab0d3039ca 100644
--- a/gcc/ada/exp_util.adb
+++ b/gcc/ada/exp_util.adb
@@ -8122,6 +8122,10 @@ package body Exp_Util is
 
function Integer_Type_For (S : Uint; Uns : Boolean) return Entity_Id is

[COMMITTED] ada: Adapt frontend optimization for aggregate assignment

2023-01-03 Thread Marc Poulhiès via Gcc-patches
The frontend currently relies on gigi to use efficient assignment in
particular cases like:

  Some_Var.all := (others => (others => 0));

gigi would use memset to clear memory pointed to by Some_Var.

In the case of an access with a Designated_Storage_Model aspect with a Copy_To
procedure, memset can't be used directly. Instead of simply disabling this
frontend/gigi optimization and having the frontend emit several assignments, a
temporary is used (through the new Build_Assignment_With_Temporary): gigi can
still memset it, and this temporary is then copied into the original
target (and the regular storage model mechanism handles it).

gcc/ada/

* exp_aggr.adb (Build_Assignment_With_Temporary): New.
(Expand_Array_Aggregate): Tune backend optimization
and insert a temporary in the case of an access with
Designated_Storage_Model aspect.
(Convert_Array_Aggr_In_Allocator): Likewise.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/exp_aggr.adb | 106 +--
 1 file changed, 92 insertions(+), 14 deletions(-)

diff --git a/gcc/ada/exp_aggr.adb b/gcc/ada/exp_aggr.adb
index 4d8bb817b80..30f32a78453 100644
--- a/gcc/ada/exp_aggr.adb
+++ b/gcc/ada/exp_aggr.adb
@@ -62,6 +62,7 @@ with Sem_Eval;   use Sem_Eval;
 with Sem_Mech;   use Sem_Mech;
 with Sem_Res;use Sem_Res;
 with Sem_Util;   use Sem_Util;
+use Sem_Util.Storage_Model_Support;
 with Sinfo;  use Sinfo;
 with Sinfo.Nodes;use Sinfo.Nodes;
 with Sinfo.Utils;use Sinfo.Utils;
@@ -75,6 +76,15 @@ with Warnsw; use Warnsw;
 
 package body Exp_Aggr is
 
+   function Build_Assignment_With_Temporary
+ (Target : Node_Id;
+  Typ: Node_Id;
+  Source : Node_Id) return List_Id;
+   --  Returns a list of actions to assign Source to Target of type Typ using
+   --  an extra temporary:
+   --   Tmp := Source;
+   --   Target := Tmp;
+
type Case_Bounds is record
  Choice_Lo   : Node_Id;
  Choice_Hi   : Node_Id;
@@ -2508,6 +2518,42 @@ package body Exp_Aggr is
   return New_Code;
end Build_Array_Aggr_Code;
 
+   -
+   -- Build_Assignment_With_Temporary --
+   -
+
+   function Build_Assignment_With_Temporary
+ (Target : Node_Id;
+  Typ: Node_Id;
+  Source : Node_Id) return List_Id
+   is
+  Loc : constant Source_Ptr := Sloc (Source);
+
+  Aggr_Code : List_Id;
+  Tmp   : Entity_Id;
+  Tmp_Decl  : Node_Id;
+
+   begin
+  Tmp := Make_Temporary (Loc, 'A', Source);
+  Tmp_Decl :=
+Make_Object_Declaration (Loc,
+  Defining_Identifier => Tmp,
+  Object_Definition   => New_Occurrence_Of (Typ, Loc));
+  Set_No_Initialization (Tmp_Decl, True);
+
+  Aggr_Code := New_List (Tmp_Decl);
+  Append_To (Aggr_Code,
+Make_OK_Assignment_Statement (Loc,
+  Name   => New_Occurrence_Of (Tmp, Loc),
+  Expression => Source));
+
+  Append_To (Aggr_Code,
+Make_OK_Assignment_Statement (Loc,
+  Name   => Target,
+  Expression => New_Occurrence_Of (Tmp, Loc)));
+  return Aggr_Code;
+   end Build_Assignment_With_Temporary;
+

-- Build_Record_Aggr_Code --

@@ -4514,15 +4560,29 @@ package body Exp_Aggr is
  New_Aggr := New_Copy_Tree (Aggr);
  Set_Expansion_Delayed (New_Aggr, False);
 
- Aggr_Code :=
-   New_List (
- Make_OK_Assignment_Statement (Sloc (New_Aggr),
-   Name   => Target,
-   Expression => New_Aggr));
+ --  In the case of Target's type using the Designated_Storage_Model
+ --  aspect with a Copy_To procedure, insert a temporary and have the
+ --  back end handle the assignment to it. Copy the result to the
+ --  original target.
+
+ if Has_Designated_Storage_Model_Aspect
+  (Etype (Prefix (Expression (Target
+   and then Present (Storage_Model_Copy_To
+   (Storage_Model_Object
+  (Etype (Prefix (Expression (Target))
+ then
+Aggr_Code := Build_Assignment_With_Temporary (Target,
+   Typ, New_Aggr);
+ else
+Aggr_Code :=
+  New_List (
+Make_OK_Assignment_Statement (Sloc (New_Aggr),
+  Name   => Target,
+  Expression => New_Aggr));
+ end if;
 
   --  Or else, generate component assignments to it, as for an aggregate
   --  that appears on the right-hand side of an assignment statement.
-
   else
  Aggr_Code :=
Build_Array_Aggr_Code (Aggr,
@@ -7065,16 +7125,34 @@ package body Exp_Aggr is
and then not Is_Possibly_Unaligned_Slice (Target)
and then Aggr_Assignment_OK_For_Backend 

[COMMITTED] ada: Fix detection of function calls in object declarations

2023-01-03 Thread Marc Poulhiès via Gcc-patches
From: Eric Botcazou 

The current code has relied on Original_Node to detect rewritten function
calls in object declarations but that's not robust enough in the presence
of function calls written in object notation.

gcc/ada/

* exp_util.ads (Is_Captured_Function_Call): Declare.
* exp_util.adb (Is_Captured_Function_Call): New predicate.
* exp_ch3.adb (Expand_N_Object_Declaration): Use it to detect a
rewritten function call as the initializing expression.
* exp_ch6.adb (Expand_Simple_Function_Return): Use it to detect a
rewritten function call as the returned expression.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/exp_ch3.adb  | 10 --
 gcc/ada/exp_ch6.adb  |  6 +-
 gcc/ada/exp_util.adb | 24 
 gcc/ada/exp_util.ads |  8 
 4 files changed, 37 insertions(+), 11 deletions(-)

diff --git a/gcc/ada/exp_ch3.adb b/gcc/ada/exp_ch3.adb
index 6de5843b4ba..def63ed0513 100644
--- a/gcc/ada/exp_ch3.adb
+++ b/gcc/ada/exp_ch3.adb
@@ -7901,18 +7901,16 @@ package body Exp_Ch3 is
 --  secondary stack, then the declaration can be rewritten as
 --  the renaming of this dereference:
 
---type Axx is access all Typ;
---Rxx : constant Axx := Func (...)'reference;
---Obj : Typ renames Rxx.all;
+--type Ann is access all Typ;
+--Rnn : constant Axx := Func (...)'reference;
+--Obj : Typ renames Rnn.all;
 
 --  This avoids an extra copy and, in the case where Typ needs
 --  finalization, a pair of Adjust/Finalize calls (see below).
 
 and then
   ((not Is_Library_Level_Entity (Def_Id)
- and then Nkind (Expr_Q) = N_Explicit_Dereference
- and then not Comes_From_Source (Expr_Q)
- and then Nkind (Original_Node (Expr_Q)) = N_Function_Call
+ and then Is_Captured_Function_Call (Expr_Q)
  and then not Is_Class_Wide_Type (Typ))
 
--  If the initializing expression is a variable with the
diff --git a/gcc/ada/exp_ch6.adb b/gcc/ada/exp_ch6.adb
index c026b63fcf6..0bc2559751b 100644
--- a/gcc/ada/exp_ch6.adb
+++ b/gcc/ada/exp_ch6.adb
@@ -6440,11 +6440,7 @@ package body Exp_Ch6 is
   pragma Assert (Present (Exp));
 
   Exp_Is_Function_Call : constant Boolean :=
-Nkind (Exp) = N_Function_Call
-  or else (Nkind (Exp) = N_Explicit_Dereference
-   and then Is_Entity_Name (Prefix (Exp))
-   and then Ekind (Entity (Prefix (Exp))) = E_Constant
-   and then Is_Related_To_Func_Return (Entity (Prefix (Exp;
+Nkind (Exp) = N_Function_Call or else Is_Captured_Function_Call (Exp);
 
   Exp_Typ : constant Entity_Id := Etype (Exp);
   --  The type of the expression (not necessarily the same as R_Type)
diff --git a/gcc/ada/exp_util.adb b/gcc/ada/exp_util.adb
index 5ab0d3039ca..3c68f917ca9 100644
--- a/gcc/ada/exp_util.adb
+++ b/gcc/ada/exp_util.adb
@@ -8160,6 +8160,30 @@ package body Exp_Util is
   end if;
end Integer_Type_For;
 
+   ---
+   -- Is_Captured_Function_Call --
+   ---
+
+   function Is_Captured_Function_Call (N : Node_Id) return Boolean is
+   begin
+  if Nkind (N) = N_Explicit_Dereference
+and then Is_Entity_Name (Prefix (N))
+and then Ekind (Entity (Prefix (N))) = E_Constant
+  then
+ declare
+Value : constant Node_Id := Constant_Value (Entity (Prefix (N)));
+
+ begin
+return Present (Value)
+  and then Nkind (Value) = N_Reference
+  and then Nkind (Prefix (Value)) = N_Function_Call;
+ end;
+
+  else
+ return False;
+  end if;
+   end Is_Captured_Function_Call;
+
--
-- Is_Displacement_Of_Object_Or_Function_Result --
--
diff --git a/gcc/ada/exp_util.ads b/gcc/ada/exp_util.ads
index a21fb8b5c2a..0d09d259f8e 100644
--- a/gcc/ada/exp_util.ads
+++ b/gcc/ada/exp_util.ads
@@ -757,6 +757,14 @@ package Exp_Util is
--  Return a suitable standard integer type containing at least S bits and
--  of the signedness given by Uns. See also Small_Integer_Type_For.
 
+   function Is_Captured_Function_Call (N : Node_Id) return Boolean;
+   --  Return True if N is a captured function call, i.e. the result of calling
+   --  Remove_Side_Effects on an N_Function_Call node:
+
+   --type Ann is access all Typ;
+   --Rnn : constant Ann := Func (...)'reference;
+   --Rnn.all
+
function Is_Displacement_Of_Object_Or_Function_Result
  (Obj_Id : Entity_Id) return Boolean;
--  Determine whether Obj_Id is a source entity that has been 

[COMMITTED] ada: Cannot reference ghost entity in class-wide precondition

2023-01-03 Thread Marc Poulhiès via Gcc-patches
From: Javier Miranda 

gcc/ada/

* ghost.adb (Is_OK_Declaration): A reference to a Ghost entity may
appear within the class-wide precondition of a helper subprogram.
This context is treated as suitable because it was already
verified when we were analyzing the original class-wide
precondition.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/ghost.adb | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/gcc/ada/ghost.adb b/gcc/ada/ghost.adb
index 0f03285b024..0d2a23dc042 100644
--- a/gcc/ada/ghost.adb
+++ b/gcc/ada/ghost.adb
@@ -261,6 +261,16 @@ package body Ghost is
 then
return True;
 
+--  A reference to a Ghost entity may appear within the class-wide
+--  precondition of a helper subprogram. This context is treated
+--  as suitable because it was already verified when we were
+--  analyzing the original class-wide precondition.
+
+elsif Is_Subprogram (Current_Scope)
+  and then Present (Class_Preconditions_Subprogram (Current_Scope))
+then
+   return True;
+
 --  References to Ghost entities may be relocated in internally
 --  generated bodies.
 
-- 
2.34.1



[COMMITTED] ada: Fix support of Default_Component_Value aspect on derived types

2023-01-03 Thread Marc Poulhiès via Gcc-patches
From: Eric Botcazou 

The support of the Default_Component_Value aspect on derived constrained
array types is broken because of a couple of issues: 1) the derived types
incorrectly inherit the initialization procedure of the ancestor types
and 2) the propagation of the aspect does not work for constrained array
types (unlike for unconstrained array types).

gcc/ada/

* exp_tss.adb (Base_Init_Proc): Do not return the Init_Proc of the
ancestor type for a derived array type.
* sem_ch13.adb (Inherit_Aspects_At_Freeze_Point): Factor out the
common processing done on representation items.
For Default_Component_Value and Default_Value, look into the first
subtype to find out the representation items.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/exp_tss.adb  |   5 +-
 gcc/ada/sem_ch13.adb | 205 +++
 2 files changed, 133 insertions(+), 77 deletions(-)

diff --git a/gcc/ada/exp_tss.adb b/gcc/ada/exp_tss.adb
index 09bb133a41f..23ee3496b23 100644
--- a/gcc/ada/exp_tss.adb
+++ b/gcc/ada/exp_tss.adb
@@ -78,8 +78,11 @@ package body Exp_Tss is
   else
  Proc := Init_Proc (Base_Type (Full_Type), Ref);
 
+ --  For derived record types, if the base type does not have one,
+ --  we use the Init_Proc of the ancestor type.
+
  if No (Proc)
-   and then Is_Composite_Type (Full_Type)
+   and then Is_Record_Type (Full_Type)
and then Is_Derived_Type (Full_Type)
  then
 return Init_Proc (Root_Type (Full_Type), Ref);
diff --git a/gcc/ada/sem_ch13.adb b/gcc/ada/sem_ch13.adb
index 618f935e4fe..e5f0ebcd6a2 100644
--- a/gcc/ada/sem_ch13.adb
+++ b/gcc/ada/sem_ch13.adb
@@ -13493,12 +13493,68 @@ package body Sem_Ch13 is
-
 
procedure Inherit_Aspects_At_Freeze_Point (Typ : Entity_Id) is
+  function Get_Inherited_Rep_Item
+(E   : Entity_Id;
+ Nam : Name_Id) return Node_Id;
+  --  Search the Rep_Item chain of entity E for an instance of a rep item
+  --  (pragma, attribute definition clause, or aspect specification) whose
+  --  name matches the given name Nam, and that has been inherited from its
+  --  parent, i.e. that has not been directly specified for E . If one is
+  --  found, it is returned, otherwise Empty is returned.
+
+  function Get_Inherited_Rep_Item
+(E: Entity_Id;
+ Nam1 : Name_Id;
+ Nam2 : Name_Id) return Node_Id;
+  --  Search the Rep_Item chain of entity E for an instance of a rep item
+  --  (pragma, attribute definition clause, or aspect specification) whose
+  --  name matches one of the given names Nam1 or Nam2, and that has been
+  --  inherited from its parent, i.e. that has not been directly specified
+  --  for E . If one is found, it is returned, otherwise Empty is returned.
+
   function Is_Pragma_Or_Corr_Pragma_Present_In_Rep_Item
 (Rep_Item : Node_Id) return Boolean;
   --  This routine checks if Rep_Item is either a pragma or an aspect
   --  specification node whose corresponding pragma (if any) is present in
   --  the Rep Item chain of the entity it has been specified to.
 
+  
+  -- Get_Inherited_Rep_Item --
+  
+
+  function Get_Inherited_Rep_Item
+(E   : Entity_Id;
+ Nam : Name_Id) return Node_Id
+  is
+ Rep : constant Node_Id
+ := Get_Rep_Item (E, Nam, Check_Parents => True);
+  begin
+ if Present (Rep)
+   and then not Has_Rep_Item (E, Nam, Check_Parents => False)
+ then
+return Rep;
+ else
+return Empty;
+ end if;
+  end Get_Inherited_Rep_Item;
+
+  function Get_Inherited_Rep_Item
+(E: Entity_Id;
+ Nam1 : Name_Id;
+ Nam2 : Name_Id) return Node_Id
+  is
+ Rep : constant Node_Id
+ := Get_Rep_Item (E, Nam1, Nam2, Check_Parents => True);
+  begin
+ if Present (Rep)
+   and then not Has_Rep_Item (E, Nam1, Nam2, Check_Parents => False)
+ then
+return Rep;
+ else
+return Empty;
+ end if;
+  end Get_Inherited_Rep_Item;
+
   --
   -- Is_Pragma_Or_Corr_Pragma_Present_In_Rep_Item --
   --
@@ -13513,6 +13569,8 @@ package body Sem_Ch13 is
Present_In_Rep_Item (Entity (Rep_Item), Aspect_Rep_Item (Rep_Item));
   end Is_Pragma_Or_Corr_Pragma_Present_In_Rep_Item;
 
+  Rep : Node_Id;
+
--  Start of processing for Inherit_Aspects_At_Freeze_Point
 
begin
@@ -13543,40 +13601,36 @@ package body Sem_Ch13 is
 
   --  Ada_05/Ada_2005
 
-  if not Has_Rep_Item (Typ, Name_Ada_05, Name_Ada_2005, False)
-and then 

Re: [PATCH 2/4] Initial Emeraldrapids Support

2023-01-03 Thread Uros Bizjak via Gcc-patches
On Tue, Jan 3, 2023 at 9:39 AM Hu, Lin1  wrote:
>
> gcc/ChangeLog:
>
> * common/config/i386/cpuinfo.h (get_intel_cpu): Handle Emeraldrapids.
> * common/config/i386/i386-common.cc: Add Emeraldrapids.

OK.

Thanks,
Uros.

> ---
>  gcc/common/config/i386/cpuinfo.h  | 2 ++
>  gcc/common/config/i386/i386-common.cc | 2 ++
>  2 files changed, 4 insertions(+)
>
> diff --git a/gcc/common/config/i386/cpuinfo.h 
> b/gcc/common/config/i386/cpuinfo.h
> index bde231c07ee..3729b0f14a5 100644
> --- a/gcc/common/config/i386/cpuinfo.h
> +++ b/gcc/common/config/i386/cpuinfo.h
> @@ -551,6 +551,8 @@ get_intel_cpu (struct __processor_model *cpu_model,
>break;
>  case 0x8f:
>/* Sapphire Rapids.  */
> +case 0xcf:
> +  /* Emerald Rapids.  */
>cpu = "sapphirerapids";
>CHECK___builtin_cpu_is ("corei7");
>CHECK___builtin_cpu_is ("sapphirerapids");
> diff --git a/gcc/common/config/i386/i386-common.cc 
> b/gcc/common/config/i386/i386-common.cc
> index 7751265aff4..026926d8b41 100644
> --- a/gcc/common/config/i386/i386-common.cc
> +++ b/gcc/common/config/i386/i386-common.cc
> @@ -2465,6 +2465,8 @@ const pta processor_alias_table[] =
>  M_CPU_SUBTYPE (INTEL_COREI7_COOPERLAKE), P_PROC_AVX512F},
>{"sapphirerapids", PROCESSOR_SAPPHIRERAPIDS, CPU_HASWELL, 
> PTA_SAPPHIRERAPIDS,
>  M_CPU_SUBTYPE (INTEL_COREI7_SAPPHIRERAPIDS), P_PROC_AVX512F},
> +  {"emeraldrapids", PROCESSOR_SAPPHIRERAPIDS, CPU_HASWELL, 
> PTA_SAPPHIRERAPIDS,
> +M_CPU_SUBTYPE (INTEL_COREI7_SAPPHIRERAPIDS), P_PROC_AVX512F},
>{"alderlake", PROCESSOR_ALDERLAKE, CPU_HASWELL, PTA_ALDERLAKE,
>  M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2},
>{"raptorlake", PROCESSOR_ALDERLAKE, CPU_HASWELL, PTA_ALDERLAKE,
> --
> 2.18.2
>


Re: [PATCH 1/4] i386: Remove Meteorlake's family_model

2023-01-03 Thread Uros Bizjak via Gcc-patches
On Tue, Jan 3, 2023 at 9:39 AM Hu, Lin1  wrote:
>
> Hi all,
>
> This patch aims to modified meteorlake's family_model.
>
> Regtested on x86_64-pc-linux-gnu. Ok for trunk?
>
> BRs,
> Lin
>
> gcc/ChangeLog:
>
> * common/config/i386/cpuinfo.h (get_intel_cpu): Remove case 0xb5
> for meteorlake.

OK.

Thanks,
Uros.

> ---
>  gcc/common/config/i386/cpuinfo.h | 1 -
>  1 file changed, 1 deletion(-)
>
> diff --git a/gcc/common/config/i386/cpuinfo.h 
> b/gcc/common/config/i386/cpuinfo.h
> index 099a02467e6..bde231c07ee 100644
> --- a/gcc/common/config/i386/cpuinfo.h
> +++ b/gcc/common/config/i386/cpuinfo.h
> @@ -540,7 +540,6 @@ get_intel_cpu (struct __processor_model *cpu_model,
>/* Alder Lake.  */
>  case 0xb7:
>/* Raptor Lake.  */
> -case 0xb5:
>  case 0xaa:
>  case 0xac:
>/* Meteor Lake.  */
> --
> 2.18.2
>


Re: [x86 PATCH] Improve ix86_expand_int_movcc to allow condition (mask) sharing.

2023-01-03 Thread Uros Bizjak via Gcc-patches
On Mon, Jan 2, 2023 at 11:50 AM Roger Sayle  wrote:
>
>
> This patch modifies the way that ix86_expand_int_movcc generates RTL,
> to allow the condition mask to be shared/reused between multiple
> conditional move sequences.  Such redundancy is common when RTL
> if-conversion transforms non-trivial basic blocks.
>
> As a motivating example, consider the new test case:
>
> int a, b, c, d;
> int foo(int x)
> {
> if (x == 0) {
> a = 3;
> b = 1;
> c = 4;
> d = 1;
> } else {
> a = 5;
> b = 9;
> c = 2;
> d = 7;
> }
> return x;
> }
>
> This is currently compiled, with -O2, to:
>
> foo:cmpl$1, %edi
> movl%edi, %eax
> sbbl%edi, %edi
> andl$-2, %edi
> addl$5, %edi
> cmpl$1, %eax
> sbbl%esi, %esi
> movl%edi, a(%rip)
> andl$-8, %esi
> addl$9, %esi
> cmpl$1, %eax
> sbbl%ecx, %ecx
> movl%esi, b(%rip)
> andl$2, %ecx
> addl$2, %ecx
> cmpl$1, %eax
> sbbl%edx, %edx
> movl%ecx, c(%rip)
> andl$-6, %edx
> addl$7, %edx
> movl%edx, d(%rip)
> ret
>
> Notice that the if-then-else blocks have been if-converted into four
> conditional move sequences/assignments, each consisting of cmpl, sbbl,
> andl and addl.  However, as the conditions are the same, the cmpl and
> sbbl instructions used to generate the mask could be shared by CSE.
>
> This patch enables that, so we now generate:
>
> foo:cmpl$1, %edi
> movl%edi, %eax
> sbbl%edx, %edx
> movl%edx, %edi
> movl%edx, %esi
> movl%edx, %ecx
> andl$-6, %edx
> andl$-2, %edi
> andl$-8, %esi
> andl$2, %ecx
> addl$7, %edx
> addl$5, %edi
> addl$9, %esi
> addl$2, %ecx
> movl%edx, d(%rip)
> movl%edi, a(%rip)
> movl%esi, b(%rip)
> movl%ecx, c(%rip)
> ret
>
> Notice, the code now contains only a single cmpl and a single sbbl,
> with their result being shared (via movl).
>
> This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
> and make -k check, both with and without --target_board=unix{-m32},
> with no new failures.  Ok for mainline?
>
>
> 2023-01-02  Roger Sayle  
>
> gcc/ChangeLog
> * config/i386/i386-expand.cc (ix86_expand_int_movcc): Rewrite
> RTL expansion to allow condition (mask) to be shared/reused,
> by avoiding overwriting pseudos and adding REG_EQUAL notes.
>
> gcc/testsuite/ChangeLog
> * gcc.target/i386/cmov10.c: New test case.

OK.

Thanks,
Uros.

>
>
> Thanks in advance,
> Roger
> --
>


RE: [PATCH] loading float member of parameter stored via int registers

2023-01-03 Thread Hu, Lin1 via Gcc-patches
Sorry for send this mail. I enter the wrong command line.

-Original Message-
From: Gcc-patches  On Behalf 
Of Segher Boessenkool
Sent: Tuesday, January 3, 2023 5:00 PM
To: Andrew Pinski 
Cc: Jiufu Guo ; Jiufu Guo via Gcc-patches 
; Richard Biener ; Richard 
Biener ; dje@gmail.com; li...@gcc.gnu.org; 
jeffreya...@gmail.com
Subject: Re: [PATCH] loading float member of parameter stored via int registers

Hi!

On Fri, Dec 30, 2022 at 12:30:04AM -0800, Andrew Pinski wrote:
> On Thu, Dec 29, 2022 at 11:45 PM Segher Boessenkool 
>  wrote:
> > Ah!  This simply shows rs6000_modes_tieable_p is decidedly non-optimal:
> > it does not allow tying a scalar float to anything else.  No such 
> > thing is required, or good apparently.  I wonder why we have such 
> > restrictions at all in rs6000; is it just unfortunate history, was 
> > it good at one point in time?
> 
> The documentation for TARGET_MODES_TIEABLE_P says the following:
> If TARGET_HARD_REGNO_MODE_OK (r, mode1) and TARGET_HARD_REGNO_MODE_OK 
> (r, mode2) are always the same for any r, then TARGET_MODES_TIEABLE_P 
> (mode1, mode2) should be true. If they differ for any r, you should 
> define this hook to return false unless some other mechanism ensures 
> the accessibility of the value in a narrower mode.
> 
> even though rs6000_hard_regno_mode_ok_uncached's comment has the following:
>   /* The float registers (except for VSX vector modes) can only hold floating
>  modes and DImode.  */

That comment is incorrect.  See fctiw for example, which defines only the 
SImode part of the result (the other bits are undefined).

> TARGET_P8_VECTOR and TARGET_P9_VECTOR has special cased different modes now:
>   if (TARGET_P8_VECTOR && (mode == SImode))
> return 1;
> 
>   if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode))
> return 1;
> Which I suspect that means rs6000_modes_tieable_p should return true 
> for SImode and SFmode if TARGET_P8_VECTOR is true. Likewise for 
> TARGET_P9_VECTOR and SFmode and QImode/HImode too.

It means that older CPUs do not have as many instructions to do scalar integer 
operations in vector registers, making it (almost) always a losing proposition 
to put scalar integers there.  On newer CPUs it is not quite as bad, there is a 
full(er) complement of instructions to do such things in vector regs, just a 
bit slower than on GPRs.

But yeah we might need to fix hard_regno_mode_ok if we change tieable.


Segher


Re: [PATCH] loading float member of parameter stored via int registers

2023-01-03 Thread Segher Boessenkool
Hi!

On Fri, Dec 30, 2022 at 12:30:04AM -0800, Andrew Pinski wrote:
> On Thu, Dec 29, 2022 at 11:45 PM Segher Boessenkool
>  wrote:
> > Ah!  This simply shows rs6000_modes_tieable_p is decidedly non-optimal:
> > it does not allow tying a scalar float to anything else.  No such thing
> > is required, or good apparently.  I wonder why we have such restrictions
> > at all in rs6000; is it just unfortunate history, was it good at one
> > point in time?
> 
> The documentation for TARGET_MODES_TIEABLE_P says the following:
> If TARGET_HARD_REGNO_MODE_OK (r, mode1) and TARGET_HARD_REGNO_MODE_OK
> (r, mode2) are always the same for any r, then TARGET_MODES_TIEABLE_P
> (mode1, mode2) should be true. If they differ for any r, you should
> define this hook to return false unless some other mechanism ensures
> the accessibility of the value in a narrower mode.
> 
> even though rs6000_hard_regno_mode_ok_uncached's comment has the following:
>   /* The float registers (except for VSX vector modes) can only hold floating
>  modes and DImode.  */

That comment is incorrect.  See fctiw for example, which defines only
the SImode part of the result (the other bits are undefined).

> TARGET_P8_VECTOR and TARGET_P9_VECTOR has special cased different modes now:
>   if (TARGET_P8_VECTOR && (mode == SImode))
> return 1;
> 
>   if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode))
> return 1;
> Which I suspect that means rs6000_modes_tieable_p should return true
> for SImode and SFmode if TARGET_P8_VECTOR is true. Likewise for
> TARGET_P9_VECTOR and SFmode and QImode/HImode too.

It means that older CPUs do not have as many instructions to do scalar
integer operations in vector registers, making it (almost) always a
losing proposition to put scalar integers there.  On newer CPUs it is
not quite as bad, there is a full(er) complement of instructions to do
such things in vector regs, just a bit slower than on GPRs.

But yeah we might need to fix hard_regno_mode_ok if we change tieable.


Segher


RE: [PATCH 2/4] Initial Emeraldrapids Support

2023-01-03 Thread Hu, Lin1 via Gcc-patches
"PATCH 2 Initial Emeraldrapids Support" aims to support Emeraldrapids for GCC. 
It's my mistake, resulting in the omission of its information.

-Original Message-
From: Liu, Hongtao  
Sent: Tuesday, January 3, 2023 4:48 PM
To: Hu, Lin1 ; gcc-patches@gcc.gnu.org
Cc: ubiz...@gmail.com
Subject: RE: [PATCH 2/4] Initial Emeraldrapids Support

There are actually only two patches, not four, and the subject *Patch 2/4* 
should be a typo.

> -Original Message-
> From: Hu, Lin1 
> Sent: Tuesday, January 3, 2023 4:37 PM
> To: gcc-patches@gcc.gnu.org
> Cc: Liu, Hongtao ; ubiz...@gmail.com
> Subject: [PATCH 2/4] Initial Emeraldrapids Support
> 
> gcc/ChangeLog:
> 
>   * common/config/i386/cpuinfo.h (get_intel_cpu): Handle Emeraldrapids.
>   * common/config/i386/i386-common.cc: Add Emeraldrapids.
> ---
>  gcc/common/config/i386/cpuinfo.h  | 2 ++
>  gcc/common/config/i386/i386-common.cc | 2 ++
>  2 files changed, 4 insertions(+)
> 
> diff --git a/gcc/common/config/i386/cpuinfo.h
> b/gcc/common/config/i386/cpuinfo.h
> index bde231c07ee..3729b0f14a5 100644
> --- a/gcc/common/config/i386/cpuinfo.h
> +++ b/gcc/common/config/i386/cpuinfo.h
> @@ -551,6 +551,8 @@ get_intel_cpu (struct __processor_model *cpu_model,
>break;
>  case 0x8f:
>/* Sapphire Rapids.  */
> +case 0xcf:
> +  /* Emerald Rapids.  */
>cpu = "sapphirerapids";
>CHECK___builtin_cpu_is ("corei7");
>CHECK___builtin_cpu_is ("sapphirerapids"); diff --git 
> a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386- 
> common.cc index 7751265aff4..026926d8b41 100644
> --- a/gcc/common/config/i386/i386-common.cc
> +++ b/gcc/common/config/i386/i386-common.cc
> @@ -2465,6 +2465,8 @@ const pta processor_alias_table[] =
>  M_CPU_SUBTYPE (INTEL_COREI7_COOPERLAKE), P_PROC_AVX512F},
>{"sapphirerapids", PROCESSOR_SAPPHIRERAPIDS, CPU_HASWELL, 
> PTA_SAPPHIRERAPIDS,
>  M_CPU_SUBTYPE (INTEL_COREI7_SAPPHIRERAPIDS), P_PROC_AVX512F},
> +  {"emeraldrapids", PROCESSOR_SAPPHIRERAPIDS, CPU_HASWELL,
> PTA_SAPPHIRERAPIDS,
> +M_CPU_SUBTYPE (INTEL_COREI7_SAPPHIRERAPIDS), P_PROC_AVX512F},
>{"alderlake", PROCESSOR_ALDERLAKE, CPU_HASWELL, PTA_ALDERLAKE,
>  M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2},
>{"raptorlake", PROCESSOR_ALDERLAKE, CPU_HASWELL, PTA_ALDERLAKE,
> --
> 2.18.2



RE: [PATCH 2/4] Initial Emeraldrapids Support

2023-01-03 Thread Liu, Hongtao via Gcc-patches
There are actually only two patches, not four, and the subject *Patch 2/4* 
should be a typo.

> -Original Message-
> From: Hu, Lin1 
> Sent: Tuesday, January 3, 2023 4:37 PM
> To: gcc-patches@gcc.gnu.org
> Cc: Liu, Hongtao ; ubiz...@gmail.com
> Subject: [PATCH 2/4] Initial Emeraldrapids Support
> 
> gcc/ChangeLog:
> 
>   * common/config/i386/cpuinfo.h (get_intel_cpu): Handle
> Emeraldrapids.
>   * common/config/i386/i386-common.cc: Add Emeraldrapids.
> ---
>  gcc/common/config/i386/cpuinfo.h  | 2 ++
>  gcc/common/config/i386/i386-common.cc | 2 ++
>  2 files changed, 4 insertions(+)
> 
> diff --git a/gcc/common/config/i386/cpuinfo.h
> b/gcc/common/config/i386/cpuinfo.h
> index bde231c07ee..3729b0f14a5 100644
> --- a/gcc/common/config/i386/cpuinfo.h
> +++ b/gcc/common/config/i386/cpuinfo.h
> @@ -551,6 +551,8 @@ get_intel_cpu (struct __processor_model *cpu_model,
>break;
>  case 0x8f:
>/* Sapphire Rapids.  */
> +case 0xcf:
> +  /* Emerald Rapids.  */
>cpu = "sapphirerapids";
>CHECK___builtin_cpu_is ("corei7");
>CHECK___builtin_cpu_is ("sapphirerapids"); diff --git
> a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-
> common.cc
> index 7751265aff4..026926d8b41 100644
> --- a/gcc/common/config/i386/i386-common.cc
> +++ b/gcc/common/config/i386/i386-common.cc
> @@ -2465,6 +2465,8 @@ const pta processor_alias_table[] =
>  M_CPU_SUBTYPE (INTEL_COREI7_COOPERLAKE), P_PROC_AVX512F},
>{"sapphirerapids", PROCESSOR_SAPPHIRERAPIDS, CPU_HASWELL,
> PTA_SAPPHIRERAPIDS,
>  M_CPU_SUBTYPE (INTEL_COREI7_SAPPHIRERAPIDS), P_PROC_AVX512F},
> +  {"emeraldrapids", PROCESSOR_SAPPHIRERAPIDS, CPU_HASWELL,
> PTA_SAPPHIRERAPIDS,
> +M_CPU_SUBTYPE (INTEL_COREI7_SAPPHIRERAPIDS), P_PROC_AVX512F},
>{"alderlake", PROCESSOR_ALDERLAKE, CPU_HASWELL, PTA_ALDERLAKE,
>  M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2},
>{"raptorlake", PROCESSOR_ALDERLAKE, CPU_HASWELL, PTA_ALDERLAKE,
> --
> 2.18.2



[PATCH 2/4] Initial Emeraldrapids Support

2023-01-03 Thread Hu, Lin1 via Gcc-patches
gcc/ChangeLog:

* common/config/i386/cpuinfo.h (get_intel_cpu): Handle Emeraldrapids.
* common/config/i386/i386-common.cc: Add Emeraldrapids.
---
 gcc/common/config/i386/cpuinfo.h  | 2 ++
 gcc/common/config/i386/i386-common.cc | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
index bde231c07ee..3729b0f14a5 100644
--- a/gcc/common/config/i386/cpuinfo.h
+++ b/gcc/common/config/i386/cpuinfo.h
@@ -551,6 +551,8 @@ get_intel_cpu (struct __processor_model *cpu_model,
   break;
 case 0x8f:
   /* Sapphire Rapids.  */
+case 0xcf:
+  /* Emerald Rapids.  */
   cpu = "sapphirerapids";
   CHECK___builtin_cpu_is ("corei7");
   CHECK___builtin_cpu_is ("sapphirerapids");
diff --git a/gcc/common/config/i386/i386-common.cc 
b/gcc/common/config/i386/i386-common.cc
index 7751265aff4..026926d8b41 100644
--- a/gcc/common/config/i386/i386-common.cc
+++ b/gcc/common/config/i386/i386-common.cc
@@ -2465,6 +2465,8 @@ const pta processor_alias_table[] =
 M_CPU_SUBTYPE (INTEL_COREI7_COOPERLAKE), P_PROC_AVX512F},
   {"sapphirerapids", PROCESSOR_SAPPHIRERAPIDS, CPU_HASWELL, PTA_SAPPHIRERAPIDS,
 M_CPU_SUBTYPE (INTEL_COREI7_SAPPHIRERAPIDS), P_PROC_AVX512F},
+  {"emeraldrapids", PROCESSOR_SAPPHIRERAPIDS, CPU_HASWELL, PTA_SAPPHIRERAPIDS,
+M_CPU_SUBTYPE (INTEL_COREI7_SAPPHIRERAPIDS), P_PROC_AVX512F},
   {"alderlake", PROCESSOR_ALDERLAKE, CPU_HASWELL, PTA_ALDERLAKE,
 M_CPU_SUBTYPE (INTEL_COREI7_ALDERLAKE), P_PROC_AVX2},
   {"raptorlake", PROCESSOR_ALDERLAKE, CPU_HASWELL, PTA_ALDERLAKE,
-- 
2.18.2



[PATCH 1/4] i386: Remove Meteorlake's family_model

2023-01-03 Thread Hu, Lin1 via Gcc-patches
Hi all,

This patch aims to modified meteorlake's family_model.

Regtested on x86_64-pc-linux-gnu. Ok for trunk?

BRs,
Lin

gcc/ChangeLog:

* common/config/i386/cpuinfo.h (get_intel_cpu): Remove case 0xb5
for meteorlake.
---
 gcc/common/config/i386/cpuinfo.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
index 099a02467e6..bde231c07ee 100644
--- a/gcc/common/config/i386/cpuinfo.h
+++ b/gcc/common/config/i386/cpuinfo.h
@@ -540,7 +540,6 @@ get_intel_cpu (struct __processor_model *cpu_model,
   /* Alder Lake.  */
 case 0xb7:
   /* Raptor Lake.  */
-case 0xb5:
 case 0xaa:
 case 0xac:
   /* Meteor Lake.  */
-- 
2.18.2