[PATCH v1] LoongArch: Fix bug for tmpdir-g++.dg-struct-layout-1/t033.

2022-04-11 Thread Lulu Cheng
From: chenglulu 

gcc/ChangeLog:

* config/loongarch/loongarch.cc: Fix bug for
tmpdir-g++.dg-struct-layout-1/t033.
---
 gcc/config/loongarch/loongarch.cc | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index 6e24111a79d..f22150a60cc 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -473,13 +473,14 @@ loongarch_pass_aggregate_in_fpr_and_gpr_p (const_tree 
type,
 
 static rtx
 loongarch_pass_fpr_single (machine_mode type_mode, unsigned regno,
-  machine_mode value_mode)
+  machine_mode value_mode,
+  HOST_WIDE_INT offset)
 {
   rtx x = gen_rtx_REG (value_mode, regno);
 
   if (type_mode != value_mode)
 {
-  x = gen_rtx_EXPR_LIST (VOIDmode, x, const0_rtx);
+  x = gen_rtx_EXPR_LIST (VOIDmode, x, GEN_INT (offset));
   x = gen_rtx_PARALLEL (type_mode, gen_rtvec (1, x));
 }
   return x;
@@ -539,7 +540,8 @@ loongarch_get_arg_info (struct loongarch_arg_info *info,
  {
  case 1:
return loongarch_pass_fpr_single (mode, fregno,
- TYPE_MODE (fields[0].type));
+ TYPE_MODE (fields[0].type),
+ fields[0].offset);
 
  case 2:
return loongarch_pass_fpr_pair (mode, fregno,
-- 
2.31.1



[PATCH v3] rs6000: Guard bifs {un, }pack_{longdouble, ibm128} under hard float [PR103623]

2022-04-11 Thread Kewen.Lin via Gcc-patches
Hi,

As PR103623 shows, it's a regression failure due to new built-in
function framework, previously we guard __builtin_{un,}pack_{longdouble,
ibm128} built-in functions under hard float, so they are unavailable
with the given configuration.  While with new bif infrastructure, it
becomes available and gets ICE due to incomplete supports.

Segher and Peter pointed out that we should make it available with
soft float, I agree we can extend it to cover both soft and hard
float.  But considering it's stage 4 now and this regression is
classified as P1, also the previous behavior requiring hard float
aligns with what document [1] says, I think it may be a good idea to
fix it with a small patch to be consistent with the previous
behavior.  Then we can extend the functionality by being tracked in new
PR105213 for GCC13.

Re-bootstrapped and re-tested as before.

v1: https://gcc.gnu.org/pipermail/gcc-patches/2022-March/591147.html

v2: Add one more test case.
https://gcc.gnu.org/pipermail/gcc-patches/2022-April/592981.html

v3: Add FIXME as Segher suggested.

[1] 
https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-ISA-2_002e05.html#Basic-PowerPC-Built-in-Functions-Available-on-ISA-2_002e05

BR,
Kewen
-
PR target/103623

gcc/ChangeLog:

* config/rs6000/rs6000-builtins.def (__builtin_pack_longdouble): Add
nosoft attribute.
(__builtin_unpack_longdouble): Likewise.
(__builtin_pack_ibm128): Likewise.
(__builtin_unpack_ibm128): Likewise.

gcc/testsuite/ChangeLog:

* gcc.target/powerpc/pr103623.c: New test.
---
 gcc/config/rs6000/rs6000-builtins.def   | 12 --
 gcc/testsuite/gcc.target/powerpc/pr103623.c | 47 +
 2 files changed, 55 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr103623.c

diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index 0f527c5d78f..9170de35874 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -220,7 +220,8 @@
 ; This is redundant with __builtin_pack_ibm128, as it requires long
 ; double to be __ibm128.  Should probably be deprecated.
   const long double __builtin_pack_longdouble (double, double);
-PACK_TF packtf {ibmld}
+; FIXME: Make this available for soft-float, see PR105213.
+PACK_TF packtf {ibmld,nosoft}

   unsigned long __builtin_ppc_mftb ();
 MFTB rs6000_mftb_di {32bit}
@@ -235,18 +236,21 @@
 MTFSF rs6000_mtfsf {}

   const __ibm128 __builtin_pack_ibm128 (double, double);
-PACK_IF packif {ibm128}
+; FIXME: Make this available for soft-float, see PR105213.
+PACK_IF packif {ibm128,nosoft}

   void __builtin_set_fpscr_rn (const int[0,3]);
 SET_FPSCR_RN rs6000_set_fpscr_rn {nosoft}

   const double __builtin_unpack_ibm128 (__ibm128, const int<1>);
-UNPACK_IF unpackif {ibm128}
+; FIXME: Make this available for soft-float, see PR105213.
+UNPACK_IF unpackif {ibm128,nosoft}

 ; This is redundant with __builtin_unpack_ibm128, as it requires long
 ; double to be __ibm128.  Should probably be deprecated.
   const double __builtin_unpack_longdouble (long double, const int<1>);
-UNPACK_TF unpacktf {ibmld}
+; FIXME: Make this available for soft-float, see PR105213.
+UNPACK_TF unpacktf {ibmld,nosoft}


 ; Builtins that have been around just about forever, but not quite.
diff --git a/gcc/testsuite/gcc.target/powerpc/pr103623.c 
b/gcc/testsuite/gcc.target/powerpc/pr103623.c
new file mode 100644
index 000..701db8bafa8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr103623.c
@@ -0,0 +1,47 @@
+/* { dg-skip-if "" { powerpc*-*-darwin* } } */
+/* { dg-options "-mlong-double-128 -msoft-float" } */
+
+/* Verify there is no ICE.  */
+
+#include 
+#include 
+#include 
+
+#if defined(__LONG_DOUBLE_IEEE128__)
+/* If long double is IEEE 128-bit, we need to use the __ibm128 type instead of
+   long double, and to use the appropriate pack/unpack routines.  We can't use
+   __ibm128 on systems that don't support IEEE 128-bit floating point, because
+   the type is not enabled on those systems.  */
+#define PACK __builtin_pack_ibm128
+#define UNPACK __builtin_unpack_ibm128
+#define LDOUBLE __ibm128
+
+#elif defined(__LONG_DOUBLE_IBM128__)
+#define PACK __builtin_pack_longdouble
+#define UNPACK __builtin_unpack_longdouble
+#define LDOUBLE long double
+
+#else
+#error "long double must be either IBM 128-bit or IEEE 128-bit"
+#endif
+
+extern LDOUBLE bar (LDOUBLE);
+
+int
+main (void)
+{
+  double high = pow (2.0, 60);
+  double low = 2.0;
+  LDOUBLE a = ((LDOUBLE) high) + ((LDOUBLE) low);
+  double x0 = UNPACK (a, 0);
+  /* Let's ignore all error messages about built-in function
+ unsupported due to soft-float, since they are not test
+ points here (this case is to check no ICE).  */
+  /* { dg-excess-errors "pr103623" } */
+  double x1 = UNPACK (a, 1);
+  LDOUBLE b = PACK (x0, x1);
+  LDOUBLE c = bar (b);
+
+

Re: [PATCH] rs6000: Guard bifs {un, }pack_{longdouble, ibm128} under hard float [PR103623]

2022-04-11 Thread Kewen.Lin via Gcc-patches
Hi Segher,

on 2022/4/9 1:31 AM, Segher Boessenkool wrote:
> On Fri, Apr 08, 2022 at 10:09:44AM +0800, Kewen.Lin wrote:
>> As Jakub noted here, we don't have the soft-float support for both m32 and 
>> m64
>> before, as the bifs are always guarded under hard-float previously.
> 
> But that bug was fixed for GCC 12.  Or we thought so, at least :-(
> 

Actually it wasn't fixed due to the incomplete support.  :(

 What makes it ICE on (at least some configurations of) 32-bit now?  Can
 you exclude just 32-bit soft float?
>>
>> As clarified above, both 32-bit and 64-bit has the same root cause for the 
>> ICE,
>> the existing define_insn* supports for these bifs only consider hard-float, 
>> such
>> as for the given test case in the PR, it fails in reload as the recognized
>> unpacktf_nodm doesn't have any available alternatives at soft-float.  eg: we 
>> only
>> have register constraint "d" for
>>   (match_operand:FMOVE128 1 "register_operand" "d,d") 
>> but it's only available for hard-float.
> 
> For me it fails during combine: the unspec suddenly doesn't recog
> anymore.  That might be that "d" thing yes, that is problematical.
> 

I must miss something, I found in combine pass we still have the
the insn_code unpacktf_nodm (recog-ed).

> If you want to add "nosoft" now, please add a FIXME comment everywhere
> you do, so we do not forget to fix this for GCC 13.
> 

Does the patch v3 look good to you?

https://gcc.gnu.org/pipermail/gcc-patches/2022-April/593053.html

> Or, try this patch?
> 
> ===
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index fdfbc6566a5c..f05b8358ba0a 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -14580,10 +14580,10 @@ (define_insn_and_split "unpack_dm"
>[(set_attr "type" "fp,fpstore,mtvsr,mfvsr,store")])
>  
>  (define_insn_and_split "unpack_nodm"
> -  [(set (match_operand: 0 "nonimmediate_operand" "=d,m")
> +  [(set (match_operand: 0 "nonimmediate_operand" "=d,m,m")
> (unspec:
> -[(match_operand:FMOVE128 1 "register_operand" "d,d")
> - (match_operand:QI 2 "const_0_to_1_operand" "i,i")]
> +[(match_operand:FMOVE128 1 "register_operand" "d,d,r")
> + (match_operand:QI 2 "const_0_to_1_operand" "i,i,i")]
>  UNSPEC_UNPACK_128BIT))]
>"(!TARGET_POWERPC64 || !TARGET_DIRECT_MOVE) && FLOAT128_2REG_P 
> (mode)"
>"#"
> @@ -14600,7 +14600,7 @@ (define_insn_and_split "unpack_nodm"
>  
>operands[3] = gen_rtx_REG (mode, fp_regno);
>  }
> -  [(set_attr "type" "fp,fpstore")])
> +  [(set_attr "type" "fp,fpstore,store")])
>  
>  (define_insn_and_split "pack"
>[(set (match_operand:FMOVE128 0 "register_operand" "=&d")
> ===
> 
> 

Nice, I confirmed this makes ICE gone, I've filed one new PR
PR105213 for GCC13 further tracking by associating this patch there.

BR,
Kewen


Re: [wwwdocs] Add Ada's changelog entry

2022-04-11 Thread Arnaud Charlet via Gcc-patches
> Thank you all for your feedback and guidance. I have taken Eric's
> feedback and deleted the relevant entry.
> 
> Since I do not have write access, I cannot add myself to the
> MAINTAINERS file. Therefore, I want to explicitly state that I am
> submitting these patches under the DCO. I have read and accept the
> indications and requirements listed in (https://gcc.gnu.org/dco.html).
> 
> There are two patches, the original one, with the DCO signature at the
> end and a second one, with the DCO signature too, with Eric's feedback.
> 
> Should anything else be required, do not hesitate to indicate so.

Thank you, I've just merged your contribution.

Arno


Re: [PATCH RFA(pointer-query)] c++: -Wplacement-new and anon union member [PR100370]

2022-04-11 Thread Richard Biener via Gcc-patches
On Wed, Apr 6, 2022 at 4:26 PM Jason Merrill via Gcc-patches
 wrote:
>
> This bug was an object/value confusion; we are interested in the size
> of *b.ip, but instead the code was calculating the size of b.ip itself.
>
> This seems to be because compute_objsize will compute the size of whatever
> object it can find in the argument: if you pass it a VAR_DECL, it gives you
> the size of that variable.  If you pass it an ADDR_EXPR of a VAR_DECL, it
> again gives you the size of the variable.  The way you can tell the
> difference is by looking at the deref member of access_ref: if it's -1, the
> argument is a pointer to the object.  Since that's what we're interested in,
> we should check for that, like check_dangling_stores does.
>
> This regressed some tests because compute_objsize_r was wrongly zeroing
> deref in the POINTER_PLUS_EXPR handling; adding an offset to a pointer
> doesn't change whether the pointer is itself a variable or a pointer to
> one.  In fact, handling POINTER_PLUS_EXPR only really makes sense for deref
> == -1, where we're adjusting a pointer to the variable.
>
> Tested x86_64-pc-linux-gnu, OK for trunk?

OK.

Thanks,
Richard.

> PR c++/100370
>
> gcc/cp/ChangeLog:
>
> * init.cc (warn_placement_new_too_small): Check deref.
>
> gcc/ChangeLog:
>
> * pointer-query.cc (compute_objsize_r) [POINTER_PLUS_EXPR]: Require
> deref == -1.
>
> gcc/testsuite/ChangeLog:
>
> * g++.dg/warn/Wplacement-new-size-11.C: New test.
> ---
>  gcc/cp/init.cc|  5 +
>  gcc/pointer-query.cc  |  7 ---
>  .../g++.dg/warn/Wplacement-new-size-11.C  | 15 +++
>  3 files changed, 24 insertions(+), 3 deletions(-)
>  create mode 100644 gcc/testsuite/g++.dg/warn/Wplacement-new-size-11.C
>
> diff --git a/gcc/cp/init.cc b/gcc/cp/init.cc
> index 01e762320f3..43097121244 100644
> --- a/gcc/cp/init.cc
> +++ b/gcc/cp/init.cc
> @@ -2811,6 +2811,11 @@ warn_placement_new_too_small (tree type, tree nelts, 
> tree size, tree oper)
>if (!objsize)
>  return;
>
> +  /* We can only draw conclusions if ref.deref == -1,
> + i.e. oper is the address of the object.  */
> +  if (ref.deref != -1)
> +return;
> +
>offset_int bytes_avail = wi::to_offset (objsize);
>offset_int bytes_need;
>
> diff --git a/gcc/pointer-query.cc b/gcc/pointer-query.cc
> index 4390535ef56..d93657f3206 100644
> --- a/gcc/pointer-query.cc
> +++ b/gcc/pointer-query.cc
> @@ -2299,9 +2299,10 @@ compute_objsize_r (tree ptr, gimple *stmt, bool addr, 
> int ostype,
>if (!compute_objsize_r (ref, stmt, addr, ostype, pref, snlim, qry))
> return false;
>
> -  /* Clear DEREF since the offset is being applied to the target
> -of the dereference.  */
> -  pref->deref = 0;
> +  /* The below only makes sense if the offset is being applied to the
> +address of the object.  */
> +  if (pref->deref != -1)
> +   return false;
>
>offset_int orng[2];
>tree off = pref->eval (TREE_OPERAND (ptr, 1));
> diff --git a/gcc/testsuite/g++.dg/warn/Wplacement-new-size-11.C 
> b/gcc/testsuite/g++.dg/warn/Wplacement-new-size-11.C
> new file mode 100644
> index 000..a6fe82e90ae
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/warn/Wplacement-new-size-11.C
> @@ -0,0 +1,15 @@
> +// PR c++/100370
> +// { dg-do compile { target c++11 } }
> +
> +using size_t = decltype(sizeof(1));
> +inline void *operator new (size_t s, void *p) { return p; }
> +
> +int main()
> +{
> +  struct s1 { int iv[4]; };
> +  struct s2 { union { char* cp; int* ip; }; };
> +
> +  s2 b;
> +  b.ip=new int[8];
> +  new (b.ip+4) s1; // { dg-bogus "-Wplacement-new" }
> +}
>
> base-commit: 44fe49401725055a740ce47e80561b6932b8cd01
> --
> 2.27.0
>


Re: [committed][nvptx] Fix ASM_SPEC workaround for sm_30

2022-04-11 Thread Tom de Vries via Gcc-patches

On 4/7/22 16:17, Thomas Schwinge wrote:

Hi!

On 2022-03-31T09:40:47+0200, Tom de Vries via Gcc-patches 
 wrote:

Newer versions of CUDA no longer support sm_30, and nvptx-tools as
currently doesn't handle that gracefully when verifying
( https://github.com/MentorEmbedded/nvptx-tools/issues/30 ).


There's now 
'as: Deal with CUDA 11.0, "Support for Kepler 'sm_30' and 'sm_32'
architecture based products is dropped"' available for comment/testing.


There's a --no-verify work-around in place in ASM_SPEC, but that one doesn't
work when using -Wa,--verify on the command line.


With that resolved in nvptx-tools, we may then revert these GCC-level
workarounds, GCC commit bf4832d6fa817f66009f100a9cd68953062add7d
"[nvptx] Fix ASM_SPEC workaround for sm_30", and
GCC commit 12fa7641ceed9c9139e2ea7b62c11f3dc5b6f6f4
"[nvptx] Use --no-verify for sm_30".  OK to push, once nvptx-tools ready?


Use a more robust workaround: verify using sm_35 when misa=sm_30 is specified
(either implicitly or explicitly).


Thanks for that suggestion!



Hi,

I've tested the nvptx-tools patch in combination with a patch that 
remote ASM_SPEC, and that went fine.


[ Well apart from a new libgomp FAIL:
...
FAIL: libgomp.oacc-fortran/private-variables.f90 
-DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none  -O1 
 at line 142 (test for bogus messages, line 131)

...
but I assume that's unrelated ]

So, patch that removes ASM_SPEC pre-approved.

Thanks,
- Tom


Re: [committed] wwwdocs: readings: www.cmass.com is gone, remove

2022-04-11 Thread Gaius Mulley via Gcc-patches
Gerald Pfeifer  writes:

> I pushed this for now.
>
> Gaius, if you want to make changes to that section of readings.html,
> absolutely be free doing so (and I'll be happy help, too).
>
> Gerald
>
> ---
>  htdocs/readings.html | 1 -
>  1 file changed, 1 deletion(-)
>
> diff --git a/htdocs/readings.html b/htdocs/readings.html
> index 12755d7e..8689eab8 100644
> --- a/htdocs/readings.html
> +++ b/htdocs/readings.html
> @@ -569,7 +569,6 @@ names.
>  Modula 3 information
>  
>  
> -  http://www.cmass.com";>http://www.cmass.com
>http://www.modula3.org";>http://www.modula3.org
>  

Hi Gerald,

looks fine - sure how about adding:


Modula 3 information

   http://www.modula3.org";>http://www.modula3.org


Modula 2 information

   https://freepages.modula2.org/intro.html#Modula2";>Information
   https://www.nongnu.org/gm2/12/dialect.html";>Modula-2 
dialects



perhaps?

regards,
Gaius


[PATCH] middle-end: Prevent the use of the cond inversion detection code when both conditions are external. [PR105197]

2022-04-11 Thread Tamar Christina via Gcc-patches
Hi All,

Previously ifcvt used to enforce that a mask A and the inverse of said mask be
represented as ~A. So for the masks

  _25 = _6 != 0;
  _44 = _4 != 0;

ifcvt would produce for an operation requiring the inverse of said mask

  _26 = ~_25;
  _43 = ~_44;

but now that VN is applied to the entire function body we get a simplification
on the mask and produce:

  _26 = _6 == 0;
  _43 = _4 == 0;

This in itself is not a problem semantically speaking (though it does create
more masks that need to be tracked) but when vectorizing the masked conditional
we would still detect _26 and _43 to be inverses of _25 and _44 and mark them
as requiring their operands be swapped.

When vectorizing we swap the operands but don't find the BIT_NOT_EXPR to remove
and so we leave the condition as is which produces invalid code:

-->vectorizing statement: _ifc__41 = _43 ? 0 : _ifc__40;
created new init_stmt: vect_cst__136 = { 0, ... }
add new stmt: _137 = mask__43.26_135 & loop_mask_111
note:  add new stmt: vect__ifc__41.27_138 = VEC_COND_EXPR <_137, 
vect__ifc__40.25_133, vect_cst__136>;

This fixes disabling the inversion detection code when the loop isn't masked
since both conditional would be external.  We'd then not use the new cond_code
and would incorrectly still swap the operands.

The resulting code is also better than GCC-11 with most operations now
predicated on the loop mask rather than a ptrue.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

PR target/105197
* tree-vect-stmts.cc (vectorizable_condition): Prevent cond swap when
not masked.

gcc/testsuite/ChangeLog:

PR target/105197
* gcc.target/aarch64/sve/pr105197-1.c: New test.
* gcc.target/aarch64/sve/pr105197-2.c: New test.

--- inline copy of patch -- 
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr105197-1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/pr105197-1.c
new file mode 100644
index 
..e33532d8bed5f90f216817a6692544eae1f3ae3f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr105197-1.c
@@ -0,0 +1,20 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-additional-options "-O -ftree-vectorize" } */
+
+unsigned char arr_7[9][3];
+unsigned char (*main_arr_7)[3] = arr_7;
+int main() {
+  char arr_2[9];
+  int arr_6[9];
+  int x;
+  unsigned i;
+  for (i = 0; i < 9; ++i) {
+arr_2[i] = 21;
+arr_6[i] = 6;
+  }
+  for (i = arr_2[8] - 21; i < 2; i++)
+x = arr_6[i] ? (main_arr_7[8][i] ? main_arr_7[8][i] : 8) : (char)arr_6[i];
+  if (x != 8)
+__builtin_abort ();
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr105197-2.c 
b/gcc/testsuite/gcc.target/aarch64/sve/pr105197-2.c
new file mode 100644
index 
..5eec5cd837d786390c441fc5ddd2f93c1374d3a4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr105197-2.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O -ftree-vectorize" } */
+
+void f(int n, int y, char *arr_2, char *arr_6) {
+  for (int i = y; i < n; i++)
+arr_6[i] = arr_6[i] ? (arr_2[i] ? 3 : 8) : 1;
+}
+
+/* { dg-final { scan-assembler-not {\tand\tp[0-9]+.b} } } */
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 
5c9e8cfefa5032d39a11696b06cff9ae50f4d46a..a680f991e07f7b147d1fa64e9464d0f7ed0d843f
 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -10493,7 +10493,7 @@ vectorizable_condition (vec_info *vinfo,
  bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
  tree_code orig_code = cond.code;
  cond.code = invert_tree_comparison (cond.code, honor_nans);
- if (loop_vinfo->scalar_cond_masked_set.contains (cond))
+ if (!masked && loop_vinfo->scalar_cond_masked_set.contains (cond))
{
  masks = &LOOP_VINFO_MASKS (loop_vinfo);
  cond_code = cond.code;


-- 
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr105197-1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/pr105197-1.c
new file mode 100644
index 
..e33532d8bed5f90f216817a6692544eae1f3ae3f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr105197-1.c
@@ -0,0 +1,20 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-additional-options "-O -ftree-vectorize" } */
+
+unsigned char arr_7[9][3];
+unsigned char (*main_arr_7)[3] = arr_7;
+int main() {
+  char arr_2[9];
+  int arr_6[9];
+  int x;
+  unsigned i;
+  for (i = 0; i < 9; ++i) {
+arr_2[i] = 21;
+arr_6[i] = 6;
+  }
+  for (i = arr_2[8] - 21; i < 2; i++)
+x = arr_6[i] ? (main_arr_7[8][i] ? main_arr_7[8][i] : 8) : (char)arr_6[i];
+  if (x != 8)
+__builtin_abort ();
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr105197-2.c 
b/gcc/testsuite/gcc.target/aarch64/sve/pr105197-2.c
new file mode 100644
index 
..5eec5cd837d786390c441fc5ddd2f9

Re: [PATCH] middle-end: Prevent the use of the cond inversion detection code when both conditions are external. [PR105197]

2022-04-11 Thread Richard Biener via Gcc-patches
On Mon, 11 Apr 2022, Tamar Christina wrote:

> Hi All,
> 
> Previously ifcvt used to enforce that a mask A and the inverse of said mask be
> represented as ~A. So for the masks
> 
>   _25 = _6 != 0;
>   _44 = _4 != 0;
> 
> ifcvt would produce for an operation requiring the inverse of said mask
> 
>   _26 = ~_25;
>   _43 = ~_44;
> 
> but now that VN is applied to the entire function body we get a simplification
> on the mask and produce:
> 
>   _26 = _6 == 0;
>   _43 = _4 == 0;
> 
> This in itself is not a problem semantically speaking (though it does create
> more masks that need to be tracked) but when vectorizing the masked 
> conditional
> we would still detect _26 and _43 to be inverses of _25 and _44 and mark them
> as requiring their operands be swapped.
> 
> When vectorizing we swap the operands but don't find the BIT_NOT_EXPR to 
> remove
> and so we leave the condition as is which produces invalid code:
> 
> -->vectorizing statement: _ifc__41 = _43 ? 0 : _ifc__40;
> created new init_stmt: vect_cst__136 = { 0, ... }
> add new stmt: _137 = mask__43.26_135 & loop_mask_111
> note:  add new stmt: vect__ifc__41.27_138 = VEC_COND_EXPR <_137, 
> vect__ifc__40.25_133, vect_cst__136>;
> 
> This fixes disabling the inversion detection code when the loop isn't masked
> since both conditional would be external.  We'd then not use the new cond_code
> and would incorrectly still swap the operands.
> 
> The resulting code is also better than GCC-11 with most operations now
> predicated on the loop mask rather than a ptrue.
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> 
> Ok for master?

LGTM.

Thansk,
Richard.

> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
>   PR target/105197
>   * tree-vect-stmts.cc (vectorizable_condition): Prevent cond swap when
>   not masked.
> 
> gcc/testsuite/ChangeLog:
> 
>   PR target/105197
>   * gcc.target/aarch64/sve/pr105197-1.c: New test.
>   * gcc.target/aarch64/sve/pr105197-2.c: New test.
> 
> --- inline copy of patch -- 
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr105197-1.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/pr105197-1.c
> new file mode 100644
> index 
> ..e33532d8bed5f90f216817a6692544eae1f3ae3f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr105197-1.c
> @@ -0,0 +1,20 @@
> +/* { dg-do run { target aarch64_sve_hw } } */
> +/* { dg-additional-options "-O -ftree-vectorize" } */
> +
> +unsigned char arr_7[9][3];
> +unsigned char (*main_arr_7)[3] = arr_7;
> +int main() {
> +  char arr_2[9];
> +  int arr_6[9];
> +  int x;
> +  unsigned i;
> +  for (i = 0; i < 9; ++i) {
> +arr_2[i] = 21;
> +arr_6[i] = 6;
> +  }
> +  for (i = arr_2[8] - 21; i < 2; i++)
> +x = arr_6[i] ? (main_arr_7[8][i] ? main_arr_7[8][i] : 8) : 
> (char)arr_6[i];
> +  if (x != 8)
> +__builtin_abort ();
> +}
> +
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr105197-2.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/pr105197-2.c
> new file mode 100644
> index 
> ..5eec5cd837d786390c441fc5ddd2f93c1374d3a4
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr105197-2.c
> @@ -0,0 +1,9 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-O -ftree-vectorize" } */
> +
> +void f(int n, int y, char *arr_2, char *arr_6) {
> +  for (int i = y; i < n; i++)
> +arr_6[i] = arr_6[i] ? (arr_2[i] ? 3 : 8) : 1;
> +}
> +
> +/* { dg-final { scan-assembler-not {\tand\tp[0-9]+.b} } } */
> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> index 
> 5c9e8cfefa5032d39a11696b06cff9ae50f4d46a..a680f991e07f7b147d1fa64e9464d0f7ed0d843f
>  100644
> --- a/gcc/tree-vect-stmts.cc
> +++ b/gcc/tree-vect-stmts.cc
> @@ -10493,7 +10493,7 @@ vectorizable_condition (vec_info *vinfo,
> bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
> tree_code orig_code = cond.code;
> cond.code = invert_tree_comparison (cond.code, honor_nans);
> -   if (loop_vinfo->scalar_cond_masked_set.contains (cond))
> +   if (!masked && loop_vinfo->scalar_cond_masked_set.contains (cond))
>   {
> masks = &LOOP_VINFO_MASKS (loop_vinfo);
> cond_code = cond.code;
> 
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Ivo Totev; HRB 36809 (AG Nuernberg)


[PATCH] ppc: testsuite: require target effectively [PR104253]

2022-04-11 Thread Alexandre Oliva via Gcc-patches


The testcase was missing dg- before require-effective-target.

While at that, I'm also pruning the excess-error warning I got when
the test failed to be disabled because of the above.  I suppose it
might be useful for some target variants.

Tested with target powerpc64-wrs-vxworks7r2.  Ok to install?  Trunk?
gcc-11?  gcc-10?


for gcc/testsuite/ChangeLog

PR target/104253
* gcc.target/powerpc/pr104253.c: Add missing dg- before
require-effective-target.  Prune warning about -mfloat128
possibly not being fully supported.
---
 gcc/testsuite/gcc.target/powerpc/pr104253.c |3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/powerpc/pr104253.c 
b/gcc/testsuite/gcc.target/powerpc/pr104253.c
index 02049cc978f05..e5f9499b7c881 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr104253.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr104253.c
@@ -6,8 +6,9 @@
  */
 
 /* { dg-do run } */
-/* { require-effective-target ppc_float128_sw } */
+/* { dg-require-effective-target ppc_float128_sw } */
 /* { dg-options "-O2 -mvsx -mfloat128" } */
+/* { dg-prune-output ".-mfloat128. option may not be fully supported" } */
 
 /*
  * PR target/104253


-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
Disinformation flourishes because many people care deeply about injustice
but very few check the facts.  Ask me about 


[committed] libstdc++: Move Filesystem TS path definitions out of class body

2022-04-11 Thread Jonathan Wakely via Gcc-patches
Tested x86_64-linux, pushed to trunk.

-- >8 --

This fixes some errors with clang caused by instantiating vector<_Cmpt>
before the _Cmpt type is complete.

libstdc++-v3/ChangeLog:

* include/experimental/bits/fs_path.h (path): Define special
members after path::_Cmpt is complete.
---
 .../include/experimental/bits/fs_path.h   | 59 ---
 1 file changed, 38 insertions(+), 21 deletions(-)

diff --git a/libstdc++-v3/include/experimental/bits/fs_path.h 
b/libstdc++-v3/include/experimental/bits/fs_path.h
index 803df424664..b0825ba76e8 100644
--- a/libstdc++-v3/include/experimental/bits/fs_path.h
+++ b/libstdc++-v3/include/experimental/bits/fs_path.h
@@ -212,21 +212,11 @@ namespace __detail
 
 // constructors and destructor
 
-path() noexcept { }
+path() noexcept;
+path(const path& __p);
+path(path&& __p) noexcept;
 
-path(const path& __p) = default;
-
-path(path&& __p) noexcept
-: _M_pathname(std::move(__p._M_pathname)), _M_type(__p._M_type)
-{
-  if (_M_type == _Type::_Multi)
-   _M_split_cmpts();
-  __p.clear();
-}
-
-path(string_type&& __source)
-: _M_pathname(std::move(__source))
-{ _M_split_cmpts(); }
+path(string_type&& __source);
 
 template>
@@ -256,11 +246,11 @@ namespace __detail
   : _M_pathname(_S_convert_loc(__first, __last, __loc))
   { _M_split_cmpts(); }
 
-~path() = default;
+~path();
 
 // assignments
 
-path& operator=(const path& __p) = default;
+path& operator=(const path& __p);
 path& operator=(path&& __p) noexcept;
 path& operator=(string_type&& __source);
 path& assign(string_type&& __source);
@@ -449,11 +439,7 @@ namespace __detail
_Multi, _Root_name, _Root_dir, _Filename
 };
 
-path(string_type __str, _Type __type) : _M_pathname(__str), _M_type(__type)
-{
-  __glibcxx_assert(!empty());
-  __glibcxx_assert(_M_type != _Type::_Multi);
-}
+path(string_type __str, _Type __type);
 
 enum class _Split { _Stem, _Extension };
 
@@ -908,6 +894,37 @@ namespace __detail
 bool   _M_at_end;  // only used when type != _Multi
   };
 
+  inline
+  path::path() noexcept = default;
+
+  inline
+  path::path(const path&) = default;
+
+  inline
+  path::path(path&& __p) noexcept
+  : _M_pathname(std::move(__p._M_pathname)),
+_M_cmpts(__p._M_cmpts),
+_M_type(__p._M_type)
+  { __p.clear(); }
+
+  inline
+  path::path(string_type&& __source)
+  : _M_pathname(std::move(__source))
+  { _M_split_cmpts(); }
+
+  inline
+  path::path(string_type __str, _Type __type)
+  : _M_pathname(__str), _M_type(__type)
+  {
+__glibcxx_assert(!empty());
+__glibcxx_assert(_M_type != _Type::_Multi);
+  }
+
+  inline
+  path::~path() = default;
+
+  inline path&
+  path::operator=(const path& __p) = default;
 
   inline path&
   path::operator=(path&& __p) noexcept
-- 
2.34.1



[committed] libstdc++: Move stacktrace tests to 19_diagnostics directory

2022-04-11 Thread Jonathan Wakely via Gcc-patches
Pushed to trunk.

-- >8 --

This matches where the feature is defined in the current draft.

libstdc++-v3/ChangeLog:

* testsuite/20_util/stacktrace/entry.cc: Moved to...
* testsuite/19_diagnostics/stacktrace/entry.cc: ...here.
* testsuite/20_util/stacktrace/synopsis.cc: Moved to...
* testsuite/19_diagnostics/stacktrace/synopsis.cc: ...here.
* testsuite/20_util/stacktrace/version.cc: Moved to...
* testsuite/19_diagnostics/stacktrace/version.cc: ...here.
---
 .../testsuite/{20_util => 19_diagnostics}/stacktrace/entry.cc | 0
 .../testsuite/{20_util => 19_diagnostics}/stacktrace/synopsis.cc  | 0
 .../testsuite/{20_util => 19_diagnostics}/stacktrace/version.cc   | 0
 3 files changed, 0 insertions(+), 0 deletions(-)
 rename libstdc++-v3/testsuite/{20_util => 19_diagnostics}/stacktrace/entry.cc 
(100%)
 rename libstdc++-v3/testsuite/{20_util => 
19_diagnostics}/stacktrace/synopsis.cc (100%)
 rename libstdc++-v3/testsuite/{20_util => 
19_diagnostics}/stacktrace/version.cc (100%)

diff --git a/libstdc++-v3/testsuite/20_util/stacktrace/entry.cc 
b/libstdc++-v3/testsuite/19_diagnostics/stacktrace/entry.cc
similarity index 100%
rename from libstdc++-v3/testsuite/20_util/stacktrace/entry.cc
rename to libstdc++-v3/testsuite/19_diagnostics/stacktrace/entry.cc
diff --git a/libstdc++-v3/testsuite/20_util/stacktrace/synopsis.cc 
b/libstdc++-v3/testsuite/19_diagnostics/stacktrace/synopsis.cc
similarity index 100%
rename from libstdc++-v3/testsuite/20_util/stacktrace/synopsis.cc
rename to libstdc++-v3/testsuite/19_diagnostics/stacktrace/synopsis.cc
diff --git a/libstdc++-v3/testsuite/20_util/stacktrace/version.cc 
b/libstdc++-v3/testsuite/19_diagnostics/stacktrace/version.cc
similarity index 100%
rename from libstdc++-v3/testsuite/20_util/stacktrace/version.cc
rename to libstdc++-v3/testsuite/19_diagnostics/stacktrace/version.cc
-- 
2.34.1



Re: [PATCH] ppc: testsuite: require target effectively [PR104253]

2022-04-11 Thread David Edelsohn via Gcc-patches
On Mon, Apr 11, 2022 at 10:53 AM Alexandre Oliva  wrote:
>
>
> The testcase was missing dg- before require-effective-target.
>
> While at that, I'm also pruning the excess-error warning I got when
> the test failed to be disabled because of the above.  I suppose it
> might be useful for some target variants.
>
> Tested with target powerpc64-wrs-vxworks7r2.  Ok to install?  Trunk?
> gcc-11?  gcc-10?

Okay.  This probably counts as obvious.

Thanks, David

>
>
> for gcc/testsuite/ChangeLog
>
> PR target/104253
> * gcc.target/powerpc/pr104253.c: Add missing dg- before
> require-effective-target.  Prune warning about -mfloat128
> possibly not being fully supported.
> ---
>  gcc/testsuite/gcc.target/powerpc/pr104253.c |3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr104253.c 
> b/gcc/testsuite/gcc.target/powerpc/pr104253.c
> index 02049cc978f05..e5f9499b7c881 100644
> --- a/gcc/testsuite/gcc.target/powerpc/pr104253.c
> +++ b/gcc/testsuite/gcc.target/powerpc/pr104253.c
> @@ -6,8 +6,9 @@
>   */
>
>  /* { dg-do run } */
> -/* { require-effective-target ppc_float128_sw } */
> +/* { dg-require-effective-target ppc_float128_sw } */
>  /* { dg-options "-O2 -mvsx -mfloat128" } */
> +/* { dg-prune-output ".-mfloat128. option may not be fully supported" } */
>
>  /*
>   * PR target/104253
>
>
> --
> Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
>Free Software Activist   GNU Toolchain Engineer
> Disinformation flourishes because many people care deeply about injustice
> but very few check the facts.  Ask me about 


Re: [PATCH] mips: testsuite: enforce -ffat-lto-objects for pr102024-4.c

2022-04-11 Thread Richard Sandiford via Gcc-patches
Xi Ruoyao  writes:
> Another brown paper bag fix for MIPS :(.
>
> This failure was not detected running mips.exp=pr102024-* with a cross
> compiler, so I just spotted it now running the test natively.
>
> ---
>
> The body of func is optimized away with -flto -fno-fat-lto-objects, so
> the psABI inform is not emitted, causing a test failure.
>
> gcc/testsuite/
>
>   * gcc.target/mips/pr102024-4.c (dg-options): Add
>   -ffat-lto-objects.

OK, thanks.

Richard

> ---
>  gcc/testsuite/gcc.target/mips/pr102024-4.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/gcc/testsuite/gcc.target/mips/pr102024-4.c 
> b/gcc/testsuite/gcc.target/mips/pr102024-4.c
> index 2147cc769d0..ea49e890ee5 100644
> --- a/gcc/testsuite/gcc.target/mips/pr102024-4.c
> +++ b/gcc/testsuite/gcc.target/mips/pr102024-4.c
> @@ -1,5 +1,5 @@
>  // { dg-do compile }
> -// { dg-options "-mabi=64 -mhard-float" }
> +// { dg-options "-mabi=64 -mhard-float -ffat-lto-objects" }
>  
>  struct __attribute__((aligned(16))) test {
>int x[0];


Re: [PATCH v1.1] c++: tolerate cdtors returning this in constexpr

2022-04-11 Thread Alexandre Oliva via Gcc-patches
On Apr  9, 2022, Jason Merrill  wrote:

>> goto ;
>> (void) S::~S (&((struct T *) this)->D.4458)

>> Now, ISTM that the goto target selected for the return stmt bypasses the
>> subobject dtor call and the full-object clobber.  That sounds like
>> another bug, no?

> The subobject cleanup and clobber should be evaluated along the way,
> the evaluation of CLEANUP_EXPR isn't affected by jump_target.

*nod*, I was just confused by the dump, that made the CLEANUP_EXPR seem
part of the STATEMENT_LIST.

> I think the only thing we're wrongly skipping is the actual return,
> and your patch works around that.  I think that makes sense for now; I
> have a patch for GCC 13 to remove cdtor_label entirely.

Ah, great, thanks.

> Your patch is OK.

Here's the adjusted (subject, thanks) patch I'm about to install.


c++: Tolerate cdtors returning this in constexpr

On targets that return this from cdtors, cxx_eval_call_expression may
flag flowing off the end of a dtor.  That's preempted for ctors, and
avoided entirely when dtors return void, but when they return this,
the return value should be conceptually disregarded, without making
room for such internal ABI details to make a program ill-formed, as in
g++.dg/cpp2a/constexpr-dtor12.C on arm-eabi.


for  gcc/cp/ChangeLog

* constexpr.cc (cxx_eval_call_expression): Disregard dtor
result.
---
 gcc/cp/constexpr.cc |3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc
index 9c40b0515747d..d8bc864ae6bcc 100644
--- a/gcc/cp/constexpr.cc
+++ b/gcc/cp/constexpr.cc
@@ -2889,7 +2889,8 @@ cxx_eval_call_expression (const constexpr_ctx *ctx, tree 
t,
  else
{
  result = *ctx->global->values.get (res);
- if (result == NULL_TREE && !*non_constant_p)
+ if (result == NULL_TREE && !*non_constant_p
+ && !DECL_DESTRUCTOR_P (fun))
{
  if (!ctx->quiet)
error ("% call flows off the end "


-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
Disinformation flourishes because many people care deeply about injustice
but very few check the facts.  Ask me about 


Re: [PATCH v2] c++: set loc on call even if result is discarded

2022-04-11 Thread Alexandre Oliva via Gcc-patches
On Apr  9, 2022, Jason Merrill  wrote:

>>> how about handling it in set_cleanup_locs instead?

>> Like this?  That seems reasonable to me.  I'll give it a spin.

> Yes, or perhaps STRIP_NOPS and set the location on whatever is left.
> OK either way.

Hmm, I'm not sure leaving the loc unset on the NOP_EXPR won't have ill
effects, so here's what I'm installing.  Thanks!


c++: Set loc on call even if result is discarded

This patch fixes a divergence in line numbers in diagnostics and,
presumably, debug information, between targets whose cdtors return
this and those that don't.

The problem was visible in g++.dg/cpp2a/constexpr-dtor3.C: while the
dtor call in the cleanup for f4 was expected at the closing brace, on
returning-this targets it came up at the assignment.

The reason is convoluted: statements in cleanups have their location
information removed, to avoid bumpy debugger behavior, and then set to
the location of the end of the scope.

The cleanup dtor call has its locus cleared in both kinds of targets,
but the end-of-scope locus doesn't make it on returning-this targets.
The calls are wrapped with a cast-to-void to discard the unused return
value, and the existing logic only attached the locus to the
conversion NOP_EXPR.

The call thus remains locus-less.  When constexpr logic copies and
evals the body, it sets unset locations; while copying cleanups, the
locus is taken from the cleanup expression, rather than matching the
end-of-scope locus set by the parser.  So we end up with different
locations.

This patch sets the locus of the call even when it's wrapped by a
convert-to-void NOP_EXPR, so it won't diverge any more.


for  gcc/cp/ChangeLog

* semantics.cc (set_cleanup_locs): Propagate locus to call
wrapped in cast-to-void.
---
 gcc/cp/semantics.cc |   12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
index a7f6449dafd2e..43627ed30afcb 100644
--- a/gcc/cp/semantics.cc
+++ b/gcc/cp/semantics.cc
@@ -609,7 +609,17 @@ set_cleanup_locs (tree stmts, location_t loc)
 {
   if (TREE_CODE (stmts) == CLEANUP_STMT)
 {
-  protected_set_expr_location (CLEANUP_EXPR (stmts), loc);
+  tree t = CLEANUP_EXPR (stmts);
+  protected_set_expr_location (t, loc);
+  /* Avoid locus differences for C++ cdtor calls depending on whether
+cdtor_returns_this: a conversion to void is added to discard the return
+value, and this conversion ends up carrying the location, and when it
+gets discarded, the location is lost.  So hold it in the call as
+well.  */
+  if (TREE_CODE (t) == NOP_EXPR
+ && TREE_TYPE (t) == void_type_node
+ && TREE_CODE (TREE_OPERAND (t, 0)) == CALL_EXPR)
+   protected_set_expr_location (TREE_OPERAND (t, 0), loc);
   set_cleanup_locs (CLEANUP_BODY (stmts), loc);
 }
   else if (TREE_CODE (stmts) == STATEMENT_LIST)


-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
Disinformation flourishes because many people care deeply about injustice
but very few check the facts.  Ask me about 


[committed] RISC-V: Sync arch-canonicalize and riscv-common.cc

2022-04-11 Thread Kito Cheng
Currently we are sync that manually, but I guess we should re-implement
arch-canonicalize in C++, so that we could reuse the stuffs from
riscv-common.cc.

gcc/ChangeLog:

* config/riscv/arch-canonicalize: Add TODO item.
(IMPLIED_EXT): Sync.
(arch_canonicalize): Checking until no change.
---
 gcc/config/riscv/arch-canonicalize | 58 +++---
 1 file changed, 37 insertions(+), 21 deletions(-)

diff --git a/gcc/config/riscv/arch-canonicalize 
b/gcc/config/riscv/arch-canonicalize
index 49a6204b9cb..73589af608e 100755
--- a/gcc/config/riscv/arch-canonicalize
+++ b/gcc/config/riscv/arch-canonicalize
@@ -20,6 +20,9 @@
 # along with GCC; see the file COPYING3.  If not see
 # .
 
+# TODO: Extract riscv_subset_t from riscv-common.cc and make it can be compiled
+#   standalone to replace this script, that also prevents us implementing
+#   that twice and keep sync again and again.
 
 from __future__ import print_function
 import sys
@@ -35,21 +38,30 @@ LONG_EXT_PREFIXES = ['z', 's', 'h', 'x']
 # IMPLIED_EXT(ext) -> implied extension list.
 #
 IMPLIED_EXT = {
-  "d" : ["f"],
-  "zk" : ["zkn"],
-  "zk" : ["zkr"],
-  "zk" : ["zkt"],
-  "zkn" : ["zbkb"],
-  "zkn" : ["zbkc"],
-  "zkn" : ["zbkx"],
-  "zkn" : ["zkne"],
-  "zkn" : ["zknd"],
-  "zkn" : ["zknh"],
-  "zks" : ["zbkb"],
-  "zks" : ["zbkc"],
-  "zks" : ["zbkx"],
-  "zks" : ["zksed"],
-  "zks" : ["zksh"],
+  "d" : ["f", "zicsr"],
+  "f" : ["zicsr"],
+  "zk" : ["zkn", "zkr", "zkt"],
+  "zkn" : ["zbkb", "zbkc", "zbkx", "zkne", "zknd", "zknh"],
+  "zks" : ["zbkb", "zbkc", "zbkx", "zksed", "zksh"],
+
+  "v" : ["zvl128b", "zve64d"],
+  "zve32x" : ["zvl32b"],
+  "zve64x" : ["zve32x", "zvl64b"],
+  "zve32f" : ["f", "zve32x"],
+  "zve64f" : ["f", "zve32f", "zve64x"],
+  "zve64d" : ["d", "zve64f"],
+
+  "zvl64b" : ["zvl32b"],
+  "zvl128b" : ["zvl64b"],
+  "zvl256b" : ["zvl128b"],
+  "zvl512b" : ["zvl256b"],
+  "zvl1024b" : ["zvl512b"],
+  "zvl2048b" : ["zvl1024b"],
+  "zvl4096b" : ["zvl2048b"],
+  "zvl8192b" : ["zvl4096b"],
+  "zvl16384b" : ["zvl8192b"],
+  "zvl32768b" : ["zvl16384b"],
+  "zvl65536b" : ["zvl32768b"],
 }
 
 def arch_canonicalize(arch):
@@ -77,12 +89,16 @@ def arch_canonicalize(arch):
   #
   # Handle implied extensions.
   #
-  for ext in std_exts + long_exts:
-if ext in IMPLIED_EXT:
-  implied_exts = IMPLIED_EXT[ext]
-  for implied_ext in implied_exts:
-if implied_ext not in std_exts + long_exts:
-  long_exts.append(implied_ext)
+  any_change = True
+  while any_change:
+any_change = False
+for ext in std_exts + long_exts:
+  if ext in IMPLIED_EXT:
+implied_exts = IMPLIED_EXT[ext]
+for implied_ext in implied_exts:
+  if implied_ext not in std_exts + long_exts:
+long_exts.append(implied_ext)
+any_change = True
 
   # Single letter extension might appear in the long_exts list,
   # becasue we just append extensions list to the arch string.
-- 
2.34.0



[committed] RISC-V: Support -misa-spec for arch-canonicalize and multilib-generator. [PR104853]

2022-04-11 Thread Kito Cheng
We migrate the default ISA spec version from 2.2 to 20191213, but those scripts
aren't updated at the same time, this patch is making both scripts support
different ISA spec versions.

gcc/ChangeLog:

PR target/104853
* config.gcc: Pass -misa-spec to arch-canonicalize and
multilib-generator.
* config/riscv/arch-canonicalize: Adding -misa-spec option.
(SUPPORTED_ISA_SPEC): New.
(arch_canonicalize): New argument `isa_spec`.
Handle multiple ISA spec versions.
* config/riscv/multilib-generator: Adding -misa-spec option.
---
 gcc/config.gcc  |  3 ++-
 gcc/config/riscv/arch-canonicalize  | 32 -
 gcc/config/riscv/multilib-generator | 14 +
 3 files changed, 39 insertions(+), 10 deletions(-)

diff --git a/gcc/config.gcc b/gcc/config.gcc
index 5382788e267..48a5bbcf787 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -4717,7 +4717,7 @@ case "${target}" in
esac
PYTHON=`which python || which python3 || which python2`
if test "x${PYTHON}" != x; then
-   with_arch=`${PYTHON} 
${srcdir}/config/riscv/arch-canonicalize ${with_arch}`
+   with_arch=`${PYTHON} 
${srcdir}/config/riscv/arch-canonicalize -misa-spec=${with_isa_spec} 
${with_arch}`
fi
tm_defines="${tm_defines} 
TARGET_RISCV_DEFAULT_ARCH=${with_arch}"
 
@@ -4766,6 +4766,7 @@ case "${target}" in
case "${target}" in
riscv*-*-elf*)
if ${srcdir}/config/riscv/multilib-generator \
+   -misa-spec=${with_isa_spec} \
`echo ${with_multilib_generator} | sed 
's/;/ /g'`\
> t-multilib-config;
then
diff --git a/gcc/config/riscv/arch-canonicalize 
b/gcc/config/riscv/arch-canonicalize
index 73589af608e..f36a2ca4593 100755
--- a/gcc/config/riscv/arch-canonicalize
+++ b/gcc/config/riscv/arch-canonicalize
@@ -26,11 +26,12 @@
 
 from __future__ import print_function
 import sys
+import argparse
 import collections
 import itertools
 from functools import reduce
 
-
+SUPPORTED_ISA_SPEC = ["2.2", "20190608", "20191213"]
 CANONICAL_ORDER = "imafdgqlcbjktpvn"
 LONG_EXT_PREFIXES = ['z', 's', 'h', 'x']
 
@@ -64,12 +65,16 @@ IMPLIED_EXT = {
   "zvl65536b" : ["zvl32768b"],
 }
 
-def arch_canonicalize(arch):
+def arch_canonicalize(arch, isa_spec):
   # TODO: Support extension version.
+  is_isa_spec_2p2 = isa_spec == '2.2'
   new_arch = ""
+  extra_long_ext = []
   if arch[:5] in ['rv32e', 'rv32i', 'rv32g', 'rv64i', 'rv64g']:
-# TODO: We should expand g to imad_zifencei once we support newer spec.
 new_arch = arch[:5].replace("g", "imafd")
+if arch[:5] in ['rv32g', 'rv64g']:
+  if not is_isa_spec_2p2:
+extra_long_ext = ['zicsr', 'zifencei']
   else:
 raise Exception("Unexpected arch: `%s`" % arch[:5])
 
@@ -86,6 +91,8 @@ def arch_canonicalize(arch):
 long_exts = []
 std_exts = list(arch[5:])
 
+  long_exts += extra_long_ext
+
   #
   # Handle implied extensions.
   #
@@ -96,6 +103,9 @@ def arch_canonicalize(arch):
   if ext in IMPLIED_EXT:
 implied_exts = IMPLIED_EXT[ext]
 for implied_ext in implied_exts:
+  if implied_ext == 'zicsr' and is_isa_spec_2p2:
+  continue
+
   if implied_ext not in std_exts + long_exts:
 long_exts.append(implied_ext)
 any_change = True
@@ -115,6 +125,9 @@ def arch_canonicalize(arch):
 return (exts.startswith("x"), exts.startswith("zxm"),
 LONG_EXT_PREFIXES.index(exts[0]), canonical_sort, exts[1:])
 
+  # Removing duplicates.
+  long_exts = list(set(long_exts))
+
   # Multi-letter extension must be in lexicographic order.
   long_exts = list(sorted(filter(lambda x:len(x) != 1, long_exts),
   key=longext_sort))
@@ -134,11 +147,20 @@ def arch_canonicalize(arch):
   # Concat rest of the multi-char extensions.
   if long_exts:
 new_arch += "_" + "_".join(long_exts)
+
   return new_arch
 
 if len(sys.argv) < 2:
   print ("Usage: %s  [*]" % sys.argv)
   sys.exit(1)
 
-for arg in sys.argv[1:]:
-  print (arch_canonicalize(arg))
+parser = argparse.ArgumentParser()
+parser.add_argument('-misa-spec', type=str,
+default='20191213',
+choices=SUPPORTED_ISA_SPEC)
+parser.add_argument('arch_strs', nargs=argparse.REMAINDER)
+
+args = parser.parse_args()
+
+for arch in args.arch_strs:
+  print (arch_canonicalize(arch, args.misa_spec))
diff --git a/gcc/config/riscv/multilib-generator 
b/gcc/config/riscv/multilib-generator
index 1ea2fb25566..36698d48f56 100755
--- a/gcc/config/riscv/multilib-generator
+++ b/gcc/config/riscv/multilib-generator
@@ -46,16 +46,18 @@ import argparse
 # TODO: Add test for this script.
 #
 
+SUPPORTED_ISA_S

Re: [PATCH] rs6000: Guard bifs {un, }pack_{longdouble, ibm128} under hard float [PR103623]

2022-04-11 Thread Segher Boessenkool
Hi!

On Mon, Apr 11, 2022 at 04:29:40PM +0800, Kewen.Lin wrote:
> on 2022/4/9 1:31 AM, Segher Boessenkool wrote:
> > On Fri, Apr 08, 2022 at 10:09:44AM +0800, Kewen.Lin wrote:
> > For me it fails during combine: the unspec suddenly doesn't recog
> > anymore.  That might be that "d" thing yes, that is problematical.
> > 
> 
> I must miss something, I found in combine pass we still have the
> the insn_code unpacktf_nodm (recog-ed).

That is recognised many passes earlier though.  When combine runs it
will ICE because recog failed (for powerpc64-linux anyway, everything
default, no -mcpu= etc.).

> > Or, try this patch?
> > 
> > ===
> > diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> > index fdfbc6566a5c..f05b8358ba0a 100644
> > --- a/gcc/config/rs6000/rs6000.md
> > +++ b/gcc/config/rs6000/rs6000.md
> > @@ -14580,10 +14580,10 @@ (define_insn_and_split "unpack_dm"
> >[(set_attr "type" "fp,fpstore,mtvsr,mfvsr,store")])
> >  
> >  (define_insn_and_split "unpack_nodm"
> > -  [(set (match_operand: 0 "nonimmediate_operand" "=d,m")
> > +  [(set (match_operand: 0 "nonimmediate_operand" "=d,m,m")
> > (unspec:
> > -[(match_operand:FMOVE128 1 "register_operand" "d,d")
> > - (match_operand:QI 2 "const_0_to_1_operand" "i,i")]
> > +[(match_operand:FMOVE128 1 "register_operand" "d,d,r")
> > + (match_operand:QI 2 "const_0_to_1_operand" "i,i,i")]
> >  UNSPEC_UNPACK_128BIT))]
> >"(!TARGET_POWERPC64 || !TARGET_DIRECT_MOVE) && FLOAT128_2REG_P 
> > (mode)"
> >"#"
> > @@ -14600,7 +14600,7 @@ (define_insn_and_split "unpack_nodm"
> >  
> >operands[3] = gen_rtx_REG (mode, fp_regno);
> >  }
> > -  [(set_attr "type" "fp,fpstore")])
> > +  [(set_attr "type" "fp,fpstore,store")])
> >  
> >  (define_insn_and_split "pack"
> >[(set (match_operand:FMOVE128 0 "register_operand" "=&d")
> > ===
> > 
> > 
> 
> Nice, I confirmed this makes ICE gone, I've filed one new PR
> PR105213 for GCC13 further tracking by associating this patch there.

Cool, I'll commit it later today then (after a final regstrap).  The
_nodm pattern just missed the alternative for no FP regs (the _dm
pattern has it, so just an oversight).



Segher


[PATCH] v2 PR102024 - IBM Z: Add psabi diagnostics

2022-04-11 Thread Andreas Krebbel via Gcc-patches
v2:

- Remove redundant num_zero_width_bf_seen and num_fields_seen
  tracking. (Thanks Stefan Schulze-Frielinghaus)

Re-tested with testsuite and ABI tests.



For IBM Z in particular there is a problem with structs like:

struct A { float a; int :0; };

Our ABI document allows passing a struct in an FPR only if it has
exactly one member. On the other hand it says that structs of 1,2,4,8
bytes are passed in a GPR. So this struct is expected to be passed in
a GPR. Since we don't return structs in registers (regardless of the
number of members) it is always returned in memory.

Situation is as follows:

All compiler versions tested return it in memory - as expected.

gcc 11, gcc 12, g++ 12, and clang 13 pass it in a GPR - as expected.

g++ 11 as well as clang++ 13 pass in an FPR

For IBM Z we stick to the current GCC 12 behavior, i.e. zero-width
bitfields are NOT ignored.  A struct as above will be passed in a
GPR. Rational behind this is that not affecting the C ABI is more
important here.

A patch for clang is in progress: https://reviews.llvm.org/D122388

In addition to the usual regression test I ran the compat and
struct-layout-1 testsuites comparing the compiler before and after the
patch.

gcc/ChangeLog:
PR target/102024
* config/s390/s390-protos.h (s390_function_arg_vector): Remove
prototype.
* config/s390/s390.cc (s390_single_field_struct_p): New function.
(s390_function_arg_vector): Invoke s390_single_field_struct_p.
(s390_function_arg_float): Likewise.

gcc/testsuite/ChangeLog:
PR target/102024
* g++.target/s390/pr102024-1.C: New test.
* g++.target/s390/pr102024-2.C: New test.
* g++.target/s390/pr102024-3.C: New test.
* g++.target/s390/pr102024-4.C: New test.
* g++.target/s390/pr102024-5.C: New test.
* g++.target/s390/pr102024-6.C: New test.
---
 gcc/config/s390/s390-protos.h  |   1 -
 gcc/config/s390/s390.cc| 208 +++--
 gcc/testsuite/g++.target/s390/pr102024-1.C |  12 ++
 gcc/testsuite/g++.target/s390/pr102024-2.C |  14 ++
 gcc/testsuite/g++.target/s390/pr102024-3.C |  15 ++
 gcc/testsuite/g++.target/s390/pr102024-4.C |  15 ++
 gcc/testsuite/g++.target/s390/pr102024-5.C |  14 ++
 gcc/testsuite/g++.target/s390/pr102024-6.C |  12 ++
 8 files changed, 187 insertions(+), 104 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/s390/pr102024-1.C
 create mode 100644 gcc/testsuite/g++.target/s390/pr102024-2.C
 create mode 100644 gcc/testsuite/g++.target/s390/pr102024-3.C
 create mode 100644 gcc/testsuite/g++.target/s390/pr102024-4.C
 create mode 100644 gcc/testsuite/g++.target/s390/pr102024-5.C
 create mode 100644 gcc/testsuite/g++.target/s390/pr102024-6.C

diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h
index e6251595870..fd4acaae44a 100644
--- a/gcc/config/s390/s390-protos.h
+++ b/gcc/config/s390/s390-protos.h
@@ -49,7 +49,6 @@ extern void s390_function_profiler (FILE *, int);
 extern void s390_set_has_landing_pad_p (bool);
 extern bool s390_hard_regno_rename_ok (unsigned int, unsigned int);
 extern int s390_class_max_nregs (enum reg_class, machine_mode);
-extern bool s390_function_arg_vector (machine_mode, const_tree);
 extern bool s390_return_addr_from_memory(void);
 extern bool s390_fma_allowed_p (machine_mode);
 #if S390_USE_TARGET_ATTRIBUTE
diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index d2af6d8813d..c091d2a692a 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -12148,29 +12148,26 @@ s390_function_arg_size (machine_mode mode, const_tree 
type)
   gcc_unreachable ();
 }
 
-/* Return true if a function argument of type TYPE and mode MODE
-   is to be passed in a vector register, if available.  */
-
-bool
-s390_function_arg_vector (machine_mode mode, const_tree type)
+/* Return true if a variable of TYPE should be passed as single value
+   with type CODE. If STRICT_SIZE_CHECK_P is true the sizes of the
+   record type and the field type must match.
+
+   The ABI says that record types with a single member are treated
+   just like that member would be.  This function is a helper to
+   detect such cases.  The function also produces the proper
+   diagnostics for cases where the outcome might be different
+   depending on the GCC version.  */
+static bool
+s390_single_field_struct_p (enum tree_code code, const_tree type,
+   bool strict_size_check_p)
 {
-  if (!TARGET_VX_ABI)
-return false;
-
-  if (s390_function_arg_size (mode, type) > 16)
-return false;
-
-  /* No type info available for some library calls ...  */
-  if (!type)
-return VECTOR_MODE_P (mode);
-
-  /* The ABI says that record types with a single member are treated
- just like that member would be.  */
   int empty_base_seen = 0;
+  bool zero_width_bf_skipped_p = false;
   const_tree orig_type = type;
+
   while (TREE_CODE (type) == RECORD_TYPE)
 {
-  tree field, sin

[PATCH] Fix 'modff' reference in extend.texi

2022-04-11 Thread Paul A. Clarke via Gcc-patches
In commit a2a919aa501e3 (2003), built-ins for modf and modff were added.
In extend.texi, section "Other Builtins", "modf" was added to the paragraph
"There are also built-in versions of the ISO C99 functions [...]" and
"modf" was also added to the paragraph "The ISO C90 functions [...]".
"modff" was not added to either paragraph.

Based on the context clues about where "modfl" and other similar function
pairs like "powf/powl" appear, I believe the reference to "modf" in the
first paragraph (C99) should instead be "modff".

2022-04-11  Paul A. Clarke  

gcc
* doc/extend.texi (Other Builtins): Correct reference to 'modff'.
---
 gcc/doc/extend.texi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index e10b10bc1f14..05c99f4284a6 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -13460,7 +13460,7 @@ There are also built-in versions of the ISO C99 
functions
 @code{expl}, @code{fabsf}, @code{fabsl}, @code{floorf}, @code{floorl},
 @code{fmodf}, @code{fmodl}, @code{frexpf}, @code{frexpl}, @code{ldexpf},
 @code{ldexpl}, @code{log10f}, @code{log10l}, @code{logf}, @code{logl},
-@code{modfl}, @code{modf}, @code{powf}, @code{powl}, @code{sinf},
+@code{modfl}, @code{modff}, @code{powf}, @code{powl}, @code{sinf},
 @code{sinhf}, @code{sinhl}, @code{sinl}, @code{sqrtf}, @code{sqrtl},
 @code{tanf}, @code{tanhf}, @code{tanhl} and @code{tanl}
 that are recognized in any mode since ISO C90 reserves these names for
-- 
2.27.0



[PATCH] builtins: Fix up expand_builtin_int_roundingfn_2 [PR105211]

2022-04-11 Thread Jakub Jelinek via Gcc-patches
Hi!

The expansion of __builtin_iround{,f,l} etc. builtins in some cases
emits calls to a different fallback builtin.  To locate the right builtin
it uses mathfn_built_in_1 with the type of the first argument.
If its TYPE_MAIN_VARIANT is {float,double,long_double}_type_node, all is
fine, but on the following testcase, because GIMPLE considers scalar
float conversions between types with the same mode as useless,
TYPE_MAIN_VARIANT of the arg's type is float32_type_node and because there
isn't __builtin_lroundf32 returns NULL and we ICE.

This patch will first try the type of the argument and as fallback the
type of the first argument of the builtin (which can't be 100% trusted
either if user incorrectly prototypes it), and if neither works, doesn't
fallback.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Though, perhaps it would be better to prefer the TREE_VALUE (TYPE_ARG_TYPES)
type and only use TREE_TYPE (arg) as fallback, so that say on
a TYPE_MODE (double_type_mode) == TYPE_MODE (long_double_type_mode)
target we decide based on what builtin the user actually called rather than
whether the argument has been converted from the other type earlier.

2022-04-11  Jakub Jelinek  

PR rtl-optimization/105211
* builtins.cc (expand_builtin_int_roundingfn_2): If mathfn_built_in_1
fails for TREE_TYPE (arg), retry it with
TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl))) and if even that
fails, emit call normally.

* gcc.dg/pr105211.c: New test.

--- gcc/builtins.cc.jj  2022-03-14 10:34:34.122924399 +0100
+++ gcc/builtins.cc 2022-04-11 11:57:31.178251743 +0200
@@ -2968,15 +2968,25 @@ expand_builtin_int_roundingfn_2 (tree ex
 a call to lround in the hope that the target provides at least some
 C99 functions.  This should result in the best user experience for
 not full C99 targets.  */
-  tree fallback_fndecl = mathfn_built_in_1
-   (TREE_TYPE (arg), as_combined_fn (fallback_fn), 0);
+  tree fallback_fndecl
+   = mathfn_built_in_1 (TREE_TYPE (arg), as_combined_fn (fallback_fn), 0);
+  /* As scalar float conversions with same mode are useless in GIMPLE,
+we can end up e.g. with _Float32 argument passed to float builtin,
+try to get the type from the builtin prototype instead.  */
+  if (fallback_fndecl == NULL_TREE)
+   if (tree argtypes = TYPE_ARG_TYPES (TREE_TYPE (fndecl)))
+ fallback_fndecl
+   = mathfn_built_in_1 (TREE_VALUE (argtypes),
+as_combined_fn (fallback_fn), 0);
+  if (fallback_fndecl)
+   {
+ exp = build_call_nofold_loc (EXPR_LOCATION (exp),
+  fallback_fndecl, 1, arg);
 
-  exp = build_call_nofold_loc (EXPR_LOCATION (exp),
-  fallback_fndecl, 1, arg);
-
-  target = expand_call (exp, NULL_RTX, target == const0_rtx);
-  target = maybe_emit_group_store (target, TREE_TYPE (exp));
-  return convert_to_mode (mode, target, 0);
+ target = expand_call (exp, NULL_RTX, target == const0_rtx);
+ target = maybe_emit_group_store (target, TREE_TYPE (exp));
+ return convert_to_mode (mode, target, 0);
+   }
 }
 
   return expand_call (exp, target, target == const0_rtx);
--- gcc/testsuite/gcc.dg/pr105211.c.jj  2022-04-11 11:48:17.369946248 +0200
+++ gcc/testsuite/gcc.dg/pr105211.c 2022-04-11 11:48:09.924049700 +0200
@@ -0,0 +1,11 @@
+/* PR rtl-optimization/105211 */
+/* { dg-do compile } */
+/* { dg-options "-Os -ffast-math" } */
+/* { dg-add-options float32 } */
+/* { dg-require-effective-target float32 } */
+
+short
+foo (_Float32 f)
+{
+  return __builtin_roundf (f);
+}

Jakub



[PATCH] i386: Fix ICE caused by ix86_emit_i387_log1p [PR105214]

2022-04-11 Thread Jakub Jelinek via Gcc-patches
Hi!

The following testcase ICEs, because ix86_emit_i387_log1p attempts to
emit something like
  if (cond)
some_code1;
  else
some_code2;
and emits a conditional jump using emit_jump_insn (standard way in
the file) and an unconditional jump using emit_jump.
The problem with that is that if there is pending stack adjustment,
it isn't emitted before the conditional jump, but is before the
unconditional jump and therefore stack is adjusted only conditionally
(at the end of some_code1 above), which makes dwarf2 pass unhappy about it
but is a serious wrong-code even if it doesn't ICE.

This can be fixed either by emitting pending stack adjust before the
conditional jump as the following patch does, or by not using
  emit_jump (label2);
and instead hand inlining what that function does except for the
pending stack adjustment, like:
  emit_jump_insn (targetm.gen_jump (label2));
  emit_barrier ();
In that case there will be no stack adjustment in the sequence and
it will be done later on somewhere else.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
Or do you prefer the other version?

2022-04-11  Jakub Jelinek  

PR target/105214
* config/i386/i386-expand.cc (ix86_emit_i387_log1p): Call
do_pending_stack_adjust.

* gcc.dg/asan/pr105214.c: New test.

--- gcc/config/i386/i386-expand.cc.jj   2022-04-03 21:50:36.001635947 +0200
+++ gcc/config/i386/i386-expand.cc  2022-04-11 15:17:43.943430658 +0200
@@ -17291,6 +17291,8 @@ void ix86_emit_i387_log1p (rtx op0, rtx
   rtx cst, cstln2, cst1;
   rtx_insn *insn;
 
+  do_pending_stack_adjust ();
+
   cst = const_double_from_real_value
 (REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode), 
XFmode);
   cstln2 = force_reg (XFmode, standard_80387_constant_rtx (4)); /* fldln2 */
--- gcc/testsuite/gcc.dg/asan/pr105214.c.jj 2022-04-11 15:21:05.467608711 
+0200
+++ gcc/testsuite/gcc.dg/asan/pr105214.c2022-04-11 15:22:10.559697224 
+0200
@@ -0,0 +1,16 @@
+/* PR target/105214 */
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-O2" } } */
+/* { dg-options "-Ofast -fnon-call-exceptions -fexceptions 
-fstack-check=generic -fsanitize=address -fno-finite-math-only -fsignaling-nans 
-fno-associative-math" } */
+
+float f;
+void bar (int *);
+
+void
+foo (void)
+{
+  int a[1600], b[1];
+  f += __builtin_log1pf (f);
+  bar (a);
+  bar (b);
+}

Jakub



[PATCH] i386: i386-expand formatting fixes

2022-04-11 Thread Jakub Jelinek via Gcc-patches
Hi!

While working on the PR105214 patch, I've noticed incorrect formatting
for a bunch of functions where the function names aren't at the start of
lines.

The following patch fixes it, though of course it isn't a regression.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk or
ok for GCC 13 once stage1 reopens?

2022-04-11  Jakub Jelinek  

* config/i386/i386-expand.cc (ix86_emit_i387_sinh, ix86_emit_i387_cosh,
ix86_emit_i387_tanh, ix86_emit_i387_asinh, ix86_emit_i387_acosh,
ix86_emit_i387_atanh, ix86_emit_i387_log1p, ix86_emit_i387_round,
ix86_emit_swdivsf, ix86_emit_swsqrtsf,
ix86_expand_atomic_fetch_op_loop, ix86_expand_cmpxchg_loop):
Formatting fix.
* config/i386/i386.cc (warn_once_call_ms2sysv_xlogues): Likewise.

--- gcc/config/i386/i386-expand.cc.jj   2022-04-11 15:17:43.943430658 +0200
+++ gcc/config/i386/i386-expand.cc  2022-04-11 15:29:54.226204466 +0200
@@ -17036,7 +17036,8 @@ ix86_emit_fp_unordered_jump (rtx label)
 
 /* Output code to perform an sinh XFmode calculation.  */
 
-void ix86_emit_i387_sinh (rtx op0, rtx op1)
+void
+ix86_emit_i387_sinh (rtx op0, rtx op1)
 {
   rtx e1 = gen_reg_rtx (XFmode);
   rtx e2 = gen_reg_rtx (XFmode);
@@ -17084,7 +17085,8 @@ void ix86_emit_i387_sinh (rtx op0, rtx o
 
 /* Output code to perform an cosh XFmode calculation.  */
 
-void ix86_emit_i387_cosh (rtx op0, rtx op1)
+void
+ix86_emit_i387_cosh (rtx op0, rtx op1)
 {
   rtx e1 = gen_reg_rtx (XFmode);
   rtx e2 = gen_reg_rtx (XFmode);
@@ -17106,7 +17108,8 @@ void ix86_emit_i387_cosh (rtx op0, rtx o
 
 /* Output code to perform an tanh XFmode calculation.  */
 
-void ix86_emit_i387_tanh (rtx op0, rtx op1)
+void
+ix86_emit_i387_tanh (rtx op0, rtx op1)
 {
   rtx e1 = gen_reg_rtx (XFmode);
   rtx e2 = gen_reg_rtx (XFmode);
@@ -17152,7 +17155,8 @@ void ix86_emit_i387_tanh (rtx op0, rtx o
 
 /* Output code to perform an asinh XFmode calculation.  */
 
-void ix86_emit_i387_asinh (rtx op0, rtx op1)
+void
+ix86_emit_i387_asinh (rtx op0, rtx op1)
 {
   rtx e1 = gen_reg_rtx (XFmode);
   rtx e2 = gen_reg_rtx (XFmode);
@@ -17204,7 +17208,8 @@ void ix86_emit_i387_asinh (rtx op0, rtx
 
 /* Output code to perform an acosh XFmode calculation.  */
 
-void ix86_emit_i387_acosh (rtx op0, rtx op1)
+void
+ix86_emit_i387_acosh (rtx op0, rtx op1)
 {
   rtx e1 = gen_reg_rtx (XFmode);
   rtx e2 = gen_reg_rtx (XFmode);
@@ -17230,7 +17235,8 @@ void ix86_emit_i387_acosh (rtx op0, rtx
 
 /* Output code to perform an atanh XFmode calculation.  */
 
-void ix86_emit_i387_atanh (rtx op0, rtx op1)
+void
+ix86_emit_i387_atanh (rtx op0, rtx op1)
 {
   rtx e1 = gen_reg_rtx (XFmode);
   rtx e2 = gen_reg_rtx (XFmode);
@@ -17281,7 +17287,8 @@ void ix86_emit_i387_atanh (rtx op0, rtx
 
 /* Output code to perform a log1p XFmode calculation.  */
 
-void ix86_emit_i387_log1p (rtx op0, rtx op1)
+void
+ix86_emit_i387_log1p (rtx op0, rtx op1)
 {
   rtx_code_label *label1 = gen_label_rtx ();
   rtx_code_label *label2 = gen_label_rtx ();
@@ -17322,7 +17329,8 @@ void ix86_emit_i387_log1p (rtx op0, rtx
 }
 
 /* Emit code for round calculation.  */
-void ix86_emit_i387_round (rtx op0, rtx op1)
+void
+ix86_emit_i387_round (rtx op0, rtx op1)
 {
   machine_mode inmode = GET_MODE (op1);
   machine_mode outmode = GET_MODE (op0);
@@ -17436,7 +17444,8 @@ void ix86_emit_i387_round (rtx op0, rtx
 /* Output code to perform a Newton-Rhapson approximation of a single precision
floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm].  
*/
 
-void ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
+void
+ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
 {
   rtx x0, x1, e0, e1;
 
@@ -17487,7 +17496,8 @@ void ix86_emit_swdivsf (rtx res, rtx a,
 /* Output code to perform a Newton-Rhapson approximation of a
single precision floating point [reciprocal] square root.  */
 
-void ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode, bool recip)
+void
+ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode, bool recip)
 {
   rtx x0, e0, e1, e2, e3, mthree, mhalf;
   REAL_VALUE_TYPE r;
@@ -23242,9 +23252,10 @@ ix86_expand_divmod_libfunc (rtx libfunc,
   *rem_p = rem;
 }
 
-void ix86_expand_atomic_fetch_op_loop (rtx target, rtx mem, rtx val,
-  enum rtx_code code, bool after,
-  bool doubleword)
+void
+ix86_expand_atomic_fetch_op_loop (rtx target, rtx mem, rtx val,
+ enum rtx_code code, bool after,
+ bool doubleword)
 {
   rtx old_reg, new_reg, old_mem, success;
   machine_mode mode = GET_MODE (target);
@@ -23288,10 +23299,11 @@ void ix86_expand_atomic_fetch_op_loop (r
it will be relaxed to an atomic load + compare, and skip
cmpxchg instruction if mem != exp_input.  */
 
-void ix86_expand_cmpxchg_loop (rtx *ptarget_bool, rtx target_val,
-  rtx mem, rtx exp_input, rtx new_input,
-

[PATCH] phiopt: Fix up debug handling in the (x != cst1 ? x : cst2) != cst3 opt [PR105218]

2022-04-11 Thread Jakub Jelinek via Gcc-patches
Hi!

In the PR104639 optimization, I've added code to emit
  # DEBUG D#1 => arg != carg ? arg : oarg
instruction and replace debug uses of the phi with that debug
temp, so that the debug info is still accurrate.
Unfortunately, that is only correct if the middle-bb and
phi bb contain 1 and 2 predecessors, i.e. the ones that
we are using in the optimization (in particular middle-bb has
cond-bb as pred and phi bb cond-bb and middle-bb).
If that is not the case, then we can reach these from another bb
and so the arg SSA_NAME might not be valid there (its definition
doesn't dominate all incoming edges), or, even if it is valid,
might be wrong-debug, e.g. phi argument from some unrelated other
incoming edge might have the carg value that the debug stmt
remaps to oarg.  In theory we could check for that case and
if middle-bb doesn't have a single pred or phi bb 2 preds
check if arg SSA_NAME dominates the phi bb and if all other
phi arguments are expr_not_equal_to the carg value, but this patch
just uses a simpler approach and resets already if we have some
extra incoming edges.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2022-04-11  Jakub Jelinek  

PR tree-optimization/105218
* tree-ssa-phiopt.cc (value_replacement): If middle_bb has
more than one predecessor or phi's bb more than 2 predecessors,
reset phi result uses instead of adding a debug temp.

* gcc.dg/pr105218.c: New test.

--- gcc/tree-ssa-phiopt.cc.jj   2022-04-11 10:44:20.282985872 +0200
+++ gcc/tree-ssa-phiopt.cc  2022-04-11 16:16:06.645348016 +0200
@@ -1454,6 +1454,7 @@ value_replacement (basic_block cond_bb,
  imm_use_iterator imm_iter;
  tree phires = gimple_phi_result (phi);
  tree temp = NULL_TREE;
+ bool reset_p = false;
 
  /* Add # DEBUG D#1 => arg != carg ? arg : oarg.  */
  FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, phires)
@@ -1462,6 +1463,16 @@ value_replacement (basic_block cond_bb,
continue;
  if (temp == NULL_TREE)
{
+ if (!single_pred_p (middle_bb)
+ || EDGE_COUNT (gimple_bb (phi)->preds) != 2)
+   {
+ /* But only if middle_bb has a single
+predecessor and phi bb has two, otherwise
+we could use a SSA_NAME not usable in that
+place or wrong-debug.  */
+ reset_p = true;
+ break;
+   }
  gimple_stmt_iterator gsi
= gsi_after_labels (gimple_bb (phi));
  tree type = TREE_TYPE (phires);
@@ -1476,6 +1487,8 @@ value_replacement (basic_block cond_bb,
replace_exp (use_p, temp);
  update_stmt (use_stmt);
}
+ if (reset_p)
+   reset_debug_uses (phi);
}
}
  if (equal_p)
--- gcc/testsuite/gcc.dg/pr105218.c.jj  2022-04-11 16:09:15.172101168 +0200
+++ gcc/testsuite/gcc.dg/pr105218.c 2022-04-11 16:08:59.387321866 +0200
@@ -0,0 +1,16 @@
+/* PR tree-optimization/105218 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -g" } */
+
+int a, c;
+void bar (void);
+
+void
+foo (void)
+{
+  int b = 131;
+  if (a)
+b = c == 2 ? 1 : c;
+  while (b)
+bar ();
+}

Jakub



Re: [PATCH] phiopt: Fix up debug handling in the (x != cst1 ? x : cst2) != cst3 opt [PR105218]

2022-04-11 Thread Richard Biener via Gcc-patches



> Am 11.04.2022 um 19:00 schrieb Jakub Jelinek :
> 
> Hi!
> 
> In the PR104639 optimization, I've added code to emit
>  # DEBUG D#1 => arg != carg ? arg : oarg
> instruction and replace debug uses of the phi with that debug
> temp, so that the debug info is still accurrate.
> Unfortunately, that is only correct if the middle-bb and
> phi bb contain 1 and 2 predecessors, i.e. the ones that
> we are using in the optimization (in particular middle-bb has
> cond-bb as pred and phi bb cond-bb and middle-bb).
> If that is not the case, then we can reach these from another bb
> and so the arg SSA_NAME might not be valid there (its definition
> doesn't dominate all incoming edges), or, even if it is valid,
> might be wrong-debug, e.g. phi argument from some unrelated other
> incoming edge might have the carg value that the debug stmt
> remaps to oarg.  In theory we could check for that case and
> if middle-bb doesn't have a single pred or phi bb 2 preds
> check if arg SSA_NAME dominates the phi bb and if all other
> phi arguments are expr_not_equal_to the carg value, but this patch
> just uses a simpler approach and resets already if we have some
> extra incoming edges.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Ok.

Richard 


> 2022-04-11  Jakub Jelinek  
> 
>PR tree-optimization/105218
>* tree-ssa-phiopt.cc (value_replacement): If middle_bb has
>more than one predecessor or phi's bb more than 2 predecessors,
>reset phi result uses instead of adding a debug temp.
> 
>* gcc.dg/pr105218.c: New test.
> 
> --- gcc/tree-ssa-phiopt.cc.jj2022-04-11 10:44:20.282985872 +0200
> +++ gcc/tree-ssa-phiopt.cc2022-04-11 16:16:06.645348016 +0200
> @@ -1454,6 +1454,7 @@ value_replacement (basic_block cond_bb,
>  imm_use_iterator imm_iter;
>  tree phires = gimple_phi_result (phi);
>  tree temp = NULL_TREE;
> +  bool reset_p = false;
> 
>  /* Add # DEBUG D#1 => arg != carg ? arg : oarg.  */
>  FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, phires)
> @@ -1462,6 +1463,16 @@ value_replacement (basic_block cond_bb,
>continue;
>  if (temp == NULL_TREE)
>{
> +  if (!single_pred_p (middle_bb)
> +  || EDGE_COUNT (gimple_bb (phi)->preds) != 2)
> +{
> +  /* But only if middle_bb has a single
> + predecessor and phi bb has two, otherwise
> + we could use a SSA_NAME not usable in that
> + place or wrong-debug.  */
> +  reset_p = true;
> +  break;
> +}
>  gimple_stmt_iterator gsi
>= gsi_after_labels (gimple_bb (phi));
>  tree type = TREE_TYPE (phires);
> @@ -1476,6 +1487,8 @@ value_replacement (basic_block cond_bb,
>replace_exp (use_p, temp);
>  update_stmt (use_stmt);
>}
> +  if (reset_p)
> +reset_debug_uses (phi);
>}
>}
>  if (equal_p)
> --- gcc/testsuite/gcc.dg/pr105218.c.jj2022-04-11 16:09:15.172101168 +0200
> +++ gcc/testsuite/gcc.dg/pr105218.c2022-04-11 16:08:59.387321866 +0200
> @@ -0,0 +1,16 @@
> +/* PR tree-optimization/105218 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -g" } */
> +
> +int a, c;
> +void bar (void);
> +
> +void
> +foo (void)
> +{
> +  int b = 131;
> +  if (a)
> +b = c == 2 ? 1 : c;
> +  while (b)
> +bar ();
> +}
> 
>Jakub
> 


[committed] libstdc++: Improve behaviour of std::stacktrace::current

2022-04-11 Thread Jonathan Wakely via Gcc-patches
Tested x86_64-linux, pushed to trunk.

-- >8 --

This prevents inlining the current() function to guarantee that it is
present in the stacktrace, then tells libbacktrace to skip that frame.

To avoid overflow in the int argument to __glibcxx_backtrace_simple, we
need to check if the skip parameter exceeds INT_MAX (which is possible
for 16-bit targets where short and int have the same width). We also
need to limit the size of the returned value to the max_depth parameter,
which was missing previously.

This also fixes basic_stacktrace::max_size() to not exceed the maximum
size supported by the allocator, which might be smaller than the maximum
value of size_type.

libstdc++-v3/ChangeLog:

* include/std/stacktrace (basic_stacktrace::current): Duplicate
implementation into each overload. Add noinline attribute and
skip current frame.
(basic_stacktrace::max_size()): Call _Impl::_S_max_size.
(basic_stacktrace::_S_curr_cb()): New function defining lambda.
(basic_stacktrace::_Impl::_S_max_size): New function defining
maximum size in terms of allocator and size_type.
(basic_stacktrace::_Impl::_M_allocate): Check against
max_size().
* testsuite/19_diagnostics/stacktrace/entry.cc: Call function
for non-constexpr checks. Check line number is correct.
---
 libstdc++-v3/include/std/stacktrace   | 91 ++-
 .../19_diagnostics/stacktrace/entry.cc|  7 +-
 2 files changed, 73 insertions(+), 25 deletions(-)

diff --git a/libstdc++-v3/include/std/stacktrace 
b/libstdc++-v3/include/std/stacktrace
index 623f44bdca4..4e271cef3f3 100644
--- a/libstdc++-v3/include/std/stacktrace
+++ b/libstdc++-v3/include/std/stacktrace
@@ -36,6 +36,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 struct __glibcxx_backtrace_state;
@@ -232,19 +233,42 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
   // [stacktrace.basic.ctor], creation and assignment
 
+  [[__gnu__::__noinline__]]
   static basic_stacktrace
   current(const allocator_type& __alloc = allocator_type()) noexcept
   {
-   return current(0, size_type(-1), __alloc);
+   auto __state = stacktrace_entry::_S_init();
+   basic_stacktrace __ret(__alloc);
+   if (!__ret._M_reserve(64)) [[unlikely]]
+ return __ret;
+
+   if (__glibcxx_backtrace_simple(__state, 1, _S_curr_cb(),
+  nullptr, std::__addressof(__ret)))
+ __ret._M_clear();
+
+   return __ret;
   }
 
+  [[__gnu__::__noinline__]]
   static basic_stacktrace
   current(size_type __skip,
  const allocator_type& __alloc = allocator_type()) noexcept
   {
-   return current(__skip, size_type(-1), __alloc);
+   auto __state = stacktrace_entry::_S_init();
+   basic_stacktrace __ret(__alloc);
+   if (__skip >= __INT_MAX__) [[unlikely]]
+ return __ret;
+   if (!__ret._M_reserve(64)) [[unlikely]]
+ return __ret;
+
+   if (__glibcxx_backtrace_simple(__state, __skip + 1, _S_curr_cb(),
+  nullptr, std::__addressof(__ret)))
+ __ret._M_clear();
+
+   return __ret;
   }
 
+  [[__gnu__::__noinline__]]
   static basic_stacktrace
   current(size_type __skip, size_type __max_depth,
  const allocator_type& __alloc = allocator_type()) noexcept
@@ -253,23 +277,17 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
auto __state = stacktrace_entry::_S_init();
basic_stacktrace __ret(__alloc);
-   if (!__ret._M_reserve(std::min(__max_depth, 64)))
+   if (__max_depth == 0 || __skip >= __INT_MAX__) [[unlikely]]
+ return __ret;
+   if (!__ret._M_reserve(std::min(__max_depth, 64))) [[unlikely]]
  return __ret;
 
-   auto __cb = [](void* __data, uintptr_t __pc) {
- auto& __s = *static_cast(__data);
- stacktrace_entry __f;
- __f._M_pc = __pc;
- if (__s._M_push_back(__f))
-   return 0;
- return 1;
-   };
+   if (__glibcxx_backtrace_simple(__state, __skip + 1, _S_curr_cb(),
+  nullptr, std::__addressof(__ret)))
+ __ret._M_clear();
+   else if (__ret.size() > __max_depth)
+ __ret.resize(__max_depth);
 
-   if (__glibcxx_backtrace_simple(__state, __skip, +__cb, nullptr,
-  std::__addressof(__ret)))
- {
-   __ret._M_clear();
- }
return __ret;
   }
 
@@ -443,7 +461,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
   [[nodiscard]] bool empty() const noexcept { return size() == 0; }
   size_type size() const noexcept { return _M_impl._M_size; }
-  size_type max_size() const noexcept { return size_type(-1); }
+
+  size_type
+  max_size() const noexcept
+  { return _Impl::_S_max_size(_M_impl._M_alloc); }
 
   const_reference
   operator[](size_type __n) const noexcept
@@ -507,

[committed] libstdc++: Fix std::basic_stacktrace special members [PR105031]

2022-04-11 Thread Jonathan Wakely via Gcc-patches
Tested x86_64-linux, pushed to trunk.

-- >8 --

The PR points out that there is a non-constant condition used for an
if-constexpr statement, but there are several other problems with the
copy, move and swap members of std::basic_stacktrace.

libstdc++-v3/ChangeLog:

PR libstdc++/105031
* include/std/stacktrace (basic_stacktrace::basic_stacktrace):
Fix allocator usage in constructors.
(basic_stacktrace::operator=(const basic_stacktrace&)): Do not
try to reallocate using const allocator.
(basic_stacktrace::operator=(basic_stacktrace&&)): Fix
if-constexpr with non-constant condition. Do not allocate new
storage if allocator propagates. Do not set _M_size if
allocation fails.
(basic_stacktrace::swap(basic_stacktrace&)): Fix typo. Add
assertion that non-propagating allocators are equal.
* testsuite/19_diagnostics/stacktrace/stacktrace.cc: New test.
---
 libstdc++-v3/include/std/stacktrace   |  59 +++--
 .../19_diagnostics/stacktrace/stacktrace.cc   | 215 ++
 2 files changed, 252 insertions(+), 22 deletions(-)
 create mode 100644 
libstdc++-v3/testsuite/19_diagnostics/stacktrace/stacktrace.cc

diff --git a/libstdc++-v3/include/std/stacktrace 
b/libstdc++-v3/include/std/stacktrace
index 4e271cef3f3..dd78c71c5dc 100644
--- a/libstdc++-v3/include/std/stacktrace
+++ b/libstdc++-v3/include/std/stacktrace
@@ -301,7 +301,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   { }
 
   basic_stacktrace(const basic_stacktrace& __other) noexcept
-  : basic_stacktrace(__other, __other._M_alloc)
+  : basic_stacktrace(__other,
+ _AllocTraits::select_on_container_copy_construction(__other._M_alloc))
   { }
 
   basic_stacktrace(basic_stacktrace&& __other) noexcept
@@ -326,13 +327,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   : _M_alloc(__alloc)
   {
if constexpr (_Allocator::is_always_equal::value)
- {
-   _M_impl = std::__exchange(__other._M_impl, {});
- }
+ _M_impl = std::__exchange(__other._M_impl, {});
else if (_M_alloc == __other._M_alloc)
- {
-   _M_impl = std::__exchange(__other._M_impl, {});
- }
+ _M_impl = std::__exchange(__other._M_impl, {});
+   else if (const auto __s = __other._M_impl._M_size)
+ if (auto __f = _M_impl._M_allocate(_M_alloc, __s))
+   {
+ std::uninitialized_copy_n(__other.begin(), __s, __f);
+ _M_impl._M_size = __s;
+   }
   }
 
   basic_stacktrace&
@@ -361,9 +364,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
// Need to allocate new storage.
_M_clear();
 
-   // Use the allocator we will have after this function returns.
-   auto& __alloc = __pocca ? __other._M_alloc : _M_alloc;
-   if (auto __f = _M_impl._M_allocate(__alloc, __s))
+   if constexpr (__pocca)
+ _M_alloc = __other._M_alloc;
+
+   if (auto __f = _M_impl._M_allocate(_M_alloc, __s))
  {
std::uninitialized_copy_n(__other.begin(), __s, __f);
_M_impl._M_size = __s;
@@ -376,10 +380,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
auto __to = std::copy_n(__other.begin(), __s, begin());
std::destroy(__to, end());
_M_impl._M_size = __s;
- }
 
-   if constexpr (__pocca)
- _M_alloc = __other._M_alloc;
+   if constexpr (__pocca)
+ _M_alloc = __other._M_alloc;
+ }
 
return *this;
   }
@@ -397,19 +401,26 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
  std::swap(_M_impl, __other._M_impl);
else if (_M_alloc == __other._M_alloc)
  std::swap(_M_impl, __other._M_impl);
-   else
+   else if constexpr (__pocma)
  {
-   const auto __s = __other.size();
+   // Free current storage and take ownership of __other's storage.
+   _M_clear();
+   _M_impl = std::__exchange(__other._M_impl, {});
+ }
+   else // Allocators are unequal and don't propagate.
+ {
+   const size_type __s = __other.size();
 
-   if constexpr (__pocma || _M_impl._M_capacity < __s)
+   if (_M_impl._M_capacity < __s)
  {
// Need to allocate new storage.
_M_clear();
 
-   // Use the allocator we will have after this function returns.
-   auto& __alloc = __pocma ? __other._M_alloc : _M_alloc;
-   if (auto __f = _M_impl._M_allocate(__alloc, __s))
- std::uninitialized_copy_n(__other.begin(), __s, __f);
+   if (auto __f = _M_impl._M_allocate(_M_alloc, __s))
+ {
+   std::uninitialized_copy_n(__other.begin(), __s, __f);
+   _M_impl._M_size = __s;
+ }
  }
else
  {
@@ -420,8 +431,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   

Re: [PATCH] builtins: Fix up expand_builtin_int_roundingfn_2 [PR105211]

2022-04-11 Thread Richard Biener via Gcc-patches



> Am 11.04.2022 um 18:45 schrieb Jakub Jelinek via Gcc-patches 
> :
> 
> Hi!
> 
> The expansion of __builtin_iround{,f,l} etc. builtins in some cases
> emits calls to a different fallback builtin.  To locate the right builtin
> it uses mathfn_built_in_1 with the type of the first argument.
> If its TYPE_MAIN_VARIANT is {float,double,long_double}_type_node, all is
> fine, but on the following testcase, because GIMPLE considers scalar
> float conversions between types with the same mode as useless,
> TYPE_MAIN_VARIANT of the arg's type is float32_type_node and because there
> isn't __builtin_lroundf32 returns NULL and we ICE.
> 
> This patch will first try the type of the argument and as fallback the
> type of the first argument of the builtin (which can't be 100% trusted
> either if user incorrectly prototypes it), and if neither works, doesn't
> fallback.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Ok.

Richard 

> Though, perhaps it would be better to prefer the TREE_VALUE (TYPE_ARG_TYPES)
> type and only use TREE_TYPE (arg) as fallback, so that say on
> a TYPE_MODE (double_type_mode) == TYPE_MODE (long_double_type_mode)
> target we decide based on what builtin the user actually called rather than
> whether the argument has been converted from the other type earlier.

Possibly yes

> 2022-04-11  Jakub Jelinek  
> 
>PR rtl-optimization/105211
>* builtins.cc (expand_builtin_int_roundingfn_2): If mathfn_built_in_1
>fails for TREE_TYPE (arg), retry it with
>TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl))) and if even that
>fails, emit call normally.
> 
>* gcc.dg/pr105211.c: New test.
> 
> --- gcc/builtins.cc.jj2022-03-14 10:34:34.122924399 +0100
> +++ gcc/builtins.cc2022-04-11 11:57:31.178251743 +0200
> @@ -2968,15 +2968,25 @@ expand_builtin_int_roundingfn_2 (tree ex
> a call to lround in the hope that the target provides at least some
> C99 functions.  This should result in the best user experience for
> not full C99 targets.  */
> -  tree fallback_fndecl = mathfn_built_in_1
> -(TREE_TYPE (arg), as_combined_fn (fallback_fn), 0);
> +  tree fallback_fndecl
> += mathfn_built_in_1 (TREE_TYPE (arg), as_combined_fn (fallback_fn), 0);
> +  /* As scalar float conversions with same mode are useless in GIMPLE,
> + we can end up e.g. with _Float32 argument passed to float builtin,
> + try to get the type from the builtin prototype instead.  */
> +  if (fallback_fndecl == NULL_TREE)
> +if (tree argtypes = TYPE_ARG_TYPES (TREE_TYPE (fndecl)))
> +  fallback_fndecl
> += mathfn_built_in_1 (TREE_VALUE (argtypes),
> + as_combined_fn (fallback_fn), 0);
> +  if (fallback_fndecl)
> +{
> +  exp = build_call_nofold_loc (EXPR_LOCATION (exp),
> +   fallback_fndecl, 1, arg);
> 
> -  exp = build_call_nofold_loc (EXPR_LOCATION (exp),
> -   fallback_fndecl, 1, arg);
> -
> -  target = expand_call (exp, NULL_RTX, target == const0_rtx);
> -  target = maybe_emit_group_store (target, TREE_TYPE (exp));
> -  return convert_to_mode (mode, target, 0);
> +  target = expand_call (exp, NULL_RTX, target == const0_rtx);
> +  target = maybe_emit_group_store (target, TREE_TYPE (exp));
> +  return convert_to_mode (mode, target, 0);
> +}
> }
> 
>   return expand_call (exp, target, target == const0_rtx);
> --- gcc/testsuite/gcc.dg/pr105211.c.jj2022-04-11 11:48:17.369946248 +0200
> +++ gcc/testsuite/gcc.dg/pr105211.c2022-04-11 11:48:09.924049700 +0200
> @@ -0,0 +1,11 @@
> +/* PR rtl-optimization/105211 */
> +/* { dg-do compile } */
> +/* { dg-options "-Os -ffast-math" } */
> +/* { dg-add-options float32 } */
> +/* { dg-require-effective-target float32 } */
> +
> +short
> +foo (_Float32 f)
> +{
> +  return __builtin_roundf (f);
> +}
> 
>Jakub
> 


Re: [PATCH v4] libgo: Don't use pt_regs member in mcontext_t

2022-04-11 Thread Sören Tempel via Gcc-patches
Hi,

Any updates no this?

Sorry I keep bothering you with this but we are quite literally only a
few lines away from having the go-signal.c code compile on PPC64 musl :)

Let me know if you need more information to get this fixed.

Greetings,
Sören

Sören Tempel  wrote:
> Ian Lance Taylor  wrote:
> > Sorry, I guess I misread your patch.
> 
> No problem, I think this stuff is hard to get right and understand in
> general since it is so poorly documented.
> 
> > What is the right standalone code for the PPC64 musl case?  Thanks.
> 
> In order to have the current code (i.e. current gofrontend HEAD with
> your patch) compile and work with PPC64 musl it would be sufficient to
> just include asm/ptrace.h, as proposed in the v1 of my patch [1]:
> 
>   // On PowerPC, ucontext.h uses a pt_regs struct as an incomplete
>   // type. This type must be completed by including asm/ptrace.h.
>   #ifdef __PPC__
>   #include 
>   #endif
> 
> Technically, this should also be needed for using .regs on glibc since
> it also declares pt_regs as in incomplete type [5]. As such, adding the
> include may be the easiest way to resolve this issue.
> 
> However, based on your feedback [2] and feedback by Rich Felker [3]. I
> rewrote the go-signal.c PowerPC register handling code to not use .regs
> ("Having pt_regs appear at all in code not using ptrace is a serious
> code smell"). See the v4 of my patch for details [4]. If you don't want
> to use .regs on PPC64 musl the right standalone code should be:
> 
>   ((ucontext_t*)(context))->uc_mcontext.gp_regs;
> 
> Unfortunately, this code only works on PPC64 musl and PPC64 glibc not on
> PPC32 glibc and PPC32 musl, thus I added a case distinction in the v4 of
> my patch [4]. For my personal needs it would be very much sufficient to
> just add an include of asm/ptrace.h to go-signal.c to make the current
> code (i.e. your patch) also work with PPC64 musl.
> 
> Greetings,
> Sören
> 
> [1]: https://gcc.gnu.org/pipermail/gcc-patches/2022-January/587520.html
> [2]: https://gcc.gnu.org/pipermail/gcc-patches/2022-February/590668.html
> [3]: https://gcc.gnu.org/pipermail/gcc-patches/2022-March/591257.html
> [4]: https://gcc.gnu.org/pipermail/gcc-patches/2022-March/591593.html
> [5]: 
> https://sourceware.org/git/?p=glibc.git;a=blob;f=sysdeps/unix/sysv/linux/powerpc/sys/ucontext.h;hb=6ff3c7714900529b8f5ca64b58d5da9cd5d5b345#l33


Re: [PATCH v4] libgo: Don't use pt_regs member in mcontext_t

2022-04-11 Thread Ian Lance Taylor via Gcc-patches
On Mon, Apr 11, 2022 at 10:26 AM Sören Tempel  wrote:
>
> Any updates no this?
>
> Sorry I keep bothering you with this but we are quite literally only a
> few lines away from having the go-signal.c code compile on PPC64 musl :)
>
> Let me know if you need more information to get this fixed.

What I was hoping from my earlier question was that you could tell me
the exact lines to write in the current sources that will compile on
MUSL.  Don't include , don't refer to earlier patches as
that is what I tried to do earlier but failed, don't add new #define
macros, just add #ifdef and appropriate lines.  Thanks.  If the new
lines also work on glibc using register indexes rather than names,
that would be a bonus.

Ian




> Sören Tempel  wrote:
> > Ian Lance Taylor  wrote:
> > > Sorry, I guess I misread your patch.
> >
> > No problem, I think this stuff is hard to get right and understand in
> > general since it is so poorly documented.
> >
> > > What is the right standalone code for the PPC64 musl case?  Thanks.
> >
> > In order to have the current code (i.e. current gofrontend HEAD with
> > your patch) compile and work with PPC64 musl it would be sufficient to
> > just include asm/ptrace.h, as proposed in the v1 of my patch [1]:
> >
> >   // On PowerPC, ucontext.h uses a pt_regs struct as an incomplete
> >   // type. This type must be completed by including asm/ptrace.h.
> >   #ifdef __PPC__
> >   #include 
> >   #endif
> >
> > Technically, this should also be needed for using .regs on glibc since
> > it also declares pt_regs as in incomplete type [5]. As such, adding the
> > include may be the easiest way to resolve this issue.
> >
> > However, based on your feedback [2] and feedback by Rich Felker [3]. I
> > rewrote the go-signal.c PowerPC register handling code to not use .regs
> > ("Having pt_regs appear at all in code not using ptrace is a serious
> > code smell"). See the v4 of my patch for details [4]. If you don't want
> > to use .regs on PPC64 musl the right standalone code should be:
> >
> >   ((ucontext_t*)(context))->uc_mcontext.gp_regs;
> >
> > Unfortunately, this code only works on PPC64 musl and PPC64 glibc not on
> > PPC32 glibc and PPC32 musl, thus I added a case distinction in the v4 of
> > my patch [4]. For my personal needs it would be very much sufficient to
> > just add an include of asm/ptrace.h to go-signal.c to make the current
> > code (i.e. your patch) also work with PPC64 musl.
> >
> > Greetings,
> > Sören
> >
> > [1]: https://gcc.gnu.org/pipermail/gcc-patches/2022-January/587520.html
> > [2]: https://gcc.gnu.org/pipermail/gcc-patches/2022-February/590668.html
> > [3]: https://gcc.gnu.org/pipermail/gcc-patches/2022-March/591257.html
> > [4]: https://gcc.gnu.org/pipermail/gcc-patches/2022-March/591593.html
> > [5]: 
> > https://sourceware.org/git/?p=glibc.git;a=blob;f=sysdeps/unix/sysv/linux/powerpc/sys/ucontext.h;hb=6ff3c7714900529b8f5ca64b58d5da9cd5d5b345#l33


Re: [PATCH v4] libgo: Don't use pt_regs member in mcontext_t

2022-04-11 Thread Sören Tempel via Gcc-patches
Ian Lance Taylor  wrote:
> What I was hoping from my earlier question was that you could tell me
> the exact lines to write in the current sources that will compile on
> MUSL. Don't include , don't refer to earlier patches as
> that is what I tried to do earlier but failed, don't add new #define
> macros, just add #ifdef and appropriate lines.  Thanks.  If the new
> lines also work on glibc using register indexes rather than names,
> that would be a bonus.

Sorry, may bad. Here you go:

diff --git a/libgo/runtime/go-signal.c b/libgo/runtime/go-signal.c
index 9c919e15..454da75e 100644
--- a/libgo/runtime/go-signal.c
+++ b/libgo/runtime/go-signal.c
@@ -233,8 +233,11 @@ getSiginfo(siginfo_t *info, void *context 
__attribute__((unused)))
 #elif defined(__PPC__) && defined(__linux__)
// For some reason different libc implementations use
// different names.
-#if defined(__PPC64__) || defined(__GLIBC__)
+#if defined(__GLIBC__)
ret.sigpc = ((ucontext_t*)(context))->uc_mcontext.regs->nip;
+#elif defined(__PPC64__)
+   // Assumed to be ppc64 musl.
+   ret.sigpc = ((ucontext_t*)(context))->uc_mcontext.gp_regs[32];
 #else
// Assumed to be ppc32 musl.
ret.sigpc = ((ucontext_t*)(context))->uc_mcontext.gregs[32];
@@ -354,7 +357,7 @@ dumpregs(siginfo_t *info __attribute__((unused)), void 
*context __attribute__((u
mcontext_t *m = &((ucontext_t*)(context))->uc_mcontext;
int i;
 
-#if defined(__PPC64__) || defined(__GLIBC__)
+#if defined(__GLIBC__)
for (i = 0; i < 32; i++)
runtime_printf("r%d %X\n", i, m->regs->gpr[i]);
runtime_printf("pc  %X\n", m->regs->nip);
@@ -363,6 +366,15 @@ dumpregs(siginfo_t *info __attribute__((unused)), void 
*context __attribute__((u
runtime_printf("lr  %X\n", m->regs->link);
runtime_printf("ctr %X\n", m->regs->ctr);
runtime_printf("xer %X\n", m->regs->xer);
+#elif defined(__PPC64__)
+   for (i = 0; i < 32; i++)
+   runtime_printf("r%d %X\n", i, m->gp_regs[i]);
+   runtime_printf("pc  %X\n", m->gp_regs[32]);
+   runtime_printf("msr %X\n", m->gp_regs[33]);
+   runtime_printf("cr  %X\n", m->gp_regs[38]);
+   runtime_printf("lr  %X\n", m->gp_regs[36]);
+   runtime_printf("ctr %X\n", m->gp_regs[35]);
+   runtime_printf("xer %X\n", m->gp_regs[37]);
 #else
for (i = 0; i < 32; i++)
runtime_printf("r%d %X\n", i, m->gregs[i]);



Re: [PATCH] i386: Fix ICE caused by ix86_emit_i387_log1p [PR105214]

2022-04-11 Thread Uros Bizjak via Gcc-patches
On Mon, Apr 11, 2022 at 6:50 PM Jakub Jelinek  wrote:
>
> Hi!
>
> The following testcase ICEs, because ix86_emit_i387_log1p attempts to
> emit something like
>   if (cond)
> some_code1;
>   else
> some_code2;
> and emits a conditional jump using emit_jump_insn (standard way in
> the file) and an unconditional jump using emit_jump.
> The problem with that is that if there is pending stack adjustment,
> it isn't emitted before the conditional jump, but is before the
> unconditional jump and therefore stack is adjusted only conditionally
> (at the end of some_code1 above), which makes dwarf2 pass unhappy about it
> but is a serious wrong-code even if it doesn't ICE.
>
> This can be fixed either by emitting pending stack adjust before the
> conditional jump as the following patch does, or by not using
>   emit_jump (label2);
> and instead hand inlining what that function does except for the
> pending stack adjustment, like:
>   emit_jump_insn (targetm.gen_jump (label2));
>   emit_barrier ();
> In that case there will be no stack adjustment in the sequence and
> it will be done later on somewhere else.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> Or do you prefer the other version?

No, this looks like the correct approach to me. Perhaps a small
comment should be added, since the reason to call
do_pending_stack_adjust is not that obvious.

> 2022-04-11  Jakub Jelinek  
>
> PR target/105214
> * config/i386/i386-expand.cc (ix86_emit_i387_log1p): Call
> do_pending_stack_adjust.
>
> * gcc.dg/asan/pr105214.c: New test.

OK with the comment.

Thanks,
Uros.

>
> --- gcc/config/i386/i386-expand.cc.jj   2022-04-03 21:50:36.001635947 +0200
> +++ gcc/config/i386/i386-expand.cc  2022-04-11 15:17:43.943430658 +0200
> @@ -17291,6 +17291,8 @@ void ix86_emit_i387_log1p (rtx op0, rtx
>rtx cst, cstln2, cst1;
>rtx_insn *insn;
>
> +  do_pending_stack_adjust ();
> +
>cst = const_double_from_real_value
>  (REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode), 
> XFmode);
>cstln2 = force_reg (XFmode, standard_80387_constant_rtx (4)); /* fldln2 */
> --- gcc/testsuite/gcc.dg/asan/pr105214.c.jj 2022-04-11 15:21:05.467608711 
> +0200
> +++ gcc/testsuite/gcc.dg/asan/pr105214.c2022-04-11 15:22:10.559697224 
> +0200
> @@ -0,0 +1,16 @@
> +/* PR target/105214 */
> +/* { dg-do compile } */
> +/* { dg-skip-if "" { *-*-* } { "*" } { "-O2" } } */
> +/* { dg-options "-Ofast -fnon-call-exceptions -fexceptions 
> -fstack-check=generic -fsanitize=address -fno-finite-math-only 
> -fsignaling-nans -fno-associative-math" } */
> +
> +float f;
> +void bar (int *);
> +
> +void
> +foo (void)
> +{
> +  int a[1600], b[1];
> +  f += __builtin_log1pf (f);
> +  bar (a);
> +  bar (b);
> +}
>
> Jakub
>


Re: [PATCH] i386: i386-expand formatting fixes

2022-04-11 Thread Uros Bizjak via Gcc-patches
On Mon, Apr 11, 2022 at 6:52 PM Jakub Jelinek  wrote:
>
> Hi!
>
> While working on the PR105214 patch, I've noticed incorrect formatting
> for a bunch of functions where the function names aren't at the start of
> lines.
>
> The following patch fixes it, though of course it isn't a regression.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk or
> ok for GCC 13 once stage1 reopens?
>
> 2022-04-11  Jakub Jelinek  
>
> * config/i386/i386-expand.cc (ix86_emit_i387_sinh, 
> ix86_emit_i387_cosh,
> ix86_emit_i387_tanh, ix86_emit_i387_asinh, ix86_emit_i387_acosh,
> ix86_emit_i387_atanh, ix86_emit_i387_log1p, ix86_emit_i387_round,
> ix86_emit_swdivsf, ix86_emit_swsqrtsf,
> ix86_expand_atomic_fetch_op_loop, ix86_expand_cmpxchg_loop):
> Formatting fix.
> * config/i386/i386.cc (warn_once_call_ms2sysv_xlogues): Likewise.

This has a very low probability of a regression, so OK for the trunk.

Thanks,
Uros.

> --- gcc/config/i386/i386-expand.cc.jj   2022-04-11 15:17:43.943430658 +0200
> +++ gcc/config/i386/i386-expand.cc  2022-04-11 15:29:54.226204466 +0200
> @@ -17036,7 +17036,8 @@ ix86_emit_fp_unordered_jump (rtx label)
>
>  /* Output code to perform an sinh XFmode calculation.  */
>
> -void ix86_emit_i387_sinh (rtx op0, rtx op1)
> +void
> +ix86_emit_i387_sinh (rtx op0, rtx op1)
>  {
>rtx e1 = gen_reg_rtx (XFmode);
>rtx e2 = gen_reg_rtx (XFmode);
> @@ -17084,7 +17085,8 @@ void ix86_emit_i387_sinh (rtx op0, rtx o
>
>  /* Output code to perform an cosh XFmode calculation.  */
>
> -void ix86_emit_i387_cosh (rtx op0, rtx op1)
> +void
> +ix86_emit_i387_cosh (rtx op0, rtx op1)
>  {
>rtx e1 = gen_reg_rtx (XFmode);
>rtx e2 = gen_reg_rtx (XFmode);
> @@ -17106,7 +17108,8 @@ void ix86_emit_i387_cosh (rtx op0, rtx o
>
>  /* Output code to perform an tanh XFmode calculation.  */
>
> -void ix86_emit_i387_tanh (rtx op0, rtx op1)
> +void
> +ix86_emit_i387_tanh (rtx op0, rtx op1)
>  {
>rtx e1 = gen_reg_rtx (XFmode);
>rtx e2 = gen_reg_rtx (XFmode);
> @@ -17152,7 +17155,8 @@ void ix86_emit_i387_tanh (rtx op0, rtx o
>
>  /* Output code to perform an asinh XFmode calculation.  */
>
> -void ix86_emit_i387_asinh (rtx op0, rtx op1)
> +void
> +ix86_emit_i387_asinh (rtx op0, rtx op1)
>  {
>rtx e1 = gen_reg_rtx (XFmode);
>rtx e2 = gen_reg_rtx (XFmode);
> @@ -17204,7 +17208,8 @@ void ix86_emit_i387_asinh (rtx op0, rtx
>
>  /* Output code to perform an acosh XFmode calculation.  */
>
> -void ix86_emit_i387_acosh (rtx op0, rtx op1)
> +void
> +ix86_emit_i387_acosh (rtx op0, rtx op1)
>  {
>rtx e1 = gen_reg_rtx (XFmode);
>rtx e2 = gen_reg_rtx (XFmode);
> @@ -17230,7 +17235,8 @@ void ix86_emit_i387_acosh (rtx op0, rtx
>
>  /* Output code to perform an atanh XFmode calculation.  */
>
> -void ix86_emit_i387_atanh (rtx op0, rtx op1)
> +void
> +ix86_emit_i387_atanh (rtx op0, rtx op1)
>  {
>rtx e1 = gen_reg_rtx (XFmode);
>rtx e2 = gen_reg_rtx (XFmode);
> @@ -17281,7 +17287,8 @@ void ix86_emit_i387_atanh (rtx op0, rtx
>
>  /* Output code to perform a log1p XFmode calculation.  */
>
> -void ix86_emit_i387_log1p (rtx op0, rtx op1)
> +void
> +ix86_emit_i387_log1p (rtx op0, rtx op1)
>  {
>rtx_code_label *label1 = gen_label_rtx ();
>rtx_code_label *label2 = gen_label_rtx ();
> @@ -17322,7 +17329,8 @@ void ix86_emit_i387_log1p (rtx op0, rtx
>  }
>
>  /* Emit code for round calculation.  */
> -void ix86_emit_i387_round (rtx op0, rtx op1)
> +void
> +ix86_emit_i387_round (rtx op0, rtx op1)
>  {
>machine_mode inmode = GET_MODE (op1);
>machine_mode outmode = GET_MODE (op0);
> @@ -17436,7 +17444,8 @@ void ix86_emit_i387_round (rtx op0, rtx
>  /* Output code to perform a Newton-Rhapson approximation of a single 
> precision
> floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. 
>  */
>
> -void ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
> +void
> +ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
>  {
>rtx x0, x1, e0, e1;
>
> @@ -17487,7 +17496,8 @@ void ix86_emit_swdivsf (rtx res, rtx a,
>  /* Output code to perform a Newton-Rhapson approximation of a
> single precision floating point [reciprocal] square root.  */
>
> -void ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode, bool recip)
> +void
> +ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode, bool recip)
>  {
>rtx x0, e0, e1, e2, e3, mthree, mhalf;
>REAL_VALUE_TYPE r;
> @@ -23242,9 +23252,10 @@ ix86_expand_divmod_libfunc (rtx libfunc,
>*rem_p = rem;
>  }
>
> -void ix86_expand_atomic_fetch_op_loop (rtx target, rtx mem, rtx val,
> -  enum rtx_code code, bool after,
> -  bool doubleword)
> +void
> +ix86_expand_atomic_fetch_op_loop (rtx target, rtx mem, rtx val,
> + enum rtx_code code, bool after,
> + bool doubleword)
>  {
>rtx old_reg, new_reg, old_mem, suc

Re: [PATCH] c, c++: attribute format on a ctor with a vbase [PR101833, PR47634]

2022-04-11 Thread Jason Merrill via Gcc-patches

On 4/8/22 15:21, Marek Polacek wrote:

On Wed, Apr 06, 2022 at 04:55:54PM -0400, Jason Merrill wrote:

On 4/1/22 15:14, Marek Polacek wrote:

Attribute format takes three arguments: archetype, string-index, and
first-to-check.  The last two specify the position in the function
parameter list.  r63030 clarified that "Since non-static C++ methods have
an implicit this argument, the arguments of such methods should be counted
from two, not one, when giving values for string-index and first-to-check."
Therefore one has to write

struct D {
  D(const char *, ...) __attribute__((format(printf, 2, 3)));
};

However -- and this is the problem in this PR -- ctors with virtual
bases also get two additional parameters: the in-charge parameter and
the VTT parameter (added in maybe_retrofit_in_chrg).  In fact we'll end up
with two clones of the ctor: an in-charge and a not-in-charge version (see
build_cdtor_clones).  That means that the argument position the user
specified in the attribute argument will refer to different arguments,
depending on which constructor we're currently dealing with.  This can
cause a range of problems: wrong errors, confusing warnings, or crashes.

This patch corrects that; for C we don't have to do anything, and in C++
we can use num_artificial_parms_for.  It would be wrong to rewrite the
attributes the user supplied, so I've added an extra parameter called
adjust_pos.

Attribute format_arg is not affected, because it requires that the
function returns "const char *" which will never be the case for cdtors.

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

PR c++/101833
PR c++/47634

gcc/c-family/ChangeLog:

* c-attribs.cc (positional_argument): Add new argument adjust_pos,
use it.
* c-common.cc (check_function_arguments): Pass fndecl to
check_function_format.
* c-common.h (check_function_format): Adjust declaration.
(maybe_adjust_arg_pos_for_attribute): Add.
(positional_argument): Adjust declaration.
* c-format.cc (decode_format_attr): Add fndecl argument.  Pass it to
maybe_adjust_arg_pos_for_attribute.  Adjust calls to get_constant.


I wonder about, instead of adding another parameter, allowing the current
fntype parameter to be the fndecl when we have one.

And then that gets passed down into positional_argument, so we can call
maybe_adjust_arg_pos_for_attribute there, and adjust the return value
appropriately so we don't need the extra adjustment in get_constant?


Unfortunately I can't do that.  positional_argument can't return the
adjusted position, because get_constant returns it and in decode_format_attr
it's used to rewrite the arguments in the attribute list:

   tree *format_num_expr = &TREE_VALUE (TREE_CHAIN (args));
   tree *first_arg_num_expr = &TREE_VALUE (TREE_CHAIN (TREE_CHAIN (args)));
   ...
 if (tree val = get_constant (fntype, atname, *format_num_expr,
2, &info->format_num, 0, validated_p,
adjust_pos))
 *format_num_expr = val;


Could we not do that?  Currently isn't it just overwriting the value 
with the same value after default_conversion?  Maybe do that conversion 
directly in decode_format_attr instead?



Replacing the arguments in the attribute list would lead to problems, because
when we're processing the constructor clone without the additional parameters,
the adjusted argument position would be out of whack at this point.

I've attempted to reduce the number of parameters, but it hardly seemed like
a win, here's the patch I came up with:

diff --git a/gcc/c-family/c-attribs.cc b/gcc/c-family/c-attribs.cc
index 6e17847ec9e..972476fbdf4 100644
--- a/gcc/c-family/c-attribs.cc
+++ b/gcc/c-family/c-attribs.cc
@@ -594,7 +594,7 @@ attribute_takes_identifier_p (const_tree attr_id)
  }
  
  /* Verify that argument value POS at position ARGNO to attribute NAME

-   applied to function TYPE refers to a function parameter at position
+   applied to function FNTYPE refers to a function parameter at position
 POS and the expected type CODE.  Treat CODE == INTEGER_TYPE as
 matching all C integral types except bool.  If successful, return
 POS after default conversions, if any.  Otherwise, issue appropriate
diff --git a/gcc/c-family/c-common.cc b/gcc/c-family/c-common.cc
index 6f08b55d4a7..ffa36673ec0 100644
--- a/gcc/c-family/c-common.cc
+++ b/gcc/c-family/c-common.cc
@@ -6069,7 +6069,7 @@ check_function_arguments (location_t loc, const_tree 
fndecl, const_tree fntype,
/* Check for errors in format strings.  */
  
if (warn_format || warn_suggest_attribute_format)

-check_function_format (fntype, fndecl, TYPE_ATTRIBUTES (fntype), nargs,
+check_function_format (fndecl, TYPE_ATTRIBUTES (fntype), nargs,
   argarray, arglocs);
  
if (warn_format)

diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h
index db6ff07db37..b68dc8f7d69 10

[PATCH] rs6000: Fix unpack for no-direct-move (PR103623)

2022-04-11 Thread Segher Boessenkool
The _dm alternative works fine for soft-float, but the _nodm variant
pattern is missing that alternative.  So, let's add that.

There probably should be an r,r,i alternative as well (or we can make it
rm,r,i), but that is for later.

Tested on powerpc64-linux {-m32,-m64}.  Pushed to trunk.


Segher


2022-04-11  Segher Boessenkool  

PR target/105213
PR target/103623
* config/rs6000/rs6000.md (unpack_nodm): Add m,r,i alternative.
---
 gcc/config/rs6000/rs6000.md | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index fdfbc6566a5c..f05b8358ba0a 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -14580,10 +14580,10 @@ (define_insn_and_split "unpack_dm"
   [(set_attr "type" "fp,fpstore,mtvsr,mfvsr,store")])
 
 (define_insn_and_split "unpack_nodm"
-  [(set (match_operand: 0 "nonimmediate_operand" "=d,m")
+  [(set (match_operand: 0 "nonimmediate_operand" "=d,m,m")
(unspec:
-[(match_operand:FMOVE128 1 "register_operand" "d,d")
- (match_operand:QI 2 "const_0_to_1_operand" "i,i")]
+[(match_operand:FMOVE128 1 "register_operand" "d,d,r")
+ (match_operand:QI 2 "const_0_to_1_operand" "i,i,i")]
 UNSPEC_UNPACK_128BIT))]
   "(!TARGET_POWERPC64 || !TARGET_DIRECT_MOVE) && FLOAT128_2REG_P (mode)"
   "#"
@@ -14600,7 +14600,7 @@ (define_insn_and_split "unpack_nodm"
 
   operands[3] = gen_rtx_REG (mode, fp_regno);
 }
-  [(set_attr "type" "fp,fpstore")])
+  [(set_attr "type" "fp,fpstore,store")])
 
 (define_insn_and_split "pack"
   [(set (match_operand:FMOVE128 0 "register_operand" "=&d")
-- 
1.8.3.1



Re: [PATCH] rs6000: Handle pcrel sibcalls to longcall functions [PR104894]

2022-04-11 Thread Segher Boessenkool
On Wed, Apr 06, 2022 at 02:33:52PM -0500, Peter Bergner wrote:
> On 4/5/22 10:33 PM, Peter Bergner via Gcc-patches wrote:
> > On 4/5/22 5:32 PM, Segher Boessenkool wrote:
> >> On Tue, Apr 05, 2022 at 05:06:50PM -0500, Peter Bergner wrote:
> So the updated change looks like below with the ChangeLog entry and tests 
> being the same:

Please don't send patches in existing threads, it confuses things.

> Is this better and ok for trunk?

Assuming you write a good changelog for this, it is fine.  Thanks!

Btw.  This code now is harder to read and understand and change than it
has to be, because you want to make the code (or the changes to the
code) as small as possible.  This is not a good tradeoff.


Segher


Re: [committed] libstdc++: Improve behaviour of std::stacktrace::current

2022-04-11 Thread Jonathan Wakely via Gcc-patches
On Mon, 11 Apr 2022 at 18:03, Jonathan Wakely via Libstdc++
 wrote:
> // Precondition: _M_frames == nullptr
> pointer
> _M_allocate(allocator_type& __alloc, size_type __n) noexcept
> {
>   __try
> {
> - _M_frames = __n ? __alloc.allocate(__n) : nullptr;
> - _M_capacity = __n;
> + if (0 < __n && __n <= _S_max_size(__alloc)) [[unlikely]]

I originally changed this to return early if the size isn't OK:

  if (unlikely condition)
return nullptr;

but in the version I pushed it's:

  if (likely condition)
// do allocation

but forgot to change the attribute to match.

Fixed by the attached. Tested x86_64-linux and pushed to trunk.

I have further improvements to stacktrace::current coming tomorrow.
commit b1124648ff8f655307f264d7b353fd68e3b03e9c
Author: Jonathan Wakely 
Date:   Mon Apr 11 20:13:44 2022

libstdc++: Fix incorrect branch prediction hint in std::stacktrace

libstdc++-v3/ChangeLog:

* include/std/stacktrace (basic_stacktrace::_Impl::_M_allocate):
Change [[unlikely]] attribute to [[likely]].

diff --git a/libstdc++-v3/include/std/stacktrace 
b/libstdc++-v3/include/std/stacktrace
index dd78c71c5dc..79038e803f2 100644
--- a/libstdc++-v3/include/std/stacktrace
+++ b/libstdc++-v3/include/std/stacktrace
@@ -579,7 +579,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{
  __try
{
- if (0 < __n && __n <= _S_max_size(__alloc)) [[unlikely]]
+ if (0 < __n && __n <= _S_max_size(__alloc)) [[likely]]
{
  _M_frames = __alloc.allocate(__n);
  _M_capacity = __n;


[pushed] c++: operator new lookup [PR98249]

2022-04-11 Thread Jason Merrill via Gcc-patches
The standard says, as we quote in the comment just above, that if we don't
find operator new in the allocated type, it should be looked up in the
global scope.  This is specifically ::, not just any namespace, and we
already give an error for an operator new declared in any other namespace.

Tested x86_64-pc-linux-gnu, applying to trunk.

PR c++/98249

gcc/cp/ChangeLog:

* call.cc (build_operator_new_call): Just look in ::.

gcc/testsuite/ChangeLog:

* g++.dg/lookup/new3.C: New test.
---
 gcc/cp/call.cc |  3 +--
 gcc/testsuite/g++.dg/lookup/new3.C | 10 ++
 2 files changed, 11 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/lookup/new3.C

diff --git a/gcc/cp/call.cc b/gcc/cp/call.cc
index 73fede5a3df..3a8d7e4b131 100644
--- a/gcc/cp/call.cc
+++ b/gcc/cp/call.cc
@@ -4899,8 +4899,7 @@ build_operator_new_call (tree fnname, vec 
**args,
up in the global scope.
 
  we disregard block-scope declarations of "operator new".  */
-  fns = lookup_name (fnname, LOOK_where::NAMESPACE);
-  fns = lookup_arg_dependent (fnname, fns, *args);
+  fns = lookup_qualified_name (global_namespace, fnname);
 
   if (align_arg)
 {
diff --git a/gcc/testsuite/g++.dg/lookup/new3.C 
b/gcc/testsuite/g++.dg/lookup/new3.C
new file mode 100644
index 000..36afb5b48e2
--- /dev/null
+++ b/gcc/testsuite/g++.dg/lookup/new3.C
@@ -0,0 +1,10 @@
+// PR c++/98249
+
+#include 
+struct Incomplete;
+template struct Holder { T t; };
+Holder *p;
+void test() {
+::new (p) int;
+new (p) int;
+}

base-commit: b1124648ff8f655307f264d7b353fd68e3b03e9c
-- 
2.27.0



Re: -Wformat-overflow handling for %b and %B directives in C2X standard

2022-04-11 Thread Marek Polacek via Gcc-patches
On Thu, Apr 07, 2022 at 02:10:48AM +0500, Frolov Daniil wrote:
> Hello! Thanks for your feedback. I've tried to take into account your
> comments. New patch applied to the letter.

Thanks.
 
> The only thing I have not removed is the check_std_c2x () function. From my
> point of view -Wformat-overflow shouldn't be thrown if the standard < C2X.
> So it's protection for false triggering.

Sorry but I still think that is the wrong behavior.  If you want to warn
about C2X constructs in pre-C2X modes, use -Wpedantic.  But if you want
to use %b/%B as an extension in older dialects, that's OK too, so I don't
know why users would want -Wformat-overflow disabled in that case.  But
perhaps other people disagree with me.

> сб, 2 апр. 2022 г. в 01:15, Marek Polacek :
> 
> > On Sat, Apr 02, 2022 at 12:19:47AM +0500, Frolov Daniil via Gcc-patches
> > wrote:
> > > Hello, I've noticed that -Wformat-overflow doesn't handle %b and %B
> > > directives in the sprintf function. I've added a relevant issue in
> > bugzilla
> > > (bug #105129).
> > > I attach a patch with a possible solution to the letter.
> >
> > Thanks for the patch.  Support for C2X %b, %B formats is relatively new
> > (Oct 2021) so it looks like gimple-ssa-sprintf.cc hasn't caught up.
> >
> > This is not a regression, so should probably wait till GCC 13.  Anyway...
> >
> > > From 2051344e9500651f6e94c44cbc7820715382b957 Mon Sep 17 00:00:00 2001
> > > From: Frolov Daniil 
> > > Date: Fri, 1 Apr 2022 00:47:03 +0500
> > > Subject: [PATCH] Support %b, %B for -Wformat-overflow (sprintf, snprintf)
> > >
> > > testsuite: add tests to check -Wformat-overflow on %b.
> > > Wformat-overflow1.c is compiled using -std=c2x so warning has to
> > > be throwed
> > >
> > > Wformat-overflow2.c doesn't throw warnings cause c2x std isn't
> > > used
> > >
> > > gcc/ChangeLog:
> > >
> > >   * gimple-ssa-sprintf.cc
> > > (check_std_c2x): New function
> > >   (fmtresult::type_max_digits): add base == 2 handling
> > >   (tree_digits): add handle for base == 2
> > >   (format_integer): now handle %b and %B using base = 2
> > >   (parse_directive): add cases to handle %b and %B directives
> > >   (compute_format_length): add handling for base = 2
> >
> > The descriptions should start with a capital letter and end with a period,
> > like "Handle base == 2."
> >
> > > gcc/testsuite/ChangeLog:
> > >
> > >   * gcc.dg/Wformat-overflow1.c: New test. (using -std=c2x)
> > >   * gcc.dg/Wformat-overflow2.c: New test. (-std=c11 no warning)
> >
> > You can just say "New test."
> >
> > > ---
> > >  gcc/gimple-ssa-sprintf.cc| 42 
> > >  gcc/testsuite/gcc.dg/Wformat-overflow1.c | 28 
> > >  gcc/testsuite/gcc.dg/Wformat-overflow2.c | 16 +
> > >  3 files changed, 79 insertions(+), 7 deletions(-)
> > >  create mode 100644 gcc/testsuite/gcc.dg/Wformat-overflow1.c
> > >  create mode 100644 gcc/testsuite/gcc.dg/Wformat-overflow2.c
> > >
> > > diff --git a/gcc/gimple-ssa-sprintf.cc b/gcc/gimple-ssa-sprintf.cc
> > > index c93f12f90b5..7f68c2b6e51 100644
> > > --- a/gcc/gimple-ssa-sprintf.cc
> > > +++ b/gcc/gimple-ssa-sprintf.cc
> > > @@ -107,6 +107,15 @@ namespace {
> > >
> > >  static int warn_level;
> > >
> > > +/* b_overflow_flag depends on the current standart when using gcc */
> >
> > "standard"
> >
> > /* Comments should be formatted like this.  */
> >
> > > +static bool b_overflow_flag;
> > > +
> > > +/* check is current standart version equals C2X*/
> > > +static bool check_std_c2x ()
> > > +{
> > > +  return !strcmp (lang_hooks.name, "GNU C2X");
> > > +}
> >
> > Is this really needed?  ISTM that this new checking shouldn't depend on
> > -std=c2x.  If not using C2X, you only get a warning if -Wpedantic.  So
> > I think you should remove b_overflow_flag.
> >
> > >  /* The minimum, maximum, likely, and unlikely maximum number of bytes
> > > of output either a formatting function or an individual directive
> > > can result in.  */
> > > @@ -535,6 +544,8 @@ fmtresult::type_max_digits (tree type, int base)
> > >unsigned prec = TYPE_PRECISION (type);
> > >switch (base)
> > >  {
> > > +case 2:
> > > +  return prec;
> > >  case 8:
> > >return (prec + 2) / 3;
> > >  case 10:
> > > @@ -857,11 +868,11 @@ tree_digits (tree x, int base, HOST_WIDE_INT prec,
> > bool plus, bool prefix)
> > >
> > >/* Adjust a non-zero value for the base prefix, either hexadecimal,
> > >   or, unless precision has resulted in a leading zero, also octal.
> > */
> > > -  if (prefix && absval && (base == 16 || prec <= ndigs))
> > > +  if (prefix && absval && (base == 2 || base == 16 || prec <= ndigs))
> > >  {
> > >if (base == 8)
> > >   res += 1;
> > > -  else if (base == 16)
> > > +  else if (base == 16 || base == 2) /*0x...(0X...) and
> > 0b...(0B...)*/
> > >   res += 2;
> > >  }
> > >
> > > @@ -1229,6 +1240,10 @@ format_integer (const directive &dir, t

Re: [PATCH] rs6000: Handle pcrel sibcalls to longcall functions [PR104894]

2022-04-11 Thread Peter Bergner via Gcc-patches
On 4/11/22 4:13 PM, Segher Boessenkool wrote:
> On Wed, Apr 06, 2022 at 02:33:52PM -0500, Peter Bergner wrote:
>> On 4/5/22 10:33 PM, Peter Bergner via Gcc-patches wrote:
>>> On 4/5/22 5:32 PM, Segher Boessenkool wrote:
 On Tue, Apr 05, 2022 at 05:06:50PM -0500, Peter Bergner wrote:
>> So the updated change looks like below with the ChangeLog entry and tests 
>> being the same:
> 
> Please don't send patches in existing threads, it confuses things.

It wasn't really meant as a patch, but more of a confirmation I made
the changes you wanted...and a ping. :-) 




>> Is this better and ok for trunk?
> 
> Assuming you write a good changelog for this, it is fine.  Thanks!

Done and pushed.  Thanks!   We need this on GCC11 and GCC10 as well.
With GCC11 due soon, I'd like this in there.  Ok for backports after
a day or so of trunk burn-in?

Peter



Re: [PATCH, V3] Eliminate power8 fusion options, use power8 tuning, PR target/102059

2022-04-11 Thread Segher Boessenkool
On Thu, Apr 07, 2022 at 07:59:27PM -0400, Michael Meissner wrote:
> I have tested this patch on a little endian power10 system.  I have tested
> previous versions on little endian power9 and big endian power8 systems.

Please test on at least p8 as well.

> I will want to backport the patch to GCC 10 and GCC 11 in a few days.
> Note this patch can't be used directly for those backports due to the
> other changes for PR target/102059 that have gone into the master branch
> but were not back ported.

Then send a new patch for there.  If you cannot backport, you cannot
backport.

>   (rs6000_opt_masks): Allow #pragma target and attribute target to set
>   power8-fusion, but these no longer represent an option that the
>   user can set.

Do you have evidence that anything used this in the wild?  Nothing
should have.  Please try to remove this in GCC 13 at least.

>   (rs6000_print_options_internal): Skip printing nop options.

It is not clear what you mean here (not in the code either).  Please
rephrase.

>   * config/rs6000/rs6000.opt (-mpower8-fusion): Recognize the option but
>   ignore the no form and warn that the option was removed for the regular
>   form.

If it is ignored, you should not warn for it.

>   (-mpower8-fusion-sign): Warn that the option has been removed.

And no one could correctly have used this ever, so don't warn anything
about this in any case.

> -/* For now, don't provide an embedded version of ISA 2.07.  Do not set power8
> -   fusion here, instead set it in rs6000.cc if we are tuning for a power8
> -   system.  */

You should keep part of this comment.  Maybe just "We don't implement an
embedded version of ISA 2.07 ."?

> +  /* The Power8 fusion option was removed.  We ignore using it in #pragma and
> + attribute target.  Users may have used the options to suppress errors if
> + they declare an inline function to be specifically power8 and the 
> function
> + was included by power9 or power10 which turned off the power8 fusion
> + support.  */
> +  { "power8-fusion", 0,  false, true  },

This sounds very hypothetical.  Does this actually happen?  If so,
rephrase the comment; if not, remove the whole thing.

> @@ -24687,6 +24657,10 @@ rs6000_print_options_internal (FILE *file,
>HOST_WIDE_INT mask = opts[i].mask;
>size_t len = comma_len + prefix_len + strlen (name);
>  
> +  /* Don't print NOP options.  */
> +  if (!mask)
> + continue;

It is not clear at all what "NOP options" means.  I can read the code
to understand the comment, but that makes the code less readable than
without comment.  Please fix this.

> +/* Power8 has special fusion operations that are enabled if we are tuning for
> +   power8.  This used to be settable with an option (-mpower8-fusion), but 
> that
> +   option has been removed.  */
> +#define TARGET_P8_FUSION (rs6000_tune == PROCESSOR_POWER8)

That comment does not belong here.

> +/* Power8 fusion does not fuse loads with sign extends.  If we are doing 
> higher
> +   optimization levels, split loads with sign extension to loads with zero
> +   extension and an explicit sign extend operation, so that the zero 
> extending
> +   load can be fused.  */
> +#define TARGET_P8_FUSION_SIGN(TARGET_P8_FUSION   
> \
> +  && optimize_function_for_speed_p (cfun) \
> +  && optimize >= 3)

This should not depend on -O3 or sneaky ways to disable optimising for
speed.  It does not have to anyway.  Just remove this whole thing, use
TARGET_P8_FUSION instead everywhere?

> +; The -mpower8-fusion option existed in the past, but it has been removed.
> +; Some users in the past needed to use the -mno-power8-fusion option when 
> they
> +; had inline functions that were specified as generating power8 code and the
> +; functions were included by power9 or power10 function.  Using
> +; -mno-power8-fusion prevented an error in this case.  We allow the
> +; -mno-power8-fusion option without a warning.

Please don't say all these things.  It should say what all our other
removed options say: a single half-line comment, and nothing more.

>  mpower8-fusion
> -Target Mask(P8_FUSION) Var(rs6000_isa_flags)
> -Fuse certain integer operations together for better performance on power8.
> +Target RejectNegative Undocumented WarnRemoved

> +mno-power8-fusion
> +Target RejectNegative Undocumented

It also should have "Ignore".

The "no-" variant (the always active one!) goes first.  Both share that
one short comment.

> +; The -mpower8-fusion-sign option was an undocumented option that modified 
> the
> +; -mpower8-fusion option.  It has been removed.
>  mpower8-fusion-sign
> -Target Undocumented Mask(P8_FUSION_SIGN) Var(rs6000_isa_flags)
> -Allow sign extension in fusion operations.
> +Target Undocumented WarnRemoved

Don't WarnRemoved.  Do Ignore.  Just remove the whole thing please.

> --- /dev

Re: [PATCH] rs6000: Handle pcrel sibcalls to longcall functions [PR104894]

2022-04-11 Thread Segher Boessenkool
On Mon, Apr 11, 2022 at 05:08:04PM -0500, Peter Bergner wrote:
> Done and pushed.  Thanks!   We need this on GCC11 and GCC10 as well.
> With GCC11 due soon, I'd like this in there.  Ok for backports after
> a day or so of trunk burn-in?

Yes, thanks!  Please make sure you have tested things very thoroughly if
you do quick backports like this (although the only thing that seems to
be in danger are longcall things, and does anyone use those?) (don't
answer that :-) )


Segher


Re: [PATCH] rs6000: Handle pcrel sibcalls to longcall functions [PR104894]

2022-04-11 Thread Segher Boessenkool
On Tue, Apr 05, 2022 at 10:33:14PM -0500, Peter Bergner wrote:
> On 4/5/22 5:32 PM, Segher Boessenkool wrote:
> >> +  gcc_assert (rs6000_pcrel_p ());
> >> +  func_desc = rs6000_longcall_ref (func_desc, tlsarg);
> >> +}
> >> +  else
> >> +gcc_assert (INTVAL (cookie) == 0);
> > 
> > So in the old code the cookie could *only* contain the CALL_LONG flag,
> > now it can contain any others as long as it has that flag as well.
> > Please fix.
> 
> No, the old code only allowed INTVAL (cookie) == 0, which means no
> attributes are allowed.

>  The new code now allows the CALL_LONG attribute
> iff the function is a SYMBOL_REF.  This is only allowed for pcrel calls
> though.

Ah, tricky.

>  I debated on whether to do a gcc_assert on rs6000_pcrel_p() or
> fold the rs6000_pcrel_p() into the if () and let the original assert
> on INTVAL (cookie) == 0 catch the illegal uses.  It's up to you on
> what you prefer.

For future changes, likely it is best if you split the pcrel and
non-pcrel paths further.

> > Not every LONG_CALL needs a TOC restore though?  
> 
> I believe if the function we're calling has the CALL_LONG attribute
> set, we have to assume that the TOC needs to be restored.

Not if we know the called function is in the same object?  If we are
doing long calls anyway there isn't much point in optimising anything
anymore, but don't say "have to" or such then :-)

> > You probably should have the same condition here as actually doing a
> > longcall as well, something involving SYMBOL_REF_FUNCTION_P?
> 
> I believe if we're here in rs6000_sibcall_aix() and func_desc is a
> SYMBOL_REF, then it also must be SYMBOL_REF_FUNCTION_P, correct?
> Otherwise, why would we be attempting to do a sibcall to it?

It doesn't hurt to be a bit defensive in programming.  It helps making
future changes easier, too :-)

Maybe we should have a utility function for this?  That helps preventing
microoptimisations as well :-P


Segher


Re: Ping^2 [PATCH, rs6000] Correct match pattern in pr56605.c

2022-04-11 Thread Alexandre Oliva via Gcc-patches
On Apr  7, 2022, HAO CHEN GUI via Gcc-patches  wrote:

>   Gentle ping this:
>https://gcc.gnu.org/pipermail/gcc-patches/2022-February/590958.html
> Thanks

>> On 28/2/2022 上午 11:17, HAO CHEN GUI wrote:

>>> This patch corrects the match pattern in pr56605.c. The former pattern
>>> is wrong and test case fails with GCC11. It should match following insn on
>>> each subtarget after mode promotion is disabled. The patch need to be
>>> backported to GCC11.

>>> -/* { dg-final { scan-rtl-dump-times {\(compare:CC 
>>> \((?:and|zero_extend):(?:DI) \((?:sub)?reg:[SD]I} 1 "combine" } } */
>>> +/* { dg-final { scan-rtl-dump-times {\(compare:CC \(and:SI \(subreg:SI 
>>> \(reg:DI} 1 "combine" } } */


How about this less strict change instead?


ppc: testsuite: PROMOTE_MODE fallout pr56605 [PR102146]

The test expects a compare of DImode values, but after the removal of
PROMOTE_MODE from rs6000/, we get SImode.  Adjust the expectations.

Tested with gcc-11 targeting ppc64-vx7r2.  Ok to install?


for  gcc/testsuite/ChangeLog

PR target/102146
* gcc.target/powerpc/pr56605.c: Accept SImode compare operand.
---
 gcc/testsuite/gcc.target/powerpc/pr56605.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/powerpc/pr56605.c 
b/gcc/testsuite/gcc.target/powerpc/pr56605.c
index fdedbfc573dd8..7695f87db6f66 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr56605.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr56605.c
@@ -11,5 +11,5 @@ void foo (short* __restrict sb, int* __restrict ia)
 ia[i] = (int) sb[i];
 }
 
-/* { dg-final { scan-rtl-dump-times {\(compare:CC \((?:and|zero_extend):(?:DI) 
\((?:sub)?reg:[SD]I} 1 "combine" } } */
+/* { dg-final { scan-rtl-dump-times {\(compare:CC 
\((?:and|zero_extend):(?:[SD]I) \((?:sub)?reg:[SD]I} 1 "combine" } } */
 



-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
Disinformation flourishes because many people care deeply about injustice
but very few check the facts.  Ask me about 


Re: rustc_codegen_gcc and libgccjit for GCC 12 ?

2022-04-11 Thread David Malcolm via Gcc-patches
On Fri, 2022-04-08 at 16:37 -0400, Antoni Boucher wrote:
> On Fri, 2022-04-08 at 15:36 -0400, David Malcolm wrote:

[...snip...]

> 
> 

> > So I think I'm waiting on an updated version of the sized-integer-
> > types
> > patch, and some nit-fixes for the other patches (but am
> > disappearing
> > on
> > vacation on 18th - 22nd).
> 
> I'll update the patches to address your review over the weekend.
> Thanks!

Thanks.

I've been working through them today, fixing things up so they commit
cleanly, and fixing a few more nits, but it's clear I'm not going to be
done tonight.

I hope to push the 5 jit patches to trunk for GCC 12 tomorrow (assuming
my tests pass).

Dave
> 



[PATCH] ppc: testsuite: skip pr60203 on no ldbl128

2022-04-11 Thread Alexandre Oliva via Gcc-patches


If neither 128-bit long double format is available, skip pr60203.c.

Tested with gcc-11 targeting ppc64-vx7r2, with neither long double
format enabled.  Ok to install?


for  gcc/testsuite/ChangeLog

* gcc.target/powerpc/pr60203.c: Skip on no 128-bit long double.
---
 gcc/testsuite/gcc.target/powerpc/pr60203.c |1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/testsuite/gcc.target/powerpc/pr60203.c 
b/gcc/testsuite/gcc.target/powerpc/pr60203.c
index 7ada64a32db45..53f58c8e05a41 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr60203.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr60203.c
@@ -1,5 +1,6 @@
 /* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
 /* { dg-skip-if "" { powerpc*-*-darwin* } } */
+/* { dg-skip-if "no 128-bit long double" { ! { long_double_ibm128 || 
long_double_ieee128 } } } */
 /* { dg-require-effective-target powerpc_p8vector_ok } */
 /* { dg-options "-mdejagnu-cpu=power8 -O3" } */
 


-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
Disinformation flourishes because many people care deeply about injustice
but very few check the facts.  Ask me about 


[PATCH] ppc: testsuite: test for arch_pwr7 with -mvsx in fold-vec-insert-double

2022-04-11 Thread Alexandre Oliva via Gcc-patches


gcc.target/powerpc/fold-vec-insert-double.c is compiled with -mvsx,
while the expected asm output depends on target has_arch_pwr7, which
is tested for without -mvsx.

In some of our configurations, that have altivec and vsx disabled by
default, the former defines up to _ARCH_PWR7, while the latter defines
only up to _ARCH_PWR4, i.e., we compile for power7, and test for
non-power7.

This patch, admittedly ugly, enables us to test for asm output
according the actual compile target given the explicitly specified
flag.

I suppose it may be possible to turn this "magic" into a reusable proc
that sets a named variable to the result of a wrapped scan test, but
I'm not sure I'm up to the task, with the need to deal with additional
scoping, so I'm hoping this can be acceptable as is.

Tested with gcc-11 targeting ppc64-vx7r2.  Ok to install?


for  gcc/testsuite/ChangeLog

* gcc.target/powerpc/fold-vec-insert-double.c: Test for asm
according to the arch selected by -mvsx.
---
 .../gcc.target/powerpc/fold-vec-insert-double.c|   48 
 1 file changed, 39 insertions(+), 9 deletions(-)

diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-double.c 
b/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-double.c
index afd7f7e9924e8..b95f0b33d6c07 100644
--- a/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-double.c
+++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-insert-double.c
@@ -18,21 +18,51 @@ testd_cst (double d, vector double vd)
 {
   return vec_insert (d, vd, 1);
 }
+
+/* The expected asm output varies depending on target arch_has_pwr7, but the
+   -mvsx option used for the test may implicitly enable the macro that target
+   arch_has_pwr7 tests for, while target arch_has_pwr7 doesn't take the option,
+   so we may end up compiling for one target variant and testing for another.
+   The following dejagnu magic sets $macro_is_defined to 1 or 0 depending on
+   whether ARCH_PWR7_is_defined appears in the assembly output.  */
+#ifdef _ARCH_PWR7
+int ARCH_PWR7_is_defined = 1;
+/* { dg-final { set asm_pattern_to_search_for "ARCH_PWR7_is_defined" } } */
+#endif
+
+/* { dg-final { global macro_is_defined } } */
+/* { dg-final { set macro_is_defined -1 } } */
+/* { dg-final { rename pass macro-save-pass } } */
+/* { dg-final { rename fail macro-save-fail } } */
+/* { dg-final { proc pass { args } { global macro_is_defined; set 
macro_is_defined 1 } } } */
+/* { dg-final { proc fail { args } { global macro_is_defined; set 
macro_is_defined 0 } } } */ 
+/* { dg-final { scan-assembler "$asm_pattern_to_search_for" } } */
+/* { dg-final { rename pass macro-dropme-pass } } */
+/* { dg-final { rename macro-save-pass pass } } */
+/* { dg-final { rename fail macro-dropme-fail } } */
+/* { dg-final { rename macro-save-fail fail } } */
+/* { dg-final { if { $macro_is_defined < 0 } { fail "macro detection" } } } */
+/* { dg-final { if { $macro_is_defined < 0 } { return } } } */
+
+/* { dg-final { set has_arch_pwr7 $macro_is_defined } } */
+/* This is the end of the magic.
+   We can now run tests conditionally on $has_arch_pwr7.  */
+
 /* The number of xxpermdi instructions varies between
  P7,P8,P9, ensure at least one hit. */
 /* { dg-final { scan-assembler {\mxxpermdi\M} } } */
 
 /* { dg-final { scan-assembler-times {\mrldic\M|\mrlwinm\M} 1 } } */
 
-/* { dg-final { scan-assembler-times {\mstxvd2x\M|\mstxv\M|\mstvx\M} 1 { 
target { ! has_arch_pwr7 } } } } */
-/* { dg-final { scan-assembler-times {\mstfdx\M|\mstfd\M} 1 { target { ! 
has_arch_pwr7 } } } } */
-/* { dg-final { scan-assembler-times {\mlxvd2x\M|\mlxv\M|\mlvx\M} 1 { target { 
! has_arch_pwr7 } } } } */
+/* { dg-final { if { ! $has_arch_pwr7 } { scan-assembler-times 
{\mstxvd2x\M|\mstxv\M|\mstvx\M} 1 } } } */
+/* { dg-final { if { ! $has_arch_pwr7 } { scan-assembler-times 
{\mstfdx\M|\mstfd\M} 1 } } } */
+/* { dg-final { if { ! $has_arch_pwr7 } { scan-assembler-times 
{\mlxvd2x\M|\mlxv\M|\mlvx\M} 1 } } } */
 
-/* { dg-final { scan-assembler-times {\mstxvd2x\M|\mstxv\M|\mstvx\M} 0 { 
target { has_arch_pwr7 && lp64 } } } } */
-/* { dg-final { scan-assembler-times {\mstfdx\M|\mstfd\M} 0 { target { 
has_arch_pwr7 && lp64 } } } } */
+/* { dg-final { if { $has_arch_pwr7 } { scan-assembler-times 
{\mstxvd2x\M|\mstxv\M|\mstvx\M} 0 { target { lp64 } } } } } */
+/* { dg-final { if { $has_arch_pwr7 } { scan-assembler-times 
{\mstfdx\M|\mstfd\M} 0 { target { lp64 } } } } } */
 
-/* { dg-final { scan-assembler-times {\mlxvd2x\M|\mlxv\M|\mlvx\M} 0 { target { 
has_arch_pwr7 && lp64 } } } } */
-/* { dg-final { scan-assembler-times {\mlxvd2x\M|\mlxv\M|\mlvx\M} 0 { target { 
has_arch_pwr7 && ilp32 } } } } */
-/* { dg-final { scan-assembler-times {\mstxvd2x\M|\mstxv\M|\mstvx\M} 0 { 
target { has_arch_pwr7 && ilp32 } } } } */
-/* { dg-final { scan-assembler-times {\mstfdx\M|\mstfd\M} 0 { target { 
has_arch_pwr7 && ilp32 } } } } */
+/* { dg-final { if { $has_arch_pwr7 } { scan-assembler-times 
{\mlxvd2x\M|\mlxv\M|\mlvx\M} 0 { target { lp64 } } 

Re: [PATCH] rs6000: Guard bifs {un, }pack_{longdouble, ibm128} under hard float [PR103623]

2022-04-11 Thread Kewen.Lin via Gcc-patches
on 2022/4/11 11:42 PM, Segher Boessenkool wrote:
> Hi!
> 
> On Mon, Apr 11, 2022 at 04:29:40PM +0800, Kewen.Lin wrote:
>> on 2022/4/9 1:31 AM, Segher Boessenkool wrote:
>>> On Fri, Apr 08, 2022 at 10:09:44AM +0800, Kewen.Lin wrote:
>>> For me it fails during combine: the unspec suddenly doesn't recog
>>> anymore.  That might be that "d" thing yes, that is problematical.
>>>
>>
>> I must miss something, I found in combine pass we still have the
>> the insn_code unpacktf_nodm (recog-ed).
> 
> That is recognised many passes earlier though.  When combine runs it
> will ICE because recog failed (for powerpc64-linux anyway, everything
> default, no -mcpu= etc.).
> 

OK, I guess that's why I didn't meet this ICE as I specified -mcpu=
for testing all the time.

>>> Or, try this patch?
>>>
>>> ===
>>> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
>>> index fdfbc6566a5c..f05b8358ba0a 100644
>>> --- a/gcc/config/rs6000/rs6000.md
>>> +++ b/gcc/config/rs6000/rs6000.md
>>> @@ -14580,10 +14580,10 @@ (define_insn_and_split "unpack_dm"
>>>[(set_attr "type" "fp,fpstore,mtvsr,mfvsr,store")])
>>>  
>>>  (define_insn_and_split "unpack_nodm"
>>> -  [(set (match_operand: 0 "nonimmediate_operand" "=d,m")
>>> +  [(set (match_operand: 0 "nonimmediate_operand" "=d,m,m")
>>> (unspec:
>>> -[(match_operand:FMOVE128 1 "register_operand" "d,d")
>>> - (match_operand:QI 2 "const_0_to_1_operand" "i,i")]
>>> +[(match_operand:FMOVE128 1 "register_operand" "d,d,r")
>>> + (match_operand:QI 2 "const_0_to_1_operand" "i,i,i")]
>>>  UNSPEC_UNPACK_128BIT))]
>>>"(!TARGET_POWERPC64 || !TARGET_DIRECT_MOVE) && FLOAT128_2REG_P 
>>> (mode)"
>>>"#"
>>> @@ -14600,7 +14600,7 @@ (define_insn_and_split "unpack_nodm"
>>>  
>>>operands[3] = gen_rtx_REG (mode, fp_regno);
>>>  }
>>> -  [(set_attr "type" "fp,fpstore")])
>>> +  [(set_attr "type" "fp,fpstore,store")])
>>>  
>>>  (define_insn_and_split "pack"
>>>[(set (match_operand:FMOVE128 0 "register_operand" "=&d")
>>> ===
>>>
>>>
>>
>> Nice, I confirmed this makes ICE gone, I've filed one new PR
>> PR105213 for GCC13 further tracking by associating this patch there.
> 
> Cool, I'll commit it later today then (after a final regstrap).  The
> _nodm pattern just missed the alternative for no FP regs (the _dm
> pattern has it, so just an oversight).
> 

Thanks!  So it can be counted as a regression fix instead of tiny
feature work?  Maybe we also need bif documentation change, and
gcc12 changes html update (as bif behavior changes), or it's
too small so no?

BR,
Kewen


[pushed] c++: using operator= [PR105223]

2022-04-11 Thread Jason Merrill via Gcc-patches
In a template class A we normally add an implicit using A::operator= as a
placeholder for the implicitly declared operator whose signature we don't
know yet.  In my patch for PR92918 I stopped doing that if the class has an
explicit operator=, but that was wrong; an operator= taking an unrelated
type doesn't prevent the implicit declaration.

When I was working on that patch, the change was necessary to avoid another
regression, but apparently it is no longer needed.

PR c++/105223
PR c++/92918

gcc/cp/ChangeLog:

* class.cc (finish_struct): Always using op=.

gcc/testsuite/ChangeLog:

* g++.dg/template/using31.C: New test.
---
 gcc/cp/class.cc | 19 ---
 gcc/testsuite/g++.dg/template/using31.C | 16 
 2 files changed, 24 insertions(+), 11 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/template/using31.C

diff --git a/gcc/cp/class.cc b/gcc/cp/class.cc
index 40e17140db5..bfda0065bf4 100644
--- a/gcc/cp/class.cc
+++ b/gcc/cp/class.cc
@@ -7723,17 +7723,14 @@ finish_struct (tree t, tree attributes)
 lookup not to fail or recurse into bases.  This isn't added
 to the template decl list so we drop this at instantiation
 time.  */
-  if (!get_class_binding_direct (t, assign_op_identifier, false))
-   {
- tree ass_op = build_lang_decl (USING_DECL, assign_op_identifier,
-NULL_TREE);
- DECL_CONTEXT (ass_op) = t;
- USING_DECL_SCOPE (ass_op) = t;
- DECL_DEPENDENT_P (ass_op) = true;
- DECL_ARTIFICIAL (ass_op) = true;
- DECL_CHAIN (ass_op) = TYPE_FIELDS (t);
- TYPE_FIELDS (t) = ass_op;
-   }
+  tree ass_op = build_lang_decl (USING_DECL, assign_op_identifier,
+NULL_TREE);
+  DECL_CONTEXT (ass_op) = t;
+  USING_DECL_SCOPE (ass_op) = t;
+  DECL_DEPENDENT_P (ass_op) = true;
+  DECL_ARTIFICIAL (ass_op) = true;
+  DECL_CHAIN (ass_op) = TYPE_FIELDS (t);
+  TYPE_FIELDS (t) = ass_op;
 
   TYPE_SIZE (t) = bitsize_zero_node;
   TYPE_SIZE_UNIT (t) = size_zero_node;
diff --git a/gcc/testsuite/g++.dg/template/using31.C 
b/gcc/testsuite/g++.dg/template/using31.C
new file mode 100644
index 000..bfeb94f2788
--- /dev/null
+++ b/gcc/testsuite/g++.dg/template/using31.C
@@ -0,0 +1,16 @@
+// PR c++/105223
+
+struct ServiceReferenceBase {
+  void operator=(int);
+};
+
+template
+struct ServiceReference : ServiceReferenceBase {
+  void foo() { operator=(0); }
+  using ServiceReferenceBase::operator=;
+};
+
+int main() {
+  ServiceReference sr;
+  sr.foo();
+}

base-commit: 6afb21b824dabf17c79e7b0a4230572f091307ec
-- 
2.27.0



[pushed] c++: rodata and defaulted ctor [PR104142]

2022-04-11 Thread Jason Merrill via Gcc-patches
Trivial initialization shouldn't bump a variable out of .rodata; if the
result of build_aggr_init is an empty STATEMENT_LIST, throw it away.

Tested x86_64-pc-linux-gnu, applying to trunk.

PR c++/104142

gcc/cp/ChangeLog:

* decl.cc (check_initializer): Check TREE_SIDE_EFFECTS.

gcc/testsuite/ChangeLog:

* g++.dg/opt/const7.C: New test.
---
 gcc/cp/decl.cc| 4 
 gcc/testsuite/g++.dg/opt/const7.C | 7 +++
 2 files changed, 11 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/opt/const7.C

diff --git a/gcc/cp/decl.cc b/gcc/cp/decl.cc
index c136dbbba1a..31cae4d1d36 100644
--- a/gcc/cp/decl.cc
+++ b/gcc/cp/decl.cc
@@ -7444,6 +7444,10 @@ check_initializer (tree decl, tree init, int flags, 
vec **cleanups)
   if (init && init != error_mark_node)
 init_code = build2 (INIT_EXPR, type, decl, init);
 
+  if (init_code && !TREE_SIDE_EFFECTS (init_code)
+  && init_code != error_mark_node)
+init_code = NULL_TREE;
+
   if (init_code)
 {
   /* We might have set these in cp_finish_decl.  */
diff --git a/gcc/testsuite/g++.dg/opt/const7.C 
b/gcc/testsuite/g++.dg/opt/const7.C
new file mode 100644
index 000..5bcf94897a8
--- /dev/null
+++ b/gcc/testsuite/g++.dg/opt/const7.C
@@ -0,0 +1,7 @@
+// PR c++/104142
+// { dg-do compile { target c++11 } }
+// { dg-additional-options -Wunused-variable }
+
+struct B { B()=default; };
+static const B b_var;  //  { dg-bogus "" }
+// { dg-final { scan-assembler-symbol-section {b_var} 
{^\.(const|rodata)|\[RO\]} } }

base-commit: 4195fced8a13422db94e179404588d9d887a036a
-- 
2.27.0



[Committed] IBM zSystems: Add support for z16 as CPU name.

2022-04-11 Thread Andreas Krebbel via Gcc-patches
So far z16 was identified as arch14. After the machine has been
announced we can now add the real name.

gcc/ChangeLog:

* common/config/s390/s390-common.cc: Rename PF_ARCH14 to PF_Z16.
* config.gcc: Add z16 as march/mtune switch.
* config/s390/driver-native.cc (s390_host_detect_local_cpu):
Recognize z16 with -march=native.
* config/s390/s390-opts.h (enum processor_type): Rename
PROCESSOR_ARCH14 to PROCESSOR_3931_Z16.
* config/s390/s390.cc (PROCESSOR_ARCH14): Rename to ...
(PROCESSOR_3931_Z16): ... throughout the file.
(s390_processor processor_table): Add z16 as cpu string.
* config/s390/s390.h (enum processor_flags): Rename PF_ARCH14 to
PF_Z16.
(TARGET_CPU_ARCH14): Rename to ...
(TARGET_CPU_Z16): ... this.
(TARGET_CPU_ARCH14_P): Rename to ...
(TARGET_CPU_Z16_P): ... this.
(TARGET_ARCH14): Rename to ...
(TARGET_Z16): ... this.
(TARGET_ARCH14_P): Rename to ...
(TARGET_Z16_P): ... this.
* config/s390/s390.md (cpu_facility): Rename arch14 to z16 and
check TARGET_Z16 instead of TARGET_ARCH14.
* config/s390/s390.opt: Add z16 to processor_type.
* doc/invoke.texi: Document z16 and arch14.
---
 gcc/common/config/s390/s390-common.cc |  4 ++--
 gcc/config.gcc|  2 +-
 gcc/config/s390/driver-native.cc  |  6 +-
 gcc/config/s390/s390-opts.h   |  2 +-
 gcc/config/s390/s390.cc   | 14 --
 gcc/config/s390/s390.h| 16 
 gcc/config/s390/s390.md   |  6 +++---
 gcc/config/s390/s390.opt  |  5 -
 gcc/doc/invoke.texi   |  3 ++-
 9 files changed, 30 insertions(+), 28 deletions(-)

diff --git a/gcc/common/config/s390/s390-common.cc 
b/gcc/common/config/s390/s390-common.cc
index caec2f14c6c..72a5ef47eaa 100644
--- a/gcc/common/config/s390/s390-common.cc
+++ b/gcc/common/config/s390/s390-common.cc
@@ -50,10 +50,10 @@ EXPORTED_CONST int processor_flags_table[] =
 /* z15 */PF_IEEE_FLOAT | PF_ZARCH | PF_LONG_DISPLACEMENT
 | PF_EXTIMM | PF_DFP | PF_Z10 | PF_Z196 | PF_ZEC12 | PF_TX
 | PF_Z13 | PF_VX | PF_VXE | PF_Z14 | PF_VXE2 | PF_Z15,
-/* arch14 */ PF_IEEE_FLOAT | PF_ZARCH | PF_LONG_DISPLACEMENT
+/* z16 */PF_IEEE_FLOAT | PF_ZARCH | PF_LONG_DISPLACEMENT
 | PF_EXTIMM | PF_DFP | PF_Z10 | PF_Z196 | PF_ZEC12 | PF_TX
 | PF_Z13 | PF_VX | PF_VXE | PF_Z14 | PF_VXE2 | PF_Z15
-| PF_NNPA | PF_ARCH14
+| PF_NNPA | PF_Z16
   };
 
 /* Change optimizations to be performed, depending on the
diff --git a/gcc/config.gcc b/gcc/config.gcc
index 48a5bbcf787..c5064dd3766 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -5532,7 +5532,7 @@ case "${target}" in
for which in arch tune; do
eval "val=\$with_$which"
case ${val} in
-   "" | native | z900 | z990 | z9-109 | z9-ec | z10 | z196 
| zEC12 | z13 | z14 | z15 | arch5 | arch6 | arch7 | arch8 | arch9 | arch10 | 
arch11 | arch12 | arch13 | arch14 )
+   "" | native | z900 | z990 | z9-109 | z9-ec | z10 | z196 
| zEC12 | z13 | z14 | z15 | z16 | arch5 | arch6 | arch7 | arch8 | arch9 | 
arch10 | arch11 | arch12 | arch13 | arch14 )
# OK
;;
*)
diff --git a/gcc/config/s390/driver-native.cc b/gcc/config/s390/driver-native.cc
index 48524c49251..b5eb222872d 100644
--- a/gcc/config/s390/driver-native.cc
+++ b/gcc/config/s390/driver-native.cc
@@ -123,8 +123,12 @@ s390_host_detect_local_cpu (int argc, const char **argv)
case 0x8562:
  cpu = "z15";
  break;
+   case 0x3931:
+   case 0x3932:
+ cpu = "z16";
+ break;
default:
- cpu = "arch14";
+ cpu = "z16";
  break;
}
}
diff --git a/gcc/config/s390/s390-opts.h b/gcc/config/s390/s390-opts.h
index 1ec84631a5f..4ef82ac5d34 100644
--- a/gcc/config/s390/s390-opts.h
+++ b/gcc/config/s390/s390-opts.h
@@ -38,7 +38,7 @@ enum processor_type
   PROCESSOR_2964_Z13,
   PROCESSOR_3906_Z14,
   PROCESSOR_8561_Z15,
-  PROCESSOR_ARCH14,
+  PROCESSOR_3931_Z16,
   PROCESSOR_NATIVE,
   PROCESSOR_max
 };
diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index d2af6d8813d..1342a2e7db0 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -337,7 +337,7 @@ const struct s390_processor processor_table[] =
   { "z13","z13",PROCESSOR_2964_Z13,&zEC12_cost,  11 },
   { "z14","arch12", PROCESSOR_3906_Z14,&zEC12_cost,  12 },
   { "z15","arch13", PROCESSOR_8561_Z15,&zEC12_cost,  13 },
-  { "arch14", "arch14", PROCESSOR_ARCH14,  &zEC12_cost,  14 },
+  { "z16","arch14", PROCESSOR_3931_Z16,