[PATCH v2] i386: Disable ix86_expand_vecop_qihi2 when !TARGET_AVX512BW

2024-05-21 Thread Haochen Jiang
Hi all,

This is the v2 patch to fix PR115069. The new testcase has passed.

Changes in v2:
  - Added a testcase.
  - Change the comment for the early exit.

Thx,
Haochen

Since vpermq is really slow, we should avoid using it for permutation
when vpmovwb is not available (needs AVX512BW) for ix86_expand_vecop_qihi2
and fall back to ix86_expand_vecop_qihi.

gcc/ChangeLog:

PR target/115069
* config/i386/i386-expand.cc (ix86_expand_vecop_qihi2):
Do not enable the optimization when AVX512BW is not enabled.

gcc/testsuite/ChangeLog:

PR target/115069
* gcc.target/i386/pr115069.c: New.
---
 gcc/config/i386/i386-expand.cc   |  7 +++
 gcc/testsuite/gcc.target/i386/pr115069.c | 78 
 2 files changed, 85 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr115069.c

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index a6132911e6a..f7939761879 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -24323,6 +24323,13 @@ ix86_expand_vecop_qihi2 (enum rtx_code code, rtx dest, 
rtx op1, rtx op2)
   bool op2vec = GET_MODE_CLASS (GET_MODE (op2)) == MODE_VECTOR_INT;
   bool uns_p = code != ASHIFTRT;
 
+  /* Without VPMOVWB (provided by AVX512BW ISA), the expansion uses the
+ generic permutation to merge the data back into the right place.  This
+ permutation results in VPERMQ, which is slow, so better fall back to
+ ix86_expand_vecop_qihi.  */
+  if (!TARGET_AVX512BW)
+return false;
+
   if ((qimode == V16QImode && !TARGET_AVX2)
   || (qimode == V32QImode && (!TARGET_AVX512BW || !TARGET_EVEX512))
   /* There are no V64HImode instructions.  */
diff --git a/gcc/testsuite/gcc.target/i386/pr115069.c 
b/gcc/testsuite/gcc.target/i386/pr115069.c
new file mode 100644
index 000..c4b48b602ef
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr115069.c
@@ -0,0 +1,78 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx2" } */
+/* { dg-final { scan-assembler-not "vpermq" } } */
+
+#include 
+#include 
+#include 
+#include 
+
+typedef int8_t  stress_vint8_t  __attribute__ ((vector_size (16)));
+
+#define OPS(a, b, c, s, v23, v3) \
+do {   \
+   a += b; \
+   a |= b; \
+   a -= b; \
+   a &= ~b;\
+   a *= c; \
+   a = ~a; \
+   a *= s; \
+   a ^= c; \
+   a <<= 1;\
+   b >>= 1;\
+   b += c; \
+   a %= v23;   \
+   c /= v3;\
+   b = b ^ c;  \
+   c = b ^ c;  \
+   b = b ^ c;  \
+} while (0)
+
+volatile uint8_t csum8_put;
+
+void stress_vecmath(void)
+{
+   const stress_vint8_t v23_8 = { 
+   0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 
+   0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17 
+   };
+   const stress_vint8_t v3_8 = {
+   0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
+   0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03
+   };
+   stress_vint8_t a8 = {
+   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+   };
+   stress_vint8_t b8 = {
+   0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef,
+   0x0f, 0x1e, 0x2d, 0x3c, 0x4b, 0x5a, 0x69, 0x78
+   };
+   stress_vint8_t c8 = {
+   0x01, 0x02, 0x03, 0x02, 0x01, 0x02, 0x03, 0x02,
+   0x03, 0x02, 0x01, 0x02, 0x03, 0x02, 0x01, 0x02
+   };
+   stress_vint8_t s8 = {
+   0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02, 0x02,
+   0x01, 0x01, 0x02, 0x02, 0x01, 0x01, 0x02, 0x02,
+   };
+   const uint8_t csum8_val =  (uint8_t)0x1b;
+   int i;
+   uint8_t csum8;
+
+   for (i = 1000; i; i--) {
+   OPS(a8, b8, c8, s8, v23_8, v3_8);
+   OPS(a8, b8, c8, s8, v23_8, v3_8);
+   OPS(a8, b8, c8, s8, v23_8, v3_8);
+   OPS(a8, b8, c8, s8, v23_8, v3_8);
+   OPS(a8, b8, c8, s8, v23_8, v3_8);
+   OPS(a8, b8, c8, s8, v23_8, v3_8);
+   }
+
+   csum8 = a8[0]  ^ a8[1]  ^ a8[2]  ^ a8[3]  ^
+   a8[4]  ^ a8[5]  ^ a8[6]  ^ a8[7]  ^
+   a8[8]  ^ a8[9]  ^ a8[10] ^ a8[11] ^
+   a8[12] ^ a8[13] ^ a8[14] ^ a8[15];
+   csum8_put = csum8;
+}
-- 
2.31.1



Re: [PATCH v2] i386: Disable ix86_expand_vecop_qihi2 when !TARGET_AVX512BW

2024-05-21 Thread Hongtao Liu
On Tue, May 21, 2024 at 3:14 PM Haochen Jiang  wrote:
>
> Hi all,
>
> This is the v2 patch to fix PR115069. The new testcase has passed.
>
> Changes in v2:
>   - Added a testcase.
>   - Change the comment for the early exit.
>
> Thx,
> Haochen
>
> Since vpermq is really slow, we should avoid using it for permutation
> when vpmovwb is not available (needs AVX512BW) for ix86_expand_vecop_qihi2
> and fall back to ix86_expand_vecop_qihi.
>
> gcc/ChangeLog:
>
> PR target/115069
> * config/i386/i386-expand.cc (ix86_expand_vecop_qihi2):
> Do not enable the optimization when AVX512BW is not enabled.
>
> gcc/testsuite/ChangeLog:
>
> PR target/115069
> * gcc.target/i386/pr115069.c: New.
> ---
>  gcc/config/i386/i386-expand.cc   |  7 +++
>  gcc/testsuite/gcc.target/i386/pr115069.c | 78 
>  2 files changed, 85 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr115069.c
>
> diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
> index a6132911e6a..f7939761879 100644
> --- a/gcc/config/i386/i386-expand.cc
> +++ b/gcc/config/i386/i386-expand.cc
> @@ -24323,6 +24323,13 @@ ix86_expand_vecop_qihi2 (enum rtx_code code, rtx 
> dest, rtx op1, rtx op2)
>bool op2vec = GET_MODE_CLASS (GET_MODE (op2)) == MODE_VECTOR_INT;
>bool uns_p = code != ASHIFTRT;
>
> +  /* Without VPMOVWB (provided by AVX512BW ISA), the expansion uses the
> + generic permutation to merge the data back into the right place.  This
> + permutation results in VPERMQ, which is slow, so better fall back to
> + ix86_expand_vecop_qihi.  */
> +  if (!TARGET_AVX512BW)
> +return false;
> +
>if ((qimode == V16QImode && !TARGET_AVX2)
>|| (qimode == V32QImode && (!TARGET_AVX512BW || !TARGET_EVEX512))
>/* There are no V64HImode instructions.  */
> diff --git a/gcc/testsuite/gcc.target/i386/pr115069.c 
> b/gcc/testsuite/gcc.target/i386/pr115069.c
> new file mode 100644
> index 000..c4b48b602ef
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr115069.c
> @@ -0,0 +1,78 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mavx2" } */
> +/* { dg-final { scan-assembler-not "vpermq" } } */
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +typedef int8_t  stress_vint8_t  __attribute__ ((vector_size (16)));
No need for such big testcase,

typedef char v16qi __attribute__((vector_size(16)));
v16qi
foo (v16qi a, v16qi b)
{
return a * b;
}

should be enough, with -mavx2 -mno-avx512f
> +
> +#define OPS(a, b, c, s, v23, v3) \
> +do {   \
> +   a += b; \
> +   a |= b; \
> +   a -= b; \
> +   a &= ~b;\
> +   a *= c; \
> +   a = ~a; \
> +   a *= s; \
> +   a ^= c; \
> +   a <<= 1;\
> +   b >>= 1;\
> +   b += c; \
> +   a %= v23;   \
> +   c /= v3;\
> +   b = b ^ c;  \
> +   c = b ^ c;  \
> +   b = b ^ c;  \
> +} while (0)
> +
> +volatile uint8_t csum8_put;
> +
> +void stress_vecmath(void)
> +{
> +   const stress_vint8_t v23_8 = {
> +   0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17,
> +   0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17
> +   };
> +   const stress_vint8_t v3_8 = {
> +   0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
> +   0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03
> +   };
> +   stress_vint8_t a8 = {
> +   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
> +   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
> +   };
> +   stress_vint8_t b8 = {
> +   0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef,
> +   0x0f, 0x1e, 0x2d, 0x3c, 0x4b, 0x5a, 0x69, 0x78
> +   };
> +   stress_vint8_t c8 = {
> +   0x01, 0x02, 0x03, 0x02, 0x01, 0x02, 0x03, 0x02,
> +   0x03, 0x02, 0x01, 0x02, 0x03, 0x02, 0x01, 0x02
> +   };
> +   stress_vint8_t s8 = {
> +   0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02, 0x02,
> +   0x01, 0x01, 0x02, 0x02, 0x01, 0x01, 0x02, 0x02,
> +   };
> +   const uint8_t csum8_val =  (uint8_t)0x1b;
> +   int i;
> +   uint8_t csum8;
> +
> +   for (i = 1000; i; i--) {
> +   OPS(a8, b8, c8, s8, v23_8, v3_8);
> +   OPS(a8, b8, c8, s8, v23_8, v3_8);
> +   OPS(a8, b8, c8, s8, v23_8, v3_8);
> +   OPS(a8, b8, c8, s8, v23_8, v3_8);
> +   OPS(a8, b8, c8, s8, v23_8, v3_8);
> +   OPS(a8, b8, c8, s8, v23_8, v3_8);
> +   }
> +
> +   csum8 = a8[0]  ^ a8[1]  ^ a8[2]  ^ a8[3]  ^
> +   a8[4]  ^ a8[5]  ^ a8[6]  ^ a8[7]  ^
> +   a8[8]  ^ a8[9]  ^ a8[10] ^ a8[11] ^
> +   a8[12] 

Re: [Patch] Fortran: invoke.texi - link to OpenCoarrays.org + mention libcaf_single

2024-05-21 Thread rep . dot . nop
On 20 May 2024 02:31:27 CEST, Sandra Loosemore  wrote:
>On 5/19/24 02:01, Tobias Burnus wrote:
>> I noticed that gfortran's coarray support did not link to the 
>> http://www.opencoarrays.org/ >
>> [snip]
>> 
>> diff --git a/gcc/fortran/invoke.texi b/gcc/fortran/invoke.texi
>> index 40e8e4a7cdd..78a2910b8d8 100644
>> --- a/gcc/fortran/invoke.texi
>> +++ b/gcc/fortran/invoke.texi
>> @@ -1753,7 +1753,10 @@ Single-image mode, i.e. @code{num_images()} is always 
>> one.
>>   @item @samp{lib}
>>  Library-based coarray parallelization; a suitable GNU Fortran coarray
>> -library needs to be linked.
>> +library needs to be linked such as @url{http://opencoarrays.org}.
>
>This would read better as
>
>library such as @url{http://opencoarrays.org} needs to be linked.

Maybe use https?

thanks

>
>> +Alternatively, GCC's @code{libcaf_single} library can be linked,
>> +albeit it only supports a single image.
>> +
>>  @end table
>
>OK with that tweak.
>
>-Sandra
>
>
>



[COMMITTED 02/31] ada: Follow-up fix to previous change for Text_Ptr

2024-05-21 Thread Marc Poulhiès
From: Eric Botcazou 

The variable would be saved and restored while still uninitialized.

gcc/ada/

* err_vars.ads (Error_Msg_Sloc): Initialize to No_Location.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/err_vars.ads | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/ada/err_vars.ads b/gcc/ada/err_vars.ads
index 113dd936db6..838217b95f4 100644
--- a/gcc/ada/err_vars.ads
+++ b/gcc/ada/err_vars.ads
@@ -107,7 +107,7 @@ package Err_Vars is
 
--  WARNING: There is a matching C declaration of these variables in fe.h
 
-   Error_Msg_Sloc : Source_Ptr;
+   Error_Msg_Sloc : Source_Ptr := No_Location;
--  Source location for # insertion character in message
 
Error_Msg_Name_1 : Name_Id;
-- 
2.43.2



[COMMITTED 04/31] ada: Remove conversion from String_Id to String and back to String_Id

2024-05-21 Thread Marc Poulhiès
From: Piotr Trojanek 

Code cleanup; semantics is unaffected.

gcc/ada/

* exp_put_image.adb (Build_Record_Put_Image_Procedure): Remove
useless conversions.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/exp_put_image.adb | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/gcc/ada/exp_put_image.adb b/gcc/ada/exp_put_image.adb
index f5141a56626..09fbfa75eeb 100644
--- a/gcc/ada/exp_put_image.adb
+++ b/gcc/ada/exp_put_image.adb
@@ -44,7 +44,6 @@ with Sinfo.Nodes;use Sinfo.Nodes;
 with Sinfo.Utils;use Sinfo.Utils;
 with Snames; use Snames;
 with Stand;
-with Stringt;use Stringt;
 with Tbuild; use Tbuild;
 with Ttypes; use Ttypes;
 with Uintp;  use Uintp;
@@ -832,9 +831,8 @@ package body Exp_Put_Image is
 Parameter_Associations => New_List
   (Make_Identifier (Loc, Name_S),
Make_String_Literal (Loc,
- To_String
-   (Fully_Qualified_Name_String
-  (Btyp, Append_NUL => False));
+ Fully_Qualified_Name_String
+   (Btyp, Append_NUL => False);
  end if;
   elsif Is_Null_Record_Type (Btyp, Ignore_Privacy => True) then
 
-- 
2.43.2



[COMMITTED 07/31] ada: Fix index entry for an implemented AI feature

2024-05-21 Thread Marc Poulhiès
From: Piotr Trojanek 

Fix inconsistent reference with "05" in the name of AI.

gcc/ada/

* doc/gnat_rm/implementation_of_ada_2012_features.rst
(AI-0216): Fix index reference.
* gnat_rm.texi: Regenerate.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/doc/gnat_rm/implementation_of_ada_2012_features.rst | 2 +-
 gcc/ada/gnat_rm.texi| 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/ada/doc/gnat_rm/implementation_of_ada_2012_features.rst 
b/gcc/ada/doc/gnat_rm/implementation_of_ada_2012_features.rst
index 2825362c616..d7f1fea01f3 100644
--- a/gcc/ada/doc/gnat_rm/implementation_of_ada_2012_features.rst
+++ b/gcc/ada/doc/gnat_rm/implementation_of_ada_2012_features.rst
@@ -1243,7 +1243,7 @@ Supported Aspect Source
 
   RM References:  B.01 (17)   B.03 (62)   B.03 (71.1/2)
 
-.. index:: AI05-0216 (Ada 2012 feature)
+.. index:: AI-0216 (Ada 2012 feature)
 
 * *AI-0216 No_Task_Hierarchy forbids local tasks (-00-00)*
 
diff --git a/gcc/ada/gnat_rm.texi b/gcc/ada/gnat_rm.texi
index 4ff1de42db2..0d38b1a4bc6 100644
--- a/gcc/ada/gnat_rm.texi
+++ b/gcc/ada/gnat_rm.texi
@@ -28603,7 +28603,7 @@ non-portable.
 RM References:  B.01 (17)   B.03 (62)   B.03 (71.1/2)
 @end itemize
 
-@geindex AI05-0216 (Ada 2012 feature)
+@geindex AI-0216 (Ada 2012 feature)
 
 
 @itemize *
-- 
2.43.2



[COMMITTED 03/31] ada: Remove trailing NUL in minimal expansion of Put_Image attribute

2024-05-21 Thread Marc Poulhiès
From: Piotr Trojanek 

When procedure that implements Put_Image attribute emits the type name,
this name was wrongly followed by a NUL character.

gcc/ada/

* exp_put_image.adb (Build_Record_Put_Image_Procedure): Remove
trailing NUL from the fully qualified type name.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/exp_put_image.adb | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/gcc/ada/exp_put_image.adb b/gcc/ada/exp_put_image.adb
index c23b4e24354..f5141a56626 100644
--- a/gcc/ada/exp_put_image.adb
+++ b/gcc/ada/exp_put_image.adb
@@ -832,7 +832,9 @@ package body Exp_Put_Image is
 Parameter_Associations => New_List
   (Make_Identifier (Loc, Name_S),
Make_String_Literal (Loc,
- To_String (Fully_Qualified_Name_String (Btyp));
+ To_String
+   (Fully_Qualified_Name_String
+  (Btyp, Append_NUL => False));
  end if;
   elsif Is_Null_Record_Type (Btyp, Ignore_Privacy => True) then
 
-- 
2.43.2



[COMMITTED 12/31] ada: Add elaboration switch tags to info messages

2024-05-21 Thread Marc Poulhiès
From: Viljar Indus 

Add the ?$? insertion characters for elaboration
message so they would be marked with the [-gnatel]
tag. Note that these insertion characters were
not added for SPARK elaboration messages:

gcc/ada/

* sem_elab.adb: Add missing elaboration insertion
characters to info messages.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/sem_elab.adb | 13 -
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/gcc/ada/sem_elab.adb b/gcc/ada/sem_elab.adb
index 9205f4cef82..4d6e14cc49c 100644
--- a/gcc/ada/sem_elab.adb
+++ b/gcc/ada/sem_elab.adb
@@ -4920,7 +4920,7 @@ package body Sem_Elab is
and then not New_In_State.Suppress_Info_Messages
  then
 Error_Msg_NE
-  ("info: access to & during elaboration", Attr, Subp_Id);
+  ("info: access to & during elaboration?$?", Attr, Subp_Id);
  end if;
 
  --  Warnings are suppressed when a prior scenario is already in that
@@ -5027,7 +5027,7 @@ package body Sem_Elab is
and then not New_In_State.Suppress_Info_Messages
  then
 Error_Msg_NE
-  ("info: activation of & during elaboration", Call, Obj_Id);
+  ("info: activation of & during elaboration?$?", Call, Obj_Id);
  end if;
 
  --  Nothing to do when the call activates a task whose type is defined
@@ -6461,7 +6461,7 @@ package body Sem_Elab is
 if In_SPARK then
return " in SPARK";
 else
-   return "";
+   return "?$?";
 end if;
  end Suffix;
 
@@ -8277,7 +8277,9 @@ package body Sem_Elab is
Error_Msg_Name_1 := Prag_Nam;
Error_Msg_Qual_Level := Nat'Last;
 
-   Error_Msg_NE ("info: missing pragma % for unit &", N, Unit_Id);
+   Error_Msg_NE
+ ("info: missing pragma % for unit &?$?", N,
+  Unit_Id);
Error_Msg_Qual_Level := 0;
 end if;
  end Info_Missing_Pragma;
@@ -8406,7 +8408,8 @@ package body Sem_Elab is
Error_Msg_Qual_Level := Nat'Last;
 
Error_Msg_NE
- ("info: implicit pragma % generated for unit &", N, Unit_Id);
+ ("info: implicit pragma % generated for unit &?$?",
+   N, Unit_Id);
 
Error_Msg_Qual_Level := 0;
Output_Active_Scenarios (N, In_State);
-- 
2.43.2



[COMMITTED 09/31] ada: Fix formatting in list of implemented Ada 2012 features

2024-05-21 Thread Marc Poulhiès
From: Piotr Trojanek 

Fix formatting; meaning is unaffected.

gcc/ada/

* doc/gnat_rm/implementation_of_ada_2012_features.rst:
Fix formatting.
* gnat_rm.texi: Regenerate.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/doc/gnat_rm/implementation_of_ada_2012_features.rst | 6 +++---
 gcc/ada/gnat_rm.texi| 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/gcc/ada/doc/gnat_rm/implementation_of_ada_2012_features.rst 
b/gcc/ada/doc/gnat_rm/implementation_of_ada_2012_features.rst
index 706de492301..9708e15de8d 100644
--- a/gcc/ada/doc/gnat_rm/implementation_of_ada_2012_features.rst
+++ b/gcc/ada/doc/gnat_rm/implementation_of_ada_2012_features.rst
@@ -255,7 +255,7 @@ http://www.ada-auth.org/ai05-summary.html.
 
 * *AI-0039 Stream attributes cannot be dynamic (-00-00)*
 
-  The RM permitted the use of dynamic expressions (such as ``ptr.all``)`
+  The RM permitted the use of dynamic expressions (such as ``ptr.all``)
   for stream attributes, but these were never useful and are now illegal. GNAT
   has always regarded such expressions as illegal.
 
@@ -555,7 +555,7 @@ http://www.ada-auth.org/ai05-summary.html.
   This AI clarifies that 'needs finalization' is part of dynamic semantics,
   and therefore depends on the run-time characteristics of an object (i.e. its
   tag) and not on its nominal type. As the AI indicates: "we do not expect
-  this to affect any implementation''.
+  this to affect any implementation".
 
   RM References:  7.06.01 (6)   7.06.01 (7)   7.06.01 (8)   7.06.01 (9/2)
 
@@ -812,7 +812,7 @@ http://www.ada-auth.org/ai05-summary.html.
 
   The new syntax for iterating over arrays and containers is now implemented.
   Iteration over containers is for now limited to read-only iterators. Only
-  default iterators are supported, with the syntax:  ``for Elem of C``.
+  default iterators are supported, with the syntax: ``for Elem of C``.
 
   RM References:  5.05
 
diff --git a/gcc/ada/gnat_rm.texi b/gcc/ada/gnat_rm.texi
index df6969f98b7..776dd4a4afc 100644
--- a/gcc/ada/gnat_rm.texi
+++ b/gcc/ada/gnat_rm.texi
@@ -26913,7 +26913,7 @@ RM References:  A.10.05 (37)   A.10.07 (8/1)   A.10.07 
(10)   A.10.07 (12)   A.1
 @item 
 `AI-0039 Stream attributes cannot be dynamic (-00-00)'
 
-The RM permitted the use of dynamic expressions (such as @code{ptr.all})`
+The RM permitted the use of dynamic expressions (such as @code{ptr.all})
 for stream attributes, but these were never useful and are now illegal. GNAT
 has always regarded such expressions as illegal.
 
@@ -27358,7 +27358,7 @@ RM References:  3.10.01 (6)   3.10.01 (9.2/2)
 This AI clarifies that ‘needs finalization’ is part of dynamic semantics,
 and therefore depends on the run-time characteristics of an object (i.e. its
 tag) and not on its nominal type. As the AI indicates: “we do not expect
-this to affect any implementation’’.
+this to affect any implementation”.
 
 RM References:  7.06.01 (6)   7.06.01 (7)   7.06.01 (8)   7.06.01 (9/2)
 @end itemize
@@ -27730,7 +27730,7 @@ RM References:  A.04.11
 
 The new syntax for iterating over arrays and containers is now implemented.
 Iteration over containers is for now limited to read-only iterators. Only
-default iterators are supported, with the syntax:  @code{for Elem of C}.
+default iterators are supported, with the syntax: @code{for Elem of C}.
 
 RM References:  5.05
 @end itemize
-- 
2.43.2



[COMMITTED 05/31] ada: Do not leak tagged type names when Discard_Names is enabled

2024-05-21 Thread Marc Poulhiès
From: Piotr Trojanek 

When both pragmas Discard_Names and No_Tagged_Streams apply to a tagged
type, the intended behavior is to prevent type names from leaking into
object code, as documented in GNAT RM.

However, while Discard_Names can be used as a configuration pragma,
No_Tagged_Streams must be applied to each type separately. This patch
enables the use of restriction No_Streams, which can be activated
globally, instead of No_Tagged_Streams on individual types.

When no tagged stream object can be created and allocated, then routines
that make use of the External_Tag won't be used.

gcc/ada/

* doc/gnat_rm/implementation_defined_pragmas.rst
(No_Tagged_Streams): Document how to avoid exposing entity names
for the entire partition.
* exp_disp.adb (Make_DT): Make use of restriction No_Streams.
* exp_put_image.adb (Build_Record_Put_Image_Procedure): Respect
Discard_Names in the generated Put_Image procedure.
* gnat_rm.texi: Regenerate.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 .../implementation_defined_pragmas.rst|  6 
 gcc/ada/exp_disp.adb  |  5 +--
 gcc/ada/exp_put_image.adb | 34 ++-
 gcc/ada/gnat_rm.texi  |  6 
 4 files changed, 41 insertions(+), 10 deletions(-)

diff --git a/gcc/ada/doc/gnat_rm/implementation_defined_pragmas.rst 
b/gcc/ada/doc/gnat_rm/implementation_defined_pragmas.rst
index 0661670e047..7e4dd935342 100644
--- a/gcc/ada/doc/gnat_rm/implementation_defined_pragmas.rst
+++ b/gcc/ada/doc/gnat_rm/implementation_defined_pragmas.rst
@@ -4000,6 +4000,12 @@ applied to a tagged type its Expanded_Name and 
External_Tag are initialized
 with empty strings. This is useful to avoid exposing entity names at binary
 level but has a negative impact on the debuggability of tagged types.
 
+Alternatively, when pragmas ``Discard_Names`` and ``Restrictions (No_Streams)``
+simultanously apply to a tagged type, its Expanded_Name and External_Tag are
+also initialized with empty strings. In particular, both these pragmas can be
+applied as configuration pragmas to avoid exposing entity names at binary
+level for the entire parition.
+
 Pragma Normalize_Scalars
 
 
diff --git a/gcc/ada/exp_disp.adb b/gcc/ada/exp_disp.adb
index 601d463a8b0..66be77c9ffc 100644
--- a/gcc/ada/exp_disp.adb
+++ b/gcc/ada/exp_disp.adb
@@ -4600,8 +4600,9 @@ package body Exp_Disp is
   --streams.
 
   Discard_Names : constant Boolean :=
-Present (No_Tagged_Streams_Pragma (Typ))
-  and then
+(Present (No_Tagged_Streams_Pragma (Typ))
+   or else Restriction_Active (No_Streams))
+  and then
 (Global_Discard_Names or else Einfo.Entities.Discard_Names (Typ));
 
   --  The following name entries are used by Make_DT to generate a number
diff --git a/gcc/ada/exp_put_image.adb b/gcc/ada/exp_put_image.adb
index 09fbfa75eeb..94299e39661 100644
--- a/gcc/ada/exp_put_image.adb
+++ b/gcc/ada/exp_put_image.adb
@@ -44,6 +44,7 @@ with Sinfo.Nodes;use Sinfo.Nodes;
 with Sinfo.Utils;use Sinfo.Utils;
 with Snames; use Snames;
 with Stand;
+with Stringt;use Stringt;
 with Tbuild; use Tbuild;
 with Ttypes; use Ttypes;
 with Uintp;  use Uintp;
@@ -825,14 +826,31 @@ package body Exp_Put_Image is
   Make_Raise_Program_Error (Loc,
   Reason => PE_Explicit_Raise));
  else
-Append_To (Stms,
-  Make_Procedure_Call_Statement (Loc,
-Name => New_Occurrence_Of (RTE (RE_Put_Image_Unknown), Loc),
-Parameter_Associations => New_List
-  (Make_Identifier (Loc, Name_S),
-   Make_String_Literal (Loc,
- Fully_Qualified_Name_String
-   (Btyp, Append_NUL => False);
+declare
+   Type_Name : String_Id;
+begin
+   --  If aspect Discard_Names is enabled the intention is to
+   --  prevent type names from leaking into object file. Instead,
+   --  we emit string that is different from the ones from the
+   --  default implementations of the Put_Image attribute.
+
+   if Global_Discard_Names or else Discard_Names (Typ) then
+  Start_String;
+  Store_String_Chars ("(DISCARDED TYPE NAME)");
+  Type_Name := End_String;
+   else
+  Type_Name :=
+Fully_Qualified_Name_String (Btyp, Append_NUL => False);
+   end if;
+
+   Append_To (Stms,
+ Make_Procedure_Call_Statement (Loc,
+   Name => New_Occurrence_Of (RTE (RE_Put_Image_Unknown), Loc),
+   Parameter_Associations => New_List
+ (Make_Identifier (Loc, Name_S),

[COMMITTED 06/31] ada: Update documentation of warning messages

2024-05-21 Thread Marc Poulhiès
From: Viljar Indus 

Update the documentation of warning messages that only
emit info messages to clearly reflect that they only emit
info messages and not warning messages.

gcc/ada/

* doc/gnat_ugn/building_executable_programs_with_gnat.rst:
Update the documentation of -gnatw.n and -gnatw.l
* gnat_ugn.texi: Regenerate.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 .../building_executable_programs_with_gnat.rst   | 14 +++---
 gcc/ada/gnat_ugn.texi| 16 
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/gcc/ada/doc/gnat_ugn/building_executable_programs_with_gnat.rst 
b/gcc/ada/doc/gnat_ugn/building_executable_programs_with_gnat.rst
index 21e277d5916..2f63d02daf7 100644
--- a/gcc/ada/doc/gnat_ugn/building_executable_programs_with_gnat.rst
+++ b/gcc/ada/doc/gnat_ugn/building_executable_programs_with_gnat.rst
@@ -3415,7 +3415,7 @@ of the pragma in the :title:`GNAT_Reference_manual`).
 .. index:: -gnatw.l  (gcc)
 
 :switch:`-gnatw.l`
-  *List inherited aspects.*
+  *List inherited aspects as info messages.*
 
   This switch causes the compiler to list inherited invariants,
   preconditions, and postconditions from Type_Invariant'Class, Invariant'Class,
@@ -3425,7 +3425,7 @@ of the pragma in the :title:`GNAT_Reference_manual`).
 .. index:: -gnatw.L  (gcc)
 
 :switch:`-gnatw.L`
-  *Suppress listing of inherited aspects.*
+  *Suppress listing of inherited aspects as info messages.*
 
   This switch suppresses listing of inherited aspects.
 
@@ -3495,20 +3495,20 @@ of the pragma in the :title:`GNAT_Reference_manual`).
 .. index:: Atomic Synchronization, warnings
 
 :switch:`-gnatw.n`
-  *Activate warnings on atomic synchronization.*
+  *Activate info messages on atomic synchronization.*
 
-  This switch actives warnings when an access to an atomic variable
+  This switch activates info messages when an access to an atomic variable
   requires the generation of atomic synchronization code. These
-  warnings are off by default.
+  info messages are off by default.
 
 .. index:: -gnatw.N  (gcc)
 
 :switch:`-gnatw.N`
-  *Suppress warnings on atomic synchronization.*
+  *Suppress info messages on atomic synchronization.*
 
   .. index:: Atomic Synchronization, warnings
 
-  This switch suppresses warnings when an access to an atomic variable
+  This switch suppresses info messages when an access to an atomic variable
   requires the generation of atomic synchronization code.
 
 
diff --git a/gcc/ada/gnat_ugn.texi b/gcc/ada/gnat_ugn.texi
index 43251ba3f1c..2df2a780ec7 100644
--- a/gcc/ada/gnat_ugn.texi
+++ b/gcc/ada/gnat_ugn.texi
@@ -11646,7 +11646,7 @@ This switch suppresses warnings for possible 
elaboration problems.
 
 @item @code{-gnatw.l}
 
-`List inherited aspects.'
+`List inherited aspects as info messages.'
 
 This switch causes the compiler to list inherited invariants,
 preconditions, and postconditions from Type_Invariant’Class, Invariant’Class,
@@ -11660,7 +11660,7 @@ Pre’Class, and Post’Class aspects. Also list inherited 
subtype predicates.
 
 @item @code{-gnatw.L}
 
-`Suppress listing of inherited aspects.'
+`Suppress listing of inherited aspects as info messages.'
 
 This switch suppresses listing of inherited aspects.
 @end table
@@ -11755,11 +11755,11 @@ use of @code{-gnatg}.
 
 @item @code{-gnatw.n}
 
-`Activate warnings on atomic synchronization.'
+`Activate info messages on atomic synchronization.'
 
-This switch actives warnings when an access to an atomic variable
+This switch activates info messages when an access to an atomic variable
 requires the generation of atomic synchronization code. These
-warnings are off by default.
+info messages are off by default.
 @end table
 
 @geindex -gnatw.N (gcc)
@@ -11769,12 +11769,12 @@ warnings are off by default.
 
 @item @code{-gnatw.N}
 
-`Suppress warnings on atomic synchronization.'
+`Suppress info messages on atomic synchronization.'
 
 @geindex Atomic Synchronization
 @geindex warnings
 
-This switch suppresses warnings when an access to an atomic variable
+This switch suppresses info messages when an access to an atomic variable
 requires the generation of atomic synchronization code.
 @end table
 
@@ -29645,8 +29645,8 @@ to permit their use in free software.
 
 @printindex ge
 
-@anchor{gnat_ugn/gnat_utility_programs switches-related-to-project-files}@w{   
   }
 @anchor{d1}@w{  }
+@anchor{gnat_ugn/gnat_utility_programs switches-related-to-project-files}@w{   
   }
 
 @c %**end of body
 @bye
-- 
2.43.2



[COMMITTED 11/31] ada: Simplify management of scopes while inlining

2024-05-21 Thread Marc Poulhiès
From: Piotr Trojanek 

Code cleanup; semantics is unaffected.

gcc/ada/

* inline.adb (Add_Scope_To_Clean): Use Append_Unique_Elmt.
(Analyze_Inlined_Bodies): Refine type of a local counter;
remove extra whitespace.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/inline.adb | 19 ---
 1 file changed, 4 insertions(+), 15 deletions(-)

diff --git a/gcc/ada/inline.adb b/gcc/ada/inline.adb
index a628a59e145..17b3099e6a6 100644
--- a/gcc/ada/inline.adb
+++ b/gcc/ada/inline.adb
@@ -845,19 +845,8 @@ package body Inline is

 
procedure Add_Scope_To_Clean (Scop : Entity_Id) is
-  Elmt : Elmt_Id;
-
begin
-  Elmt := First_Elmt (To_Clean);
-  while Present (Elmt) loop
- if Node (Elmt) = Scop then
-return;
- end if;
-
- Next_Elmt (Elmt);
-  end loop;
-
-  Append_Elmt (Scop, To_Clean);
+  Append_Unique_Elmt (Scop, To_Clean);
end Add_Scope_To_Clean;
 
--
@@ -915,7 +904,7 @@ package body Inline is
 
procedure Analyze_Inlined_Bodies is
   Comp_Unit : Node_Id;
-  J : Int;
+  J : Nat;
   Pack  : Entity_Id;
   Subp  : Subp_Index;
   S : Succ_Index;
@@ -2569,8 +2558,8 @@ package body Inline is
(Proc_Id   : out Entity_Id;
 Decl_List : out List_Id)
  is
-Formals   : constant List_Id   := New_List;
-Subp_Name : constant Name_Id   := New_Internal_Name ('F');
+Formals   : constant List_Id := New_List;
+Subp_Name : constant Name_Id := New_Internal_Name ('F');
 
 Body_Decls : List_Id := No_List;
 Decl   : Node_Id;
-- 
2.43.2



[COMMITTED 21/31] ada: Remove unused dependencies from gnatbind object list

2024-05-21 Thread Marc Poulhiès
From: Piotr Trojanek 

The gnatbind executable does not depend on aspects, SCIL, style checks,
etc. Also, these dependencies are not needed to actually build the
executable. Cleanup.

gcc/ada/

* gcc-interface/Make-lang.in (GNATBIND_OBJS): Remove unused
dependencies.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/gcc-interface/Make-lang.in | 5 -
 1 file changed, 5 deletions(-)

diff --git a/gcc/ada/gcc-interface/Make-lang.in 
b/gcc/ada/gcc-interface/Make-lang.in
index f6404c0b1eb..4f1b310fb84 100644
--- a/gcc/ada/gcc-interface/Make-lang.in
+++ b/gcc/ada/gcc-interface/Make-lang.in
@@ -572,7 +572,6 @@ GNATBIND_OBJS = \
  ada/ali-util.o   \
  ada/ali.o\
  ada/alloc.o  \
- ada/aspects.o\
  ada/atree.o  \
  ada/bcheck.o \
  ada/binde.o  \
@@ -602,12 +601,10 @@ GNATBIND_OBJS = \
  ada/exit.o   \
  ada/final.o  \
  ada/fmap.o   \
- ada/fname-uf.o   \
  ada/fname.o  \
  ada/gnatbind.o   \
  ada/gnatvsn.o\
  ada/hostparm.o   \
- ada/krunch.o \
  ada/lib.o\
  ada/link.o   \
  ada/namet.o  \
@@ -618,7 +615,6 @@ GNATBIND_OBJS = \
  ada/output.o \
  ada/rident.o \
  ada/scans.o  \
- ada/scil_ll.o\
  ada/scng.o   \
  ada/sdefault.o   \
  ada/seinfo.o\
@@ -631,7 +627,6 @@ GNATBIND_OBJS = \
  ada/snames.o \
  ada/stand.o  \
  ada/stringt.o\
- ada/style.o  \
  ada/styleg.o \
  ada/stylesw.o\
  ada/switch-b.o   \
-- 
2.43.2



[COMMITTED 01/31] ada: Add new Mingw task priority mapping

2024-05-21 Thread Marc Poulhiès
From: Justin Squirek 

This patch adds a new mapping (Non_FIFO_Underlying_Priorities) for dynamically
setting task priorities in Windows when pragma Task_Dispatching_Policy
(FIFO_Within_Priorities) is not present. Additionally, it documents the
requirement to specify the pragma in order to use Set_Priority in the general
case.

gcc/ada/

* doc/gnat_ugn/platform_specific_information.rst: Add note about
different priority level granularities under different policies in
Windows and move POSIX related info into new section.
* libgnarl/s-taprop.ads: Add note about Task_Dispatching_Policy.
* libgnarl/s-taprop__mingw.adb:
(Set_Priority): Add use of Non_FIFO_Underlying_Priorities.
* libgnat/system-mingw.ads: Add documentation for modifying
priority mappings and add alternative mapping
Non_FIFO_Underlying_Priorities.
* gnat_ugn.texi: Regenerate.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 .../platform_specific_information.rst | 117 +++---
 gcc/ada/gnat_ugn.texi | 359 +-
 gcc/ada/libgnarl/s-taprop.ads |   9 +
 gcc/ada/libgnarl/s-taprop__mingw.adb  |   5 +-
 gcc/ada/libgnat/system-mingw.ads  |  27 +-
 5 files changed, 289 insertions(+), 228 deletions(-)

diff --git a/gcc/ada/doc/gnat_ugn/platform_specific_information.rst 
b/gcc/ada/doc/gnat_ugn/platform_specific_information.rst
index 3744b742f8e..7eeb6c2c396 100644
--- a/gcc/ada/doc/gnat_ugn/platform_specific_information.rst
+++ b/gcc/ada/doc/gnat_ugn/platform_specific_information.rst
@@ -171,57 +171,6 @@ Selecting another run-time library temporarily can be
 achieved by using the :switch:`--RTS` switch, e.g., :switch:`--RTS=sjlj`
 
 
-.. _Choosing_the_Scheduling_Policy:
-
-.. index:: SCHED_FIFO scheduling policy
-.. index:: SCHED_RR scheduling policy
-.. index:: SCHED_OTHER scheduling policy
-
-Choosing the Scheduling Policy
---
-
-When using a POSIX threads implementation, you have a choice of several
-scheduling policies: ``SCHED_FIFO``, ``SCHED_RR`` and ``SCHED_OTHER``.
-
-Typically, the default is ``SCHED_OTHER``, while using ``SCHED_FIFO``
-or ``SCHED_RR`` requires special (e.g., root) privileges.
-
-.. index:: pragma Time_Slice
-.. index:: -T0 option
-.. index:: pragma Task_Dispatching_Policy
-
-
-By default, GNAT uses the ``SCHED_OTHER`` policy. To specify
-``SCHED_FIFO``,
-you can use one of the following:
-
-* ``pragma Time_Slice (0.0)``
-* the corresponding binder option :switch:`-T0`
-* ``pragma Task_Dispatching_Policy (FIFO_Within_Priorities)``
-
-
-To specify ``SCHED_RR``,
-you should use ``pragma Time_Slice`` with a
-value greater than 0.0, or else use the corresponding :switch:`-T`
-binder option.
-
-
-To make sure a program is running as root, you can put something like
-this in a library package body in your application:
-
-  .. code-block:: ada
-
- function geteuid return Integer;
- pragma Import (C, geteuid, "geteuid");
- Ignore : constant Boolean :=
-   (if geteuid = 0 then True else raise Program_Error with "must be root");
-
-It gets the effective user id, and if it's not 0 (i.e. root), it raises
-Program_Error. Note that if you re running the code in a container, this may
-not be sufficient, as you may have sufficient priviledge on the container,
-but not on the host machine running the container, so check that you also
-have sufficient priviledge for running the container image.
-
 .. index:: Linux
 .. index:: GNU/Linux
 
@@ -296,6 +245,55 @@ drop the :samp:`-no-pie` workaround, you'll need to get 
the identified
 dependencies rebuilt with PIE enabled (compiled with :samp:`-fPIE`
 and linked with :samp:`-pie`).
 
+.. _Choosing_the_Scheduling_Policy_With_GNU_Linux:
+
+.. index:: SCHED_FIFO scheduling policy
+.. index:: SCHED_RR scheduling policy
+.. index:: SCHED_OTHER scheduling policy
+
+Choosing the Scheduling Policy with GNU/Linux
+-
+
+When using a POSIX threads implementation, you have a choice of several
+scheduling policies: ``SCHED_FIFO``, ``SCHED_RR`` and ``SCHED_OTHER``.
+
+Typically, the default is ``SCHED_OTHER``, while using ``SCHED_FIFO``
+or ``SCHED_RR`` requires special (e.g., root) privileges.
+
+.. index:: pragma Time_Slice
+.. index:: -T0 option
+.. index:: pragma Task_Dispatching_Policy
+
+
+By default, GNAT uses the ``SCHED_OTHER`` policy. To specify
+``SCHED_FIFO``,
+you can use one of the following:
+
+* ``pragma Time_Slice (0.0)``
+* the corresponding binder option :switch:`-T0`
+* ``pragma Task_Dispatching_Policy (FIFO_Within_Priorities)``
+
+To specify ``SCHED_RR``,
+you should use ``pragma Time_Slice`` with a
+value greater than 0.0, or else use the corresponding :switch:`-T`
+binder option.
+
+To make sure a program is running as root, you can put something like
+this in a library package body in your application:
+
+  .. code-block:: ada
+
+ functi

[COMMITTED 13/31] ada: Remove useless trampolines caused by Unchecked_Conversion

2024-05-21 Thread Marc Poulhiès
From: Eric Botcazou 

The partial solution implemented in Validate_Unchecked_Conversion to support
unchecked conversions between addresses and pointers to subprograms, for the
platforms where pointers to subprograms do not all have the same size, turns
out to be counter-productive for others because it may cause the creation of
useless trampolines, which in turn makes the stack executable.

gcc/ada/

* sem_ch13.adb (Validate_Unchecked_Conversion): Restrict forcing the
Can_Use_Internal_Rep flag to platforms that require unnesting.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/sem_ch13.adb | 29 -
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/gcc/ada/sem_ch13.adb b/gcc/ada/sem_ch13.adb
index 59c80022c20..4cf6fc9a645 100644
--- a/gcc/ada/sem_ch13.adb
+++ b/gcc/ada/sem_ch13.adb
@@ -18132,20 +18132,23 @@ package body Sem_Ch13 is
  Set_No_Strict_Aliasing (Implementation_Base_Type (Target));
   end if;
 
-  --  If the unchecked conversion is between Address and an access
-  --  subprogram type, show that we shouldn't use an internal
-  --  representation for the access subprogram type.
+  --  For code generators that do not support nested subprograms, if the
+  --  unchecked conversion is between Address and an access subprogram
+  --  type, show that we shouldn't use an internal representation for the
+  --  access subprogram type.
 
-  if Is_Access_Subprogram_Type (Target)
-and then Is_Descendant_Of_Address (Source)
-and then In_Same_Source_Unit (Target, N)
-  then
- Set_Can_Use_Internal_Rep (Base_Type (Target), False);
-  elsif Is_Access_Subprogram_Type (Source)
-and then Is_Descendant_Of_Address (Target)
-and then In_Same_Source_Unit (Source, N)
-  then
- Set_Can_Use_Internal_Rep (Base_Type (Source), False);
+  if Unnest_Subprogram_Mode then
+ if Is_Access_Subprogram_Type (Target)
+   and then Is_Descendant_Of_Address (Source)
+   and then In_Same_Source_Unit (Target, N)
+ then
+Set_Can_Use_Internal_Rep (Base_Type (Target), False);
+ elsif Is_Access_Subprogram_Type (Source)
+   and then Is_Descendant_Of_Address (Target)
+   and then In_Same_Source_Unit (Source, N)
+ then
+Set_Can_Use_Internal_Rep (Base_Type (Source), False);
+ end if;
   end if;
 
   --  Generate N_Validate_Unchecked_Conversion node for back end in case
-- 
2.43.2



[COMMITTED 16/31] ada: Missing constraint check for initial value of object with address clause

2024-05-21 Thread Marc Poulhiès
From: Steve Baird 

In some cases where an object is declared with an initial value that is
an aggregate and also with a specified Address (either via an
aspect_specification or via an attribute_definition_clause), the
check that the initial value satisfies the constraints of the object's
subtype was incorrectly omitted.

gcc/ada/

* exp_util.adb (Remove_Side_Effects): Make_Reference assumes that
the referenced object satisfies the constraints of the designated
subtype of the access type. Ensure that this assumption holds by
introducing a qualified expression if needed (and then ensuring
that checking associated with evaluation of the qualified
expression is not suppressed).

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/exp_util.adb | 29 +
 1 file changed, 29 insertions(+)

diff --git a/gcc/ada/exp_util.adb b/gcc/ada/exp_util.adb
index b71f7739481..654ea7d9124 100644
--- a/gcc/ada/exp_util.adb
+++ b/gcc/ada/exp_util.adb
@@ -12772,6 +12772,35 @@ package body Exp_Util is
 --  since we know it cannot be null and we don't want a check.
 
 else
+   --  Make_Reference assumes that the referenced
+   --  object satisfies the constraints of the designated
+   --  subtype of the access type. Ensure that this assumption
+   --  holds by introducing a qualified expression if needed.
+
+   if not Analyzed (Exp)
+ and then Nkind (Exp) = N_Aggregate
+ and then (Is_Array_Type (Exp_Type)
+   or else Has_Discriminants (Exp_Type))
+ and then Is_Constrained (Exp_Type)
+   then
+  --  Do not suppress checks associated with the qualified
+  --  expression we are about to introduce (unless those
+  --  checks were already suppressed when Remove_Side_Effects
+  --  was called).
+
+  if Is_Array_Type (Exp_Type) then
+ Scope_Suppress.Suppress (Length_Check)
+   := Svg_Suppress.Suppress (Length_Check);
+  else
+ Scope_Suppress.Suppress (Discriminant_Check)
+   := Svg_Suppress.Suppress (Discriminant_Check);
+  end if;
+
+  E := Make_Qualified_Expression (Loc,
+ Subtype_Mark => New_Occurrence_Of (Exp_Type, Loc),
+ Expression => E);
+   end if;
+
New_Exp := Make_Reference (Loc, E);
Set_Is_Known_Non_Null (Def_Id);
 end if;
-- 
2.43.2



[COMMITTED 10/31] ada: Remove some explicit yields in tasking run-time

2024-05-21 Thread Marc Poulhiès
From: Ronan Desplanques 

This patch removes three occurrences where tasking run-time
subprograms yielded control shortly before conditional calls to Sleep,
in order to avoid these calls more often. It was intended as an
optimization on systems where calls to Sleep are costly and in
particular VMS.

A problem was that two of the yields contained data races that were
reported by thread sanitizing tools on some platforms, and that's the
motivation for removing them.

gcc/ada/

* libgnarl/s-taenca.adb (Wait_For_Completion): Remove call to
Yield.
* libgnarl/s-tasren.adb (Timed_Selective_Wait, Wait_For_Call):
Remove calls to Yield.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/libgnarl/s-taenca.adb | 12 
 gcc/ada/libgnarl/s-tasren.adb | 24 
 2 files changed, 36 deletions(-)

diff --git a/gcc/ada/libgnarl/s-taenca.adb b/gcc/ada/libgnarl/s-taenca.adb
index cd9c53b19fe..1dc8ec518bd 100644
--- a/gcc/ada/libgnarl/s-taenca.adb
+++ b/gcc/ada/libgnarl/s-taenca.adb
@@ -410,18 +410,6 @@ package body System.Tasking.Entry_Calls is
 
   Self_Id.Common.State := Entry_Caller_Sleep;
 
-  --  Try to remove calls to Sleep in the loop below by letting the caller
-  --  a chance of getting ready immediately, using Unlock & Yield.
-  --  See similar action in Wait_For_Call & Timed_Selective_Wait.
-
-  STPO.Unlock (Self_Id);
-
-  if Entry_Call.State < Done then
- STPO.Yield;
-  end if;
-
-  STPO.Write_Lock (Self_Id);
-
   loop
  Check_Pending_Actions_For_Entry_Call (Self_Id, Entry_Call);
 
diff --git a/gcc/ada/libgnarl/s-tasren.adb b/gcc/ada/libgnarl/s-tasren.adb
index d65b9f011b0..6face7ef8d4 100644
--- a/gcc/ada/libgnarl/s-tasren.adb
+++ b/gcc/ada/libgnarl/s-tasren.adb
@@ -1317,18 +1317,6 @@ package body System.Tasking.Rendezvous is
 
 Self_Id.Common.State := Acceptor_Delay_Sleep;
 
---  Try to remove calls to Sleep in the loop below by letting the
---  caller a chance of getting ready immediately, using Unlock
---  Yield. See similar action in Wait_For_Completion/Wait_For_Call.
-
-Unlock (Self_Id);
-
-if Self_Id.Open_Accepts /= null then
-   Yield;
-end if;
-
-Write_Lock (Self_Id);
-
 --  Check if this task has been aborted while the lock was released
 
 if Self_Id.Pending_ATC_Level < Self_Id.ATC_Nesting_Level then
@@ -1510,18 +1498,6 @@ package body System.Tasking.Rendezvous is
begin
   Self_Id.Common.State := Acceptor_Sleep;
 
-  --  Try to remove calls to Sleep in the loop below by letting the caller
-  --  a chance of getting ready immediately, using Unlock & Yield.
-  --  See similar action in Wait_For_Completion & Timed_Selective_Wait.
-
-  Unlock (Self_Id);
-
-  if Self_Id.Open_Accepts /= null then
- Yield;
-  end if;
-
-  Write_Lock (Self_Id);
-
   --  Check if this task has been aborted while the lock was released
 
   if Self_Id.Pending_ATC_Level < Self_Id.ATC_Nesting_Level then
-- 
2.43.2



[COMMITTED 28/31] ada: Fix strict aliasing violation in parameter passing (continued)

2024-05-21 Thread Marc Poulhiès
From: Eric Botcazou 

This fixes another long-standing (implicit) violation of the strict aliasing
rules that occurs when the result of a value conversion is directly passed
as an actual parameter in a call to a subprogram and the passing mechanism
is by reference.  In this case, the reference passed to the subprogram may
be to a type that is too different from the type of the underlying object,
which is the definition of such a violation.

The change reworks and strengthens the previous fix as follows: first, the
detection of these violations is moved into a dedicated predicate; second,
an assertion is added to check that none of them has been missed, which is
triggered by either -fchecking or -fstrict-aliasing, as the closely related
assertion that is present in relate_alias_sets.

The assertion uncovered two internal sources of violations: implementation
types for packed array types with peculiar index types and interface types,
which are fixed by propagating alias sets in the first case and resorting to
universal aliasing in the second case.

Finally, an unconditional warning is implemented to inform the user that the
temporary is created and to suggest a possible solution to prevent that.

gcc/ada/

* gcc-interface/decl.cc (gnat_to_gnu_entity) : For a
packed type implemented specially, temporarily save the XUA type as
equivalent to the entity before processing the implementation type.
For this implementation type, if its component type is the same as
that of the original type, copy the alias set from the latter.
: Resort to universal aliasing for all interface types.
* gcc-interface/trans.cc (Call_to_gnu): Add GNU_ACTUAL_TYPE local
variable and rename existing one to GNU_UNPADDED_ACTUAL_TYPE.
If the formal is passed by reference and the actual is a conversion,
call aliasable_p to detect aliasing violations, issue a warning upon
finding one and create the temporary in the target type.
Add an assertion that no such violation has been missed above.
(addressable_p): Revert latest changes.
(aliasable_p): New predicate.
* gcc-interface/utils2.cc (build_binary_op) : When
creating a new array type on the fly, preserve the alias set of the
operation type.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/gcc-interface/decl.cc   |  48 ++---
 gcc/ada/gcc-interface/trans.cc  | 167 +++-
 gcc/ada/gcc-interface/utils2.cc |   6 +-
 3 files changed, 159 insertions(+), 62 deletions(-)

diff --git a/gcc/ada/gcc-interface/decl.cc b/gcc/ada/gcc-interface/decl.cc
index ab54d2ccf13..6e40a157734 100644
--- a/gcc/ada/gcc-interface/decl.cc
+++ b/gcc/ada/gcc-interface/decl.cc
@@ -2119,6 +2119,7 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, 
bool definition)
 
 case E_Array_Type:
   {
+   const Entity_Id OAT = Original_Array_Type (gnat_entity);
const Entity_Id PAT = Packed_Array_Impl_Type (gnat_entity);
const bool convention_fortran_p
  = (Convention (gnat_entity) == Convention_Fortran);
@@ -2392,14 +2393,10 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree 
gnu_expr, bool definition)
  set_typeless_storage_on_aggregate_type (tem);
  }
 
-   /* If this is a packed type implemented specially, then process the
-  implementation type so it is elaborated in the proper scope.  */
-   if (Present (PAT))
- gnat_to_gnu_entity (PAT, NULL_TREE, false);
-
-   /* Otherwise, if an alignment is specified, use it if valid and, if
-  the alignment was requested with an explicit clause, state so.  */
-   else if (Known_Alignment (gnat_entity))
+   /* If an alignment is specified for an array that is not a packed type
+  implemented specially, use the alignment if it is valid and, if it
+  was requested with an explicit clause, preserve the information.  */
+   if (Known_Alignment (gnat_entity) && No (PAT))
  {
SET_TYPE_ALIGN (tem,
validate_alignment (Alignment (gnat_entity),
@@ -2418,7 +2415,7 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, 
bool definition)
 
TYPE_BIT_PACKED_ARRAY_TYPE_P (tem)
  = (Is_Packed_Array_Impl_Type (gnat_entity)
-? Is_Bit_Packed_Array (Original_Array_Type (gnat_entity))
+? Is_Bit_Packed_Array (OAT)
 : Is_Bit_Packed_Array (gnat_entity));
 
if (Treat_As_Volatile (gnat_entity))
@@ -2447,8 +2444,9 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, 
bool definition)
  TYPE_ARRAY_MAX_SIZE (tem) = gnu_max_size;
 
/* See the above description for the rationale.  */
-   create_type_decl (create_concat_name (gnat_entity, "XUA"), tem,
- artificial_p, debug_info_p, gnat_entity);
+   tree gnu_tmp_decl
+ = create_type_decl (create_con

[COMMITTED 15/31] ada: Fix layout in a list of aspects

2024-05-21 Thread Marc Poulhiès
From: Piotr Trojanek 

Code cleanup; semantics is unaffected.

gcc/ada/

* aspects.ads (Nonoverridable_Aspect_Id): Fix layout.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/aspects.ads | 19 +++
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/gcc/ada/aspects.ads b/gcc/ada/aspects.ads
index ce393d4f602..3cc62de3411 100644
--- a/gcc/ada/aspects.ads
+++ b/gcc/ada/aspects.ads
@@ -237,14 +237,17 @@ package Aspects is
--  Aspect_Id's excluding No_Aspect
 
subtype Nonoverridable_Aspect_Id is Aspect_Id with
- Static_Predicate => Nonoverridable_Aspect_Id in
-   Aspect_Default_Iterator | Aspect_Iterator_Element |
-   Aspect_Implicit_Dereference | Aspect_Constant_Indexing |
-   Aspect_Variable_Indexing | Aspect_Aggregate |
-   Aspect_Max_Entry_Queue_Length
-| Aspect_No_Controlled_Parts
-   --  ??? No_Controlled_Parts not yet in Aspect_Id enumeration
-   ;  --  see RM 13.1.1(18.7)
+ Static_Predicate =>
+   Nonoverridable_Aspect_Id in Aspect_Aggregate
+ | Aspect_Constant_Indexing
+ | Aspect_Default_Iterator
+ | Aspect_Implicit_Dereference
+ | Aspect_Iterator_Element
+ | Aspect_Max_Entry_Queue_Length
+ | Aspect_No_Controlled_Parts
+ | Aspect_Variable_Indexing;
+   --  ??? No_Controlled_Parts not yet in Aspect_Id enumeration see RM
+   --  13.1.1(18.7).
 
--  The following array indicates aspects that accept 'Class
 
-- 
2.43.2



[COMMITTED 20/31] ada: Fix assembler error for gigantic library-level object on 64-bit Windows

2024-05-21 Thread Marc Poulhiès
From: Eric Botcazou 

Most small 64-bit code models have a limit of 2 GB on the span of binaries,
so we also use the limit for the size of the largest statically allocatable
object by the compiler.  If the limit is topped, the compiler switches over
to a dynamic allocation (if not forbidden) after giving a warning.

gcc/ada/

* gcc-interface/decl.cc (gnat_to_gnu_entity) : Give a
warning for a statically allocated object whose size is constant,
valid but too large.
(allocatable_size_p): In the static case, return false for a size
that is constant, valid but too large.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/gcc-interface/decl.cc | 27 +--
 1 file changed, 21 insertions(+), 6 deletions(-)

diff --git a/gcc/ada/gcc-interface/decl.cc b/gcc/ada/gcc-interface/decl.cc
index 41d5c29a17c..e16ee6edac5 100644
--- a/gcc/ada/gcc-interface/decl.cc
+++ b/gcc/ada/gcc-interface/decl.cc
@@ -1415,10 +1415,22 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree 
gnu_expr, bool definition)
 false);
  }
 
-   if (TREE_CODE (TYPE_SIZE_UNIT (gnu_alloc_type)) == INTEGER_CST
-   && !valid_constant_size_p (TYPE_SIZE_UNIT (gnu_alloc_type)))
- post_error ("??Storage_Error will be raised at run time!",
- gnat_entity);
+   /* Give a warning if the size is constant but too large.  */
+   if (TREE_CODE (TYPE_SIZE_UNIT (gnu_alloc_type)) == INTEGER_CST)
+ {
+   if (valid_constant_size_p (TYPE_SIZE_UNIT (gnu_alloc_type)))
+ {
+   post_error
+ ("??too large object cannot be allocated statically",
+  gnat_entity);
+   post_error ("\\?dynamic allocation will be used 
instead",
+   gnat_entity);
+ }
+
+   else
+ post_error ("??Storage_Error will be raised at run time!",
+ gnat_entity);
+ }
 
gnu_expr
  = build_allocator (gnu_alloc_type, gnu_expr, gnu_type,
@@ -6822,9 +6834,12 @@ constructor_address_p (tree gnu_expr)
 static bool
 allocatable_size_p (tree gnu_size, bool static_p)
 {
-  /* We can allocate a fixed size if it is a valid for the middle-end.  */
+  /* We can allocate a fixed size if it is a valid for the middle-end but, for
+ a static allocation, we do not allocate more than 2 GB because this would
+ very likely be unintended and problematic for usual code models.  */
   if (TREE_CODE (gnu_size) == INTEGER_CST)
-return valid_constant_size_p (gnu_size);
+return valid_constant_size_p (gnu_size)
+  && (!static_p || tree_to_uhwi (gnu_size) <= INT_MAX);
 
   /* We can allocate a variable size if this isn't a static allocation.  */
   else
-- 
2.43.2



Re: [Patch] contrib/gcc-changelog/git_update_version.py: Improve diagnostic

2024-05-21 Thread Tobias Burnus

Hi Jakub,

Jakub Jelinek wrote:

On Mon, May 20, 2024 at 08:31:02AM +0200, Tobias Burnus wrote:

Hmm, there were now two daily bumps: [...] I really wonder why.

Because I've done it by hand.


Okay, that explains it.

I still do not understand why it slipped through at the first place; I 
tried old versions down to r12-709-g772e5e82e3114f and it still FAIL for 
the invalid commit ("ERR: cannot find a ChangeLog location in message").


Thus, I wonder whether the commit hook is active at all?!?


I have in ~gccadmin a gcc-changelog copy and adjusted update_version_git
script which doesn't use contrib/gcc-changelog subdirectory from the
checkout it makes but from the ~gccadmin directory,

[...]

I'm already using something similar in
my hack (just was doing it for even successful commits, but I think your
patch is better).
And, I think best would be if update_version_git script simply
accepted a list of ignored commits from the command line too,
passed it to the git_update_version.py script and that one
added those to IGNORED_COMMITS.


Updated version:

* Uses my diagnostic

* Adds an -i/--ignore argument for commits. Permits to use '-i hash1  -i 
hash2' but also '-i hash1,hash2' or '-i "hash1 hash2'


* I changed the global variable to lower case as Python's style guide 
states that all uppercase variables is for constants.


* The '=None' matches one of the current usages (no argument passed); 
hence, it is now explicit and 'pylint' is happy.


OK for mainline?

Tobias

PS: I have not updated the hashes. If needed/wanted, I leave that to 
you, Jakub.
contrib/gcc-changelog/git_update_version.py: Improve diagnostic

contrib/ChangeLog:

	* gcc-changelog/git_update_version.py: Add '-i'/'--ignore' argument
	to add to-be-ignored commits via the command line.
	(ignored_commits): Rename from IGNORED_COMMITS and change
	type from tuple to set.
	(prepend_to_changelog_files): Show git hash if errors occurred.
	(update_current_branch): Mark argument as optional by defaulting
	to None.

 contrib/gcc-changelog/git_update_version.py | 15 +++
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/contrib/gcc-changelog/git_update_version.py b/contrib/gcc-changelog/git_update_version.py
index 24f6c43d0b2..c69a3a6897a 100755
--- a/contrib/gcc-changelog/git_update_version.py
+++ b/contrib/gcc-changelog/git_update_version.py
@@ -22,6 +22,7 @@ import argparse
 import datetime
 import logging
 import os
+import re
 
 from git import Repo
 
@@ -30,7 +31,7 @@ from git_repository import parse_git_revisions
 current_timestamp = datetime.datetime.now().strftime('%Y%m%d\n')
 
 # Skip the following commits, they cannot be correctly processed
-IGNORED_COMMITS = (
+ignored_commits = {
 'c2be82058fb40f3ae891c68d185ff53e07f14f45',
 '04a040d907a83af54e0a98bdba5bfabc0ef4f700',
 '2e96b5f14e4025691b57d2301d71aa6092ed44bc',
@@ -41,7 +42,7 @@ IGNORED_COMMITS = (
 '040e5b0edbca861196d9e2ea2af5e805769c8d5d',
 '8057f9aa1f7e70490064de796d7a8d42d446caf8',
 '109f1b28fc94c93096506e3df0c25e331cef19d0',
-'39f81924d88e3cc197fc3df74204c9b5e01e12f7')
+'39f81924d88e3cc197fc3df74204c9b5e01e12f7'}
 
 FORMAT = '%(asctime)s:%(levelname)s:%(name)s:%(message)s'
 logging.basicConfig(level=logging.INFO, format=FORMAT,
@@ -58,6 +59,7 @@ def read_timestamp(path):
 
 def prepend_to_changelog_files(repo, folder, git_commit, add_to_git):
 if not git_commit.success:
+logging.info(f"While processing {git_commit.info.hexsha}:")
 for error in git_commit.errors:
 logging.info(error)
 raise AssertionError()
@@ -93,13 +95,15 @@ parser.add_argument('-d', '--dry-mode',
  ' is expected')
 parser.add_argument('-c', '--current', action='store_true',
 help='Modify current branch (--push argument is ignored)')
+parser.add_argument('-i', '--ignore', action='append',
+help='list of commits to ignore')
 args = parser.parse_args()
 
 repo = Repo(args.git_path)
 origin = repo.remotes['origin']
 
 
-def update_current_branch(ref_name):
+def update_current_branch(ref_name=None):
 commit = repo.head.commit
 commit_count = 1
 while commit:
@@ -123,7 +127,7 @@ def update_current_branch(ref_name):
 head = head.parents[1]
 commits = parse_git_revisions(args.git_path, '%s..%s'
   % (commit.hexsha, head.hexsha), ref_name)
-commits = [c for c in commits if c.info.hexsha not in IGNORED_COMMITS]
+commits = [c for c in commits if c.info.hexsha not in ignored_commits]
 for git_commit in reversed(commits):
 prepend_to_changelog_files(repo, args.git_path, git_commit,
not args.dry_mode)
@@ -153,6 +157,9 @@ def update_current_branch(ref_name):
 else:
 logging.info('DATESTAMP unchanged')
 
+if args.ignore is not None:
+for item in args.ignore:
+ignored_com

[COMMITTED 14/31] ada: Remove duplicate statement

2024-05-21 Thread Marc Poulhiès
From: Ronan Desplanques 

This patch removes a duplicate statement that was useless and could
be misleading to the reader by suggesting that there are multiple
global variables named Style_Check, while there is just one.

gcc/ada/

* frontend.adb (Frontend): Remove duplicate statement.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/frontend.adb | 1 -
 1 file changed, 1 deletion(-)

diff --git a/gcc/ada/frontend.adb b/gcc/ada/frontend.adb
index bd0f0c44ff4..ece0e728e4a 100644
--- a/gcc/ada/frontend.adb
+++ b/gcc/ada/frontend.adb
@@ -158,7 +158,6 @@ begin
   --  intended -gnatg or -gnaty compilations. We also disconnect checking
   --  for maximum line length.
 
-  Opt.Style_Check := False;
   Style_Check := False;
 
   --  Capture current suppress options, which may get modified
-- 
2.43.2



[COMMITTED 19/31] ada: Fix crash on aliased constant with packed array type and -g switch

2024-05-21 Thread Marc Poulhiès
From: Eric Botcazou 

The problem is that we build a template whose array field is not an array
in the case of an aliased object with nominal unconstrained array subtype.

gcc/ada/

* gcc-interface/decl.cc (gnat_to_gnu_entity) : For an
array allocated with its bounds, make sure to have an array type
to build the template.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/gcc-interface/decl.cc | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/gcc/ada/gcc-interface/decl.cc b/gcc/ada/gcc-interface/decl.cc
index ca174bff009..41d5c29a17c 100644
--- a/gcc/ada/gcc-interface/decl.cc
+++ b/gcc/ada/gcc-interface/decl.cc
@@ -939,6 +939,9 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, 
bool definition)
&& !type_annotate_only)
  {
tree gnu_array = gnat_to_gnu_type (Base_Type (gnat_type));
+   /* Make sure to have an array type for the template.  */
+   if (TYPE_IS_PADDING_P (gnu_type))
+ gnu_type = TREE_TYPE (TYPE_FIELDS (gnu_type));
gnu_type
  = build_unc_object_type_from_ptr (TREE_TYPE (gnu_array),
gnu_type,
-- 
2.43.2



[COMMITTED 18/31] ada: Fix small inaccuracy for Size attribute applied to objects

2024-05-21 Thread Marc Poulhiès
From: Eric Botcazou 

This reverts a change made some time ago in lvalue_required_for_attribute_p
whereby the Size attribute applied to objects would no longer be considered
as requiring an lvalue.

While not wrong in principle, this turns out to be problematic because the
implementation in Attribute_to_gnu needs to look at the translated prefix
to spot particular cases and not only at the actual type of its value.

This of course requires a small adjustment in gnat_to_gnu to compensate.

gcc/ada/

* gcc-interface/trans.cc (access_attribute_p): New predicate.
(lvalue_required_for_attribute_p): Return again 1 for Size and add
the missing terminating call to gcc_unreachable.
(gnat_to_gnu): Return the result unmodified for a reference to an
unconstrained array only if it is the prefix of an access attribute.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/gcc-interface/trans.cc | 28 +---
 1 file changed, 25 insertions(+), 3 deletions(-)

diff --git a/gcc/ada/gcc-interface/trans.cc b/gcc/ada/gcc-interface/trans.cc
index 8c7ffbf5687..6f761766559 100644
--- a/gcc/ada/gcc-interface/trans.cc
+++ b/gcc/ada/gcc-interface/trans.cc
@@ -745,6 +745,26 @@ build_raise_check (int check, enum exception_info_kind 
kind)
   return result;
 }
 
+/* Return true if GNAT_NODE, which is an N_Attribute_Reference, is one of the
+   access attributes.  */
+
+static bool
+access_attribute_p (Node_Id gnat_node)
+{
+  switch (Get_Attribute_Id (Attribute_Name (gnat_node)))
+{
+case Attr_Access:
+case Attr_Unchecked_Access:
+case Attr_Unrestricted_Access:
+  return true;
+
+default:
+  return false;
+}
+
+  gcc_unreachable ();
+}
+
 /* Return a positive value if an lvalue is required for GNAT_NODE, which is
an N_Attribute_Reference.  */
 
@@ -760,7 +780,6 @@ lvalue_required_for_attribute_p (Node_Id gnat_node)
 case Attr_Range_Length:
 case Attr_Length:
 case Attr_Object_Size:
-case Attr_Size:
 case Attr_Value_Size:
 case Attr_Component_Size:
 case Attr_Descriptor_Size:
@@ -786,11 +805,14 @@ lvalue_required_for_attribute_p (Node_Id gnat_node)
 case Attr_First_Bit:
 case Attr_Last_Bit:
 case Attr_Bit:
+case Attr_Size:
 case Attr_Asm_Input:
 case Attr_Asm_Output:
 default:
   return 1;
 }
+
+  gcc_unreachable ();
 }
 
 /* Return a positive value if an lvalue is required for GNAT_NODE.  GNU_TYPE
@@ -8472,7 +8494,7 @@ gnat_to_gnu (Node_Id gnat_node)
  return slot optimization in this case.
 
5. If this is a reference to an unconstrained array which is used either
- as the prefix of an attribute reference that requires an lvalue or in
+ as the prefix of an attribute reference for an access attribute or in
  a return statement without storage pool, return the result unmodified
  because we want to return the original bounds.
 
@@ -8539,7 +8561,7 @@ gnat_to_gnu (Node_Id gnat_node)
   else if (TREE_CODE (TREE_TYPE (gnu_result)) == UNCONSTRAINED_ARRAY_TYPE
   && Present (Parent (gnat_node))
   && ((Nkind (Parent (gnat_node)) == N_Attribute_Reference
-   && lvalue_required_for_attribute_p (Parent (gnat_node)))
+   && access_attribute_p (Parent (gnat_node)))
   || (Nkind (Parent (gnat_node)) == N_Simple_Return_Statement
   && No (Storage_Pool (Parent (gnat_node))
 ;
-- 
2.43.2



[COMMITTED 17/31] ada: Fix oversight in previous change

2024-05-21 Thread Marc Poulhiès
From: Eric Botcazou 

In rare cases, types using structural equality may reach relate_alias_sets.

gcc/ada/

* gcc-interface/utils.cc (relate_alias_sets): Restore previous code
when the type uses structural equality.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/gcc-interface/utils.cc | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/gcc/ada/gcc-interface/utils.cc b/gcc/ada/gcc-interface/utils.cc
index b628481335d..ae520542ace 100644
--- a/gcc/ada/gcc-interface/utils.cc
+++ b/gcc/ada/gcc-interface/utils.cc
@@ -1867,8 +1867,11 @@ relate_alias_sets (tree new_type, tree old_type, enum 
alias_set_op op)
  && TYPE_NONALIASED_COMPONENT (new_type)
 != TYPE_NONALIASED_COMPONENT (old_type)));
 
-  /* The alias set always lives on the TYPE_CANONICAL.  */
-  TYPE_ALIAS_SET (TYPE_CANONICAL (new_type)) = get_alias_set (old_type);
+  /* The alias set is a property of the TYPE_CANONICAL if it exists.  */
+  if (TYPE_STRUCTURAL_EQUALITY_P (new_type))
+   TYPE_ALIAS_SET (new_type) = get_alias_set (old_type);
+  else
+   TYPE_ALIAS_SET (TYPE_CANONICAL (new_type)) = get_alias_set (old_type);
   break;
 
 case ALIAS_SET_SUBSET:
-- 
2.43.2



[COMMITTED 22/31] ada: Avoid temporary for conditional expression of discriminated record type

2024-05-21 Thread Marc Poulhiès
From: Eric Botcazou 

This just aligns the definite case (discriminants with default) with the
indefinite case (discriminants without default), the latter case having
been properly handled for long.  In the former case, the maximum size is
used so a temporary can be much larger than the actual data it contains.

gcc/ada/

* gcc-interface/utils2.cc (build_cond_expr): Use the indirect path
for all types containing a placeholder.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/gcc-interface/utils2.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/ada/gcc-interface/utils2.cc b/gcc/ada/gcc-interface/utils2.cc
index a953b070ed8..fb0ccf59224 100644
--- a/gcc/ada/gcc-interface/utils2.cc
+++ b/gcc/ada/gcc-interface/utils2.cc
@@ -1715,8 +1715,8 @@ build_cond_expr (tree result_type, tree condition_operand,
  then dereference the result.  Likewise if the result type is passed by
  reference, because creating a temporary of this type is not allowed.  */
   if (TREE_CODE (result_type) == UNCONSTRAINED_ARRAY_TYPE
-  || TYPE_IS_BY_REFERENCE_P (result_type)
-  || CONTAINS_PLACEHOLDER_P (TYPE_SIZE (result_type)))
+  || type_contains_placeholder_p (result_type)
+  || TYPE_IS_BY_REFERENCE_P (result_type))
 {
   result_type = build_pointer_type (result_type);
   true_operand = build_unary_op (ADDR_EXPR, result_type, true_operand);
-- 
2.43.2



[COMMITTED 24/31] ada: Minor typo fix in comment

2024-05-21 Thread Marc Poulhiès
gcc/ada/

* gcc-interface/decl.cc: Fix typo in comment.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/gcc-interface/decl.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/ada/gcc-interface/decl.cc b/gcc/ada/gcc-interface/decl.cc
index e16ee6edac5..0987d534e69 100644
--- a/gcc/ada/gcc-interface/decl.cc
+++ b/gcc/ada/gcc-interface/decl.cc
@@ -5629,7 +5629,7 @@ gnat_to_gnu_param (Entity_Id gnat_param, tree 
gnu_param_type, bool first,
   by_ref = true;
 }
 
-  /* If we were requested or muss pass by reference, do so.
+  /* If we were requested or must pass by reference, do so.
  If we were requested to pass by copy, do so.
  Otherwise, for foreign conventions, pass In Out or Out parameters
  or aggregates by reference.  For COBOL and Fortran, pass all
-- 
2.43.2



[COMMITTED 27/31] ada: Make detection of useless copy for return more robust

2024-05-21 Thread Marc Poulhiès
From: Eric Botcazou 

In the return-by-invisible-reference case, the return object of an extended
return statement is allocated directly on the return stack and, therefore,
the copy operation on return is useless.  The code detecting this was not
robust enough and missed some complex cases.

gcc/ada/

* gcc-interface/trans.cc (gnat_to_gnu) :
In the return-by-invisible-reference case, remove conversions before
looking for a dereference in the return values and building the test
protecting against a useless copy operation.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/gcc-interface/trans.cc | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/gcc/ada/gcc-interface/trans.cc b/gcc/ada/gcc-interface/trans.cc
index a6b86ec8b51..4ae599b8b4c 100644
--- a/gcc/ada/gcc-interface/trans.cc
+++ b/gcc/ada/gcc-interface/trans.cc
@@ -7767,11 +7767,12 @@ gnat_to_gnu (Node_Id gnat_node)
gnu_result = build2 (INIT_EXPR, void_type_node,
 gnu_ret_deref, gnu_ret_val);
/* Avoid a useless copy with __builtin_return_slot.  */
-   if (INDIRECT_REF_P (gnu_ret_val))
+   tree gnu_inner_val = remove_conversions (gnu_ret_val, false);
+   if (INDIRECT_REF_P (gnu_inner_val))
  gnu_result
= build3 (COND_EXPR, void_type_node,
  fold_build2 (NE_EXPR, boolean_type_node,
-  TREE_OPERAND (gnu_ret_val, 0),
+  TREE_OPERAND (gnu_inner_val, 0),
   gnu_ret_obj),
  gnu_result, NULL_TREE);
add_stmt_with_node (gnu_result, gnat_node);
-- 
2.43.2



[COMMITTED 29/31] ada: Fix internal error on discriminated record with Atomic aspect in Ada 2022

2024-05-21 Thread Marc Poulhiès
From: Eric Botcazou 

It occurs in build_load_modify_store where the pattern matching logic cannot
find the atomic load that is present in the tree because it has been wrapped
in a SAVE_EXPR by gnat_protect_expr, which is unnecessary.

gcc/ada/

* gcc-interface/utils2.cc (gnat_protect_expr): Deal specifically
with atomic loads. Document the relationship with gnat_save_expr.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/gcc-interface/utils2.cc | 13 -
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/gcc/ada/gcc-interface/utils2.cc b/gcc/ada/gcc-interface/utils2.cc
index c1346cfadeb..8fb86ab29e3 100644
--- a/gcc/ada/gcc-interface/utils2.cc
+++ b/gcc/ada/gcc-interface/utils2.cc
@@ -2887,7 +2887,11 @@ gnat_save_expr (tree exp)
 
 /* Protect EXP for immediate reuse.  This is a variant of gnat_save_expr that
is optimized under the assumption that EXP's value doesn't change before
-   its subsequent reuse(s) except through its potential reevaluation.  */
+   its subsequent reuse(s) except potentially through its reevaluation.
+
+   gnat_protect_expr guarantees that multiple evaluations of the expression
+   will not generate multiple side effects, whereas gnat_save_expr further
+   guarantees that all evaluations will yield the same result.  */
 
 tree
 gnat_protect_expr (tree exp)
@@ -2932,6 +2936,13 @@ gnat_protect_expr (tree exp)
 return build3 (code, type, gnat_protect_expr (TREE_OPERAND (exp, 0)),
   TREE_OPERAND (exp, 1), NULL_TREE);
 
+  /* An atomic load is an INDIRECT_REF of its first argument, so apply the
+ same transformation as in the INDIRECT_REF case above.  */
+  if (code == CALL_EXPR && call_is_atomic_load (exp))
+return build_call_expr (TREE_OPERAND (CALL_EXPR_FN (exp), 0), 2,
+   gnat_protect_expr (CALL_EXPR_ARG (exp, 0)),
+   CALL_EXPR_ARG (exp, 1));
+
   /* If this is a COMPONENT_REF of a fat pointer, save the entire fat pointer.
  This may be more efficient, but will also allow us to more easily find
  the match for the PLACEHOLDER_EXPR.  */
-- 
2.43.2



[COMMITTED 23/31] ada: Follow-up adjustment to earlier fix in Build_Allocate_Deallocate_Proc

2024-05-21 Thread Marc Poulhiès
From: Eric Botcazou 

The deallocation call of the return and secondary stacks no longer matches
the profile built in Exp_Util.Build_Allocate_Deallocate_Proc, so this just
removes the code as unreachable and adds an assertion to that effect.

gcc/ada/

* gcc-interface/utils2.cc (build_call_alloc_dealloc_proc): Add an
assertion that this is not a deallocation of the return or secondary
stack and remove subsequent unreachable code.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/gcc-interface/utils2.cc | 12 
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/gcc/ada/gcc-interface/utils2.cc b/gcc/ada/gcc-interface/utils2.cc
index fb0ccf59224..64712cb9962 100644
--- a/gcc/ada/gcc-interface/utils2.cc
+++ b/gcc/ada/gcc-interface/utils2.cc
@@ -2187,15 +2187,16 @@ build_call_alloc_dealloc_proc (tree gnu_obj, tree 
gnu_size, tree gnu_type,
= Etype (Next_Formal (First_Formal (gnat_proc)));
   tree gnu_size_type = gnat_to_gnu_type (gnat_size_type);
 
+  /* Deallocation is not supported for return and secondary stacks.  */
+  gcc_assert (!gnu_obj);
+
   gnu_size = convert (gnu_size_type, gnu_size);
   gnu_align = convert (gnu_size_type, gnu_align);
 
   if (DECL_BUILT_IN_CLASS (gnu_proc) == BUILT_IN_FRONTEND
  && DECL_FE_FUNCTION_CODE (gnu_proc) == BUILT_IN_RETURN_SLOT)
{
- /* This must be an allocation of the return stack in a function that
-returns by invisible reference.  */
- gcc_assert (!gnu_obj);
+ /* This must be a function that returns by invisible reference.  */
  gcc_assert (current_function_decl
  && TREE_ADDRESSABLE (TREE_TYPE (current_function_decl)));
  tree gnu_ret_size;
@@ -2221,11 +,6 @@ build_call_alloc_dealloc_proc (tree gnu_obj, tree 
gnu_size, tree gnu_type,
 N_Raise_Program_Error));
}
 
-  /* The first arg is the address of the object, for a deallocator,
-then the size.  */
-  else if (gnu_obj)
-   gnu_call = build_call_n_expr (gnu_proc, 2, gnu_obj, gnu_size);
-
   else
gnu_call = build_call_n_expr (gnu_proc, 2, gnu_size, gnu_align);
 }
-- 
2.43.2



[COMMITTED 31/31] ada: Streamline implementation of simple nonbinary modular operations

2024-05-21 Thread Marc Poulhiès
From: Eric Botcazou 

They are implemented by the nonbinary_modular_operation routine, which is
complex and, in particular, creates signed types and types with a partial
precision each time a subtraction or a multiplication resp. is generated.
Both are unnecessary and a simple approach even generates better code for
the subtraction on architectures with conditional moves.

gcc/ada/

* gcc-interface/utils2.cc (nonbinary_modular_operation): Rewrite.
Do not create signed types for subtraction, do not create types with
partial precision, call fold_convert instead of convert throughout.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/gcc-interface/utils2.cc | 91 ++---
 1 file changed, 28 insertions(+), 63 deletions(-)

diff --git a/gcc/ada/gcc-interface/utils2.cc b/gcc/ada/gcc-interface/utils2.cc
index 8fb86ab29e3..4b7e2739f6a 100644
--- a/gcc/ada/gcc-interface/utils2.cc
+++ b/gcc/ada/gcc-interface/utils2.cc
@@ -535,85 +535,50 @@ compare_fat_pointers (location_t loc, tree result_type, 
tree p1, tree p2)
 }
 
 /* Compute the result of applying OP_CODE to LHS and RHS, where both are of
-   type TYPE.  We know that TYPE is a modular type with a nonbinary
-   modulus.  */
+   TYPE.  We know that TYPE is a modular type with a nonbinary modulus.  */
 
 static tree
 nonbinary_modular_operation (enum tree_code op_code, tree type, tree lhs,
  tree rhs)
 {
   tree modulus = TYPE_MODULUS (type);
-  unsigned int needed_precision = tree_floor_log2 (modulus) + 1;
-  unsigned int precision;
-  bool unsignedp = true;
-  tree op_type = type;
-  tree result;
+  unsigned precision = tree_floor_log2 (modulus) + 1;
+  tree op_type, result;
 
-  /* If this is an addition of a constant, convert it to a subtraction
- of a constant since we can do that faster.  */
-  if (op_code == PLUS_EXPR && TREE_CODE (rhs) == INTEGER_CST)
-{
-  rhs = fold_build2 (MINUS_EXPR, type, modulus, rhs);
-  op_code = MINUS_EXPR;
-}
-
-  /* For the logical operations, we only need PRECISION bits.  For
- addition and subtraction, we need one more and for multiplication we
- need twice as many.  But we never want to make a size smaller than
- our size. */
+  /* For the logical operations, we only need PRECISION bits.  For addition and
+ subtraction, we need one more, and for multiplication twice as many.  */
   if (op_code == PLUS_EXPR || op_code == MINUS_EXPR)
-needed_precision += 1;
+precision += 1;
   else if (op_code == MULT_EXPR)
-needed_precision *= 2;
-
-  precision = MAX (needed_precision, TYPE_PRECISION (op_type));
+precision *= 2;
 
-  /* Unsigned will do for everything but subtraction.  */
-  if (op_code == MINUS_EXPR)
-unsignedp = false;
-
-  /* If our type is the wrong signedness or isn't wide enough, make a new
- type and convert both our operands to it.  */
-  if (TYPE_PRECISION (op_type) < precision
-  || TYPE_UNSIGNED (op_type) != unsignedp)
+  /* If the type is not wide enough, make a new type of the needed precision
+ and convert modulus and operands to it.  Use a type with full precision
+ for its mode since operations are ultimately performed in the mode.  */
+  if (TYPE_PRECISION (type) < precision)
 {
-  /* Copy the type so we ensure it can be modified to make it modular.  */
-  op_type = copy_type (gnat_type_for_size (precision, unsignedp));
-  modulus = convert (op_type, modulus);
-  SET_TYPE_MODULUS (op_type, modulus);
-  TYPE_MODULAR_P (op_type) = 1;
-  lhs = convert (op_type, lhs);
-  rhs = convert (op_type, rhs);
+  const scalar_int_mode m = smallest_int_mode_for_size (precision);
+  op_type = gnat_type_for_mode (m, 1);
+  modulus = fold_convert (op_type, modulus);
+  lhs = fold_convert (op_type, lhs);
+  rhs = fold_convert (op_type, rhs);
 }
+  else
+op_type = type;
 
   /* Do the operation, then we'll fix it up.  */
   result = fold_build2 (op_code, op_type, lhs, rhs);
 
-  /* For multiplication, we have no choice but to do a full modulus
- operation.  However, we want to do this in the narrowest
- possible size.  */
-  if (op_code == MULT_EXPR)
-{
-  /* Copy the type so we ensure it can be modified to make it modular.  */
-  tree div_type = copy_type (gnat_type_for_size (needed_precision, 1));
-  modulus = convert (div_type, modulus);
-  SET_TYPE_MODULUS (div_type, modulus);
-  TYPE_MODULAR_P (div_type) = 1;
-  result = convert (op_type,
-   fold_build2 (TRUNC_MOD_EXPR, div_type,
-convert (div_type, result), modulus));
-}
+  /* Unconditionally add the modulus to the result for a subtraction, this gets
+ rid of all its peculiarities by cancelling out the addition of the binary
+ modulus in the case where the subtraction wraps around in OP_TYPE, and may
+ even generate better code on architectures 

[COMMITTED 25/31] ada: Fix crash with aliased array and if expression

2024-05-21 Thread Marc Poulhiès
From: Ronan Desplanques 

The way if expressions were translated led the gimplifying phase
to attempt to create a temporary of a variable-sized type in some
cases. This patch fixes this by adding an address indirection layer
in those cases.

gcc/ada/

* gcc-interface/utils2.cc (build_cond_expr): Also apply an
indirection when the result type is variable-sized.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/gcc-interface/utils2.cc | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/gcc/ada/gcc-interface/utils2.cc b/gcc/ada/gcc-interface/utils2.cc
index 64712cb9962..161f0f11e5c 100644
--- a/gcc/ada/gcc-interface/utils2.cc
+++ b/gcc/ada/gcc-interface/utils2.cc
@@ -1711,11 +1711,13 @@ build_cond_expr (tree result_type, tree 
condition_operand,
   true_operand = convert (result_type, true_operand);
   false_operand = convert (result_type, false_operand);
 
-  /* If the result type is unconstrained, take the address of the operands and
- then dereference the result.  Likewise if the result type is passed by
- reference, because creating a temporary of this type is not allowed.  */
+  /* If the result type is unconstrained or variable-sized, take the address
+ of the operands and then dereference the result.  Likewise if the result
+ type is passed by reference, because creating a temporary of this type is
+ not allowed.  */
   if (TREE_CODE (result_type) == UNCONSTRAINED_ARRAY_TYPE
   || type_contains_placeholder_p (result_type)
+  || !TREE_CONSTANT (TYPE_SIZE (result_type))
   || TYPE_IS_BY_REFERENCE_P (result_type))
 {
   result_type = build_pointer_type (result_type);
-- 
2.43.2



[COMMITTED 26/31] ada: Fix strict aliasing violation in parameter passing

2024-05-21 Thread Marc Poulhiès
From: Eric Botcazou 

This fixes a long-standing (implicit) violation of the strict aliasing rules
that occurs when the result of a call to an instance of Unchecked_Conversion
is directly passed as an actual parameter in a call to a subprogram and the
passing mechanism is by reference.  In this case, the reference passed to
the subprogram may be to a type that has nothing to do with the type of the
underlying object, which is the definition of such a violation.

This implements the following two-pronged approach: first, the problematic
cases are detected and a reference to a temporary is passed instead of the
direct reference to the underlying object; second, the implementation of
pragma Universal_Aliasing is enhanced so that it is propagated from the
component type of an array type to the array type itself, or else can be
applied to the array type directly, and may therefore be used to prevent
the violation from occurring in the first place, when the array type is
involved in the Unchecked_Conversion.

gcc/ada/

* gcc-interface/decl.cc (gnat_to_gnu_entity) : Set
TYPE_TYPELESS_STORAGE on the array types if Universal_Aliasing is
set on the type or its component type.
: Likewise.
For other aggregate types, set TYPE_TYPELESS_STORAGE in this case.
(set_typeless_storage_on_aggregate_type): New function.
(set_universal_aliasing_on_type): Likewise.
* gcc-interface/trans.cc (Call_to_gnu): Add const to local variable.
Adjust comment.  Pass GNAT_NAME in the call to addressable_p and add
a bypass for atomic types in case it returns false.
(addressable_p): Add GNAT_EXPR third parameter with default value
and add a default value to the existing second parameter.
: Return false if the expression comes from a
function call and if the alias sets of source and target types are
both distinct from zero and each other.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/gcc-interface/decl.cc  | 40 ++-
 gcc/ada/gcc-interface/trans.cc | 60 --
 2 files changed, 82 insertions(+), 18 deletions(-)

diff --git a/gcc/ada/gcc-interface/decl.cc b/gcc/ada/gcc-interface/decl.cc
index 0987d534e69..ab54d2ccf13 100644
--- a/gcc/ada/gcc-interface/decl.cc
+++ b/gcc/ada/gcc-interface/decl.cc
@@ -205,6 +205,8 @@ static Entity_Id Gigi_Cloned_Subtype (Entity_Id);
 static tree gnu_ext_name_for_subprog (Entity_Id, tree);
 static void set_nonaliased_component_on_array_type (tree);
 static void set_reverse_storage_order_on_array_type (tree);
+static void set_typeless_storage_on_aggregate_type (tree);
+static void set_universal_aliasing_on_type (tree);
 static bool same_discriminant_p (Entity_Id, Entity_Id);
 static bool array_type_has_nonaliased_component (tree, Entity_Id);
 static bool compile_time_known_address_p (Node_Id);
@@ -2385,6 +2387,9 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, 
bool definition)
  set_reverse_storage_order_on_array_type (tem);
if (array_type_has_nonaliased_component (tem, gnat_entity))
  set_nonaliased_component_on_array_type (tem);
+   if (Universal_Aliasing (gnat_entity)
+   || Universal_Aliasing (Component_Type (gnat_entity)))
+ set_typeless_storage_on_aggregate_type (tem);
  }
 
/* If this is a packed type implemented specially, then process the
@@ -2790,6 +2795,9 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, 
bool definition)
set_reverse_storage_order_on_array_type (gnu_type);
  if (array_type_has_nonaliased_component (gnu_type, gnat_entity))
set_nonaliased_component_on_array_type (gnu_type);
+ if (Universal_Aliasing (gnat_entity)
+ || Universal_Aliasing (Component_Type (gnat_entity)))
+   set_typeless_storage_on_aggregate_type (gnu_type);
 
  /* Clear the TREE_OVERFLOW flag, if any, for null arrays.  */
  if (gnu_null_ranges[index])
@@ -4757,7 +4765,17 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree 
gnu_expr, bool definition)
 
  /* Record whether a pragma Universal_Aliasing was specified.  */
  if (Universal_Aliasing (gnat_entity) && !TYPE_IS_DUMMY_P (gnu_type))
-   TYPE_UNIVERSAL_ALIASING_P (gnu_type) = 1;
+   {
+ /* Set TYPE_TYPELESS_STORAGE if this is an aggregate type and
+TYPE_UNIVERSAL_ALIASING_P otherwise, since the former is not
+available in the latter case  Both will effectively put alias
+set 0 on the type, but the former is more robust because it
+will be streamed in LTO mode.  */
+ if (AGGREGATE_TYPE_P (gnu_type))
+   set_typeless_storage_on_aggregate_type (gnu_type);
+ else
+   set_universal_aliasing_on_type (gnu_type);
+   }
 
   

[COMMITTED 30/31] ada: Simplify test for propagation of attributes to subtypes

2024-05-21 Thread Marc Poulhiès
From: Eric Botcazou 

This changes the test to use the Is_Base_Type predicate and also removes the
superfluous call to Base_Type before First_Subtype.  No functional changes.

gcc/ada/

* gcc-interface/decl.cc (gnat_to_gnu_entity): Use the Is_Base_Type
predicate and remove superfluous calls to Base_Type.

Tested on x86_64-pc-linux-gnu, committed on master.

---
 gcc/ada/gcc-interface/decl.cc | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/gcc/ada/gcc-interface/decl.cc b/gcc/ada/gcc-interface/decl.cc
index 6e40a157734..f6a4c0631b6 100644
--- a/gcc/ada/gcc-interface/decl.cc
+++ b/gcc/ada/gcc-interface/decl.cc
@@ -506,11 +506,10 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, 
bool definition)
   /* Machine_Attributes on types are expected to be propagated to
 subtypes.  The corresponding Gigi_Rep_Items are only attached
 to the first subtype though, so we handle the propagation here.  */
-  if (Base_Type (gnat_entity) != gnat_entity
+  if (!Is_Base_Type (gnat_entity)
  && !Is_First_Subtype (gnat_entity)
- && Has_Gigi_Rep_Item (First_Subtype (Base_Type (gnat_entity
-   prepend_attributes (&attr_list,
-   First_Subtype (Base_Type (gnat_entity)));
+ && Has_Gigi_Rep_Item (First_Subtype (gnat_entity)))
+   prepend_attributes (&attr_list, First_Subtype (gnat_entity));
 
   /* Compute a default value for the size of an elementary type.  */
   if (Known_Esize (gnat_entity) && Is_Elementary_Type (gnat_entity))
-- 
2.43.2



Re: [Patch] contrib/gcc-changelog/git_update_version.py: Improve diagnostic

2024-05-21 Thread Jakub Jelinek
On Tue, May 21, 2024 at 09:36:05AM +0200, Tobias Burnus wrote:
> Jakub Jelinek wrote:
> > On Mon, May 20, 2024 at 08:31:02AM +0200, Tobias Burnus wrote:
> > > Hmm, there were now two daily bumps: [...] I really wonder why.
> > Because I've done it by hand.
> 
> Okay, that explains it.
> 
> I still do not understand why it slipped through at the first place; I tried
> old versions down to r12-709-g772e5e82e3114f and it still FAIL for the
> invalid commit ("ERR: cannot find a ChangeLog location in message").
> 
> Thus, I wonder whether the commit hook is active at all?!?

They are.  But
https://github.com/AdaCore/git-hooks/blob/master/hooks/updates/__init__.py#L836
with
https://github.com/AdaCore/git-hooks/blob/master/hooks/updates/commits.py#L230
bypasses all commits which contain just 3 magic words in a row.
And because that part is owned by AdaCore hooks, not the GCC customizations,
not sure what to do about that.

> > I have in ~gccadmin a gcc-changelog copy and adjusted update_version_git
> > script which doesn't use contrib/gcc-changelog subdirectory from the
> > checkout it makes but from the ~gccadmin directory,
> [...]
> > I'm already using something similar in
> > my hack (just was doing it for even successful commits, but I think your
> > patch is better).
> > And, I think best would be if update_version_git script simply
> > accepted a list of ignored commits from the command line too,
> > passed it to the git_update_version.py script and that one
> > added those to IGNORED_COMMITS.
> 
> Updated version:
> 
> * Uses my diagnostic
> 
> * Adds an -i/--ignore argument for commits. Permits to use '-i hash1  -i
> hash2' but also '-i hash1,hash2' or '-i "hash1 hash2'
> 
> * I changed the global variable to lower case as Python's style guide states
> that all uppercase variables is for constants.
> 
> * The '=None' matches one of the current usages (no argument passed); hence,
> it is now explicit and 'pylint' is happy.
> 
> OK for mainline?

Yes, thanks.

> PS: I have not updated the hashes. If needed/wanted, I leave that to you,
> Jakub.

Once some commit is ignored, we won't be processing it anymore, so I think
the -i option is all we need.

Jakub



Re: [Patch] Fortran: invoke.texi - link to OpenCoarrays.org + mention libcaf_single

2024-05-21 Thread Tobias Burnus

Hi Bernhard,

rep.dot@gmail.com wrote:

library such as @url{http://opencoarrays.org} needs to be linked.

Maybe use https?


Works, but as the certificate is not valid, it requires to ignore the 
errors in a browser, which is a worse user experience.


The error is, e.g.,

"curl: (60) SSL certificate problem: self-signed certificate"

Or at 
https://www.ssllabs.com/ssltest/analyze.html?d=www.opencoarrays.org&ignoreMismatch=on&latest


"Common names: invalid-sni.invalid / Issuer: invalid-sni.invalid  
(Self-signed)"


@Damian: Can you fix the server to actually have a valid certificate?

Tobias


Re: [PATCH v5 1/5] Improve must tail in RTL backend

2024-05-21 Thread Richard Biener
On Mon, May 20, 2024 at 6:53 AM Andi Kleen  wrote:
>
> On Tue, May 14, 2024 at 04:15:08PM +0200, Richard Biener wrote:
> > On Sun, May 5, 2024 at 8:16 PM Andi Kleen  wrote:
> > >
> > > - Give error messages for all causes of non sibling call generation
> > > - Don't override choices of other non sibling call checks with
> > > must tail. This causes ICEs. The must tail attribute now only
> > > overrides flag_optimize_sibling_calls locally.
> > > - Error out when tree-tailcall failed to mark a must-tail call
> > > sibcall. In this case it doesn't know the true reason and only gives
> > > a vague message (this could be improved, but it's already useful without
> > > that) tree-tailcall usually fails without optimization, so must
> > > adjust the existing must-tail plugin test to specify -O2.
> > >
> > > PR83324
> > >
> > > gcc/ChangeLog:
> > >
> > > * calls.cc (expand_call): Fix mustcall implementation.
> > >
> > > gcc/testsuite/ChangeLog:
> > >
> > > * gcc.dg/plugin/must-tail-call-1.c: Adjust.
> > > ---
> > >  gcc/calls.cc  | 30 ---
> > >  .../gcc.dg/plugin/must-tail-call-1.c  |  1 +
> > >  2 files changed, 21 insertions(+), 10 deletions(-)
> > >
> > > diff --git a/gcc/calls.cc b/gcc/calls.cc
> > > index 21d78f9779fe..a6b8ee44cc29 100644
> > > --- a/gcc/calls.cc
> > > +++ b/gcc/calls.cc
> > > @@ -2650,7 +2650,9 @@ expand_call (tree exp, rtx target, int ignore)
> > >/* The type of the function being called.  */
> > >tree fntype;
> > >bool try_tail_call = CALL_EXPR_TAILCALL (exp);
> > > -  bool must_tail_call = CALL_EXPR_MUST_TAIL_CALL (exp);
> > > +  /* tree-tailcall decided not to do tail calls. Error for the musttail 
> > > case.  */
> > > +  if (!try_tail_call)
> > > +  maybe_complain_about_tail_call (exp, "other reasons");
> > >int pass;
> > >
> > >/* Register in which non-BLKmode value will be returned,
> > > @@ -3022,10 +3024,22 @@ expand_call (tree exp, rtx target, int ignore)
> > >   pushed these optimizations into -O2.  Don't try if we're already
> > >   expanding a call, as that means we're an argument.  Don't try if
> > >   there's cleanups, as we know there's code to follow the call.  */
> > > -  if (currently_expanding_call++ != 0
> > > -  || (!flag_optimize_sibling_calls && !CALL_FROM_THUNK_P (exp))
> > > -  || args_size.var
> > > -  || dbg_cnt (tail_call) == false)
> > > +  if (currently_expanding_call++ != 0)
> > > +{
> > > +  maybe_complain_about_tail_call (exp, "inside another call");
> > > +  try_tail_call = 0;
> > > +}
> > > +  if (!flag_optimize_sibling_calls
> > > +   && !CALL_FROM_THUNK_P (exp)
> > > +   && !CALL_EXPR_MUST_TAIL_CALL (exp))
> > > +try_tail_call = 0;
> > > +  if (args_size.var)
> >
> > If we are both inside another call and run into this we give two errors,
> > but I guess that's OK ...
> >
> > > +{
> > > +  /* ??? correct message?  */
> > > +  maybe_complain_about_tail_call (exp, "stack space needed");
> >
> > args_size.var != NULL_TREE means the argument size is not constant.
> > I'm quite sure this is an overly conservative check.
> >
> > > +  try_tail_call = 0;
> > > +}
> > > +  if (dbg_cnt (tail_call) == false)
> > >  try_tail_call = 0;
> > >
> > >/* Workaround buggy C/C++ wrappers around Fortran routines with
> > > @@ -3046,15 +3060,11 @@ expand_call (tree exp, rtx target, int ignore)
> > > if (MEM_P (*iter))
> > >   {
> > > try_tail_call = 0;
> > > +   maybe_complain_about_tail_call (exp, "hidden string 
> > > length argument");
> >
> > "hidden string length argument passed on stack"
> >
> > from what I read the code.
> >
> > > break;
> > >   }
> > > }
> > >
> > > -  /* If the user has marked the function as requiring tail-call
> > > - optimization, attempt it.  */
> > > -  if (must_tail_call)
> > > -try_tail_call = 1;
> > > -
> > >/*  Rest of purposes for tail call optimizations to fail.  */
> > >if (try_tail_call)
> > >  try_tail_call = can_implement_as_sibling_call_p (exp,
> > > diff --git a/gcc/testsuite/gcc.dg/plugin/must-tail-call-1.c 
> > > b/gcc/testsuite/gcc.dg/plugin/must-tail-call-1.c
> > > index 3a6d4cceaba7..44af361e2925 100644
> > > --- a/gcc/testsuite/gcc.dg/plugin/must-tail-call-1.c
> > > +++ b/gcc/testsuite/gcc.dg/plugin/must-tail-call-1.c
> > > @@ -1,4 +1,5 @@
> > >  /* { dg-do compile { target tail_call } } */
> > > +/* { dg-options "-O2" } */
> >
> > So I think this is unfortunate - I think when there's a must-tail attribute
> > we should either run the tailcall pass to check the call even at -O0 or
> > trust the user with correctness  (hoping no optimization interfered with
> > the ability to tail-call).
> >
> > What were the ICEs you ran into?
> >
> > I would guess it's for example problematic to duplicate must-tail calls?
>
> I experimented more with this,

RE: [PATCH v2] i386: Disable ix86_expand_vecop_qihi2 when !TARGET_AVX512BW

2024-05-21 Thread Jiang, Haochen
> > diff --git a/gcc/testsuite/gcc.target/i386/pr115069.c
> b/gcc/testsuite/gcc.target/i386/pr115069.c
> > new file mode 100644
> > index 000..c4b48b602ef
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr115069.c
> > @@ -0,0 +1,78 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2 -mavx2" } */
> > +/* { dg-final { scan-assembler-not "vpermq" } } */
> > +
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +
> > +typedef int8_t  stress_vint8_t  __attribute__ ((vector_size (16)));
> No need for such big testcase,
> 
> typedef char v16qi __attribute__((vector_size(16)));
> v16qi
> foo (v16qi a, v16qi b)
> {
> return a * b;
> }
> 
> should be enough, with -mavx2 -mno-avx512f

Yes. I will change to that.

Thx,
Haochen

> > +
> > +#define OPS(a, b, c, s, v23, v3) \
> > +do {   \
> > +   a += b; \
> > +   a |= b; \
> > +   a -= b; \
> > +   a &= ~b;\
> > +   a *= c; \
> > +   a = ~a; \
> > +   a *= s; \
> > +   a ^= c; \
> > +   a <<= 1;\
> > +   b >>= 1;\
> > +   b += c; \
> > +   a %= v23;   \
> > +   c /= v3;\
> > +   b = b ^ c;  \
> > +   c = b ^ c;  \
> > +   b = b ^ c;  \
> > +} while (0)
> > +
> > +volatile uint8_t csum8_put;
> > +
> > +void stress_vecmath(void)
> > +{
> > +   const stress_vint8_t v23_8 = {
> > +   0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17,
> > +   0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17
> > +   };
> > +   const stress_vint8_t v3_8 = {
> > +   0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
> > +   0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03
> > +   };
> > +   stress_vint8_t a8 = {
> > +   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
> > +   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
> > +   };
> > +   stress_vint8_t b8 = {
> > +   0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef,
> > +   0x0f, 0x1e, 0x2d, 0x3c, 0x4b, 0x5a, 0x69, 0x78
> > +   };
> > +   stress_vint8_t c8 = {
> > +   0x01, 0x02, 0x03, 0x02, 0x01, 0x02, 0x03, 0x02,
> > +   0x03, 0x02, 0x01, 0x02, 0x03, 0x02, 0x01, 0x02
> > +   };
> > +   stress_vint8_t s8 = {
> > +   0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02, 0x02,
> > +   0x01, 0x01, 0x02, 0x02, 0x01, 0x01, 0x02, 0x02,
> > +   };
> > +   const uint8_t csum8_val =  (uint8_t)0x1b;
> > +   int i;
> > +   uint8_t csum8;
> > +
> > +   for (i = 1000; i; i--) {
> > +   OPS(a8, b8, c8, s8, v23_8, v3_8);
> > +   OPS(a8, b8, c8, s8, v23_8, v3_8);
> > +   OPS(a8, b8, c8, s8, v23_8, v3_8);
> > +   OPS(a8, b8, c8, s8, v23_8, v3_8);
> > +   OPS(a8, b8, c8, s8, v23_8, v3_8);
> > +   OPS(a8, b8, c8, s8, v23_8, v3_8);
> > +   }
> > +
> > +   csum8 = a8[0]  ^ a8[1]  ^ a8[2]  ^ a8[3]  ^
> > +   a8[4]  ^ a8[5]  ^ a8[6]  ^ a8[7]  ^
> > +   a8[8]  ^ a8[9]  ^ a8[10] ^ a8[11] ^
> > +   a8[12] ^ a8[13] ^ a8[14] ^ a8[15];
> > +   csum8_put = csum8;
> > +}
> > --
> > 2.31.1
> >
> 
> 
> --
> BR,
> Hongtao


[PATCH] tree-optimization/115149 - VOP live and missing PHIs

2024-05-21 Thread Richard Biener
The following fixes a bug in vop-live get_live_in which was using
NULL to indicate the first processed edge but at the same time
using it for the case the live-in virtual operand cannot be computed.
The following fixes this, avoiding sinking a load to a place where
we'd have to insert virtual PHIs to make the virtual operand SSA
web OK.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

PR tree-optimization/115149
* tree-ssa-live.cc (virtual_operand_live::get_live_in):
Explicitly track the first processed edge.

* gcc.dg/pr115149.c: New testcase.
---
 gcc/testsuite/gcc.dg/pr115149.c | 16 
 gcc/tree-ssa-live.cc|  8 ++--
 2 files changed, 22 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr115149.c

diff --git a/gcc/testsuite/gcc.dg/pr115149.c b/gcc/testsuite/gcc.dg/pr115149.c
new file mode 100644
index 000..9f6bc97dbe6
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr115149.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-inline -fno-tree-vrp -fno-ipa-sra -fno-tree-dce 
-fno-tree-ch" } */
+
+int a, c, e, f, g, h[1], i;
+static int j(int b) { return 0; }
+static void k(int d) {}
+int main()
+{
+  if (h[0])
+while (1) {
+   k(f && j(i && (h[g] = e)));
+   while (a)
+ c ^= 1;
+}
+  return 0;
+}
diff --git a/gcc/tree-ssa-live.cc b/gcc/tree-ssa-live.cc
index e6ae551a457..60dfc05dcd9 100644
--- a/gcc/tree-ssa-live.cc
+++ b/gcc/tree-ssa-live.cc
@@ -1675,14 +1675,18 @@ virtual_operand_live::get_live_in (basic_block bb)
   edge_iterator ei;
   edge e;
   tree livein = NULL_TREE;
+  bool first = true;
   FOR_EACH_EDGE (e, ei, bb->preds)
 if (e->flags & EDGE_DFS_BACK)
   /* We can ignore backedges since if there's a def there it would
 have forced a PHI in the source because it also acts as use
 downstream.  */
   continue;
-else if (!livein)
-  livein = get_live_out (e->src);
+else if (first)
+  {
+   livein = get_live_out (e->src);
+   first = false;
+  }
 else if (get_live_out (e->src) != livein)
   /* When there's no virtual use downstream this indicates a point
 where we'd insert a PHI merging the different live virtual
-- 
2.35.3


[PATCH v3] i386: Disable ix86_expand_vecop_qihi2 when !TARGET_AVX512BW

2024-05-21 Thread Haochen Jiang
Hi all,

This is the v3 patch to fix PR115069. The new testcase has passed.

Changes in v3:
  - Simplify the testcase.

Changes in v2:
  - Add a testcase.
  - Change the comment for the early exit.

Thx,
Haochen

Since vpermq is really slow, we should avoid using it for permutation
when vpmovwb is not available (needs AVX512BW) for ix86_expand_vecop_qihi2
and fall back to ix86_expand_vecop_qihi.

gcc/ChangeLog:

PR target/115069
* config/i386/i386-expand.cc (ix86_expand_vecop_qihi2):
Do not enable the optimization when AVX512BW is not enabled.

gcc/testsuite/ChangeLog:

PR target/115069
* gcc.target/i386/pr115069.c: New.
---
 gcc/config/i386/i386-expand.cc   |  7 +++
 gcc/testsuite/gcc.target/i386/pr115069.c | 10 ++
 2 files changed, 17 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr115069.c

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index a6132911e6a..f7939761879 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -24323,6 +24323,13 @@ ix86_expand_vecop_qihi2 (enum rtx_code code, rtx dest, 
rtx op1, rtx op2)
   bool op2vec = GET_MODE_CLASS (GET_MODE (op2)) == MODE_VECTOR_INT;
   bool uns_p = code != ASHIFTRT;
 
+  /* Without VPMOVWB (provided by AVX512BW ISA), the expansion uses the
+ generic permutation to merge the data back into the right place.  This
+ permutation results in VPERMQ, which is slow, so better fall back to
+ ix86_expand_vecop_qihi.  */
+  if (!TARGET_AVX512BW)
+return false;
+
   if ((qimode == V16QImode && !TARGET_AVX2)
   || (qimode == V32QImode && (!TARGET_AVX512BW || !TARGET_EVEX512))
   /* There are no V64HImode instructions.  */
diff --git a/gcc/testsuite/gcc.target/i386/pr115069.c 
b/gcc/testsuite/gcc.target/i386/pr115069.c
new file mode 100644
index 000..7f1ff209f26
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr115069.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx2" } */
+/* { dg-final { scan-assembler-not "vpermq" } } */
+
+typedef char v16qi __attribute__((vector_size(16)));
+
+v16qi foo (v16qi a, v16qi b) {
+return a * b;
+}
+
-- 
2.31.1



[PATCH] Cache the set of EH_RETURN_DATA_REGNOs

2024-05-21 Thread Richard Sandiford
While reviewing Andrew's fix for PR114843, it seemed like it would
be convenient to have a HARD_REG_SET of EH_RETURN_DATA_REGNOs.
This patch adds one and uses it to simplify a couple of use sites.

Tested on aarch64-linux-gnu & x86_64-linux-gnu.  OK to install?

Richard


gcc/
* hard-reg-set.h (target_hard_regs::x_eh_return_data_regs): New field.
(eh_return_data_regs): New macro.
* reginfo.cc (init_reg_sets_1): Initialize x_eh_return_data_regs.
* df-scan.cc (df_get_exit_block_use_set): Use it.
* ira-lives.cc (process_out_of_region_eh_regs): Likewise.
---
 gcc/df-scan.cc |  8 +---
 gcc/hard-reg-set.h |  5 +
 gcc/ira-lives.cc   | 10 ++
 gcc/reginfo.cc | 10 ++
 4 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/gcc/df-scan.cc b/gcc/df-scan.cc
index 1bade2cd71e..c8ab3c09cee 100644
--- a/gcc/df-scan.cc
+++ b/gcc/df-scan.cc
@@ -3702,13 +3702,7 @@ df_get_exit_block_use_set (bitmap exit_block_uses)
 
   /* Mark the registers that will contain data for the handler.  */
   if (reload_completed && crtl->calls_eh_return)
-for (i = 0; ; ++i)
-  {
-   unsigned regno = EH_RETURN_DATA_REGNO (i);
-   if (regno == INVALID_REGNUM)
- break;
-   bitmap_set_bit (exit_block_uses, regno);
-  }
+IOR_REG_SET_HRS (exit_block_uses, eh_return_data_regs);
 
 #ifdef EH_RETURN_STACKADJ_RTX
   if ((!targetm.have_epilogue () || ! epilogue_completed)
diff --git a/gcc/hard-reg-set.h b/gcc/hard-reg-set.h
index 8c1d1512ca2..340eb425c10 100644
--- a/gcc/hard-reg-set.h
+++ b/gcc/hard-reg-set.h
@@ -421,6 +421,9 @@ struct target_hard_regs {
  with the local stack frame are safe, but scant others.  */
   HARD_REG_SET x_regs_invalidated_by_call;
 
+  /* The set of registers that are used by EH_RETURN_DATA_REGNO.  */
+  HARD_REG_SET x_eh_return_data_regs;
+
   /* Table of register numbers in the order in which to try to use them.  */
   int x_reg_alloc_order[FIRST_PSEUDO_REGISTER];
 
@@ -485,6 +488,8 @@ extern struct target_hard_regs *this_target_hard_regs;
 #define call_used_or_fixed_regs \
   (regs_invalidated_by_call | fixed_reg_set)
 #endif
+#define eh_return_data_regs \
+  (this_target_hard_regs->x_eh_return_data_regs)
 #define reg_alloc_order \
   (this_target_hard_regs->x_reg_alloc_order)
 #define inv_reg_alloc_order \
diff --git a/gcc/ira-lives.cc b/gcc/ira-lives.cc
index e07d3dc3e89..958eabb9708 100644
--- a/gcc/ira-lives.cc
+++ b/gcc/ira-lives.cc
@@ -1260,14 +1260,8 @@ process_out_of_region_eh_regs (basic_block bb)
   for (int n = ALLOCNO_NUM_OBJECTS (a) - 1; n >= 0; n--)
{
  ira_object_t obj = ALLOCNO_OBJECT (a, n);
- for (int k = 0; ; k++)
-   {
- unsigned int regno = EH_RETURN_DATA_REGNO (k);
- if (regno == INVALID_REGNUM)
-   break;
- SET_HARD_REG_BIT (OBJECT_CONFLICT_HARD_REGS (obj), regno);
- SET_HARD_REG_BIT (OBJECT_TOTAL_CONFLICT_HARD_REGS (obj), regno);
-   }
+ OBJECT_CONFLICT_HARD_REGS (obj) |= eh_return_data_regs;
+ OBJECT_TOTAL_CONFLICT_HARD_REGS (obj) |= eh_return_data_regs;
}
 }
 }
diff --git a/gcc/reginfo.cc b/gcc/reginfo.cc
index a0baeb90e12..73121365c47 100644
--- a/gcc/reginfo.cc
+++ b/gcc/reginfo.cc
@@ -420,6 +420,16 @@ init_reg_sets_1 (void)
}
 }
 
+  /* Recalculate eh_return_data_regs.  */
+  CLEAR_HARD_REG_SET (eh_return_data_regs);
+  for (i = 0; ; ++i)
+{
+  unsigned int regno = EH_RETURN_DATA_REGNO (i);
+  if (regno == INVALID_REGNUM)
+   break;
+  SET_HARD_REG_BIT (eh_return_data_regs, regno);
+}
+
   memset (have_regs_of_mode, 0, sizeof (have_regs_of_mode));
   memset (contains_reg_of_mode, 0, sizeof (contains_reg_of_mode));
   for (m = 0; m < (unsigned int) MAX_MACHINE_MODE; m++)
-- 
2.25.1



[Ada] Fix PR ada/115168

2024-05-21 Thread Eric Botcazou
Recent changes made to the runtime library broke again its build on Solaris 
because it uses Solaris threads instead of POSIX threads on this platform.

Tested by Rainer, applied on the mainline.


2024-05-21  Eric Botcazou  

PR ada/115168
* libgnarl/s-taprop__solaris.adb (Initialize): Fix pasto.
* libgnat/s-oslock__solaris.ads (Owner_Int): Delete.
(Owner_ID): Change the designated type to Integer.

-- 
Eric Botcazoudiff --git a/gcc/ada/libgnarl/s-taprop__solaris.adb b/gcc/ada/libgnarl/s-taprop__solaris.adb
index 09f90e6e204..6d05e8db004 100644
--- a/gcc/ada/libgnarl/s-taprop__solaris.adb
+++ b/gcc/ada/libgnarl/s-taprop__solaris.adb
@@ -424,7 +424,7 @@ package body System.Task_Primitives.Operations is
 
begin
   Environment_Task_Id := Environment_Task;
-  Self_ID.Common.LL.Thread := thr_self;
+  Environment_Task.Common.LL.Thread := thr_self;
 
   Interrupt_Management.Initialize;
 
diff --git a/gcc/ada/libgnat/s-oslock__solaris.ads b/gcc/ada/libgnat/s-oslock__solaris.ads
index cc5a83df02e..56a242c8070 100644
--- a/gcc/ada/libgnat/s-oslock__solaris.ads
+++ b/gcc/ada/libgnat/s-oslock__solaris.ads
@@ -42,10 +42,7 @@ package System.OS_Locks is
type Private_Task_Serial_Number is mod 2 ** Long_Long_Integer'Size;
--  Used to give each task a unique serial number
 
-   type Owner_Int is new Integer;
-   for Owner_Int'Alignment use Standard'Maximum_Alignment;
-
-   type Owner_ID is access all Owner_Int;
+   type Owner_ID is access all Integer;
 
function To_Owner_ID is
  new Ada.Unchecked_Conversion (System.Address, Owner_ID);


Re: [PATCH] Cache the set of EH_RETURN_DATA_REGNOs

2024-05-21 Thread Richard Biener
On Tue, May 21, 2024 at 11:03 AM Richard Sandiford
 wrote:
>
> While reviewing Andrew's fix for PR114843, it seemed like it would
> be convenient to have a HARD_REG_SET of EH_RETURN_DATA_REGNOs.
> This patch adds one and uses it to simplify a couple of use sites.
>
> Tested on aarch64-linux-gnu & x86_64-linux-gnu.  OK to install?

OK

> Richard
>
>
> gcc/
> * hard-reg-set.h (target_hard_regs::x_eh_return_data_regs): New field.
> (eh_return_data_regs): New macro.
> * reginfo.cc (init_reg_sets_1): Initialize x_eh_return_data_regs.
> * df-scan.cc (df_get_exit_block_use_set): Use it.
> * ira-lives.cc (process_out_of_region_eh_regs): Likewise.
> ---
>  gcc/df-scan.cc |  8 +---
>  gcc/hard-reg-set.h |  5 +
>  gcc/ira-lives.cc   | 10 ++
>  gcc/reginfo.cc | 10 ++
>  4 files changed, 18 insertions(+), 15 deletions(-)
>
> diff --git a/gcc/df-scan.cc b/gcc/df-scan.cc
> index 1bade2cd71e..c8ab3c09cee 100644
> --- a/gcc/df-scan.cc
> +++ b/gcc/df-scan.cc
> @@ -3702,13 +3702,7 @@ df_get_exit_block_use_set (bitmap exit_block_uses)
>
>/* Mark the registers that will contain data for the handler.  */
>if (reload_completed && crtl->calls_eh_return)
> -for (i = 0; ; ++i)
> -  {
> -   unsigned regno = EH_RETURN_DATA_REGNO (i);
> -   if (regno == INVALID_REGNUM)
> - break;
> -   bitmap_set_bit (exit_block_uses, regno);
> -  }
> +IOR_REG_SET_HRS (exit_block_uses, eh_return_data_regs);
>
>  #ifdef EH_RETURN_STACKADJ_RTX
>if ((!targetm.have_epilogue () || ! epilogue_completed)
> diff --git a/gcc/hard-reg-set.h b/gcc/hard-reg-set.h
> index 8c1d1512ca2..340eb425c10 100644
> --- a/gcc/hard-reg-set.h
> +++ b/gcc/hard-reg-set.h
> @@ -421,6 +421,9 @@ struct target_hard_regs {
>   with the local stack frame are safe, but scant others.  */
>HARD_REG_SET x_regs_invalidated_by_call;
>
> +  /* The set of registers that are used by EH_RETURN_DATA_REGNO.  */
> +  HARD_REG_SET x_eh_return_data_regs;
> +
>/* Table of register numbers in the order in which to try to use them.  */
>int x_reg_alloc_order[FIRST_PSEUDO_REGISTER];
>
> @@ -485,6 +488,8 @@ extern struct target_hard_regs *this_target_hard_regs;
>  #define call_used_or_fixed_regs \
>(regs_invalidated_by_call | fixed_reg_set)
>  #endif
> +#define eh_return_data_regs \
> +  (this_target_hard_regs->x_eh_return_data_regs)
>  #define reg_alloc_order \
>(this_target_hard_regs->x_reg_alloc_order)
>  #define inv_reg_alloc_order \
> diff --git a/gcc/ira-lives.cc b/gcc/ira-lives.cc
> index e07d3dc3e89..958eabb9708 100644
> --- a/gcc/ira-lives.cc
> +++ b/gcc/ira-lives.cc
> @@ -1260,14 +1260,8 @@ process_out_of_region_eh_regs (basic_block bb)
>for (int n = ALLOCNO_NUM_OBJECTS (a) - 1; n >= 0; n--)
> {
>   ira_object_t obj = ALLOCNO_OBJECT (a, n);
> - for (int k = 0; ; k++)
> -   {
> - unsigned int regno = EH_RETURN_DATA_REGNO (k);
> - if (regno == INVALID_REGNUM)
> -   break;
> - SET_HARD_REG_BIT (OBJECT_CONFLICT_HARD_REGS (obj), regno);
> - SET_HARD_REG_BIT (OBJECT_TOTAL_CONFLICT_HARD_REGS (obj), regno);
> -   }
> + OBJECT_CONFLICT_HARD_REGS (obj) |= eh_return_data_regs;
> + OBJECT_TOTAL_CONFLICT_HARD_REGS (obj) |= eh_return_data_regs;
> }
>  }
>  }
> diff --git a/gcc/reginfo.cc b/gcc/reginfo.cc
> index a0baeb90e12..73121365c47 100644
> --- a/gcc/reginfo.cc
> +++ b/gcc/reginfo.cc
> @@ -420,6 +420,16 @@ init_reg_sets_1 (void)
> }
>  }
>
> +  /* Recalculate eh_return_data_regs.  */
> +  CLEAR_HARD_REG_SET (eh_return_data_regs);
> +  for (i = 0; ; ++i)
> +{
> +  unsigned int regno = EH_RETURN_DATA_REGNO (i);
> +  if (regno == INVALID_REGNUM)
> +   break;
> +  SET_HARD_REG_BIT (eh_return_data_regs, regno);
> +}
> +
>memset (have_regs_of_mode, 0, sizeof (have_regs_of_mode));
>memset (contains_reg_of_mode, 0, sizeof (contains_reg_of_mode));
>for (m = 0; m < (unsigned int) MAX_MACHINE_MODE; m++)
> --
> 2.25.1
>


[PATCH] tree-optimization/115137 - more ptr-vs-ptr compare fixes

2024-05-21 Thread Richard Biener
The following fixes the omission of const-pool included in NONLOCAL.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

PR tree-optimization/115137
* tree-ssa-structalias.cc (pt_solution_includes_const_pool): NONLOCAL
also includes constant pool entries.

* gcc.dg/torture/pr115137.c: New testcase.
---
 gcc/testsuite/gcc.dg/torture/pr115137.c | 34 +
 gcc/tree-ssa-structalias.cc |  1 +
 2 files changed, 35 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/torture/pr115137.c

diff --git a/gcc/testsuite/gcc.dg/torture/pr115137.c 
b/gcc/testsuite/gcc.dg/torture/pr115137.c
new file mode 100644
index 000..9cd8ff93633
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr115137.c
@@ -0,0 +1,34 @@
+/* { dg-do run } */
+
+struct a {
+  int b;
+} c;
+
+int d;
+long e;
+
+static void f(char *g, char *h, struct a *l) {
+  char a[1024];
+  int j = 0;
+
+  if (d)
+h = a;
+
+  for (; g < h; g++)
+if (__builtin_iscntrl(*g))
+  ++j;
+
+  while (l->b < j)
+;
+}
+
+int main() {
+  static const struct {
+char *input;
+  } k[] = {{"somepage.html"}, {""}};
+
+  for (unsigned int i = 0; i < 1; ++i) {
+e = __builtin_strlen(k[i].input);
+f(k[i].input, k[i].input + e, &c);
+  }
+}
diff --git a/gcc/tree-ssa-structalias.cc b/gcc/tree-ssa-structalias.cc
index 61fb3610a17..0e9423a78ec 100644
--- a/gcc/tree-ssa-structalias.cc
+++ b/gcc/tree-ssa-structalias.cc
@@ -7087,6 +7087,7 @@ bool
 pt_solution_includes_const_pool (struct pt_solution *pt)
 {
   return (pt->const_pool
+ || pt->nonlocal
  || (pt->escaped && (!cfun || cfun->gimple_df->escaped.const_pool))
  || (pt->ipa_escaped && ipa_escaped_pt.const_pool));
 }
-- 
2.35.3


Re: [Patch, aarch64, middle-end] Move pair_fusion pass from aarch64 to middle-end

2024-05-21 Thread Ajit Agarwal
Hello Alex:

On 21/05/24 1:16 am, Alex Coplan wrote:
> On 20/05/2024 18:44, Alex Coplan wrote:
>> Hi Ajit,
>>
>> On 20/05/2024 21:50, Ajit Agarwal wrote:
>>> Hello Alex/Richard:
>>>
>>> Move pair fusion pass from aarch64-ldp-fusion.cc to middle-end
>>> to support multiple targets.
>>>
>>> Common infrastructure of load store pair fusion is divided into
>>> target independent and target dependent code.
>>>
>>> Target independent code is structured in the following files.
>>> gcc/pair-fusion.h
>>> gcc/pair-fusion.cc
>>>
>>> Target independent code is the Generic code with pure virtual
>>> function to interface betwwen target independent and dependent
>>> code.
>>>
>>> Bootstrapped and regtested on aarch64-linux-gnu.
>>>
>>> Thanks & Regards
>>> Ajit
>>>
>>> aarch64, middle-end: Move pair_fusion pass from aarch64 to middle-end
>>>
>>> Move pair fusion pass from aarch64-ldp-fusion.cc to middle-end
>>> to support multiple targets.
>>>
>>> Common infrastructure of load store pair fusion is divided into
>>> target independent and target dependent code.
>>>
>>> Target independent code is structured in the following files.
>>> gcc/pair-fusion.h
>>> gcc/pair-fusion.cc
>>>
>>> Target independent code is the Generic code with pure virtual
>>> function to interface betwwen target independent and dependent
>>> code.
>>>
>>> 2024-05-20  Ajit Kumar Agarwal  
>>>
>>> gcc/ChangeLog:
>>>
>>> * pair-fusion.h: Generic header code for load store fusion
>>
>> Insert "pair" before fusion?

Addressed in v1 of the patch.
>>
>>> that can be shared across different architectures.
>>> * pair-fusion.cc: Generic source code implementation for
>>> load store fusion that can be shared across different architectures.
>>
>> Likewise.
Addressed in v1 of the patch.
>>
>>> * Makefile.in: Add new executable pair-fusion.o
>>
>> It's not an executable but an object file.
>>
>>> * config/aarch64/aarch64-ldp-fusion.cc: Target specific
>>> code for load store fusion of aarch64.
>>
>> I guess this should say something like: "Delete generic code and move it
>> to pair-fusion.cc in the middle-end."
>>
>> I've left some comments below on the header file.  The rest of the patch
>> looks pretty good to me.  I tried diffing the original contents of
>> aarch64-ldp-fusion.cc with pair-fusion.cc, and that looks as expected.
>>
> 
> 
> 
>>> diff --git a/gcc/pair-fusion.h b/gcc/pair-fusion.h
>>> new file mode 100644
>>> index 000..00f6d3e149a
>>> --- /dev/null
>>> +++ b/gcc/pair-fusion.h
>>> @@ -0,0 +1,340 @@
>>> +// Pair Mem fusion generic header file.
>>> +// Copyright (C) 2024 Free Software Foundation, Inc.
>>> +//
>>> +// This file is part of GCC.
>>> +//
>>> +// GCC is free software; you can redistribute it and/or modify it
>>> +// under the terms of the GNU General Public License as published by
>>> +// the Free Software Foundation; either version 3, or (at your option)
>>> +// any later version.
>>> +//
>>> +// GCC is distributed in the hope that it will be useful, but
>>> +// WITHOUT ANY WARRANTY; without even the implied warranty of
>>> +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>>> +// General Public License for more details.
>>> +//
>>> +// You should have received a copy of the GNU General Public License
>>> +// along with GCC; see the file COPYING3.  If not see
>>> +// .
>>> +
>>> +#define INCLUDE_ALGORITHM
>>> +#define INCLUDE_FUNCTIONAL
>>> +#define INCLUDE_LIST
>>> +#define INCLUDE_TYPE_TRAITS
>>> +#include "config.h"
>>> +#include "system.h"
>>> +#include "coretypes.h"
>>> +#include "backend.h"
>>> +#include "rtl.h"
>>> +#include "df.h"
>>> +#include "rtl-iter.h"
>>> +#include "rtl-ssa.h"
>>
>> I'm not sure how desirable this is, but you might be able to
>> forward-declare RTL-SSA types like this:
>>
>> class def_info;
>> class insn_info;
>> class insn_range_info;
>>
>> thus removing the need to include the header here, since the interface
>> only refers to these types by pointer or reference.
>>
>> Richard: please say if you'd prefer keeping the include.
>>

Doing forward declaration gives ambigous errors with conflicting
insn_info with rtl_ssa::insn_info and templated initialization
errors. Also with overloaded operator with insn_info is not 
defined with forward declaration.

Hence I kept the header.

Addressed in v1 of the patch.

>>> +#include "cfgcleanup.h"
>>> +#include "tree-pass.h"
>>> +#include "ordered-hash-map.h"
>>> +#include "tree-dfa.h"
>>> +#include "fold-const.h"
>>> +#include "tree-hash-traits.h"
>>> +#include "print-tree.h"
>>> +#include "insn-attr.h"
>>
>> I expect we don't need all of these includes here.  I think we should
>> have the minimum necessary set of includes here and most of the includes
>> should be in the *.cc files.
>>

Addressed in v1 of the patch.
>>> +
>>> +using namespace rtl_ssa;
>>> +
>>> +// We pack these fields (load_p, fpsimd_p, and size) into an integer
>>> +// (LFS) which we use as part of the key into the ma

RE: [PATCH v3] Match: Extract ternary_integer_types_match_p helper func [NFC]

2024-05-21 Thread Li, Pan2
> Thanks, looks good to me! You still need approval from a maintainer..

Thanks Tamar, let's wait for a while, 😊!

Pan

-Original Message-
From: Tamar Christina  
Sent: Tuesday, May 21, 2024 11:19 AM
To: Li, Pan2 ; gcc-patches@gcc.gnu.org
Cc: juzhe.zh...@rivai.ai; kito.ch...@gmail.com; richard.guent...@gmail.com
Subject: RE: [PATCH v3] Match: Extract ternary_integer_types_match_p helper 
func [NFC]



> -Original Message-
> From: pan2...@intel.com 
> Sent: Tuesday, May 21, 2024 2:13 AM
> To: gcc-patches@gcc.gnu.org
> Cc: juzhe.zh...@rivai.ai; kito.ch...@gmail.com; Tamar Christina
> ; richard.guent...@gmail.com; Pan Li
> 
> Subject: [PATCH v3] Match: Extract ternary_integer_types_match_p helper func
> [NFC]
> 
> From: Pan Li 
> 
> There are sorts of match pattern for SAT related cases,  there will be
> some duplicated code to check the dest, op_0, op_1 are same tree types.
> Aka ternary tree type matches.  Thus, extract one helper function to
> do this and avoid match code duplication.
> 
> The below test suites are passed for this patch:
> * The rv64gcv fully regression test.
> * The x86 bootstrap test.
> * The x86 regression test.
> 
> gcc/ChangeLog:
> 
>   * match.pd: Leverage helper func for SAT_ADD match.
>   * tree.cc (ternary_integer_types_match_p): New func impl to
>   check if ternary tree types are all integer.
>   * tree.h (ternary_integer_types_match_p): New func decl.
> 

Thanks, looks good to me! You still need approval from a maintainer..

Cheers,
Tamar

> Signed-off-by: Pan Li 
> ---
>  gcc/match.pd | 28 +++-
>  gcc/tree.cc  | 16 
>  gcc/tree.h   |  5 +
>  3 files changed, 28 insertions(+), 21 deletions(-)
> 
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 0f9c34fa897..cff67c84498 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -39,7 +39,8 @@ along with GCC; see the file COPYING3.  If not see
> HONOR_NANS
> uniform_vector_p
> expand_vec_cmp_expr_p
> -   bitmask_inv_cst_vector_p)
> +   bitmask_inv_cst_vector_p
> +   ternary_integer_types_match_p)
> 
>  /* Operator lists.  */
>  (define_operator_list tcc_comparison
> @@ -3046,38 +3047,23 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>  /* Unsigned Saturation Add */
>  (match (usadd_left_part_1 @0 @1)
>   (plus:c @0 @1)
> - (if (INTEGRAL_TYPE_P (type)
> -  && TYPE_UNSIGNED (TREE_TYPE (@0))
> -  && types_match (type, TREE_TYPE (@0))
> -  && types_match (type, TREE_TYPE (@1)
> + (if (ternary_integer_types_match_p (type, @0, @1) && TYPE_UNSIGNED
> (type
> 
>  (match (usadd_left_part_2 @0 @1)
>   (realpart (IFN_ADD_OVERFLOW:c @0 @1))
> - (if (INTEGRAL_TYPE_P (type)
> -  && TYPE_UNSIGNED (TREE_TYPE (@0))
> -  && types_match (type, TREE_TYPE (@0))
> -  && types_match (type, TREE_TYPE (@1)
> + (if (ternary_integer_types_match_p (type, @0, @1) && TYPE_UNSIGNED
> (type
> 
>  (match (usadd_right_part_1 @0 @1)
>   (negate (convert (lt (plus:c @0 @1) @0)))
> - (if (INTEGRAL_TYPE_P (type)
> -  && TYPE_UNSIGNED (TREE_TYPE (@0))
> -  && types_match (type, TREE_TYPE (@0))
> -  && types_match (type, TREE_TYPE (@1)
> + (if (ternary_integer_types_match_p (type, @0, @1) && TYPE_UNSIGNED
> (type
> 
>  (match (usadd_right_part_1 @0 @1)
>   (negate (convert (gt @0 (plus:c @0 @1
> - (if (INTEGRAL_TYPE_P (type)
> -  && TYPE_UNSIGNED (TREE_TYPE (@0))
> -  && types_match (type, TREE_TYPE (@0))
> -  && types_match (type, TREE_TYPE (@1)
> + (if (ternary_integer_types_match_p (type, @0, @1) && TYPE_UNSIGNED
> (type
> 
>  (match (usadd_right_part_2 @0 @1)
>   (negate (convert (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1))
> integer_zerop)))
> - (if (INTEGRAL_TYPE_P (type)
> -  && TYPE_UNSIGNED (TREE_TYPE (@0))
> -  && types_match (type, TREE_TYPE (@0))
> -  && types_match (type, TREE_TYPE (@1)
> + (if (ternary_integer_types_match_p (type, @0, @1) && TYPE_UNSIGNED
> (type
> 
>  /* We cannot merge or overload usadd_left_part_1 and usadd_left_part_2
> because the sub part of left_part_2 cannot work with right_part_1.
> diff --git a/gcc/tree.cc b/gcc/tree.cc
> index 6564b002dc1..b59d42c3e47 100644
> --- a/gcc/tree.cc
> +++ b/gcc/tree.cc
> @@ -10622,6 +10622,22 @@ uniform_integer_cst_p (tree t)
>return NULL_TREE;
>  }
> 
> +/* Check if the types T1,  T2 and T3 are effectively the same integer type.
> +   If T1,  T2 or T3 is not a type, the test applies to their TREE_TYPE.  */
> +
> +bool
> +ternary_integer_types_match_p (tree t1, tree t2, tree t3)
> +{
> +  t1 = TYPE_P (t1) ? t1 : TREE_TYPE (t1);
> +  t2 = TYPE_P (t2) ? t2 : TREE_TYPE (t2);
> +  t3 = TYPE_P (t3) ? t3 : TREE_TYPE (t3);
> +
> +  if (!INTEGRAL_TYPE_P (t1) || !INTEGRAL_TYPE_P (t2) || !INTEGRAL_TYPE_P
> (t3))
> +return false;
> +
> +  return types_compatible_p (t1, t2) && types_compatible_p (t2, t3);
> +}
> +
>  /* Checks to see if T is a constant or a constant vector and if each element 
> E
> adheres to ~E + 1 == 

[PATCH v1 2/2] RISC-V: Add test cases for __builtin_add_overflow branch form unsigned SAT_ADD

2024-05-21 Thread pan2 . li
From: Pan Li 

After we support __builtin_add_overflow  branch form unsigned SAT_ADD
from the middle end.  Add more tests case to cover the functionarlities.

The below test suites are passed.
* The rv64gcv fully regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test macro for
branch __builtin_add_overflow form.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-13.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-14.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-15.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-16.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-13.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-14.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-15.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-16.c: New test.
* gcc.target/riscv/sat_u_add-13.c: New test.
* gcc.target/riscv/sat_u_add-14.c: New test.
* gcc.target/riscv/sat_u_add-15.c: New test.
* gcc.target/riscv/sat_u_add-16.c: New test.
* gcc.target/riscv/sat_u_add-run-13.c: New test.
* gcc.target/riscv/sat_u_add-run-14.c: New test.
* gcc.target/riscv/sat_u_add-run-15.c: New test.
* gcc.target/riscv/sat_u_add-run-16.c: New test.

Signed-off-by: Pan Li 
---
 .../rvv/autovec/binop/vec_sat_u_add-13.c  | 19 +
 .../rvv/autovec/binop/vec_sat_u_add-14.c  | 20 +
 .../rvv/autovec/binop/vec_sat_u_add-15.c  | 20 +
 .../rvv/autovec/binop/vec_sat_u_add-16.c  | 20 +
 .../rvv/autovec/binop/vec_sat_u_add-run-13.c  | 75 +++
 .../rvv/autovec/binop/vec_sat_u_add-run-14.c  | 75 +++
 .../rvv/autovec/binop/vec_sat_u_add-run-15.c  | 75 +++
 .../rvv/autovec/binop/vec_sat_u_add-run-16.c  | 75 +++
 gcc/testsuite/gcc.target/riscv/sat_arith.h| 25 +++
 gcc/testsuite/gcc.target/riscv/sat_u_add-13.c | 19 +
 gcc/testsuite/gcc.target/riscv/sat_u_add-14.c | 21 ++
 gcc/testsuite/gcc.target/riscv/sat_u_add-15.c | 18 +
 gcc/testsuite/gcc.target/riscv/sat_u_add-16.c | 17 +
 .../gcc.target/riscv/sat_u_add-run-13.c   | 25 +++
 .../gcc.target/riscv/sat_u_add-run-14.c   | 25 +++
 .../gcc.target/riscv/sat_u_add-run-15.c   | 25 +++
 .../gcc.target/riscv/sat_u_add-run-16.c   | 25 +++
 17 files changed, 579 insertions(+)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-13.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-14.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-15.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-16.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-13.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-14.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-15.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-16.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add-13.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add-14.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add-15.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add-16.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add-run-13.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add-run-14.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add-run-15.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_add-run-16.c

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-13.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-13.c
new file mode 100644
index 000..2628ac315b3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-13.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "../../../sat_arith.h"
+
+/*
+** vec_sat_u_add_uint8_t_fmt_4:
+** ...
+** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e8,\s*m1,\s*ta,\s*ma
+** vle8\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vle8\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vsaddu\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+
+** ...
+*/
+DEF_VEC_SAT_U_ADD_FMT_4(uint8_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 4 "expand" } } */
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-14.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-14.c
new file mode 100644
index 000..4f6e113d9ad
--- /dev/null
+++ 

[PATCH v1 1/2] Match: Support __builtin_add_overflow branch form for unsigned SAT_ADD

2024-05-21 Thread pan2 . li
From: Pan Li 

This patch would like to support the __builtin_add_overflow branch form for
unsigned SAT_ADD.  For example as below:

uint64_t
sat_add (uint64_t x, uint64_t y)
{
  uint64_t ret;
  return __builtin_add_overflow (x, y, &ret) ? -1 : ret;
}

Different to the branchless version,  we leverage the simplify to
convert the branch version of SAT_ADD into branchless if and only
if the backend has supported the IFN_SAT_ADD.  Thus,  the backend has
the ability to choose branch or branchless implementation of .SAT_ADD.
For example,  some target can take care of branches code more optimally.

When the target implement the IFN_SAT_ADD for unsigned and before this
patch:

uint64_t sat_add (uint64_t x, uint64_t y)
{
  long unsigned int _1;
  long unsigned int _2;
  uint64_t _3;
  __complex__ long unsigned int _6;

;;   basic block 2, loop depth 0
;;pred:   ENTRY
  _6 = .ADD_OVERFLOW (x_4(D), y_5(D));
  _2 = IMAGPART_EXPR <_6>;
  if (_2 != 0)
goto ; [35.00%]
  else
goto ; [65.00%]
;;succ:   4
;;3

;;   basic block 3, loop depth 0
;;pred:   2
  _1 = REALPART_EXPR <_6>;
;;succ:   4

;;   basic block 4, loop depth 0
;;pred:   3
;;2
  # _3 = PHI <_1(3), 18446744073709551615(2)>
  return _3;
;;succ:   EXIT
}

After this patch:
uint64_t sat_add (uint64_t x, uint64_t y)
{
  long unsigned int _12;

;;   basic block 2, loop depth 0
;;pred:   ENTRY
  _12 = .SAT_ADD (x_4(D), y_5(D)); [tail call]
  return _12;
;;succ:   EXIT
}

The below test suites are passed for this patch:
* The x86 bootstrap test.
* The x86 fully regression test.
* The riscv fully regression test.

gcc/ChangeLog:

* match.pd: Add new simplify to convert branch SAT_ADD into
branchless,  if and only if backend implement the IFN.

Signed-off-by: Pan Li 
---
 gcc/match.pd | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/gcc/match.pd b/gcc/match.pd
index 0f9c34fa897..8b9ded98323 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3094,6 +3094,16 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (match (unsigned_integer_sat_add @0 @1)
  (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1)))
 
+#if GIMPLE
+
+(simplify
+ (cond (ne (imagpart (IFN_ADD_OVERFLOW:c@2 @0 @1)) integer_zerop)
+  integer_minus_onep (realpart @2))
+ (if (direct_internal_fn_supported_p (IFN_SAT_ADD, type, OPTIMIZE_FOR_BOTH))
+  (bit_ior (plus@3 @0 @1) (negate (convert (lt @3 @0))
+
+#endif
+
 /* x >  y  &&  x != XXX_MIN  -->  x > y
x >  y  &&  x == XXX_MIN  -->  false . */
 (for eqne (eq ne)
-- 
2.34.1



[PING^2] [PATCH v2] testsuite: Verify r0-r3 are extended with CMSE

2024-05-21 Thread Torbjorn SVENSSON

Gentle ping!

Kind regards,
Torbjörn

On 2024-05-14 13:01, Torbjorn SVENSSON wrote:

Hi,

I'm not sure if the previous "ok" from Richard on the v1 is enough for 
this or if there needs another approval.


Adding extra maintainers since Richard Earnshaw appears to be busy the 
past weeks.


Kind regards,
Torbjörn

On 2024-05-06 13:50, Torbjorn SVENSSON wrote:

Hi,

Forgot to mention when I sent the patch that I would like to commit it 
to the following branches:


- releases/gcc-11
- releases/gcc-12
- releases/gcc-13
- releases/gcc-14
- trunk

Kind regards,
Torbjörn

On 2024-05-02 12:50, Torbjörn SVENSSON wrote:

Add regression test to the existing zero/sign extend tests for CMSE to
verify that r0, r1, r2 and r3 are properly extended, not just r0.

boolCharShortEnumSecureFunc test is done using -O0 to ensure the
instructions are in a predictable order.

gcc/testsuite/ChangeLog:

* gcc.target/arm/cmse/extend-param.c: Add regression test. Add
  -fshort-enums.
* gcc.target/arm/cmse/extend-return.c: Add -fshort-enums option.

Signed-off-by: Torbjörn SVENSSON 
---
  .../gcc.target/arm/cmse/extend-param.c    | 21 +++
  .../gcc.target/arm/cmse/extend-return.c   |  4 ++--
  2 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/gcc/testsuite/gcc.target/arm/cmse/extend-param.c 
b/gcc/testsuite/gcc.target/arm/cmse/extend-param.c

index 01fac786238..d01ef87e0be 100644
--- a/gcc/testsuite/gcc.target/arm/cmse/extend-param.c
+++ b/gcc/testsuite/gcc.target/arm/cmse/extend-param.c
@@ -1,5 +1,5 @@
  /* { dg-do compile } */
-/* { dg-options "-mcmse" } */
+/* { dg-options "-mcmse -fshort-enums" } */
  /* { dg-final { check-function-bodies "**" "" "" } } */
  #include 
@@ -78,7 +78,6 @@ __attribute__((cmse_nonsecure_entry)) char 
enumSecureFunc (enum offset index) {

    if (index >= ARRAY_SIZE)
  return 0;
    return array[index];
-
  }
  /*
@@ -88,9 +87,23 @@ __attribute__((cmse_nonsecure_entry)) char 
enumSecureFunc (enum offset index) {

  **    ...
  */
  __attribute__((cmse_nonsecure_entry)) char boolSecureFunc (bool 
index) {

-
    if (index >= ARRAY_SIZE)
  return 0;
    return array[index];
+}
-}
\ No newline at end of file
+/*
+**__acle_se_boolCharShortEnumSecureFunc:
+**    ...
+**    uxtb    r0, r0
+**    uxtb    r1, r1
+**    uxth    r2, r2
+**    uxtb    r3, r3
+**    ...
+*/
+__attribute__((cmse_nonsecure_entry,optimize(0))) char 
boolCharShortEnumSecureFunc (bool a, unsigned char b, unsigned short 
c, enum offset d) {

+  size_t index = a + b + c + d;
+  if (index >= ARRAY_SIZE)
+    return 0;
+  return array[index];
+}
diff --git a/gcc/testsuite/gcc.target/arm/cmse/extend-return.c 
b/gcc/testsuite/gcc.target/arm/cmse/extend-return.c

index cf731ed33df..081de0d699f 100644
--- a/gcc/testsuite/gcc.target/arm/cmse/extend-return.c
+++ b/gcc/testsuite/gcc.target/arm/cmse/extend-return.c
@@ -1,5 +1,5 @@
  /* { dg-do compile } */
-/* { dg-options "-mcmse" } */
+/* { dg-options "-mcmse -fshort-enums" } */
  /* { dg-final { check-function-bodies "**" "" "" } } */
  #include 
@@ -89,4 +89,4 @@ unsigned char __attribute__((noipa)) enumNonsecure0 
(ns_enum_foo_t * ns_foo_p)

  unsigned char boolNonsecure0 (ns_bool_foo_t * ns_foo_p)
  {
    return ns_foo_p ();
-}
\ No newline at end of file
+}


[PATCH] testsuite: Allow for non-SECTION_MERGE systems in gcc.dg/pr115066.c [PR115066]

2024-05-21 Thread Rainer Orth
gcc.dg/pr115066.c currently FAILs on Solaris/SPARC with the native as:

FAIL: gcc.dg/pr115066.c scan-assembler .bytet0xbt# Define macro strx

Instead of the expected

.byte   0xb ! Define macro strx

the assembler output contains

.byte   0x1 ! Define macro

Apparently this happens because the Solaris as/ld combo doesn't support
SHF_MERGE.

While I initially meant to just skip the test on sparc*-*-solaris2* && !gas,
Tom suggested to allow for both forms instead, which is what his patch
does.

Tested on sparc-sun-solaris2.11 and i386-pc-solaris2.11 (as and gas
each) and x86_64-pc-linux-gnu.

Ok for trunk?

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


2024-05-21  Tom de Vries  

gcc/testsuite:
PR debug/115066
* gcc.dg/pr115066.c (scan-assembler): Allow for alternative form
of Define macro.

# HG changeset patch
# Parent  630ea4594490d9bd4911b95d13eaca62a8cd4dba
testsuite: Allow for non-SECTION_MERGE systems in gcc.dg/pr115066.c [PR115066]

diff --git a/gcc/testsuite/gcc.dg/pr115066.c b/gcc/testsuite/gcc.dg/pr115066.c
--- a/gcc/testsuite/gcc.dg/pr115066.c
+++ b/gcc/testsuite/gcc.dg/pr115066.c
@@ -3,6 +3,6 @@
 /* { dg-options "-gsplit-dwarf -g3 -dA -gdwarf-4" } */
 /* { dg-final { scan-assembler-times {\.section\t"?\.debug_macro} 1 } } */
 /* { dg-final { scan-assembler-not {\.byte\t0x5\t[^\n\r]* Define macro strp} } } */
-/* { dg-final { scan-assembler {\.byte\t0xb\t[^\n\r]* Define macro strx} } } */
+/* { dg-final { scan-assembler {\.byte\t0xb\t[^\n\r]* Define macro strx|\.byte\t0x1\t[^\n\r]* Define macro} } } */
 
 #define foo 1


Re: [PATCH v1 1/2] Match: Support __builtin_add_overflow branch form for unsigned SAT_ADD

2024-05-21 Thread Andrew Pinski
On Tue, May 21, 2024, 3:55 AM  wrote:

> From: Pan Li 
>
> This patch would like to support the __builtin_add_overflow branch form for
> unsigned SAT_ADD.  For example as below:
>
> uint64_t
> sat_add (uint64_t x, uint64_t y)
> {
>   uint64_t ret;
>   return __builtin_add_overflow (x, y, &ret) ? -1 : ret;
> }
>
> Different to the branchless version,  we leverage the simplify to
> convert the branch version of SAT_ADD into branchless if and only
> if the backend has supported the IFN_SAT_ADD.  Thus,  the backend has
> the ability to choose branch or branchless implementation of .SAT_ADD.
> For example,  some target can take care of branches code more optimally.
>
> When the target implement the IFN_SAT_ADD for unsigned and before this
> patch:
>
> uint64_t sat_add (uint64_t x, uint64_t y)
> {
>   long unsigned int _1;
>   long unsigned int _2;
>   uint64_t _3;
>   __complex__ long unsigned int _6;
>
> ;;   basic block 2, loop depth 0
> ;;pred:   ENTRY
>   _6 = .ADD_OVERFLOW (x_4(D), y_5(D));
>   _2 = IMAGPART_EXPR <_6>;
>   if (_2 != 0)
> goto ; [35.00%]
>   else
> goto ; [65.00%]
> ;;succ:   4
> ;;3
>
> ;;   basic block 3, loop depth 0
> ;;pred:   2
>   _1 = REALPART_EXPR <_6>;
> ;;succ:   4
>
> ;;   basic block 4, loop depth 0
> ;;pred:   3
> ;;2
>   # _3 = PHI <_1(3), 18446744073709551615(2)>
>   return _3;
> ;;succ:   EXIT
> }
>
> After this patch:
> uint64_t sat_add (uint64_t x, uint64_t y)
> {
>   long unsigned int _12;
>
> ;;   basic block 2, loop depth 0
> ;;pred:   ENTRY
>   _12 = .SAT_ADD (x_4(D), y_5(D)); [tail call]
>   return _12;
> ;;succ:   EXIT
> }
>
> The below test suites are passed for this patch:
> * The x86 bootstrap test.
> * The x86 fully regression test.
> * The riscv fully regression test.
>
> gcc/ChangeLog:
>
> * match.pd: Add new simplify to convert branch SAT_ADD into
> branchless,  if and only if backend implement the IFN.
>
> Signed-off-by: Pan Li 
> ---
>  gcc/match.pd | 10 ++
>  1 file changed, 10 insertions(+)
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 0f9c34fa897..8b9ded98323 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -3094,6 +3094,16 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>  (match (unsigned_integer_sat_add @0 @1)
>   (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1)))
>
> +#if GIMPLE
> +
> +(simplify
> + (cond (ne (imagpart (IFN_ADD_OVERFLOW:c@2 @0 @1)) integer_zerop)
> +  integer_minus_onep (realpart @2))
> + (if (direct_internal_fn_supported_p (IFN_SAT_ADD, type,
> OPTIMIZE_FOR_BOTH))
> +  (bit_ior (plus@3 @0 @1) (negate (convert (lt @3 @0))
>

I think you need to make sure type and @0's type matches.

Also I don't think you need :c here since you don't match @0 nor @1 more
than once.

Thanks,
Andrew


+
> +#endif
> +
>  /* x >  y  &&  x != XXX_MIN  -->  x > y
> x >  y  &&  x == XXX_MIN  -->  false . */
>  (for eqne (eq ne)
> --
> 2.34.1
>
>


[PATCH] driver: Use -as/ld as final fallback instead of as/ld for cross

2024-05-21 Thread YunQiang Su
If `find_a_program` cannot find `as/ld` and we are a cross toolchain,
the final fallback is `as/ld` of system.  In fact, we can have a try
with -as/ld before fallback to native as/ld.

This patch is derivatived from Debian's patch:
  gcc-search-prefixed-as-ld.diff

gcc
* gcc.cc(execute): Looks for -as/ld before fallback
to native as/ld.
---
 gcc/gcc.cc | 21 +
 1 file changed, 21 insertions(+)

diff --git a/gcc/gcc.cc b/gcc/gcc.cc
index 830a4700a87..8a1bdb5e3e2 100644
--- a/gcc/gcc.cc
+++ b/gcc/gcc.cc
@@ -3293,6 +3293,27 @@ execute (void)
   string = find_a_program(commands[0].prog);
   if (string)
commands[0].argv[0] = string;
+  else if (*cross_compile != '0'
+   && (!strcmp (commands[0].argv[0], "as")
+   || !strcmp (commands[0].argv[0], "ld")))
+   {
+ string = XNEWVEC (char, strlen (commands[0].argv[0]) + 2
+ + strlen (DEFAULT_REAL_TARGET_MACHINE));
+ strcpy (string, DEFAULT_REAL_TARGET_MACHINE);
+ strcat (string, "-");
+ strcat (string, commands[0].argv[0]);
+ const char *string_args[] = {string, "--version", NULL};
+ int exit_status = 0;
+ int err = 0;
+ const char *errmsg = pex_one (PEX_SEARCH, string,
+ CONST_CAST (char **, string_args), string,
+ NULL, NULL, &exit_status, &err);
+ if (errmsg == NULL && exit_status == 0 && err == 0)
+   {
+ commands[0].argv[0] = string;
+ commands[0].prog = string;
+   }
+   }
 }
 
   for (n_commands = 1, i = 0; argbuf.iterate (i, &arg); i++)
-- 
2.39.2



[C PATCH]: allow aliasing of compatible types derived from enumeral types [PR115157]

2024-05-21 Thread Martin Uecker


For enum and integer we allow aliasing by specifically returning
via a langhook the aliasing set of the underlying type.
But this is not sufficient for derived types, i.e. pointers to
enums and pointers to compatible integers also need to have the
same aliasing set.

We also allow forward declarations of enums which is a GNU 
extension, but I think this has to work consistently too, so
we here have the same issue as in C23 with other tagged types.

The solution in this patch is similar to what we do in C23, i.e. 
we start out with structural equality and then set TYPE_CANONICAL 
to the underlying type. The only way to make the TYPE_CANONICAL 
system work with the C rules for type compatility seems to set 
TYPE_CANONICAL to the same type for all types in a compatibility
equivalence class (as compatibility is not transitive this puts
together similar types that are not compatible). This is the
underlying type in this case.  As all types in such an equivalence
class have the same representation, so this should always work 
in my opinion (but maybe there is some middle end aspects I am
still missing).


When testing, I so far only found two minor issues, i.e. when
computing the 'aka' type in diagnostics and an issue with
godump.cc (not sure I fixed this correctly).


Beyond this patch, we need also some change for function types 
in general and there are problably also some other issues related
to incomplete arrays as well  (I added some checking to 'comptypes'
to check that all types ruled compatible by the C FE also have 
either structural equality, or have the same TYPE_CANONICAL, and
this brings up some more inconsistencies).

Thoughts?


Bootstrapped and regression tested on x86_64 (only C, C++ so far).




C: allow aliasing of compatible types derived from enumeral types [PR115157]

Aliasing of enumeral types with the underlying integer is now allowed
by setting the aliasing set to zero.  But this does not allow aliasing
of derived types which are compatible as required by ISO C.  Instead,
initially set structural equality.  Then set TYPE_CANONICAL and update
pointers and main variants when the type is completed (as done for
structures and unions in C23).

PR 115157

gcc/c/
* c-decl.cc (shadow_tag-warned,parse_xref_tag,start_enum,
finish_enum): Set SET_TYPE_STRUCTURAL_EQUALITY / TYPE_CANONICAL.
* c-obj-common.cc (get_alias_set): Remove special case.
(get_aka_type): Add special case.

gcc/
* godump.cc (go_output_typedef): use TYPE_MAIN_VARIANT instead
of TYPE_CANONICAL.

gcc/testsuite/
* gcc.dg/enum-alias-1.c: New test.
* gcc.dg/enum-alias-2.c: New test.
* gcc.dg/enum-alias-3.c: New test.

diff --git a/gcc/c/c-decl.cc b/gcc/c/c-decl.cc
index b691b91b3db..6e6606c9570 100644
--- a/gcc/c/c-decl.cc
+++ b/gcc/c/c-decl.cc
@@ -5051,7 +5051,7 @@ shadow_tag_warned (const struct c_declspecs *declspecs, 
int warned)
  if (t == NULL_TREE)
{
  t = make_node (code);
- if (flag_isoc23 && code != ENUMERAL_TYPE)
+ if (flag_isoc23 || code == ENUMERAL_TYPE)
SET_TYPE_STRUCTURAL_EQUALITY (t);
  pushtag (input_location, name, t);
}
@@ -8828,7 +8828,7 @@ parser_xref_tag (location_t loc, enum tree_code code, 
tree name,
  the forward-reference will be altered into a real type.  */
 
   ref = make_node (code);
-  if (flag_isoc23 && code != ENUMERAL_TYPE)
+  if (flag_isoc23 || code == ENUMERAL_TYPE)
 SET_TYPE_STRUCTURAL_EQUALITY (ref);
   if (code == ENUMERAL_TYPE)
 {
@@ -9919,6 +9919,7 @@ start_enum (location_t loc, struct c_enum_contents 
*the_enum, tree name,
 {
   enumtype = make_node (ENUMERAL_TYPE);
   TYPE_SIZE (enumtype) = NULL_TREE;
+  SET_TYPE_STRUCTURAL_EQUALITY (enumtype);
   pushtag (loc, name, enumtype);
   if (fixed_underlying_type != NULL_TREE)
{
@@ -9935,6 +9936,8 @@ start_enum (location_t loc, struct c_enum_contents 
*the_enum, tree name,
  TYPE_SIZE (enumtype) = NULL_TREE;
  TYPE_PRECISION (enumtype) = TYPE_PRECISION (fixed_underlying_type);
  ENUM_UNDERLYING_TYPE (enumtype) = fixed_underlying_type;
+ TYPE_CANONICAL (enumtype) = TYPE_CANONICAL (fixed_underlying_type);
+ c_update_type_canonical (enumtype);
  layout_type (enumtype);
}
 }
@@ -10094,6 +10097,10 @@ finish_enum (tree enumtype, tree values, tree 
attributes)
   ENUM_UNDERLYING_TYPE (enumtype) =
c_common_type_for_size (TYPE_PRECISION (tem), TYPE_UNSIGNED (tem));
 
+  TYPE_CANONICAL (enumtype) =
+   TYPE_CANONICAL (ENUM_UNDERLYING_TYPE (enumtype));
+  c_update_type_canonical (enumtype);
+
   layout_type (enumtype);
 }
 
diff --git a/gcc/c/c-objc-common.cc b/gcc/c/c-objc-common.cc
index b7c72d2609c..551ec6f4b65 100644
--- a/g

Re: [PATCH] driver: Use -as/ld as final fallback instead of as/ld for cross

2024-05-21 Thread Andrew Pinski
On Tue, May 21, 2024 at 5:12 AM YunQiang Su  wrote:
>
> If `find_a_program` cannot find `as/ld` and we are a cross toolchain,
> the final fallback is `as/ld` of system.  In fact, we can have a try
> with -as/ld before fallback to native as/ld.
>
> This patch is derivatived from Debian's patch:
>   gcc-search-prefixed-as-ld.diff
>
> gcc
> * gcc.cc(execute): Looks for -as/ld before fallback
> to native as/ld.
> ---
>  gcc/gcc.cc | 21 +
>  1 file changed, 21 insertions(+)
>
> diff --git a/gcc/gcc.cc b/gcc/gcc.cc
> index 830a4700a87..8a1bdb5e3e2 100644
> --- a/gcc/gcc.cc
> +++ b/gcc/gcc.cc
> @@ -3293,6 +3293,27 @@ execute (void)
>string = find_a_program(commands[0].prog);
>if (string)
> commands[0].argv[0] = string;
> +  else if (*cross_compile != '0'
> +   && (!strcmp (commands[0].argv[0], "as")
> +   || !strcmp (commands[0].argv[0], "ld")))
> +   {
> + string = XNEWVEC (char, strlen (commands[0].argv[0]) + 2
> + + strlen (DEFAULT_REAL_TARGET_MACHINE));
> + strcpy (string, DEFAULT_REAL_TARGET_MACHINE);
> + strcat (string, "-");
> + strcat (string, commands[0].argv[0]);
> + const char *string_args[] = {string, "--version", NULL};
> + int exit_status = 0;
> + int err = 0;
> + const char *errmsg = pex_one (PEX_SEARCH, string,
> + CONST_CAST (char **, string_args), string,
> + NULL, NULL, &exit_status, &err);

I think this should be handled under find_a_program instead of
execute. That should simplify things slightly.
You should also most likely use concat here instead of
XNEWVEC/strcpy/strcat which will also simplify the code.
Like string = concat (DEFAULT_REAL_TARGET_MACHINE, "-", commands[0].prog);

I think this should be done for more than just as/ld but also objcopy
(which is used for gsplit-dwarf).
Is there a reason why you are needing to try to execute with
"--version" as an argument here?

Thanks,
Andrew Pinski

> + if (errmsg == NULL && exit_status == 0 && err == 0)
> +   {
> + commands[0].argv[0] = string;
> + commands[0].prog = string;
> +   }
> +   }
>  }
>
>for (n_commands = 1, i = 0; argbuf.iterate (i, &arg); i++)
> --
> 2.39.2
>


RE: [PATCH v1 1/2] Match: Support __builtin_add_overflow branch form for unsigned SAT_ADD

2024-05-21 Thread Li, Pan2
Thanks Andrew for comments.

> I think you need to make sure type and @0's type matches.

Oh, yes, we need that, will update in v2.

> Also I don't think you need :c here since you don't match @0 nor @1 more than 
> once.

You mean the :c from (IFN_ADD_OVERFLOW:c@2 @0 @1)), right?
My initial idea is to catch both the (IFN_ADD_OVERFLOW @0 @1) and 
(IFN_ADD_OVERFLOW @1 @0).
It is unnecessary if IFN_ADD_OVERFLOW takes care of this already.

Pan


From: Andrew Pinski 
Sent: Tuesday, May 21, 2024 7:40 PM
To: Li, Pan2 
Cc: GCC Patches ; 钟居哲 ; Kito 
Cheng ; Tamar Christina ; 
Richard Guenther 
Subject: Re: [PATCH v1 1/2] Match: Support __builtin_add_overflow branch form 
for unsigned SAT_ADD


On Tue, May 21, 2024, 3:55 AM mailto:pan2...@intel.com>> 
wrote:
From: Pan Li mailto:pan2...@intel.com>>

This patch would like to support the __builtin_add_overflow branch form for
unsigned SAT_ADD.  For example as below:

uint64_t
sat_add (uint64_t x, uint64_t y)
{
  uint64_t ret;
  return __builtin_add_overflow (x, y, &ret) ? -1 : ret;
}

Different to the branchless version,  we leverage the simplify to
convert the branch version of SAT_ADD into branchless if and only
if the backend has supported the IFN_SAT_ADD.  Thus,  the backend has
the ability to choose branch or branchless implementation of .SAT_ADD.
For example,  some target can take care of branches code more optimally.

When the target implement the IFN_SAT_ADD for unsigned and before this
patch:

uint64_t sat_add (uint64_t x, uint64_t y)
{
  long unsigned int _1;
  long unsigned int _2;
  uint64_t _3;
  __complex__ long unsigned int _6;

;;   basic block 2, loop depth 0
;;pred:   ENTRY
  _6 = .ADD_OVERFLOW (x_4(D), y_5(D));
  _2 = IMAGPART_EXPR <_6>;
  if (_2 != 0)
goto ; [35.00%]
  else
goto ; [65.00%]
;;succ:   4
;;3

;;   basic block 3, loop depth 0
;;pred:   2
  _1 = REALPART_EXPR <_6>;
;;succ:   4

;;   basic block 4, loop depth 0
;;pred:   3
;;2
  # _3 = PHI <_1(3), 18446744073709551615(2)>
  return _3;
;;succ:   EXIT
}

After this patch:
uint64_t sat_add (uint64_t x, uint64_t y)
{
  long unsigned int _12;

;;   basic block 2, loop depth 0
;;pred:   ENTRY
  _12 = .SAT_ADD (x_4(D), y_5(D)); [tail call]
  return _12;
;;succ:   EXIT
}

The below test suites are passed for this patch:
* The x86 bootstrap test.
* The x86 fully regression test.
* The riscv fully regression test.

gcc/ChangeLog:

* match.pd: Add new simplify to convert branch SAT_ADD into
branchless,  if and only if backend implement the IFN.

Signed-off-by: Pan Li mailto:pan2...@intel.com>>
---
 gcc/match.pd | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/gcc/match.pd b/gcc/match.pd
index 0f9c34fa897..8b9ded98323 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3094,6 +3094,16 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (match (unsigned_integer_sat_add @0 @1)
  (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1)))

+#if GIMPLE
+
+(simplify
+ (cond (ne (imagpart (IFN_ADD_OVERFLOW:c@2 @0 @1)) integer_zerop)
+  integer_minus_onep (realpart @2))
+ (if (direct_internal_fn_supported_p (IFN_SAT_ADD, type, OPTIMIZE_FOR_BOTH))
+  (bit_ior (plus@3 @0 @1) (negate (convert (lt @3 @0))

I think you need to make sure type and @0's type matches.

Also I don't think you need :c here since you don't match @0 nor @1 more than 
once.

Thanks,
Andrew


+
+#endif
+
 /* x >  y  &&  x != XXX_MIN  -->  x > y
x >  y  &&  x == XXX_MIN  -->  false . */
 (for eqne (eq ne)
--
2.34.1


Re: [Patch, aarch64, middle-end] Move pair_fusion pass from aarch64 to middle-end

2024-05-21 Thread Alex Coplan
On 21/05/2024 16:02, Ajit Agarwal wrote:
> Hello Alex:
> 
> On 21/05/24 1:16 am, Alex Coplan wrote:
> > On 20/05/2024 18:44, Alex Coplan wrote:
> >> Hi Ajit,
> >>
> >> On 20/05/2024 21:50, Ajit Agarwal wrote:
> >>> Hello Alex/Richard:
> >>>
> >>> Move pair fusion pass from aarch64-ldp-fusion.cc to middle-end
> >>> to support multiple targets.
> >>>
> >>> Common infrastructure of load store pair fusion is divided into
> >>> target independent and target dependent code.
> >>>
> >>> Target independent code is structured in the following files.
> >>> gcc/pair-fusion.h
> >>> gcc/pair-fusion.cc
> >>>
> >>> Target independent code is the Generic code with pure virtual
> >>> function to interface betwwen target independent and dependent
> >>> code.
> >>>
> >>> Bootstrapped and regtested on aarch64-linux-gnu.
> >>>
> >>> Thanks & Regards
> >>> Ajit
> >>>
> >>> aarch64, middle-end: Move pair_fusion pass from aarch64 to middle-end
> >>>
> >>> Move pair fusion pass from aarch64-ldp-fusion.cc to middle-end
> >>> to support multiple targets.
> >>>
> >>> Common infrastructure of load store pair fusion is divided into
> >>> target independent and target dependent code.
> >>>
> >>> Target independent code is structured in the following files.
> >>> gcc/pair-fusion.h
> >>> gcc/pair-fusion.cc
> >>>
> >>> Target independent code is the Generic code with pure virtual
> >>> function to interface betwwen target independent and dependent
> >>> code.
> >>>
> >>> 2024-05-20  Ajit Kumar Agarwal  
> >>>
> >>> gcc/ChangeLog:
> >>>
> >>>   * pair-fusion.h: Generic header code for load store fusion
> >>
> >> Insert "pair" before fusion?
> 
> Addressed in v1 of the patch.
> >>
> >>>   that can be shared across different architectures.
> >>>   * pair-fusion.cc: Generic source code implementation for
> >>>   load store fusion that can be shared across different architectures.
> >>
> >> Likewise.
> Addressed in v1 of the patch.
> >>
> >>>   * Makefile.in: Add new executable pair-fusion.o
> >>
> >> It's not an executable but an object file.
> >>
> >>>   * config/aarch64/aarch64-ldp-fusion.cc: Target specific
> >>>   code for load store fusion of aarch64.
> >>
> >> I guess this should say something like: "Delete generic code and move it
> >> to pair-fusion.cc in the middle-end."
> >>
> >> I've left some comments below on the header file.  The rest of the patch
> >> looks pretty good to me.  I tried diffing the original contents of
> >> aarch64-ldp-fusion.cc with pair-fusion.cc, and that looks as expected.
> >>
> > 
> > 
> > 
> >>> diff --git a/gcc/pair-fusion.h b/gcc/pair-fusion.h
> >>> new file mode 100644
> >>> index 000..00f6d3e149a
> >>> --- /dev/null
> >>> +++ b/gcc/pair-fusion.h
> >>> @@ -0,0 +1,340 @@
> >>> +// Pair Mem fusion generic header file.
> >>> +// Copyright (C) 2024 Free Software Foundation, Inc.
> >>> +//
> >>> +// This file is part of GCC.
> >>> +//
> >>> +// GCC is free software; you can redistribute it and/or modify it
> >>> +// under the terms of the GNU General Public License as published by
> >>> +// the Free Software Foundation; either version 3, or (at your option)
> >>> +// any later version.
> >>> +//
> >>> +// GCC is distributed in the hope that it will be useful, but
> >>> +// WITHOUT ANY WARRANTY; without even the implied warranty of
> >>> +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> >>> +// General Public License for more details.
> >>> +//
> >>> +// You should have received a copy of the GNU General Public License
> >>> +// along with GCC; see the file COPYING3.  If not see
> >>> +// .
> >>> +
> >>> +#define INCLUDE_ALGORITHM
> >>> +#define INCLUDE_FUNCTIONAL
> >>> +#define INCLUDE_LIST
> >>> +#define INCLUDE_TYPE_TRAITS
> >>> +#include "config.h"
> >>> +#include "system.h"
> >>> +#include "coretypes.h"
> >>> +#include "backend.h"
> >>> +#include "rtl.h"
> >>> +#include "df.h"
> >>> +#include "rtl-iter.h"
> >>> +#include "rtl-ssa.h"
> >>
> >> I'm not sure how desirable this is, but you might be able to
> >> forward-declare RTL-SSA types like this:
> >>
> >> class def_info;
> >> class insn_info;
> >> class insn_range_info;
> >>
> >> thus removing the need to include the header here, since the interface
> >> only refers to these types by pointer or reference.
> >>
> >> Richard: please say if you'd prefer keeping the include.
> >>
> 
> Doing forward declaration gives ambigous errors with conflicting
> insn_info with rtl_ssa::insn_info and templated initialization
> errors. Also with overloaded operator with insn_info is not 
> defined with forward declaration.

So I tried this locally and it seems to work if you wrap the
forward-decls in:

namespace rtl_ssa {
  [...]
};

and indeed you'd need to move the definition of base_cand::viable () to
pair-fusion.cc so that we don't dereference those pointers in the
header (would be good to mark it inline if you do that).

Btw, I noticed that the GCC coding conventions
(https://gcc.gnu.org/codingconventions.h

Re: [PATCH v1 1/2] Match: Support __builtin_add_overflow branch form for unsigned SAT_ADD

2024-05-21 Thread Andrew Pinski
On Tue, May 21, 2024 at 5:28 AM Li, Pan2  wrote:
>
> Thanks Andrew for comments.
>
>
>
> > I think you need to make sure type and @0's type matches.
>
>
>
> Oh, yes, we need that, will update in v2.
>
>
>
> > Also I don't think you need :c here since you don't match @0 nor @1 more 
> > than once.
>
>
>
> You mean the :c from (IFN_ADD_OVERFLOW:c@2 @0 @1)), right?
>
> My initial idea is to catch both the (IFN_ADD_OVERFLOW @0 @1) and 
> (IFN_ADD_OVERFLOW @1 @0).
>
> It is unnecessary if IFN_ADD_OVERFLOW takes care of this already.

Since in this case there is Canonical form/order here (at least there
should be).
> + (cond (ne (imagpart (IFN_ADD_OVERFLOW:c@2 @0 @1)) integer_zerop)
> +  integer_minus_onep (realpart @2))

Since you matching @2 for the realpart rather than `(IFN_ADD_OVERFLOW
@0 @1)` directly the :c is not needed and genmatch will just generate
extra matching code that cannot be not get reached

Thanks,
Andrew.

>
>
>
> Pan
>
>
>
>
>
> From: Andrew Pinski 
> Sent: Tuesday, May 21, 2024 7:40 PM
> To: Li, Pan2 
> Cc: GCC Patches ; 钟居哲 ; Kito 
> Cheng ; Tamar Christina ; 
> Richard Guenther 
> Subject: Re: [PATCH v1 1/2] Match: Support __builtin_add_overflow branch form 
> for unsigned SAT_ADD
>
>
>
>
>
> On Tue, May 21, 2024, 3:55 AM  wrote:
>
> From: Pan Li 
>
> This patch would like to support the __builtin_add_overflow branch form for
> unsigned SAT_ADD.  For example as below:
>
> uint64_t
> sat_add (uint64_t x, uint64_t y)
> {
>   uint64_t ret;
>   return __builtin_add_overflow (x, y, &ret) ? -1 : ret;
> }
>
> Different to the branchless version,  we leverage the simplify to
> convert the branch version of SAT_ADD into branchless if and only
> if the backend has supported the IFN_SAT_ADD.  Thus,  the backend has
> the ability to choose branch or branchless implementation of .SAT_ADD.
> For example,  some target can take care of branches code more optimally.
>
> When the target implement the IFN_SAT_ADD for unsigned and before this
> patch:
>
> uint64_t sat_add (uint64_t x, uint64_t y)
> {
>   long unsigned int _1;
>   long unsigned int _2;
>   uint64_t _3;
>   __complex__ long unsigned int _6;
>
> ;;   basic block 2, loop depth 0
> ;;pred:   ENTRY
>   _6 = .ADD_OVERFLOW (x_4(D), y_5(D));
>   _2 = IMAGPART_EXPR <_6>;
>   if (_2 != 0)
> goto ; [35.00%]
>   else
> goto ; [65.00%]
> ;;succ:   4
> ;;3
>
> ;;   basic block 3, loop depth 0
> ;;pred:   2
>   _1 = REALPART_EXPR <_6>;
> ;;succ:   4
>
> ;;   basic block 4, loop depth 0
> ;;pred:   3
> ;;2
>   # _3 = PHI <_1(3), 18446744073709551615(2)>
>   return _3;
> ;;succ:   EXIT
> }
>
> After this patch:
> uint64_t sat_add (uint64_t x, uint64_t y)
> {
>   long unsigned int _12;
>
> ;;   basic block 2, loop depth 0
> ;;pred:   ENTRY
>   _12 = .SAT_ADD (x_4(D), y_5(D)); [tail call]
>   return _12;
> ;;succ:   EXIT
> }
>
> The below test suites are passed for this patch:
> * The x86 bootstrap test.
> * The x86 fully regression test.
> * The riscv fully regression test.
>
> gcc/ChangeLog:
>
> * match.pd: Add new simplify to convert branch SAT_ADD into
> branchless,  if and only if backend implement the IFN.
>
> Signed-off-by: Pan Li 
> ---
>  gcc/match.pd | 10 ++
>  1 file changed, 10 insertions(+)
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 0f9c34fa897..8b9ded98323 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -3094,6 +3094,16 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>  (match (unsigned_integer_sat_add @0 @1)
>   (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1)))
>
> +#if GIMPLE
> +
> +(simplify
> + (cond (ne (imagpart (IFN_ADD_OVERFLOW:c@2 @0 @1)) integer_zerop)
> +  integer_minus_onep (realpart @2))
> + (if (direct_internal_fn_supported_p (IFN_SAT_ADD, type, OPTIMIZE_FOR_BOTH))
> +  (bit_ior (plus@3 @0 @1) (negate (convert (lt @3 @0))
>
>
>
> I think you need to make sure type and @0's type matches.
>
>
>
> Also I don't think you need :c here since you don't match @0 nor @1 more than 
> once.
>
>
>
> Thanks,
>
> Andrew
>
>
>
>
>
> +
> +#endif
> +
>  /* x >  y  &&  x != XXX_MIN  -->  x > y
> x >  y  &&  x == XXX_MIN  -->  false . */
>  (for eqne (eq ne)
> --
> 2.34.1


Re: [PATCH] driver: Use -as/ld as final fallback instead of as/ld for cross

2024-05-21 Thread YunQiang Su
Andrew Pinski  于2024年5月21日周二 20:23写道:
>
> On Tue, May 21, 2024 at 5:12 AM YunQiang Su  wrote:
> >
> > If `find_a_program` cannot find `as/ld` and we are a cross toolchain,
> > the final fallback is `as/ld` of system.  In fact, we can have a try
> > with -as/ld before fallback to native as/ld.
> >
> > This patch is derivatived from Debian's patch:
> >   gcc-search-prefixed-as-ld.diff
> >
> > gcc
> > * gcc.cc(execute): Looks for -as/ld before fallback
> > to native as/ld.
> > ---
> >  gcc/gcc.cc | 21 +
> >  1 file changed, 21 insertions(+)
> >
> > diff --git a/gcc/gcc.cc b/gcc/gcc.cc
> > index 830a4700a87..8a1bdb5e3e2 100644
> > --- a/gcc/gcc.cc
> > +++ b/gcc/gcc.cc
> > @@ -3293,6 +3293,27 @@ execute (void)
> >string = find_a_program(commands[0].prog);
> >if (string)
> > commands[0].argv[0] = string;
> > +  else if (*cross_compile != '0'
> > +   && (!strcmp (commands[0].argv[0], "as")
> > +   || !strcmp (commands[0].argv[0], "ld")))
> > +   {
> > + string = XNEWVEC (char, strlen (commands[0].argv[0]) + 2
> > + + strlen (DEFAULT_REAL_TARGET_MACHINE));
> > + strcpy (string, DEFAULT_REAL_TARGET_MACHINE);
> > + strcat (string, "-");
> > + strcat (string, commands[0].argv[0]);
> > + const char *string_args[] = {string, "--version", NULL};
> > + int exit_status = 0;
> > + int err = 0;
> > + const char *errmsg = pex_one (PEX_SEARCH, string,
> > + CONST_CAST (char **, string_args), string,
> > + NULL, NULL, &exit_status, &err);
>
> I think this should be handled under find_a_program instead of
> execute. That should simplify things slightly.

Maybe. But it seems that they are two different problems.
`find_a_program` won't try to find any as/ld from user path dirs,
such as /usr/bin

My patch tries to resolve the problem: if  `find_a_program` fails to find
any usable ld/as, then let's fallback to /usr/bin/-as
instead of /usr/bin/as.

Yes, we should also make `find_a_program` look for  -as
from its search path, while I guess it should be done by another patch.

> You should also most likely use concat here instead of
> XNEWVEC/strcpy/strcat which will also simplify the code.
> Like string = concat (DEFAULT_REAL_TARGET_MACHINE, "-", commands[0].prog);
>
> I think this should be done for more than just as/ld but also objcopy
> (which is used for gsplit-dwarf).
> Is there a reason why you are needing to try to execute with
> "--version" as an argument here?
>

I try to make it possible to fallback to system's ld/as, if
-as/ld doesn't exist.
With `--version` args, I have a test to -as/ld.

> Thanks,
> Andrew Pinski
>
> > + if (errmsg == NULL && exit_status == 0 && err == 0)
> > +   {
> > + commands[0].argv[0] = string;
> > + commands[0].prog = string;
> > +   }
> > +   }
> >  }
> >
> >for (n_commands = 1, i = 0; argbuf.iterate (i, &arg); i++)
> > --
> > 2.39.2
> >


[PATCH 1/4] Avoid requiring VEC_PERM represenatives

2024-05-21 Thread Richard Biener
The following plugs one hole where we require a VEC_PERM node
representative unnecessarily.  This is for vect_check_store_rhs
which looks at the RHS and checks whether a constant can be
native encoded.  The fix is to guard that with vect_constant_def
additionally and making vect_is_simple_use forgiving for a missing
SLP_TREE_REPRESENTATIVE when the child is a VEC_PERM node,
initializing the scalar def to error_mark_node.

* tree-vect-stmts.cc (vect_check_store_rhs): Look at *rhs
only when it's a vec_constant_def.
(vect_is_simple_use): When we have no representative for
an internal node, fill in *op with error_mark_node.
---
 gcc/tree-vect-stmts.cc | 25 ++---
 1 file changed, 22 insertions(+), 3 deletions(-)

diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 672959501bb..4219ad832db 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -2553,7 +2553,8 @@ vect_check_store_rhs (vec_info *vinfo, stmt_vec_info 
stmt_info,
 
   /* In the case this is a store from a constant make sure
  native_encode_expr can handle it.  */
-  if (CONSTANT_CLASS_P (*rhs) && native_encode_expr (*rhs, NULL, 64) == 0)
+  if (rhs_dt == vect_constant_def
+  && CONSTANT_CLASS_P (*rhs) && native_encode_expr (*rhs, NULL, 64) == 0)
 {
   if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -14002,8 +14003,26 @@ vect_is_simple_use (vec_info *vinfo, stmt_vec_info 
stmt, slp_tree slp_node,
   *vectype = SLP_TREE_VECTYPE (child);
   if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
{
- *op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt);
- return vect_is_simple_use (*op, vinfo, dt, def_stmt_info_out);
+ /* ???  VEC_PERM nodes might be intermediate and their lane value
+have no representative (nor do we build a VEC_PERM stmt for
+the actual operation).  Note for two-operator nodes we set
+a representative but leave scalar stmts empty as we'd only
+have one for a subset of lanes.  Ideally no caller would
+require *op for internal defs.  */
+ if (SLP_TREE_REPRESENTATIVE (child))
+   {
+ *op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt);
+ return vect_is_simple_use (*op, vinfo, dt, def_stmt_info_out);
+   }
+ else
+   {
+ gcc_assert (SLP_TREE_CODE (child) == VEC_PERM_EXPR);
+ *op = error_mark_node;
+ *dt = vect_internal_def;
+ if (def_stmt_info_out)
+   *def_stmt_info_out = NULL;
+ return true;
+   }
}
   else
{
-- 
2.35.3



[PATCH 2/4] Avoid SLP_REPRESENTATIVE access for VEC_PERM in SLP scheduling

2024-05-21 Thread Richard Biener
SLP permute nodes can end up without a SLP_REPRESENTATIVE now,
the following avoids touching it in this case in vect_schedule_slp_node.

* tree-vect-slp.cc (vect_schedule_slp_node): Avoid looking
at SLP_REPRESENTATIVE for VEC_PERM nodes.
---
 gcc/tree-vect-slp.cc | 28 
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index f34ed54a70b..43f2c153bf0 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -9301,13 +9301,8 @@ vect_schedule_slp_node (vec_info *vinfo,
   gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0);
   SLP_TREE_VEC_DEFS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
 
-  if (dump_enabled_p ())
-dump_printf_loc (MSG_NOTE, vect_location,
-"-->vectorizing SLP node starting from: %G",
-stmt_info->stmt);
-
-  if (STMT_VINFO_DATA_REF (stmt_info)
-  && SLP_TREE_CODE (node) != VEC_PERM_EXPR)
+  if (SLP_TREE_CODE (node) != VEC_PERM_EXPR
+  && STMT_VINFO_DATA_REF (stmt_info))
 {
   /* Vectorized loads go before the first scalar load to make it
 ready early, vectorized stores go before the last scalar
@@ -9319,10 +9314,10 @@ vect_schedule_slp_node (vec_info *vinfo,
last_stmt_info = vect_find_last_scalar_stmt_in_slp (node);
   si = gsi_for_stmt (last_stmt_info->stmt);
 }
-  else if ((STMT_VINFO_TYPE (stmt_info) == cycle_phi_info_type
-   || STMT_VINFO_TYPE (stmt_info) == induc_vec_info_type
-   || STMT_VINFO_TYPE (stmt_info) == phi_info_type)
-  && SLP_TREE_CODE (node) != VEC_PERM_EXPR)
+  else if (SLP_TREE_CODE (node) != VEC_PERM_EXPR
+  && (STMT_VINFO_TYPE (stmt_info) == cycle_phi_info_type
+  || STMT_VINFO_TYPE (stmt_info) == induc_vec_info_type
+  || STMT_VINFO_TYPE (stmt_info) == phi_info_type))
 {
   /* For PHI node vectorization we do not use the insertion iterator.  */
   si = gsi_none ();
@@ -9456,6 +9451,9 @@ vect_schedule_slp_node (vec_info *vinfo,
   /* Handle purely internal nodes.  */
   if (SLP_TREE_CODE (node) == VEC_PERM_EXPR)
 {
+  if (dump_enabled_p ())
+   dump_printf_loc (MSG_NOTE, vect_location,
+"-->vectorizing SLP permutation node\n");
   /* ???  the transform kind is stored to STMT_VINFO_TYPE which might
 be shared with different SLP nodes (but usually it's the same
 operation apart from the case the stmt is only there for denoting
@@ -9474,7 +9472,13 @@ vect_schedule_slp_node (vec_info *vinfo,
  }
 }
   else
-vect_transform_stmt (vinfo, stmt_info, &si, node, instance);
+{
+  if (dump_enabled_p ())
+   dump_printf_loc (MSG_NOTE, vect_location,
+"-->vectorizing SLP node starting from: %G",
+stmt_info->stmt);
+  vect_transform_stmt (vinfo, stmt_info, &si, node, instance);
+}
 }
 
 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
-- 
2.35.3



[PATCH 3/4] Avoid splitting store dataref groups during SLP discovery

2024-05-21 Thread Richard Biener
The following avoids splitting store dataref groups during SLP
discovery but instead forces (eventually single-lane) consecutive
lane SLP discovery for all lanes of the group, creating VEC_PERM
SLP nodes merging them so the store will always cover the whole group.

With this for example

int x[1024], y[1024], z[1024], w[1024];
void foo (void)
{
  for (int i = 0; i < 256; i++)
{
  x[4*i+0] = y[2*i+0];
  x[4*i+1] = y[2*i+1];
  x[4*i+2] = z[i];
  x[4*i+3] = w[i];
}
}

which was previously using hybrid SLP can now be fully SLPed and
SSE code generated looks better (but of course you never know,
I didn't actually benchmark).  We of course need a VF of four here.

.L2:
movdqa  z(%rax), %xmm0
movdqa  w(%rax), %xmm4
movdqa  y(%rax,%rax), %xmm2
movdqa  y+16(%rax,%rax), %xmm1
movdqa  %xmm0, %xmm3
punpckhdq   %xmm4, %xmm0
punpckldq   %xmm4, %xmm3
movdqa  %xmm2, %xmm4
shufps  $238, %xmm3, %xmm2
movaps  %xmm2, x+16(,%rax,4)
movdqa  %xmm1, %xmm2
shufps  $68, %xmm3, %xmm4
shufps  $68, %xmm0, %xmm2
movaps  %xmm4, x(,%rax,4)
shufps  $238, %xmm0, %xmm1
movaps  %xmm2, x+32(,%rax,4)
movaps  %xmm1, x+48(,%rax,4)
addq$16, %rax
cmpq$1024, %rax
jne .L2

The extra permute nodes merging distinct branches of the SLP
tree might be unexpected for some code, esp. since
SLP_TREE_REPRESENTATIVE cannot be meaningfully set and we
cannot populate SLP_TREE_SCALAR_STMTS or SLP_TREE_SCALAR_OPS
consistently as we can have a mix of both.

The patch keeps the sub-trees form consecutive lanes but that's
in principle not necessary if we for example have an even/odd
split which now would result in N single-lane sub-trees.  That's
left for future improvements.

The interesting part is how VLA vector ISAs handle merging of
two vectors that's not trivial even/odd merging.  The strathegy
of how to build the permute tree might need adjustments for that
(in the end splitting each branch to single lanes and then doing
even/odd merging would be the brute-force fallback).  Not sure
how much we can or should rely on the SLP optimize pass to handle
this.

* tree-vect-slp.cc (vect_build_slp_instance): Do not split
store dataref groups on loop SLP discovery failure but create
a single SLP instance for the stores but branch to SLP sub-trees
and merge with a series of VEC_PERM nodes.
---
 gcc/tree-vect-slp.cc | 240 ++-
 1 file changed, 214 insertions(+), 26 deletions(-)

diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 43f2c153bf0..873748b0a72 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -3468,12 +3468,7 @@ vect_build_slp_instance (vec_info *vinfo,
  return true;
}
 }
-  else
-{
-  /* Failed to SLP.  */
-  /* Free the allocated memory.  */
-  scalar_stmts.release ();
-}
+  /* Failed to SLP.  */
 
   stmt_vec_info stmt_info = stmt_info_;
   /* Try to break the group up into pieces.  */
@@ -3491,6 +3486,9 @@ vect_build_slp_instance (vec_info *vinfo,
   if (is_a  (vinfo)
  && (i > 1 && i < group_size))
{
+ /* Free the allocated memory.  */
+ scalar_stmts.release ();
+
  tree scalar_type
= TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
  tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
@@ -3535,38 +3533,228 @@ vect_build_slp_instance (vec_info *vinfo,
}
}
 
-  /* For loop vectorization split into arbitrary pieces of size > 1.  */
-  if (is_a  (vinfo)
- && (i > 1 && i < group_size)
- && !vect_slp_prefer_store_lanes_p (vinfo, stmt_info, group_size, i))
+  /* For loop vectorization split the RHS into arbitrary pieces of
+size >= 1.  */
+  else if (is_a  (vinfo)
+  && (i > 0 && i < group_size)
+  && !vect_slp_prefer_store_lanes_p (vinfo,
+ stmt_info, group_size, i))
{
- unsigned group1_size = i;
-
  if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
 "Splitting SLP group at stmt %u\n", i);
 
- stmt_vec_info rest = vect_split_slp_store_group (stmt_info,
-  group1_size);
- /* Loop vectorization cannot handle gaps in stores, make sure
-the split group appears as strided.  */
- STMT_VINFO_STRIDED_P (rest) = 1;
- DR_GROUP_GAP (rest) = 0;
- STMT_VINFO_STRIDED_P (stmt_info) = 1;
- DR_GROUP_GAP (stmt_info) = 0;
+ /* Analyze the stored values and pinch them together with
+a permute node so we can preserve the whole store group.  */
+ auto_vec rhs_nodes;
+
+ /* Calculate the unrolling factor based on

[PATCH 4/4] Testsuite updates

2024-05-21 Thread Richard Biener
The gcc.dg/vect/slp-12a.c case is interesting as we currently split
the 8 store group into lanes 0-5 which we SLP with an unroll factor
of two (on x86-64 with SSE) and the remaining two lanes are using
interleaving vectorization with a final unroll factor of four.  Thus
we're using hybrid SLP within a single store group.  After the change
we discover the same 0-5 lane SLP part as well as two single-lane
parts feeding the full store group.  But that results in a load
permutation that isn't supported (I have WIP patchs to rectify that).
So we end up cancelling SLP and vectorizing the whole loop with
interleaving which is IMO good and results in better code.

This is similar for gcc.target/i386/pr52252-atom.c where interleaving
generates much better code than hybrid SLP.  I'm unsure how to update
the testcase though.

gcc.dg/vect/slp-21.c runs into similar situations.  Note that when
when analyzing SLP operations we discard an instance we currently
force the full loop to have no SLP because hybrid detection is
broken.  It's probably not worth fixing this at this moment.

For gcc.dg/vect/pr97428.c we are not splitting the 16 store group
into two but merge the two 8 lane loads into one before doing the
store and thus have only a single SLP instance.  A similar situation
happens in gcc.dg/vect/slp-11c.c but the branches feeding the
single SLP store only have a single lane.  Likewise for
gcc.dg/vect/vect-complex-5.c and gcc.dg/vect/vect-gather-2.c.

gcc.dg/vect/slp-cond-1.c has an additional SLP vectorization
with a SLP store group of size two but two single-lane branches.

gcc.target/i386/pr98928.c ICEs in SLP permute optimization
because we don't expect a constant and internal branch to be
merged with a permute node in
vect_optimize_slp_pass::change_vec_perm_layout:4859 (the only
permutes merging two SLP nodes are two-operator nodes right now).
This still requires fixing.

The whole series has been bootstrapped and tested on 
x86_64-unknown-linux-gnu with the gcc.target/i386/pr98928.c FAIL
unfixed.

Comments welcome (and hello ARM CI), RISC-V and other arch
testing appreciated.  Unless there are comments to the contrary
I plan to push patch 1 and 2 tomorrow.

Thanks,
Richard.

* gcc.dg/vect/pr97428.c: Expect a single store SLP group.
* gcc.dg/vect/slp-11c.c: Likewise.
* gcc.dg/vect/vect-complex-5.c: Likewise.
* gcc.dg/vect/slp-12a.c: Do not expect SLP.
* gcc.dg/vect/slp-21.c: Likewise.
* gcc.dg/vect/slp-cond-1.c: Expect one more SLP.
* gcc.dg/vect/vect-gather-2.c: Expect SLP to be used.
* gcc.target/i386/pr52252-atom.c: XFAIL test for palignr.
---
 gcc/testsuite/gcc.dg/vect/pr97428.c  |  2 +-
 gcc/testsuite/gcc.dg/vect/slp-11c.c  |  5 +++--
 gcc/testsuite/gcc.dg/vect/slp-12a.c  |  6 +-
 gcc/testsuite/gcc.dg/vect/slp-21.c   | 19 +--
 gcc/testsuite/gcc.dg/vect/slp-cond-1.c   |  2 +-
 gcc/testsuite/gcc.dg/vect/vect-complex-5.c   |  2 +-
 gcc/testsuite/gcc.dg/vect/vect-gather-2.c|  1 -
 gcc/testsuite/gcc.target/i386/pr52252-atom.c |  3 ++-
 8 files changed, 18 insertions(+), 22 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/pr97428.c 
b/gcc/testsuite/gcc.dg/vect/pr97428.c
index 60dd984cfd3..3cc9976c00c 100644
--- a/gcc/testsuite/gcc.dg/vect/pr97428.c
+++ b/gcc/testsuite/gcc.dg/vect/pr97428.c
@@ -44,5 +44,5 @@ void foo_i2(dcmlx4_t dst[], const dcmlx_t src[], int n)
 /* { dg-final { scan-tree-dump "Detected interleaving store of size 16" "vect" 
} } */
 /* We're not able to peel & apply re-aligning to make accesses well-aligned 
for !vect_hw_misalign,
but we could by peeling the stores for alignment and applying re-aligning 
loads.  */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { 
xfail { ! vect_hw_misalign } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { 
xfail { ! vect_hw_misalign } } } } */
 /* { dg-final { scan-tree-dump-not "gap of 6 elements" "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-11c.c 
b/gcc/testsuite/gcc.dg/vect/slp-11c.c
index 0f680cd4e60..169b0d10eec 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-11c.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-11c.c
@@ -13,7 +13,8 @@ main1 ()
   unsigned int in[N*8] = 
{0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
   float out[N*8];
 
-  /* Different operations - not SLPable.  */
+  /* Different operations - we SLP the store and split the group to two
+ single-lane branches.  */
   for (i = 0; i < N*4; i++)
 {
   out[i*2] = ((float) in[i*2] * 2 + 6) ;
@@ -44,4 +45,4 @@ int main (void)
 
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { 
{ vect_uintfloat_cvt && vect_strided2 } && vect_int_mult } } } } */
 /* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { 
!

Re: [PATCH v3] i386: Disable ix86_expand_vecop_qihi2 when !TARGET_AVX512BW

2024-05-21 Thread Uros Bizjak
On Tue, May 21, 2024 at 11:01 AM Haochen Jiang  wrote:
>
> Hi all,
>
> This is the v3 patch to fix PR115069. The new testcase has passed.
>
> Changes in v3:
>   - Simplify the testcase.
>
> Changes in v2:
>   - Add a testcase.
>   - Change the comment for the early exit.
>
> Thx,
> Haochen
>
> Since vpermq is really slow, we should avoid using it for permutation
> when vpmovwb is not available (needs AVX512BW) for ix86_expand_vecop_qihi2
> and fall back to ix86_expand_vecop_qihi.
>
> gcc/ChangeLog:
>
> PR target/115069
> * config/i386/i386-expand.cc (ix86_expand_vecop_qihi2):
> Do not enable the optimization when AVX512BW is not enabled.
>
> gcc/testsuite/ChangeLog:
>
> PR target/115069
> * gcc.target/i386/pr115069.c: New.

LGTM, with a nit below.

Thanks,
Uros.

> ---
>  gcc/config/i386/i386-expand.cc   |  7 +++
>  gcc/testsuite/gcc.target/i386/pr115069.c | 10 ++
>  2 files changed, 17 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr115069.c
>
> diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
> index a6132911e6a..f7939761879 100644
> --- a/gcc/config/i386/i386-expand.cc
> +++ b/gcc/config/i386/i386-expand.cc
> @@ -24323,6 +24323,13 @@ ix86_expand_vecop_qihi2 (enum rtx_code code, rtx 
> dest, rtx op1, rtx op2)
>bool op2vec = GET_MODE_CLASS (GET_MODE (op2)) == MODE_VECTOR_INT;
>bool uns_p = code != ASHIFTRT;
>
> +  /* Without VPMOVWB (provided by AVX512BW ISA), the expansion uses the
> + generic permutation to merge the data back into the right place.  This
> + permutation results in VPERMQ, which is slow, so better fall back to
> + ix86_expand_vecop_qihi.  */
> +  if (!TARGET_AVX512BW)
> +return false;
> +
>if ((qimode == V16QImode && !TARGET_AVX2)
>|| (qimode == V32QImode && (!TARGET_AVX512BW || !TARGET_EVEX512))
>/* There are no V64HImode instructions.  */
> diff --git a/gcc/testsuite/gcc.target/i386/pr115069.c 
> b/gcc/testsuite/gcc.target/i386/pr115069.c
> new file mode 100644
> index 000..7f1ff209f26
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr115069.c
> @@ -0,0 +1,10 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mavx2" } */
> +/* { dg-final { scan-assembler-not "vpermq" } } */
> +
> +typedef char v16qi __attribute__((vector_size(16)));
> +
> +v16qi foo (v16qi a, v16qi b) {
> +return a * b;
> +}
> +

Please remove the trailing line.

> --
> 2.31.1
>


Re: [Patch, aarch64, middle-end] Move pair_fusion pass from aarch64 to middle-end

2024-05-21 Thread Alex Coplan
On 20/05/2024 21:50, Ajit Agarwal wrote:
> Hello Alex/Richard:
> 
> Move pair fusion pass from aarch64-ldp-fusion.cc to middle-end
> to support multiple targets.
> 
> Common infrastructure of load store pair fusion is divided into
> target independent and target dependent code.
> 
> Target independent code is structured in the following files.
> gcc/pair-fusion.h
> gcc/pair-fusion.cc
> 
> Target independent code is the Generic code with pure virtual
> function to interface betwwen target independent and dependent
> code.
> 
> Bootstrapped and regtested on aarch64-linux-gnu.
> 
> Thanks & Regards
> Ajit
> 
> aarch64, middle-end: Move pair_fusion pass from aarch64 to middle-end
> 
> Move pair fusion pass from aarch64-ldp-fusion.cc to middle-end
> to support multiple targets.
> 
> Common infrastructure of load store pair fusion is divided into
> target independent and target dependent code.
> 
> Target independent code is structured in the following files.
> gcc/pair-fusion.h
> gcc/pair-fusion.cc
> 
> Target independent code is the Generic code with pure virtual
> function to interface betwwen target independent and dependent
> code.
> 
> 2024-05-20  Ajit Kumar Agarwal  
> 
> gcc/ChangeLog:
> 
>   * pair-fusion.h: Generic header code for load store fusion
>   that can be shared across different architectures.
>   * pair-fusion.cc: Generic source code implementation for
>   load store fusion that can be shared across different architectures.
>   * Makefile.in: Add new executable pair-fusion.o
>   * config/aarch64/aarch64-ldp-fusion.cc: Target specific
>   code for load store fusion of aarch64.

Apologies for missing this in the last review but you'll also need to
update gcc/config/aarch64/t-aarch64 to add a dependency on pair-fusion.h
for aarch64-ldp-fusion.o.

Thanks,
Alex

> ---
>  gcc/Makefile.in  |1 +
>  gcc/config/aarch64/aarch64-ldp-fusion.cc | 3303 +-
>  gcc/pair-fusion.cc   | 2852 +++
>  gcc/pair-fusion.h|  340 +++
>  4 files changed, 3268 insertions(+), 3228 deletions(-)
>  create mode 100644 gcc/pair-fusion.cc
>  create mode 100644 gcc/pair-fusion.h



[PATCH] rs6000: Don't pass -many to the assembler [PR112868]

2024-05-21 Thread jeevitha


Hi All,

The following patch has been bootstrapped and regtested with default 
configuration
[--enable-checking=yes] and with --enable-checking=release on powerpc64le-linux.

This patch removes passing the -many assembler option for release builds. Now,
GCC no longer passes -many under any conditions to the assembler.

2024-05-15  Jeevitha Palanisamy  

gcc/
PR target/112868
* config/rs6000/rs6000.h (ASM_OPT_ANY): Removed Define.
(ASM_CPU_SPEC): Remove ASM_OPT_ANY usage.

diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 2cde2e329b0..1ccaee9d74c 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -94,12 +94,6 @@
   "%{mdejagnu-*: %

Re: [PATCH v5 1/5] Improve must tail in RTL backend

2024-05-21 Thread Andi Kleen
> I can't see how this triggers on the IL above, the loop should have
> ignored both the return and the clobber and when recursing to
> the predecessor stop before the above check when runnig into the
> call?

Yes, I tracked that down later. The problem was that there
were multiple successors to the BB due to exception handling,
which makes the find_tail_calls walker give up.

Putting the new pass after ehcleanup fixed that, but there
are still cases when ehcleanup cannot get rid of them and
then it gives up. musttail checking at expand time still
works, but can only give a vague error message.

> 
> > In a optimized build this passes, but with -O0 it always fails
> > when the pass is placed before pass_optimizations_g. I assume
> > it's some problem with mem ssa form.
> >
> > Any ideas how to fix that? Otherwise I can restrict musttail to non
> > structs.
> 
> I wonder how this works when optimizing?

It just doesn't. You need optimization to do tail calls with
structs. The only alternative would be to detect the situation
and pull in some extra passes.

Also even with optimization it only works for structs that
fit into registers. This could be maybe fixed, but is out of scope
for this patch kit.

-Andi


RE: [PATCH v1 1/2] Match: Support __builtin_add_overflow branch form for unsigned SAT_ADD

2024-05-21 Thread Li, Pan2
> Since you matching @2 for the realpart rather than `(IFN_ADD_OVERFLOW
> @0 @1)` directly the :c is not needed and genmatch will just generate
> extra matching code that cannot be not get reached

Got it, thanks for explanation. I may need to check the generated matching code 
for a better understanding for this.

Pan

-Original Message-
From: Andrew Pinski  
Sent: Tuesday, May 21, 2024 8:34 PM
To: Li, Pan2 
Cc: GCC Patches ; 钟居哲 ; Kito 
Cheng ; Tamar Christina ; 
Richard Guenther 
Subject: Re: [PATCH v1 1/2] Match: Support __builtin_add_overflow branch form 
for unsigned SAT_ADD

On Tue, May 21, 2024 at 5:28 AM Li, Pan2  wrote:
>
> Thanks Andrew for comments.
>
>
>
> > I think you need to make sure type and @0's type matches.
>
>
>
> Oh, yes, we need that, will update in v2.
>
>
>
> > Also I don't think you need :c here since you don't match @0 nor @1 more 
> > than once.
>
>
>
> You mean the :c from (IFN_ADD_OVERFLOW:c@2 @0 @1)), right?
>
> My initial idea is to catch both the (IFN_ADD_OVERFLOW @0 @1) and 
> (IFN_ADD_OVERFLOW @1 @0).
>
> It is unnecessary if IFN_ADD_OVERFLOW takes care of this already.

Since in this case there is Canonical form/order here (at least there
should be).
> + (cond (ne (imagpart (IFN_ADD_OVERFLOW:c@2 @0 @1)) integer_zerop)
> +  integer_minus_onep (realpart @2))

Since you matching @2 for the realpart rather than `(IFN_ADD_OVERFLOW
@0 @1)` directly the :c is not needed and genmatch will just generate
extra matching code that cannot be not get reached

Thanks,
Andrew.

>
>
>
> Pan
>
>
>
>
>
> From: Andrew Pinski 
> Sent: Tuesday, May 21, 2024 7:40 PM
> To: Li, Pan2 
> Cc: GCC Patches ; 钟居哲 ; Kito 
> Cheng ; Tamar Christina ; 
> Richard Guenther 
> Subject: Re: [PATCH v1 1/2] Match: Support __builtin_add_overflow branch form 
> for unsigned SAT_ADD
>
>
>
>
>
> On Tue, May 21, 2024, 3:55 AM  wrote:
>
> From: Pan Li 
>
> This patch would like to support the __builtin_add_overflow branch form for
> unsigned SAT_ADD.  For example as below:
>
> uint64_t
> sat_add (uint64_t x, uint64_t y)
> {
>   uint64_t ret;
>   return __builtin_add_overflow (x, y, &ret) ? -1 : ret;
> }
>
> Different to the branchless version,  we leverage the simplify to
> convert the branch version of SAT_ADD into branchless if and only
> if the backend has supported the IFN_SAT_ADD.  Thus,  the backend has
> the ability to choose branch or branchless implementation of .SAT_ADD.
> For example,  some target can take care of branches code more optimally.
>
> When the target implement the IFN_SAT_ADD for unsigned and before this
> patch:
>
> uint64_t sat_add (uint64_t x, uint64_t y)
> {
>   long unsigned int _1;
>   long unsigned int _2;
>   uint64_t _3;
>   __complex__ long unsigned int _6;
>
> ;;   basic block 2, loop depth 0
> ;;pred:   ENTRY
>   _6 = .ADD_OVERFLOW (x_4(D), y_5(D));
>   _2 = IMAGPART_EXPR <_6>;
>   if (_2 != 0)
> goto ; [35.00%]
>   else
> goto ; [65.00%]
> ;;succ:   4
> ;;3
>
> ;;   basic block 3, loop depth 0
> ;;pred:   2
>   _1 = REALPART_EXPR <_6>;
> ;;succ:   4
>
> ;;   basic block 4, loop depth 0
> ;;pred:   3
> ;;2
>   # _3 = PHI <_1(3), 18446744073709551615(2)>
>   return _3;
> ;;succ:   EXIT
> }
>
> After this patch:
> uint64_t sat_add (uint64_t x, uint64_t y)
> {
>   long unsigned int _12;
>
> ;;   basic block 2, loop depth 0
> ;;pred:   ENTRY
>   _12 = .SAT_ADD (x_4(D), y_5(D)); [tail call]
>   return _12;
> ;;succ:   EXIT
> }
>
> The below test suites are passed for this patch:
> * The x86 bootstrap test.
> * The x86 fully regression test.
> * The riscv fully regression test.
>
> gcc/ChangeLog:
>
> * match.pd: Add new simplify to convert branch SAT_ADD into
> branchless,  if and only if backend implement the IFN.
>
> Signed-off-by: Pan Li 
> ---
>  gcc/match.pd | 10 ++
>  1 file changed, 10 insertions(+)
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 0f9c34fa897..8b9ded98323 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -3094,6 +3094,16 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>  (match (unsigned_integer_sat_add @0 @1)
>   (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1)))
>
> +#if GIMPLE
> +
> +(simplify
> + (cond (ne (imagpart (IFN_ADD_OVERFLOW:c@2 @0 @1)) integer_zerop)
> +  integer_minus_onep (realpart @2))
> + (if (direct_internal_fn_supported_p (IFN_SAT_ADD, type, OPTIMIZE_FOR_BOTH))
> +  (bit_ior (plus@3 @0 @1) (negate (convert (lt @3 @0))
>
>
>
> I think you need to make sure type and @0's type matches.
>
>
>
> Also I don't think you need :c here since you don't match @0 nor @1 more than 
> once.
>
>
>
> Thanks,
>
> Andrew
>
>
>
>
>
> +
> +#endif
> +
>  /* x >  y  &&  x != XXX_MIN  -->  x > y
> x >  y  &&  x == XXX_MIN  -->  false . */
>  (for eqne (eq ne)
> --
> 2.34.1


Re: [committed] PATCH for Re: Stepping down as maintainer for ARC and Epiphany

2024-05-21 Thread Jeff Law




On 5/21/24 12:05 AM, Richard Biener via Gcc wrote:

On Mon, May 20, 2024 at 4:45 PM Gerald Pfeifer  wrote:


On Wed, 5 Jul 2023, Joern Rennecke wrote:

I haven't worked with these targets in years and can't really do
sensible maintenance or reviews of patches for them. I am currently
working on optimizations for other ports like RISC-V.


I noticed MAINTAINERS was not updated, so pushed the patch below.


That leaves the epiphany port unmaintained.  Should we automatically add such
ports to the list of obsoleted ports?
Given that epiphany has randomly failed tests for the last 3+ years due 
to bugs in its patterns, yes, it really needs to be deprecated.


I tried to fix the worst of the offenders in epiphany.md a few years 
back and gave up.  Essentially seemingly innocent changes in the RTL 
will cause reload to occasionally not see a path to get constraints 
satisfied.  So a test which passes today, will flip to failing tomorrow 
while some other test of tests will go the other way.




jeff



Re: [committed] PATCH for Re: Stepping down as maintainer for ARC and Epiphany

2024-05-21 Thread Paul Koning



> On May 21, 2024, at 9:57 AM, Jeff Law  wrote:
> 
> 
> 
> On 5/21/24 12:05 AM, Richard Biener via Gcc wrote:
>> On Mon, May 20, 2024 at 4:45 PM Gerald Pfeifer  wrote:
>>> 
>>> On Wed, 5 Jul 2023, Joern Rennecke wrote:
 I haven't worked with these targets in years and can't really do
 sensible maintenance or reviews of patches for them. I am currently
 working on optimizations for other ports like RISC-V.
>>> 
>>> I noticed MAINTAINERS was not updated, so pushed the patch below.
>> That leaves the epiphany port unmaintained.  Should we automatically add such
>> ports to the list of obsoleted ports?
> Given that epiphany has randomly failed tests for the last 3+ years due to 
> bugs in its patterns, yes, it really needs to be deprecated.
> 
> I tried to fix the worst of the offenders in epiphany.md a few years back and 
> gave up.  Essentially seemingly innocent changes in the RTL will cause reload 
> to occasionally not see a path to get constraints satisfied.  So a test which 
> passes today, will flip to failing tomorrow while some other test of tests 
> will go the other way.

Does LRA make that issue go away, or does it not help?

paul



Musttail patchkit v6

2024-05-21 Thread Andi Kleen
I reworked the musttail patchkit to make it work without optimization
and also give better error messages and fixed some bugs and increased
the test coverage.

The no optimization support is not perfect: it cannot handle returning
small structures which would need to be allocated to registers first.

There are also still cases when musttail cannot give an accurate
error message (mainly because of complex control flow making
tree-tailcall giving up). But overall it works significantly better.

Jason's feedback on the C++ parser is also addressed.



[PATCH v6 3/8] C++: Support clang compatible [[musttail]] (PR83324)

2024-05-21 Thread Andi Kleen
This patch implements a clang compatible [[musttail]] attribute for
returns.

musttail is useful as an alternative to computed goto for interpreters.
With computed goto the interpreter function usually ends up very big
which causes problems with register allocation and other per function
optimizations not scaling. With musttail the interpreter can be instead
written as a sequence of smaller functions that call each other. To
avoid unbounded stack growth this requires forcing a sibling call, which
this attribute does. It guarantees an error if the call cannot be tail
called which allows the programmer to fix it instead of risking a stack
overflow. Unlike computed goto it is also type-safe.

It turns out that David Malcolm had already implemented middle/backend
support for a musttail attribute back in 2016, but it wasn't exposed
to any frontend other than a special plugin.

This patch adds a [[gnu::musttail]] attribute for C++ that can be added
to return statements. The return statement must be a direct call
(it does not follow dependencies), which is similar to what clang
implements. It then uses the existing must tail infrastructure.

For compatibility it also detects clang::musttail

One problem is that tree-tailcall usually fails when optimization
is disabled, which implies the attribute only really works with
optimization on. But that seems to be a reasonable limitation.

Passes bootstrap and full test

PR83324

gcc/cp/ChangeLog:

* parser.cc (cp_parser_statement): Handle musttail.
(cp_parser_jump_statement): Dito.
(cp_parser_std_attribute): Dito.
---
 gcc/cp/parser.cc | 42 +-
 1 file changed, 37 insertions(+), 5 deletions(-)

diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
index 66ce161252c7..d25c1a1ac6a5 100644
--- a/gcc/cp/parser.cc
+++ b/gcc/cp/parser.cc
@@ -2467,7 +2467,7 @@ static tree cp_parser_perform_range_for_lookup
 static tree cp_parser_range_for_member_function
   (tree, tree);
 static tree cp_parser_jump_statement
-  (cp_parser *);
+  (cp_parser *, tree &);
 static void cp_parser_declaration_statement
   (cp_parser *);
 
@@ -12734,13 +12734,17 @@ cp_parser_statement (cp_parser* parser, tree 
in_statement_expr,
 NULL_TREE, false);
  break;
 
+   case RID_RETURN:
+ std_attrs = process_stmt_hotness_attribute (std_attrs, attrs_loc);
+ statement = cp_parser_jump_statement (parser, std_attrs);
+ break;
+
case RID_BREAK:
case RID_CONTINUE:
-   case RID_RETURN:
case RID_CO_RETURN:
case RID_GOTO:
  std_attrs = process_stmt_hotness_attribute (std_attrs, attrs_loc);
- statement = cp_parser_jump_statement (parser);
+ statement = cp_parser_jump_statement (parser, std_attrs);
  break;
 
  /* Objective-C++ exception-handling constructs.  */
@@ -14797,10 +14801,11 @@ cp_parser_init_statement (cp_parser *parser, tree 
*decl)
jump-statement:
  goto * expression ;
 
+   STD_ATTRS are the statement attributes. They can be modified.
Returns the new BREAK_STMT, CONTINUE_STMT, RETURN_EXPR, or GOTO_EXPR.  */
 
 static tree
-cp_parser_jump_statement (cp_parser* parser)
+cp_parser_jump_statement (cp_parser* parser, tree &std_attrs)
 {
   tree statement = error_mark_node;
   cp_token *token;
@@ -14877,6 +14882,33 @@ cp_parser_jump_statement (cp_parser* parser)
  /* If the next token is a `;', then there is no
 expression.  */
  expr = NULL_TREE;
+
+   if (keyword == RID_RETURN && expr)
+ {
+   bool musttail_p = false;
+   if (lookup_attribute ("gnu", "musttail", std_attrs))
+ {
+   musttail_p = true;
+   std_attrs = remove_attribute ("gnu", "musttail", std_attrs);
+ }
+   // support this for compatibility
+   if (lookup_attribute ("clang", "musttail", std_attrs))
+ {
+   musttail_p = true;
+   std_attrs = remove_attribute ("clang", "musttail", std_attrs);
+ }
+   if (musttail_p)
+ {
+   tree t = expr;
+   if (t && TREE_CODE (t) == TARGET_EXPR)
+ t = TARGET_EXPR_INITIAL (t);
+   if (t && TREE_CODE (t) != CALL_EXPR)
+ error_at (token->location, "cannot tail-call: return value 
must be a call");
+   else
+ CALL_EXPR_MUST_TAIL_CALL (t) = 1;
+ }
+ }
+
/* Build the return-statement, check co-return first, since type
   deduction is not valid there.  */
if (keyword == RID_CO_RETURN)
@@ -30189,7 +30221,7 @@ cp_parser_std_attribute (cp_parser *parser, tree 
attr_ns)
 /* Maybe we don't expect to see any arguments for this attribute.  */
 const attribute_spec *as
   = lookup_attribute_spec (TREE_PURPOSE (attribute));
-if (as && as->max_length ==

[PATCH v6 4/8] C: Implement musttail attribute for returns

2024-05-21 Thread Andi Kleen
Implement a C23 clang compatible musttail attribute similar to the earlier
C++ implementation in the C parser.

PR83324

gcc/c/ChangeLog:

* c-parser.cc (struct attr_state): Define with musttail_p.
(c_parser_statement_after_labels): Handle [[musttail]]
(c_parser_std_attribute): Dito.
(c_parser_handle_musttail): Dito.
(c_parser_compound_statement_nostart): Dito.
(c_parser_all_labels): Dito.
(c_parser_statement): Dito.
* c-tree.h (c_finish_return): Add musttail_p flag.
* c-typeck.cc (c_finish_return): Handle musttail_p flag.
---
 gcc/c/c-parser.cc | 61 +--
 gcc/c/c-tree.h|  2 +-
 gcc/c/c-typeck.cc | 15 ++--
 3 files changed, 63 insertions(+), 15 deletions(-)

diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc
index 00f8bf4376e5..9edadb0fee96 100644
--- a/gcc/c/c-parser.cc
+++ b/gcc/c/c-parser.cc
@@ -1616,6 +1616,11 @@ struct omp_for_parse_data {
   bool fail : 1;
 };
 
+struct attr_state
+{
+  bool musttail_p; // parsed a musttail for return
+};
+
 static bool c_parser_nth_token_starts_std_attributes (c_parser *,
  unsigned int);
 static tree c_parser_std_attribute_specifier_sequence (c_parser *);
@@ -1660,7 +1665,7 @@ static location_t c_parser_compound_statement_nostart 
(c_parser *);
 static void c_parser_label (c_parser *, tree);
 static void c_parser_statement (c_parser *, bool *, location_t * = NULL);
 static void c_parser_statement_after_labels (c_parser *, bool *,
-vec * = NULL);
+vec * = NULL, attr_state = 
{});
 static tree c_parser_c99_block_statement (c_parser *, bool *,
  location_t * = NULL);
 static void c_parser_if_statement (c_parser *, bool *, vec *);
@@ -5756,6 +5761,8 @@ c_parser_std_attribute (c_parser *parser, bool for_tm)
}
   goto out;
 }
+  else if (is_attribute_p ("musttail", name))
+error ("% attribute has arguments");
   {
 location_t open_loc = c_parser_peek_token (parser)->location;
 matching_parens parens;
@@ -6941,6 +6948,28 @@ c_parser_handle_directive_omp_attributes (tree &attrs,
 }
 }
 
+/* Check if STD_ATTR contains a musttail attribute and handle it
+   PARSER is the parser and A is the output attr_state.  */
+
+static tree
+c_parser_handle_musttail (c_parser *parser, tree std_attrs, attr_state &a)
+{
+  if (c_parser_next_token_is_keyword (parser, RID_RETURN))
+{
+  if (lookup_attribute ("gnu", "musttail", std_attrs))
+   {
+ std_attrs = remove_attribute ("gnu", "musttail", std_attrs);
+ a.musttail_p = true;
+   }
+  if (lookup_attribute ("clang", "musttail", std_attrs))
+   {
+ std_attrs = remove_attribute ("clang", "musttail", std_attrs);
+ a.musttail_p = true;
+   }
+}
+  return std_attrs;
+}
+
 /* Parse a compound statement except for the opening brace.  This is
used for parsing both compound statements and statement expressions
(which follow different paths to handling the opening).  */
@@ -6957,6 +6986,7 @@ c_parser_compound_statement_nostart (c_parser *parser)
   bool in_omp_loop_block
 = omp_for_parse_state ? omp_for_parse_state->want_nested_loop : false;
   tree sl = NULL_TREE;
+  attr_state a = {};
 
   if (c_parser_next_token_is (parser, CPP_CLOSE_BRACE))
 {
@@ -7095,7 +7125,10 @@ c_parser_compound_statement_nostart (c_parser *parser)
= c_parser_nth_token_starts_std_attributes (parser, 1);
   tree std_attrs = NULL_TREE;
   if (have_std_attrs)
-   std_attrs = c_parser_std_attribute_specifier_sequence (parser);
+   {
+ std_attrs = c_parser_std_attribute_specifier_sequence (parser);
+ std_attrs = c_parser_handle_musttail (parser, std_attrs, a);
+   }
   if (c_parser_next_token_is_keyword (parser, RID_CASE)
  || c_parser_next_token_is_keyword (parser, RID_DEFAULT)
  || (c_parser_next_token_is (parser, CPP_NAME)
@@ -7243,7 +7276,7 @@ c_parser_compound_statement_nostart (c_parser *parser)
  last_stmt = true;
  mark_valid_location_for_stdc_pragma (false);
  if (!omp_for_parse_state)
-   c_parser_statement_after_labels (parser, NULL);
+   c_parser_statement_after_labels (parser, NULL, NULL, a);
  else
{
  /* In canonical loop nest form, nested loops can only appear
@@ -7285,15 +7318,18 @@ c_parser_compound_statement_nostart (c_parser *parser)
 /* Parse all consecutive labels, possibly preceded by standard
attributes.  In this context, a statement is required, not a
declaration, so attributes must be followed by a statement that is
-   not just a semicolon.  */
+   not just a semicolon.  Returns an attr_state.  */
 
-static void
+static attr_state
 c_parser_all_labels (c_parser *parser)
 {
+  attr_state a = {

[PATCH v6 1/8] Improve must tail in RTL backend

2024-05-21 Thread Andi Kleen
- Give error messages for all causes of non sibling call generation
- When giving error messages clear the musttail flag to avoid ICEs
- Error out when tree-tailcall failed to mark a must-tail call
sibcall. In this case it doesn't know the true reason and only gives
a vague message.

PR83324

gcc/ChangeLog:

* calls.cc (expand_call): Fix mustcall implementation.
(maybe_complain_about_tail_call): Clear must tail flag on error.
---
 gcc/calls.cc | 30 --
 1 file changed, 24 insertions(+), 6 deletions(-)

diff --git a/gcc/calls.cc b/gcc/calls.cc
index 21d78f9779fe..161e36839654 100644
--- a/gcc/calls.cc
+++ b/gcc/calls.cc
@@ -1249,6 +1249,7 @@ maybe_complain_about_tail_call (tree call_expr, const 
char *reason)
 return;
 
   error_at (EXPR_LOCATION (call_expr), "cannot tail-call: %s", reason);
+  CALL_EXPR_MUST_TAIL_CALL (call_expr) = 0;
 }
 
 /* Fill in ARGS_SIZE and ARGS array based on the parameters found in
@@ -2650,7 +2651,11 @@ expand_call (tree exp, rtx target, int ignore)
   /* The type of the function being called.  */
   tree fntype;
   bool try_tail_call = CALL_EXPR_TAILCALL (exp);
-  bool must_tail_call = CALL_EXPR_MUST_TAIL_CALL (exp);
+  /* tree-tailcall decided not to do tail calls. Error for the musttail case,
+ unfortunately we don't know the reason so it's fairly vague.
+ When tree-tailcall reported an error it already cleared the flag.  */
+  if (!try_tail_call)
+  maybe_complain_about_tail_call (exp, "other reasons");
   int pass;
 
   /* Register in which non-BLKmode value will be returned,
@@ -3022,10 +3027,21 @@ expand_call (tree exp, rtx target, int ignore)
  pushed these optimizations into -O2.  Don't try if we're already
  expanding a call, as that means we're an argument.  Don't try if
  there's cleanups, as we know there's code to follow the call.  */
-  if (currently_expanding_call++ != 0
-  || (!flag_optimize_sibling_calls && !CALL_FROM_THUNK_P (exp))
-  || args_size.var
-  || dbg_cnt (tail_call) == false)
+  if (currently_expanding_call++ != 0)
+{
+  maybe_complain_about_tail_call (exp, "inside another call");
+  try_tail_call = 0;
+}
+  if (!flag_optimize_sibling_calls
+   && !CALL_FROM_THUNK_P (exp)
+   && !CALL_EXPR_MUST_TAIL_CALL (exp))
+try_tail_call = 0;
+  if (args_size.var)
+{
+  maybe_complain_about_tail_call (exp, "variable size arguments");
+  try_tail_call = 0;
+}
+  if (dbg_cnt (tail_call) == false)
 try_tail_call = 0;
 
   /* Workaround buggy C/C++ wrappers around Fortran routines with
@@ -3046,13 +3062,15 @@ expand_call (tree exp, rtx target, int ignore)
if (MEM_P (*iter))
  {
try_tail_call = 0;
+   maybe_complain_about_tail_call (exp,
+   "hidden string length argument passed on 
stack");
break;
  }
}
 
   /* If the user has marked the function as requiring tail-call
  optimization, attempt it.  */
-  if (must_tail_call)
+  if (CALL_EXPR_MUST_TAIL_CALL (exp))
 try_tail_call = 1;
 
   /*  Rest of purposes for tail call optimizations to fail.  */
-- 
2.44.0



[PATCH v6 2/8] Add a musttail generic attribute to the c-attribs table

2024-05-21 Thread Andi Kleen
It does nothing currently since statement attributes are handled
directly in the parser.

gcc/c-family/ChangeLog:

* c-attribs.cc (handle_musttail_attribute): Add.
* c-common.h (handle_musttail_attribute): Add.
---
 gcc/c-family/c-attribs.cc | 15 +++
 gcc/c-family/c-common.h   |  1 +
 2 files changed, 16 insertions(+)

diff --git a/gcc/c-family/c-attribs.cc b/gcc/c-family/c-attribs.cc
index 04e39b41bdf3..7110d56c8ca0 100644
--- a/gcc/c-family/c-attribs.cc
+++ b/gcc/c-family/c-attribs.cc
@@ -338,6 +338,8 @@ const struct attribute_spec c_common_gnu_attributes[] =
   { "common", 0, 0, true,  false, false, false,
  handle_common_attribute,
  attr_common_exclusions },
+  { "musttail",  0, 0, false, false, false,
+ false, handle_musttail_attribute, NULL },
   /* FIXME: logically, noreturn attributes should be listed as
  "false, true, true" and apply to function types.  But implementing this
  would require all the places in the compiler that use TREE_THIS_VOLATILE
@@ -1216,6 +1218,19 @@ handle_common_attribute (tree *node, tree name, tree 
ARG_UNUSED (args),
   return NULL_TREE;
 }
 
+/* Handle a "musttail" attribute; arguments as in
+   struct attribute_spec.handler.  */
+
+tree
+handle_musttail_attribute (tree ARG_UNUSED (*node), tree name, tree ARG_UNUSED 
(args),
+  int ARG_UNUSED (flags), bool *no_add_attrs)
+{
+  /* Currently only a statement attribute, handled directly in parser.  */
+  warning (OPT_Wattributes, "%qE attribute ignored", name);
+  *no_add_attrs = true;
+  return NULL_TREE;
+}
+
 /* Handle a "noreturn" attribute; arguments as in
struct attribute_spec.handler.  */
 
diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h
index 2d5f53998855..2707405e8def 100644
--- a/gcc/c-family/c-common.h
+++ b/gcc/c-family/c-common.h
@@ -1637,6 +1637,7 @@ extern tree find_tm_attribute (tree);
 extern const struct attribute_spec::exclusions attr_cold_hot_exclusions[];
 extern const struct attribute_spec::exclusions attr_noreturn_exclusions[];
 extern tree handle_noreturn_attribute (tree *, tree, tree, int, bool *);
+extern tree handle_musttail_attribute (tree *, tree, tree, int, bool *);
 extern bool has_attribute (location_t, tree, tree, tree (*)(tree));
 extern tree build_attr_access_from_parms (tree, bool);
 
-- 
2.44.0



[PATCH v6 5/8] Add tests for C/C++ musttail attributes

2024-05-21 Thread Andi Kleen
Mostly adopted from the existing C musttail plugin tests.

gcc/testsuite/ChangeLog:

* c-c++-common/musttail1.c: New test.
* c-c++-common/musttail2.c: New test.
* c-c++-common/musttail3.c: New test.
* c-c++-common/musttail4.c: New test.
* c-c++-common/musttail7.c: New test.
* c-c++-common/musttail8.c: New test.
* g++.dg/musttail6.C: New test.
* g++.dg/musttail9.C: New test.
---
 gcc/testsuite/c-c++-common/musttail1.c | 14 +++
 gcc/testsuite/c-c++-common/musttail2.c | 33 +++
 gcc/testsuite/c-c++-common/musttail3.c | 29 +
 gcc/testsuite/c-c++-common/musttail4.c | 17 
 gcc/testsuite/c-c++-common/musttail5.c | 28 +
 gcc/testsuite/c-c++-common/musttail7.c | 14 +++
 gcc/testsuite/c-c++-common/musttail8.c | 17 
 gcc/testsuite/g++.dg/musttail6.C   | 58 ++
 gcc/testsuite/g++.dg/musttail9.C   | 10 +
 9 files changed, 220 insertions(+)
 create mode 100644 gcc/testsuite/c-c++-common/musttail1.c
 create mode 100644 gcc/testsuite/c-c++-common/musttail2.c
 create mode 100644 gcc/testsuite/c-c++-common/musttail3.c
 create mode 100644 gcc/testsuite/c-c++-common/musttail4.c
 create mode 100644 gcc/testsuite/c-c++-common/musttail5.c
 create mode 100644 gcc/testsuite/c-c++-common/musttail7.c
 create mode 100644 gcc/testsuite/c-c++-common/musttail8.c
 create mode 100644 gcc/testsuite/g++.dg/musttail6.C
 create mode 100644 gcc/testsuite/g++.dg/musttail9.C

diff --git a/gcc/testsuite/c-c++-common/musttail1.c 
b/gcc/testsuite/c-c++-common/musttail1.c
new file mode 100644
index ..74efcc2a0bc6
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/musttail1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile { target { tail_call && { c || c++11 } } } } */
+/* { dg-additional-options "-fdelayed-branch" { target sparc*-*-* } } */
+
+int __attribute__((noinline,noclone,noipa))
+callee (int i)
+{
+  return i * i;
+}
+
+int __attribute__((noinline,noclone,noipa))
+caller (int i)
+{
+  [[gnu::musttail]] return callee (i + 1);
+}
diff --git a/gcc/testsuite/c-c++-common/musttail2.c 
b/gcc/testsuite/c-c++-common/musttail2.c
new file mode 100644
index ..86f2c3d77404
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/musttail2.c
@@ -0,0 +1,33 @@
+/* { dg-do compile { target { tail_call && { c || c++11 } } } } */
+
+struct box { char field[256]; int i; };
+
+int __attribute__((noinline,noclone,noipa))
+test_2_callee (int i, struct box b)
+{
+  if (b.field[0])
+return 5;
+  return i * i;
+}
+
+int __attribute__((noinline,noclone,noipa))
+test_2_caller (int i)
+{
+  struct box b;
+  [[gnu::musttail]] return test_2_callee (i + 1, b); /* { dg-error "cannot 
tail-call: " } */
+}
+
+extern void setjmp (void);
+void
+test_3 (void)
+{
+  [[gnu::musttail]] return setjmp (); /* { dg-error "cannot tail-call: " } */
+}
+
+extern float f7(void);
+
+int
+test_6 (void)
+{
+  [[gnu::musttail]] return f7(); /* { dg-error "cannot tail-call: " } */
+}
diff --git a/gcc/testsuite/c-c++-common/musttail3.c 
b/gcc/testsuite/c-c++-common/musttail3.c
new file mode 100644
index ..ea9589c59ef2
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/musttail3.c
@@ -0,0 +1,29 @@
+/* { dg-do compile { target { tail_call && { c || c++11 } } } } */
+
+extern int foo2 (int x, ...);
+
+struct str
+{
+  int a, b;
+};
+
+struct str
+cstruct (int x)
+{
+  if (x < 10)
+[[clang::musttail]] return cstruct (x + 1);
+  return ((struct str){ x, 0 });
+}
+
+int
+foo (int x)
+{
+  if (x < 10)
+[[clang::musttail]] return foo2 (x, 29);
+  if (x < 100)
+{
+  int k = foo (x + 1);
+  [[clang::musttail]] return k;/* { dg-error "cannot tail-call: " } */
+}
+  return x;
+}
diff --git a/gcc/testsuite/c-c++-common/musttail4.c 
b/gcc/testsuite/c-c++-common/musttail4.c
new file mode 100644
index ..23f4b5e1cd68
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/musttail4.c
@@ -0,0 +1,17 @@
+/* { dg-do compile { target { tail_call && { c || c++11 } } } } */
+
+struct box { char field[64]; int i; };
+
+struct box __attribute__((noinline,noclone,noipa))
+returns_struct (int i)
+{
+  struct box b;
+  b.i = i * i;
+  return b;
+}
+
+int __attribute__((noinline,noclone))
+test_1 (int i)
+{
+  [[gnu::musttail]] return returns_struct (i * 5).i; /* { dg-error "cannot 
tail-call: " } */
+}
diff --git a/gcc/testsuite/c-c++-common/musttail5.c 
b/gcc/testsuite/c-c++-common/musttail5.c
new file mode 100644
index ..234da0d3f2a9
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/musttail5.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-std=c23" { target c } } */
+/* { dg-options "-std=gnu++11" { target c++ } } */
+
+[[musttail]] int j; /* { dg-warning "attribute" } */
+__attribute__((musttail)) int k; /* { dg-warning "attribute" } */
+
+void foo(void)
+{
+  [[gnu::musttail]] j++; /* { dg-warning "attribute" } */
+  [[gnu::musttail]] if (k > 0) /* { dg-warning "attribute" } */
+[[gnu::musttail

[PATCH v6 7/8] Give better error messages for musttail

2024-05-21 Thread Andi Kleen
When musttail is set, make tree-tailcall to give error messages
when it cannot handle a call. This avoids vague "other reasons"
error messages later at expand time.

This doesn't always work, for example when find_tail_call
walking gives up because the control flow is too complicated
then it won't find the tail call and can't give a suitable
error message.

gcc/ChangeLog:

* tree-tailcall.cc (maybe_error_musttail): Add.
(find_tail_calls): Add error messages.
---
 gcc/tree-tailcall.cc | 37 ++---
 1 file changed, 34 insertions(+), 3 deletions(-)

diff --git a/gcc/tree-tailcall.cc b/gcc/tree-tailcall.cc
index 094856de22ef..1217435c3f90 100644
--- a/gcc/tree-tailcall.cc
+++ b/gcc/tree-tailcall.cc
@@ -43,6 +43,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "common/common-target.h"
 #include "ipa-utils.h"
 #include "tree-ssa-live.h"
+#include "diagnostic-core.h"
 
 /* The file implements the tail recursion elimination.  It is also used to
analyze the tail calls in general, passing the results to the rtl level
@@ -402,6 +403,20 @@ propagate_through_phis (tree var, edge e)
   return var;
 }
 
+/* Report an error for failing to tail convert must call CALL
+   with error message ERR.  */
+
+static void
+maybe_error_musttail (gcall *call, const char *err)
+{
+  if (gimple_call_must_tail_p (call))
+{
+  error_at (call->location, "cannot tail-call: %s", err);
+  gimple_call_set_must_tail (call, false); /* Avoid another error.  */
+  gimple_call_set_tail (call, false);
+}
+}
+
 /* Argument for compute_live_vars/live_vars_at_stmt and what compute_live_vars
returns.  Computed lazily, but just once for the function.  */
 static live_vars_map *live_vars;
@@ -489,8 +504,14 @@ find_tail_calls (basic_block bb, struct tailcall **ret, 
bool only_musttail)
   if (ass_var
   && !is_gimple_reg (ass_var)
   && !auto_var_in_fn_p (ass_var, cfun->decl))
-return;
+{
+  maybe_error_musttail (call, "complex return value");
+  return;
+}
 
+  /* This doesn't really work because the BB would have multiple succ edges
+ and is rejected above. Instead just gives a "other reasons" error
+ at expand time.  */
   /* If the call might throw an exception that wouldn't propagate out of
  cfun, we can't transform to a tail or sibling call (82081).  */
   if (stmt_could_throw_p (cfun, stmt)
@@ -524,7 +545,10 @@ find_tail_calls (basic_block bb, struct tailcall **ret, 
bool only_musttail)
   if (result_decl
   && may_be_aliased (result_decl)
   && ref_maybe_used_by_stmt_p (call, result_decl, false))
-return;
+{
+  maybe_error_musttail (call, "tail call must be same type");
+  return;
+}
 
   /* We found the call, check whether it is suitable.  */
   tail_recursion = false;
@@ -605,6 +629,7 @@ find_tail_calls (basic_block bb, struct tailcall **ret, 
bool only_musttail)
{
  if (local_live_vars)
BITMAP_FREE (local_live_vars);
+ maybe_error_musttail (call, "call invocation refers to locals");
  return;
}
  else
@@ -613,6 +638,7 @@ find_tail_calls (basic_block bb, struct tailcall **ret, 
bool only_musttail)
  if (bitmap_bit_p (local_live_vars, *v))
{
  BITMAP_FREE (local_live_vars);
+ maybe_error_musttail (call, "call invocation refers to 
locals");
  return;
}
}
@@ -631,6 +657,8 @@ find_tail_calls (basic_block bb, struct tailcall **ret, 
bool only_musttail)
   auto_bitmap to_move_defs;
   auto_vec to_move_stmts;
 
+  /* Does not give musttail errors because the frontend won't generate
+ such musttails. */
   abb = bb;
   agsi = gsi;
   while (1)
@@ -716,7 +744,10 @@ find_tail_calls (basic_block bb, struct tailcall **ret, 
bool only_musttail)
   if (ret_var
   && (ret_var != ass_var
  && !(is_empty_type (TREE_TYPE (ret_var)) && !ass_var)))
-return;
+{
+  maybe_error_musttail (call, "call must be the same type");
+  return;
+}
 
   /* If this is not a tail recursive call, we cannot handle addends or
  multiplicands.  */
-- 
2.44.0



[PATCH v6 8/8] Add documentation for musttail attribute

2024-05-21 Thread Andi Kleen
gcc/ChangeLog:

* doc/extend.texi: Document [[musttail]]
---
 gcc/doc/extend.texi | 23 +--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index e290265d68d3..f3df7688edf1 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -9839,7 +9839,7 @@ same manner as the @code{deprecated} attribute.
 @section Statement Attributes
 @cindex Statement Attributes
 
-GCC allows attributes to be set on null statements.  @xref{Attribute Syntax},
+GCC allows attributes to be set on statements.  @xref{Attribute Syntax},
 for details of the exact syntax for using attributes.  Other attributes are
 available for functions (@pxref{Function Attributes}), variables
 (@pxref{Variable Attributes}), labels (@pxref{Label Attributes}), enumerators
@@ -9896,6 +9896,23 @@ foo (int x, int y)
 @code{y} is not actually incremented and the compiler can but does not
 have to optimize it to just @code{return 42 + 42;}.
 
+@cindex @code{musttail} statement attribute
+@item musttail
+
+The @code{gnu::musttail} or @code{clang::musttail} attribute
+can be applied to a @code{return} statement with a return-value expression
+that is a function call.  It asserts that the call must be a tail call that
+does not allocate extra stack space.
+
+@smallexample
+[[gnu::musttail]] return foo();
+@end smallexample
+
+If the compiler cannot generate a tail call it generates
+an error. On some targets they may not be supported.
+Tail calls cannot reference locals in memory, which may affect
+builds without optimization when passing structures that
+would fit into registers.
 @end table
 
 @node Attribute Syntax
@@ -10019,7 +10036,9 @@ the constant expression, if present.
 
 @subsubheading Statement Attributes
 In GNU C, an attribute specifier list may appear as part of a null
-statement.  The attribute goes before the semicolon.
+statement. The attribute goes before the semicolon.
+Some attributes in new style syntax are also supported
+on non-null statements.
 
 @subsubheading Type Attributes
 
-- 
2.44.0



[PATCH v6 6/8] Enable musttail tail conversion even when not optimizing

2024-05-21 Thread Andi Kleen
Enable the tailcall optimization for non optimizing builds,
but in this case only checks calls that have the musttail attribute set.
This makes musttail work without optimization.

This is done with a new late musttail pass that is only active when
not optimizing. The pass must be after ehcleanup.

gcc/ChangeLog:

* passes.def (pass_musttail): Add.
* tree-pass.h (make_pass_musttail): Add.
* tree-tailcall.cc (find_tail_calls): Handle only_musttail
  argument.
(tree_optimize_tail_calls_1): Pass on only_musttail.
(execute_tail_calls): Pass only_musttail as false.
(class pass_musttail): Add.
(make_pass_musttail): Add.
---
 gcc/passes.def   |  1 +
 gcc/tree-pass.h  |  1 +
 gcc/tree-tailcall.cc | 64 +---
 3 files changed, 57 insertions(+), 9 deletions(-)

diff --git a/gcc/passes.def b/gcc/passes.def
index 1cbbd4130970..3e83cc327fd2 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -443,6 +443,7 @@ along with GCC; see the file COPYING3.  If not see
   NEXT_PASS (pass_tsan_O0);
   NEXT_PASS (pass_sanopt);
   NEXT_PASS (pass_cleanup_eh);
+  NEXT_PASS (pass_musttail);
   NEXT_PASS (pass_lower_resx);
   NEXT_PASS (pass_nrv);
   NEXT_PASS (pass_gimple_isel);
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index 29267589eeb3..0668cea0a48e 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -368,6 +368,7 @@ extern gimple_opt_pass *make_pass_sra (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_sra_early (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_tail_recursion (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_tail_calls (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_musttail (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_fix_loops (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_tree_loop (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_tree_no_loop (gcc::context *ctxt);
diff --git a/gcc/tree-tailcall.cc b/gcc/tree-tailcall.cc
index e9f7f8a12b3a..094856de22ef 100644
--- a/gcc/tree-tailcall.cc
+++ b/gcc/tree-tailcall.cc
@@ -408,10 +408,10 @@ static live_vars_map *live_vars;
 static vec live_vars_vec;
 
 /* Finds tailcalls falling into basic block BB. The list of found tailcalls is
-   added to the start of RET.  */
+   added to the start of RET. When ONLY_MUSTTAIL is set only handle musttail.  
*/
 
 static void
-find_tail_calls (basic_block bb, struct tailcall **ret)
+find_tail_calls (basic_block bb, struct tailcall **ret, bool only_musttail)
 {
   tree ass_var = NULL_TREE, ret_var, func, param;
   gimple *stmt;
@@ -445,6 +445,9 @@ find_tail_calls (basic_block bb, struct tailcall **ret)
   if (is_gimple_call (stmt))
{
  call = as_a  (stmt);
+ /* Handle only musttail calls when not optimizing.  */
+ if (only_musttail && !gimple_call_must_tail_p (call))
+   return;
  ass_var = gimple_call_lhs (call);
  break;
}
@@ -467,7 +470,7 @@ find_tail_calls (basic_block bb, struct tailcall **ret)
   edge_iterator ei;
   /* Recurse to the predecessors.  */
   FOR_EACH_EDGE (e, ei, bb->preds)
-   find_tail_calls (e->src, ret);
+   find_tail_calls (e->src, ret, only_musttail);
 
   return;
 }
@@ -528,7 +531,8 @@ find_tail_calls (basic_block bb, struct tailcall **ret)
   func = gimple_call_fndecl (call);
   if (func
   && !fndecl_built_in_p (func)
-  && recursive_call_p (current_function_decl, func))
+  && recursive_call_p (current_function_decl, func)
+  && !only_musttail)
 {
   tree arg;
 
@@ -1094,10 +1098,11 @@ create_tailcall_accumulator (const char *label, 
basic_block bb, tree init)
 }
 
 /* Optimizes tail calls in the function, turning the tail recursion
-   into iteration.  */
+   into iteration. When ONLY_MUSTCALL is true only optimize mustcall
+   marked calls.  */
 
 static unsigned int
-tree_optimize_tail_calls_1 (bool opt_tailcalls)
+tree_optimize_tail_calls_1 (bool opt_tailcalls, bool only_mustcall)
 {
   edge e;
   bool phis_constructed = false;
@@ -1117,7 +1122,7 @@ tree_optimize_tail_calls_1 (bool opt_tailcalls)
   /* Only traverse the normal exits, i.e. those that end with return
 statement.  */
   if (safe_is_a  (*gsi_last_bb (e->src)))
-   find_tail_calls (e->src, &tailcalls);
+   find_tail_calls (e->src, &tailcalls, only_mustcall);
 }
 
   if (live_vars)
@@ -1228,7 +1233,7 @@ gate_tail_calls (void)
 static unsigned int
 execute_tail_calls (void)
 {
-  return tree_optimize_tail_calls_1 (true);
+  return tree_optimize_tail_calls_1 (true, false);
 }
 
 namespace {
@@ -1261,7 +1266,7 @@ public:
   bool gate (function *) final override { return gate_tail_calls (); }
   unsigned int execute (function *) final override
 {
-  return tree_optimize_tail_calls_1 (false);
+  return tree_optimize_tail_calls_1 (false, false);
 }
 
 }; // class pass_tail_recursion
@@ -1312,3 +1317,44 @@ make_p

Re: [PATCH] RISC-V: Enable vectorization for vect-early-break_124-pr114403.c

2024-05-21 Thread Robin Dapp
The patch is OK from the riscv side.  generic-ooo includes fast unaligned
access.

Regards
 Robin


Re: [PATCH v3] aarch64: Fix normal returns inside functions which use eh_returns [PR114843]

2024-05-21 Thread Richard Sandiford
Wilco Dijkstra  writes:
> Hi Andrew,
>
> A few comments on the implementation, I think it can be simplified a lot:

FWIW, I agree with Wilco's comments, except:

>> +++ b/gcc/config/aarch64/aarch64.h
>> @@ -700,8 +700,9 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = 
>> AARCH64_FL_SM_OFF;
>> #define DWARF2_UNWIND_INFO 1
>>  
>>  /* Use R0 through R3 to pass exception handling information.  */
>> +#define EH_RETURN_DATA_REGISTERS_N 4
>>  #define EH_RETURN_DATA_REGNO(N) \
>> -  ((N) < 4 ? ((unsigned int) R0_REGNUM + (N)) : INVALID_REGNUM)
>> +  ((N) < EH_RETURN_DATA_REGISTERS_N ? ((unsigned int) R0_REGNUM + (N)) : 
>> INVALID_REGNUM)
>  
> It would be useful to add a macro IS_EH_RETURN_REGNUM(regnum) that just checks
> the range R0_REGNUM to R0_REGNUM + EH_RETURN_DATA_REGISTERS_N.

I've just pushed a patch that adds a global eh_return_data_regs set,
so I think we can test that instead.

>> @@ -929,6 +928,7 @@ struct GTY (()) aarch64_frame
>>  outgoing arguments) of each register save slot, or -2 if no save is
>>  needed.  */
>>   poly_int64 reg_offset[LAST_SAVED_REGNUM + 1];
>> +  bool eh_return_allocated[EH_RETURN_DATA_REGISTERS_N];
>
> This doesn't make much sense - besides X0-X3, we also need X5 and X6 for 
> eh_return.
> If these or any of the other temporaries used by epilog are callee-saved 
> somehow,
> things are going horribly wrong already... So what do we gain by doing this?
>
>
>> +++ b/gcc/config/aarch64/aarch64.cc
>> @@ -7792,6 +7792,7 @@ aarch64_layout_frame (void)
>> 
>>  #define SLOT_NOT_REQUIRED (-2)
>>  #define SLOT_REQUIRED (-1)
>> +#define SLOT_EH_RETURN_REQUIRED (-3)
>  
> I don't see a need for this.
>
>
>> @@ -7949,6 +7950,18 @@ aarch64_layout_frame (void)
>> stopping it from being individually shrink-wrapped.  */
>>  allocate_gpr_slot (R30_REGNUM);
>>  
>> +  /* Allocate the eh_return first. */
>> +  if (crtl->calls_eh_return)
>> +for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
>> +  {
>> +int realregno = EH_RETURN_DATA_REGNO (regno);
>> +if (known_eq (frame.reg_offset[realregno], SLOT_EH_RETURN_REQUIRED))
>> +  {
>> +frame.eh_return_allocated[regno] = true;
>> +allocate_gpr_slot (realregno);
>> +  }
>> +  }
>
> This change is unnecessary if we just mark the slots with SLOT_REQUIRED.

Also, is it necessary to allocate EH data registers first?

>> @@ -8035,6 +8048,23 @@ aarch64_layout_frame (void)
>>   frame.wb_pop_candidate1 = frame.wb_push_candidate1;
>>   frame.wb_pop_candidate2 = frame.wb_push_candidate2;
>>  
>> +  /* EH data registers are not pop canidates. */
>> +  if (crtl->calls_eh_return)
>> +for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; 
>> regno++)> 
>> +  {
>> +if (frame.eh_return_allocated[regno]
>> +&& frame.wb_pop_candidate1 == EH_RETURN_DATA_REGNO (regno))
>> +{
>> +  frame.wb_pop_candidate1 = frame.wb_pop_candidate2;
>> +  frame.wb_pop_candidate2 = INVALID_REGNUM;
>> +}
>> +if (frame.eh_return_allocated[regno]
>> +&& frame.wb_pop_candidate2 == EH_RETURN_DATA_REGNO (regno))
>> +{
>> +  frame.wb_pop_candidate2 = INVALID_REGNUM;
>> +}
>> +  }
>
> This is unnecessary since we can just avoid making them push candidates
> if there is no frame chain, eg:
>
> if ((!crtl->calls_eh_return || frame.emit_frame_chain) && !push_regs.empty ()
>   && known_eq (frame.reg_offset[push_regs[0]], frame.bytes_below_hard_fp))

I agree we should do the check here (and similarly for the second register),
rather than fixing it up later.  But IMO we should test the register directly:

  if (!push_regs.empty ()
  && known_eq (frame.reg_offset[push_regs[0]], frame.bytes_below_hard_fp)
  && (!crtl->calls_eh_return
  || !TEST_HARD_REG_BIT (eh_return_data_regs, push_regs[0])))

In some ways it seems unfortunate that we're generating two different
copies of the epilogue in order to skip two LDPs that (with a bit of
work) could easily be done before entering a combined epilogue.
But we already have a branch on EH_RETURN_TAKEN_RTX as well,
so maybe this is the tipping point at which duplication is worthwhile.

Thanks,
Richard

> @@ -8681,6 +8712,20 @@ aarch64_restore_callee_saves (poly_int64 
> bytes_below_sp,
>if (frame.is_scs_enabled && regno == LR_REGNUM)
>   return true;
>  
> +  /* Skip the eh return data registers if we are
> +  returning normally rather than via eh_return. */
> +  if (!was_eh_return && crtl->calls_eh_return)
> + {
> +   for (unsigned ehregno = 0;
> +EH_RETURN_DATA_REGNO (ehregno) != INVALID_REGNUM;
> +ehregno++)
> + {
> +   if (EH_RETURN_DATA_REGNO (ehregno) == regno
> +   && frame.eh_return_allocated[ehregno])
> + return true;
> + }
> + }
> +
>
> So this could be something like:
>
>   if (!was_eh_return && crtl->calls_eh_return && IS_EH_RETURN_REGNUM 
> (regno))
>   

Re: [Patch, aarch64, middle-end] Move pair_fusion pass from aarch64 to middle-end

2024-05-21 Thread Ajit Agarwal
Hello Alex:

On 21/05/24 6:02 pm, Alex Coplan wrote:
> On 21/05/2024 16:02, Ajit Agarwal wrote:
>> Hello Alex:
>>
>> On 21/05/24 1:16 am, Alex Coplan wrote:
>>> On 20/05/2024 18:44, Alex Coplan wrote:
 Hi Ajit,

 On 20/05/2024 21:50, Ajit Agarwal wrote:
> Hello Alex/Richard:
>
> Move pair fusion pass from aarch64-ldp-fusion.cc to middle-end
> to support multiple targets.
>
> Common infrastructure of load store pair fusion is divided into
> target independent and target dependent code.
>
> Target independent code is structured in the following files.
> gcc/pair-fusion.h
> gcc/pair-fusion.cc
>
> Target independent code is the Generic code with pure virtual
> function to interface betwwen target independent and dependent
> code.
>
> Bootstrapped and regtested on aarch64-linux-gnu.
>
> Thanks & Regards
> Ajit
>
> aarch64, middle-end: Move pair_fusion pass from aarch64 to middle-end
>
> Move pair fusion pass from aarch64-ldp-fusion.cc to middle-end
> to support multiple targets.
>
> Common infrastructure of load store pair fusion is divided into
> target independent and target dependent code.
>
> Target independent code is structured in the following files.
> gcc/pair-fusion.h
> gcc/pair-fusion.cc
>
> Target independent code is the Generic code with pure virtual
> function to interface betwwen target independent and dependent
> code.
>
> 2024-05-20  Ajit Kumar Agarwal  
>
> gcc/ChangeLog:
>
>   * pair-fusion.h: Generic header code for load store fusion

 Insert "pair" before fusion?
>>
>> Addressed in v1 of the patch.

>   that can be shared across different architectures.
>   * pair-fusion.cc: Generic source code implementation for
>   load store fusion that can be shared across different architectures.

 Likewise.
>> Addressed in v1 of the patch.

>   * Makefile.in: Add new executable pair-fusion.o

 It's not an executable but an object file.

>   * config/aarch64/aarch64-ldp-fusion.cc: Target specific
>   code for load store fusion of aarch64.

 I guess this should say something like: "Delete generic code and move it
 to pair-fusion.cc in the middle-end."

 I've left some comments below on the header file.  The rest of the patch
 looks pretty good to me.  I tried diffing the original contents of
 aarch64-ldp-fusion.cc with pair-fusion.cc, and that looks as expected.

>>>
>>> 
>>>
> diff --git a/gcc/pair-fusion.h b/gcc/pair-fusion.h
> new file mode 100644
> index 000..00f6d3e149a
> --- /dev/null
> +++ b/gcc/pair-fusion.h
> @@ -0,0 +1,340 @@
> +// Pair Mem fusion generic header file.
> +// Copyright (C) 2024 Free Software Foundation, Inc.
> +//
> +// This file is part of GCC.
> +//
> +// GCC is free software; you can redistribute it and/or modify it
> +// under the terms of the GNU General Public License as published by
> +// the Free Software Foundation; either version 3, or (at your option)
> +// any later version.
> +//
> +// GCC is distributed in the hope that it will be useful, but
> +// WITHOUT ANY WARRANTY; without even the implied warranty of
> +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +// General Public License for more details.
> +//
> +// You should have received a copy of the GNU General Public License
> +// along with GCC; see the file COPYING3.  If not see
> +// .
> +
> +#define INCLUDE_ALGORITHM
> +#define INCLUDE_FUNCTIONAL
> +#define INCLUDE_LIST
> +#define INCLUDE_TYPE_TRAITS
> +#include "config.h"
> +#include "system.h"
> +#include "coretypes.h"
> +#include "backend.h"
> +#include "rtl.h"
> +#include "df.h"
> +#include "rtl-iter.h"
> +#include "rtl-ssa.h"

 I'm not sure how desirable this is, but you might be able to
 forward-declare RTL-SSA types like this:

 class def_info;
 class insn_info;
 class insn_range_info;

 thus removing the need to include the header here, since the interface
 only refers to these types by pointer or reference.

 Richard: please say if you'd prefer keeping the include.

>>
>> Doing forward declaration gives ambigous errors with conflicting
>> insn_info with rtl_ssa::insn_info and templated initialization
>> errors. Also with overloaded operator with insn_info is not 
>> defined with forward declaration.
> 
> So I tried this locally and it seems to work if you wrap the
> forward-decls in:
> 
> namespace rtl_ssa {
>   [...]
> };
> 
> and indeed you'd need to move the definition of base_cand::viable () to
> pair-fusion.cc so that we don't dereference those pointers in the
> header (would be good to mark it inline if you do that).
> 
> 

Re: [Patch, aarch64, middle-end] Move pair_fusion pass from aarch64 to middle-end

2024-05-21 Thread Ajit Agarwal
Hello Alex:

On 21/05/24 6:50 pm, Alex Coplan wrote:
> On 20/05/2024 21:50, Ajit Agarwal wrote:
>> Hello Alex/Richard:
>>
>> Move pair fusion pass from aarch64-ldp-fusion.cc to middle-end
>> to support multiple targets.
>>
>> Common infrastructure of load store pair fusion is divided into
>> target independent and target dependent code.
>>
>> Target independent code is structured in the following files.
>> gcc/pair-fusion.h
>> gcc/pair-fusion.cc
>>
>> Target independent code is the Generic code with pure virtual
>> function to interface betwwen target independent and dependent
>> code.
>>
>> Bootstrapped and regtested on aarch64-linux-gnu.
>>
>> Thanks & Regards
>> Ajit
>>
>> aarch64, middle-end: Move pair_fusion pass from aarch64 to middle-end
>>
>> Move pair fusion pass from aarch64-ldp-fusion.cc to middle-end
>> to support multiple targets.
>>
>> Common infrastructure of load store pair fusion is divided into
>> target independent and target dependent code.
>>
>> Target independent code is structured in the following files.
>> gcc/pair-fusion.h
>> gcc/pair-fusion.cc
>>
>> Target independent code is the Generic code with pure virtual
>> function to interface betwwen target independent and dependent
>> code.
>>
>> 2024-05-20  Ajit Kumar Agarwal  
>>
>> gcc/ChangeLog:
>>
>>  * pair-fusion.h: Generic header code for load store fusion
>>  that can be shared across different architectures.
>>  * pair-fusion.cc: Generic source code implementation for
>>  load store fusion that can be shared across different architectures.
>>  * Makefile.in: Add new executable pair-fusion.o
>>  * config/aarch64/aarch64-ldp-fusion.cc: Target specific
>>  code for load store fusion of aarch64.
> 
> Apologies for missing this in the last review but you'll also need to
> update gcc/config/aarch64/t-aarch64 to add a dependency on pair-fusion.h
> for aarch64-ldp-fusion.o.
> 

Addrssd in v2 of the patch.
> Thanks,
> Alex

Thanks & Regards
Ajit
> 
>> ---
>>  gcc/Makefile.in  |1 +
>>  gcc/config/aarch64/aarch64-ldp-fusion.cc | 3303 +-
>>  gcc/pair-fusion.cc   | 2852 +++
>>  gcc/pair-fusion.h|  340 +++
>>  4 files changed, 3268 insertions(+), 3228 deletions(-)
>>  create mode 100644 gcc/pair-fusion.cc
>>  create mode 100644 gcc/pair-fusion.h
> 


Re: [RFC][PATCH] PR tree-optimization/109071 - -Warray-bounds false positive warnings due to code duplication from jump threading

2024-05-21 Thread Qing Zhao
Thanks for the comments and suggestions.

> On May 15, 2024, at 10:00, David Malcolm  wrote:
> 
> On Tue, 2024-05-14 at 15:08 +0200, Richard Biener wrote:
>> On Mon, 13 May 2024, Qing Zhao wrote:
>> 
>>> -Warray-bounds is an important option to enable linux kernal to
>>> keep
>>> the array out-of-bound errors out of the source tree.
>>> 
>>> However, due to the false positive warnings reported in PR109071
>>> (-Warray-bounds false positive warnings due to code duplication
>>> from
>>> jump threading), -Warray-bounds=1 cannot be added on by default.
>>> 
>>> Although it's impossible to elinimate all the false positive
>>> warnings
>>> from -Warray-bounds=1 (See PR104355 Misleading -Warray-bounds
>>> documentation says "always out of bounds"), we should minimize the
>>> false positive warnings in -Warray-bounds=1.
>>> 
>>> The root reason for the false positive warnings reported in
>>> PR109071 is:
>>> 
>>> When the thread jump optimization tries to reduce the # of branches
>>> inside the routine, sometimes it needs to duplicate the code and
>>> split into two conditional pathes. for example:
>>> 
>>> The original code:
>>> 
>>> void sparx5_set (int * ptr, struct nums * sg, int index)
>>> {
>>>   if (index >= 4)
>>> warn ();
>>>   *ptr = 0;
>>>   *val = sg->vals[index];
>>>   if (index >= 4)
>>> warn ();
>>>   *ptr = *val;
>>> 
>>>   return;
>>> }
>>> 
>>> With the thread jump, the above becomes:
>>> 
>>> void sparx5_set (int * ptr, struct nums * sg, int index)
>>> {
>>>   if (index >= 4)
>>> {
>>>   warn ();
>>>   *ptr = 0; // Code duplications since "warn" does
>>> return;
>>>   *val = sg->vals[index];   // same this line.
>>> // In this path, since it's under
>>> the condition
>>> // "index >= 4", the compiler knows
>>> the value
>>> // of "index" is larger then 4,
>>> therefore the
>>> // out-of-bound warning.
>>>   warn ();
>>> }
>>>   else
>>> {
>>>   *ptr = 0;
>>>   *val = sg->vals[index];
>>> }
>>>   *ptr = *val;
>>>   return;
>>> }
>>> 
>>> We can see, after the thread jump optimization, the # of branches
>>> inside
>>> the routine "sparx5_set" is reduced from 2 to 1, however,  due to
>>> the
>>> code duplication (which is needed for the correctness of the code),
>>> we
>>> got a false positive out-of-bound warning.
>>> 
>>> In order to eliminate such false positive out-of-bound warning,
>>> 
>>> A. Add one more flag for GIMPLE: is_splitted.
>>> B. During the thread jump optimization, when the basic blocks are
>>>duplicated, mark all the STMTs inside the original and
>>> duplicated
>>>basic blocks as "is_splitted";
>>> C. Inside the array bound checker, add the following new heuristic:
>>> 
>>> If
>>>1. the stmt is duplicated and splitted into two conditional
>>> paths;
>>> +  2. the warning level < 2;
>>> +  3. the current block is not dominating the exit block
>>> Then not report the warning.
>>> 
>>> The false positive warnings are moved from -Warray-bounds=1 to
>>>  -Warray-bounds=2 now.
>>> 
>>> Bootstrapped and regression tested on both x86 and aarch64.
>>> adjusted
>>>  -Warray-bounds-61.c due to the false positive warnings.
>>> 
>>> Let me know if you have any comments and suggestions.
>> 
>> At the last Cauldron I talked with David Malcolm about these kind of
>> issues and thought of instead of suppressing diagnostics to record
>> how a block was duplicated.  For jump threading my idea was to record
>> the condition that was proved true when entering the path and do this
>> by recording the corresponding locations

Is only recording the location for the TRUE path  enough?
We might need to record the corresponding locations for both TRUE and FALSE 
paths since the VRP might be more accurate on both paths. 
Is only recording the location is enough? 
Do we need to record the pointer to the original condition stmt?


>> so that in the end we can
>> use the diagnostic-path infrastructure to say
>> 
>> warning: array index always above array bounds
>> events 1:
>> 
>>> 3 |  if (index >= 4)
>>  |
>> (1) when index >= 4
>> 
>> it would be possible to record the info as part of the ad-hoc
>> location data on each duplicated stmt or, possibly simpler,
>> as part of a debug stmt of new kind.
>> 
>> I'm not sure pruning the warnings is a good thing to do.  One
>> would argue we should instead isolate such path as unreachable
>> since it invokes undefined behavior.  In particular your
>> example is clearly a bug and should be diagnosed.
>> 
>> Note very similar issues happen when unrolling a loop.
>> 
>> Note all late diagnostics are prone to these kind of issues.
> 
> To recap our chat at Cauldron: any GCC diagnostic can potentially have
> a diagnostic_path associated with it (not just the analyzer).  The
> current mechanism is:
> (a) use a rich_location for the diagnostic, and 
> (b) create an

Re: [PATCH v3 2/2] RISC-V: avoid LUI based const mat in alloca epilogue expansion

2024-05-21 Thread Vineet Gupta
On 5/20/24 20:54, Jeff Law wrote:
> On 5/20/24 5:32 PM, Vineet Gupta wrote:
>> This is testsuite clean however there's a dwarf quirk which I want to
>> run by the experts. The test that was tripping CI has following
>> fragment:
>>
>>  Before patch|   After Patch
>> --
>> li   t0,-4096|  addi sp,s0,-2048
>> addi t0,t0,560   |  .cfi_def_cfa 2, 2048  <- #1
>> add  sp,s0,t0|  addi sp,sp,-1488
>> .cfi_def_cfa 2, 3536 |  .cfi_def_cfa_offset 3536  <- #2
>> addi sp,sp,1504  |  addi sp,sp,1504
>> .cfi_def_cfa_offset 2032 |  .cfi_def_cfa_offset 2032  <- #3
>>
>> The dwarf insn #1 and #3 seem ok, however #2 seems dubious to me.
> What about it seems dubious?  

My discomfort at claiming I understand dwarf, despite debugging/fixing
the ARC Linux port's in kernel dwarf unwinder :-)

> We need a CFA adjustment on each insn that 
> modifies the stack pointer so that we can unwind at any arbitrary point.

Of course.

> The first adjustment says the prior frame is at sp + 2048.  Then it's at 
> sp + 3536.  Then after the final insn the prior frame is at sp+2032.

Yeah I got confused with second one since once it gets anchored to SP
from S0, but you are right it is farther from base CFA now.

-Vineet


Re: [committed] PATCH for Re: Stepping down as maintainer for ARC and Epiphany

2024-05-21 Thread Jeff Law




On 5/21/24 8:02 AM, Paul Koning wrote:




On May 21, 2024, at 9:57 AM, Jeff Law  wrote:



On 5/21/24 12:05 AM, Richard Biener via Gcc wrote:

On Mon, May 20, 2024 at 4:45 PM Gerald Pfeifer  wrote:


On Wed, 5 Jul 2023, Joern Rennecke wrote:

I haven't worked with these targets in years and can't really do
sensible maintenance or reviews of patches for them. I am currently
working on optimizations for other ports like RISC-V.


I noticed MAINTAINERS was not updated, so pushed the patch below.

That leaves the epiphany port unmaintained.  Should we automatically add such
ports to the list of obsoleted ports?

Given that epiphany has randomly failed tests for the last 3+ years due to bugs 
in its patterns, yes, it really needs to be deprecated.

I tried to fix the worst of the offenders in epiphany.md a few years back and 
gave up.  Essentially seemingly innocent changes in the RTL will cause reload 
to occasionally not see a path to get constraints satisfied.  So a test which 
passes today, will flip to failing tomorrow while some other test of tests will 
go the other way.


Does LRA make that issue go away, or does it not help?
LRA didn't trivially work on epiphany.  I didn't care enough about the 
port to try and make it LRA compatible.


jeff



Re: [PATCH v5 1/5] Improve must tail in RTL backend

2024-05-21 Thread Richard Biener
On Tue, May 21, 2024 at 3:35 PM Andi Kleen  wrote:
>
> > I can't see how this triggers on the IL above, the loop should have
> > ignored both the return and the clobber and when recursing to
> > the predecessor stop before the above check when runnig into the
> > call?
>
> Yes, I tracked that down later. The problem was that there
> were multiple successors to the BB due to exception handling,
> which makes the find_tail_calls walker give up.
>
> Putting the new pass after ehcleanup fixed that, but there
> are still cases when ehcleanup cannot get rid of them and
> then it gives up. musttail checking at expand time still
> works, but can only give a vague error message.
>
> >
> > > In a optimized build this passes, but with -O0 it always fails
> > > when the pass is placed before pass_optimizations_g. I assume
> > > it's some problem with mem ssa form.
> > >
> > > Any ideas how to fix that? Otherwise I can restrict musttail to non
> > > structs.
> >
> > I wonder how this works when optimizing?
>
> It just doesn't. You need optimization to do tail calls with
> structs. The only alternative would be to detect the situation
> and pull in some extra passes.
>
> Also even with optimization it only works for structs that
> fit into registers. This could be maybe fixed, but is out of scope
> for this patch kit.

I see.  I do wonder how we should deal with the inherent
dependence on optimization for [[musttail]] to work then?  "Solve"
the problem with good documentation?  Offer a -fignore-musttail
option to allow a -O0 build to at least succeed?  But then [[musttail]]
would rather be [[shouldtail]] and can no longer be for correctness?

How does clang solve this?

Richard.

>
> -Andi


Re: [committed] PATCH for Re: Stepping down as maintainer for ARC and Epiphany

2024-05-21 Thread Richard Biener
On Tue, May 21, 2024 at 6:21 PM Jeff Law  wrote:
>
>
>
> On 5/21/24 8:02 AM, Paul Koning wrote:
> >
> >
> >> On May 21, 2024, at 9:57 AM, Jeff Law  wrote:
> >>
> >>
> >>
> >> On 5/21/24 12:05 AM, Richard Biener via Gcc wrote:
> >>> On Mon, May 20, 2024 at 4:45 PM Gerald Pfeifer  wrote:
> 
>  On Wed, 5 Jul 2023, Joern Rennecke wrote:
> > I haven't worked with these targets in years and can't really do
> > sensible maintenance or reviews of patches for them. I am currently
> > working on optimizations for other ports like RISC-V.
> 
>  I noticed MAINTAINERS was not updated, so pushed the patch below.
> >>> That leaves the epiphany port unmaintained.  Should we automatically add 
> >>> such
> >>> ports to the list of obsoleted ports?
> >> Given that epiphany has randomly failed tests for the last 3+ years due to 
> >> bugs in its patterns, yes, it really needs to be deprecated.
> >>
> >> I tried to fix the worst of the offenders in epiphany.md a few years back 
> >> and gave up.  Essentially seemingly innocent changes in the RTL will cause 
> >> reload to occasionally not see a path to get constraints satisfied.  So a 
> >> test which passes today, will flip to failing tomorrow while some other 
> >> test of tests will go the other way.
> >
> > Does LRA make that issue go away, or does it not help?
> LRA didn't trivially work on epiphany.  I didn't care enough about the
> port to try and make it LRA compatible.

In that case LRA will make the issue go away (the port, that is ...).

Richard.

>
> jeff
>


Re: [Patch, aarch64, middle-end] v2: Move pair_fusion pass from aarch64 to middle-end

2024-05-21 Thread Alex Coplan
Hi Ajit,

I've left some more comments below.  It's getting there now, thanks for
your patience.

On 21/05/2024 20:32, Ajit Agarwal wrote:
> Hello Alex/Richard:
> 
> All comments are addressed.
> 
> Move pair fusion pass from aarch64-ldp-fusion.cc to middle-end
> to support multiple targets.
> 
> Common infrastructure of load store pair fusion is divided into
> target independent and target dependent code.
> 
> Target independent code is structured in the following files.
> gcc/pair-fusion.h
> gcc/pair-fusion.cc
> 
> Target independent code is the Generic code with pure virtual
> function to interface betwwen target independent and dependent
> code.
> 
> Bootstrapped and regtested on aarch64-linux-gnu.
> 
> Thabks & Regards
> Ajit
> 
> 
> aarch64, middle-end: Move pair_fusion pass from aarch64 to middle-end
> 
> Move pair fusion pass from aarch64-ldp-fusion.cc to middle-end
> to support multiple targets.
> 
> Common infrastructure of load store pair fusion is divided into
> target independent and target dependent code.
> 
> Target independent code is structured in the following files.
> gcc/pair-fusion.h
> gcc/pair-fusion.cc
> 
> Target independent code is the Generic code with pure virtual
> function to interface betwwen target independent and dependent
> code.
> 
> 2024-05-21  Ajit Kumar Agarwal  
> 
> gcc/ChangeLog:
> 
>   * pair-fusion.h: Generic header code for load store pair fusion
>   that can be shared across different architectures.
>   * pair-fusion.cc: Generic source code implementation for
>   load store pair fusion that can be shared across different 
> architectures.
>   * Makefile.in: Add new object file pair-fusion.o.
>   * config/aarch64/aarch64-ldp-fusion.cc: Delete generic code and move it
>   to pair-fusion.cc in the middle-end.
>   * config/aarch64/t-aarch64: Add header file dependency pair-fusion.h.

insert "on" after dependency.

> ---
>  gcc/Makefile.in  |1 +
>  gcc/config/aarch64/aarch64-ldp-fusion.cc | 3282 +-
>  gcc/config/aarch64/t-aarch64 |2 +-
>  gcc/pair-fusion.cc   | 3013 
>  gcc/pair-fusion.h|  189 ++
>  5 files changed, 3280 insertions(+), 3207 deletions(-)
>  create mode 100644 gcc/pair-fusion.cc
>  create mode 100644 gcc/pair-fusion.h
> 
> diff --git a/gcc/Makefile.in b/gcc/Makefile.in
> index a7f15694c34..643342f623d 100644
> --- a/gcc/Makefile.in
> +++ b/gcc/Makefile.in
> @@ -1563,6 +1563,7 @@ OBJS = \
>   ipa-strub.o \
>   ipa.o \
>   ira.o \
> + pair-fusion.o \
>   ira-build.o \
>   ira-costs.o \
>   ira-conflicts.o \
> diff --git a/gcc/config/aarch64/aarch64-ldp-fusion.cc 
> b/gcc/config/aarch64/aarch64-ldp-fusion.cc
> index 085366cdf68..612f62060bc 100644
> --- a/gcc/config/aarch64/aarch64-ldp-fusion.cc
> +++ b/gcc/config/aarch64/aarch64-ldp-fusion.cc
> @@ -40,262 +40,13 @@
>  
>  using namespace rtl_ssa;

I think we should drop this, since the public interface and remaining
backend code in this file is independent of RTL-SSA.  I think you should
also drop the inlcude of "rtl-ssa.h" from this file.   These two
changes will force you to get the header file (pair-fusion.h) right.

With these changes we can also significantly thin out the include list
in this file.  The current set of includes is:

#define INCLUDE_ALGORITHM
#define INCLUDE_FUNCTIONAL
#define INCLUDE_LIST
#define INCLUDE_TYPE_TRAITS
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "backend.h"
#include "rtl.h"
#include "df.h"
#include "rtl-iter.h"
#include "rtl-ssa.h"
#include "cfgcleanup.h"
#include "tree-pass.h"
#include "ordered-hash-map.h"
#include "tree-dfa.h"
#include "fold-const.h"
#include "tree-hash-traits.h"
#include "print-tree.h"
#include "insn-attr.h"

I think instead the following should be enough for this file:

#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "backend.h"
#include "rtl.h"
#include "memmodel.h"
#include "emit-rtl.h"
#include "tm_p.h"
#include "rtl-iter.h"
#include "tree-pass.h"
#include "insn-attr.h"
#include "pair-fusion.h"

>  
> +#include "pair-fusion.h"
> +
>  static constexpr HOST_WIDE_INT LDP_IMM_BITS = 7;
>  static constexpr HOST_WIDE_INT LDP_IMM_SIGN_BIT = (1 << (LDP_IMM_BITS - 1));
>  static constexpr HOST_WIDE_INT LDP_MAX_IMM = LDP_IMM_SIGN_BIT - 1;
>  static constexpr HOST_WIDE_INT LDP_MIN_IMM = -LDP_MAX_IMM - 1;
>  

> diff --git a/gcc/config/aarch64/t-aarch64 b/gcc/config/aarch64/t-aarch64
> index 78713558e7d..bdada08be70 100644
> --- a/gcc/config/aarch64/t-aarch64
> +++ b/gcc/config/aarch64/t-aarch64
> @@ -203,7 +203,7 @@ aarch64-early-ra.o: 
> $(srcdir)/config/aarch64/aarch64-early-ra.cc \
>  aarch64-ldp-fusion.o: $(srcdir)/config/aarch64/aarch64-ldp-fusion.cc \
>  $(CONFIG_H) $(SYSTEM_H) $(CORETYPES_H) $(BACKEND_H) $(RTL_H) $(DF_H) \
>  $(RTL_SSA_H) cfgcleanup.h tree-pass.h ordered-hash-map.h tree-dfa.h \
> -fold-c

[PATCH] Fix mixed input kind permute optimization

2024-05-21 Thread Richard Biener
When change_vec_perm_layout runs into a permute combining two
nodes where one is invariant and one internal the partition of
one input can be -1 but the other might not be.  The following
supports this case by simply ignoring inputs with input partiton -1.

I'm not sure this is correct but it avoids ICEing when accessing
that partitions layout for gcc.target/i386/pr98928.c with the
change to avoid splitting store dataref groups during SLP discovery.

Bootstrap and regtest running on x86_64-unknown-linux-gnu (ontop of
the SLP series).  The change can't break anything that's already
broken but I'm not sure this does the right thing - the testcase
has an uniform constant.  I'll try to come up with a better runtime
testcase tomorrow.  Hints as to where to correctly fix such case
appreciated.

* tree-vect-slp.cc (change_vec_perm_layout): Ignore an
input partition of -1.
---
 gcc/tree-vect-slp.cc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 873748b0a72..f6ec1a81c96 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -4828,6 +4828,8 @@ change_vec_perm_layout (slp_tree node, lane_permutation_t 
&perm,
{
  slp_tree in_node = SLP_TREE_CHILDREN (node)[entry.first];
  unsigned int in_partition_i = m_vertices[in_node->vertex].partition;
+ if (in_partition_i == -1u)
+   continue;
  this_in_layout_i = m_partitions[in_partition_i].layout;
}
   if (this_in_layout_i > 0)
-- 
2.35.3


Re: [PATCH v1 2/2] RISC-V: Add test cases for __builtin_add_overflow branch form unsigned SAT_ADD

2024-05-21 Thread Jeff Law




On 5/21/24 4:53 AM, pan2...@intel.com wrote:

From: Pan Li 

After we support __builtin_add_overflow  branch form unsigned SAT_ADD
from the middle end.  Add more tests case to cover the functionarlities.

The below test suites are passed.
* The rv64gcv fully regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add test macro for
branch __builtin_add_overflow form.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-13.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-14.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-15.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-16.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-13.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-14.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-15.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-16.c: New test.
* gcc.target/riscv/sat_u_add-13.c: New test.
* gcc.target/riscv/sat_u_add-14.c: New test.
* gcc.target/riscv/sat_u_add-15.c: New test.
* gcc.target/riscv/sat_u_add-16.c: New test.
* gcc.target/riscv/sat_u_add-run-13.c: New test.
* gcc.target/riscv/sat_u_add-run-14.c: New test.
* gcc.target/riscv/sat_u_add-run-15.c: New test.
* gcc.target/riscv/sat_u_add-run-16.c: New test.

OK
jeff



Re: [PATCH v3 1/2] RISC-V: avoid LUI based const mat in prologue/epilogue expansion [PR/105733]

2024-05-21 Thread Jeff Law




On 5/20/24 5:32 PM, Vineet Gupta wrote:

Changes since v2:
   - Broke out the hunk corresponding to alloca in epilogue expansion in
 a seperate patch.
---

If the constant used for stack offset can be expressed as sum of two S12
values, the constant need not be materialized (in a reg) and instead the
two S12 bits can be added to instructions involved with frame pointer.
This avoids burning a register and more importantly can often get down
to be 2 insn vs. 3.

The prev patches to generally avoid LUI based const materialization didn't
fix this PR and need this directed fix in funcion prologue/epilogue
expansion.

This fix doesn't move the neddle for SPEC, at all, but it is still a
win considering gcc generates one insn fewer than llvm for the test ;-)

gcc-13.1 release   |  gcc 230823 |   |
   |g6619b3d4c15c|   This patch  |  clang/llvm
-
li  t0,-4096 | lit0,-4096  | addi  sp,sp,-2048 | addi 
sp,sp,-2048
addit0,t0,2016   | addi  t0,t0,2032| add   sp,sp,-16   | addi sp,sp,-32
li  a4,4096  | add   sp,sp,t0  | add   a5,sp,a0| add  a1,sp,16
add sp,sp,t0 | addi  a5,sp,-2032   | sbzero,0(a5)  | add  a0,a0,a1
li  a5,-4096 | add   a0,a5,a0  | addi  sp,sp,2032  | sb   zero,0(a0)
addia4,a4,-2032  | lit0, 4096  | addi  sp,sp,32| addi sp,sp,2032
add a4,a4,a5 | sbzero,2032(a0) | ret   | addi sp,sp,48
addia5,sp,16 | addi  t0,t0,-2032   |   | ret
add a5,a4,a5 | add   sp,sp,t0  |
add a0,a5,a0 | ret |
li  t0,4096  |
sd  a5,8(sp) |
sb  zero,2032(a0)|
addit0,t0,-2016  |
add sp,sp,t0 |
ret  |

gcc/ChangeLog:
PR target/105733
* config/riscv/riscv.h: New macros for with aligned offsets.
* config/riscv/riscv.cc (riscv_split_sum_of_two_s12): New
function to split a sum of two s12 values into constituents.
(riscv_expand_prologue): Handle offset being sum of two S12.
(riscv_expand_epilogue): Ditto.
* config/riscv/riscv-protos.h (riscv_split_sum_of_two_s12): New.

gcc/testsuite/ChangeLog:
* gcc.target/riscv/pr105733.c: New Test.
* gcc.target/riscv/rvv/autovec/vls/spill-1.c: Adjust to not
expect LUI 4096.
* gcc.target/riscv/rvv/autovec/vls/spill-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls/spill-3.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls/spill-4.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls/spill-5.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls/spill-6.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls/spill-7.c: Ditto.

OK
Jeff



[COMMITTED] RISC-V: avoid LUI based const mat in prologue/epilogue expansion [PR/105733]

2024-05-21 Thread Vineet Gupta
If the constant used for stack offset can be expressed as sum of two S12
values, the constant need not be materialized (in a reg) and instead the
two S12 bits can be added to instructions involved with frame pointer.
This avoids burning a register and more importantly can often get down
to be 2 insn vs. 3.

The prev patches to generally avoid LUI based const materialization didn't
fix this PR and need this directed fix in funcion prologue/epilogue
expansion.

This fix doesn't move the neddle for SPEC, at all, but it is still a
win considering gcc generates one insn fewer than llvm for the test ;-)

   gcc-13.1 release   |  gcc 230823 |   |
  |g6619b3d4c15c|   This patch  |  clang/llvm
-
li  t0,-4096 | lit0,-4096  | addi  sp,sp,-2048 | addi 
sp,sp,-2048
addit0,t0,2016   | addi  t0,t0,2032| add   sp,sp,-16   | addi sp,sp,-32
li  a4,4096  | add   sp,sp,t0  | add   a5,sp,a0| add  a1,sp,16
add sp,sp,t0 | addi  a5,sp,-2032   | sbzero,0(a5)  | add  a0,a0,a1
li  a5,-4096 | add   a0,a5,a0  | addi  sp,sp,2032  | sb   zero,0(a0)
addia4,a4,-2032  | lit0, 4096  | addi  sp,sp,32| addi sp,sp,2032
add a4,a4,a5 | sbzero,2032(a0) | ret   | addi sp,sp,48
addia5,sp,16 | addi  t0,t0,-2032   |   | ret
add a5,a4,a5 | add   sp,sp,t0  |
add a0,a5,a0 | ret |
li  t0,4096  |
sd  a5,8(sp) |
sb  zero,2032(a0)|
addit0,t0,-2016  |
add sp,sp,t0 |
ret  |

gcc/ChangeLog:
PR target/105733
* config/riscv/riscv.h: New macros for with aligned offsets.
* config/riscv/riscv.cc (riscv_split_sum_of_two_s12): New
function to split a sum of two s12 values into constituents.
(riscv_expand_prologue): Handle offset being sum of two S12.
(riscv_expand_epilogue): Ditto.
* config/riscv/riscv-protos.h (riscv_split_sum_of_two_s12): New.

gcc/testsuite/ChangeLog:
* gcc.target/riscv/pr105733.c: New Test.
* gcc.target/riscv/rvv/autovec/vls/spill-1.c: Adjust to not
expect LUI 4096.
* gcc.target/riscv/rvv/autovec/vls/spill-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls/spill-3.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls/spill-4.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls/spill-5.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls/spill-6.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls/spill-7.c: Ditto.

Tested-by: Edwin Lu  # pre-commit-CI #1568
Signed-off-by: Vineet Gupta 
---
 gcc/config/riscv/riscv-protos.h   |  2 +
 gcc/config/riscv/riscv.cc | 54 +--
 gcc/config/riscv/riscv.h  |  7 +++
 gcc/testsuite/gcc.target/riscv/pr105733.c | 15 ++
 .../riscv/rvv/autovec/vls/spill-1.c   |  4 +-
 .../riscv/rvv/autovec/vls/spill-2.c   |  4 +-
 .../riscv/rvv/autovec/vls/spill-3.c   |  4 +-
 .../riscv/rvv/autovec/vls/spill-4.c   |  4 +-
 .../riscv/rvv/autovec/vls/spill-5.c   |  4 +-
 .../riscv/rvv/autovec/vls/spill-6.c   |  4 +-
 .../riscv/rvv/autovec/vls/spill-7.c   |  4 +-
 11 files changed, 89 insertions(+), 17 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/pr105733.c

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index c64aae18deb9..0704968561bb 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -167,6 +167,8 @@ extern void riscv_subword_address (rtx, rtx *, rtx *, rtx 
*, rtx *);
 extern void riscv_lshift_subword (machine_mode, rtx, rtx, rtx *);
 extern enum memmodel riscv_union_memmodels (enum memmodel, enum memmodel);
 extern bool riscv_reg_frame_related (rtx);
+extern void riscv_split_sum_of_two_s12 (HOST_WIDE_INT, HOST_WIDE_INT *,
+   HOST_WIDE_INT *);
 
 /* Routines implemented in riscv-c.cc.  */
 void riscv_cpu_cpp_builtins (cpp_reader *);
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index d0c22058b8c3..2ecbcf1d0af8 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -4075,6 +4075,32 @@ riscv_split_doubleword_move (rtx dest, rtx src)
riscv_emit_move (riscv_subword (dest, true), riscv_subword (src, true));
  }
 }
+
+/* Constant VAL is known to be sum of two S12 constants.  Break it into
+   comprising BASE and OFF.
+   Numerically S12 is -2048 to 2047, however it uses the more conservative
+   range -2048 to 2032 as offsets pertain to stack related registers.  */
+
+void
+riscv_split_sum_of_two_s12 (HOST_WIDE_INT val, HOST_WIDE_INT *base,
+   HOST_WIDE_INT *off)
+{
+  if (SUM_OF_TWO_S12_N (val))
+{
+  *base = -2048;
+  *off = val - (-2048);
+}
+  else if (SUM_OF_TW

Re: [PATCH v3 2/2] RISC-V: avoid LUI based const mat in alloca epilogue expansion

2024-05-21 Thread Jeff Law




On 5/20/24 5:32 PM, Vineet Gupta wrote:

This is testsuite clean however there's a dwarf quirk which I want to
run by the experts. The test that was tripping CI has following
fragment:

Before patch|   After Patch
--
li  t0,-4096|  addi sp,s0,-2048
addit0,t0,560   |  .cfi_def_cfa 2, 2048  <- #1
add sp,s0,t0|  addi sp,sp,-1488
.cfi_def_cfa 2, 3536|  .cfi_def_cfa_offset 3536  <- #2
addisp,sp,1504  |  addi sp,sp,1504
.cfi_def_cfa_offset 2032|  .cfi_def_cfa_offset 2032  <- #3

The dwarf insn #1 and #3 seem ok, however #2 seems dubious to me.

---

This is continuing on the prev patch in function epilogue expansion.

gcc/ChangeLog:
* config/riscv/riscv.cc (riscv_expand_epilogue): Handle offset
being sum of two S12.

OK.
jeff



Re: [PATCH v1 2/2] RISC-V: Add test cases for branch form unsigned SAT_ADD

2024-05-21 Thread Jeff Law




On 5/20/24 5:01 AM, pan2...@intel.com wrote:

From: Pan Li 

After we support branch form unsigned SAT_ADD from the
middle end.  Add more tests case to cover the functionarlities.

The below test suites are passed.
* The rv64gcv fully regression test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_arith.h: Add branch form test macro.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-10.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-11.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-12.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-9.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-10.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-11.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-12.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-9.c: New test.
* gcc.target/riscv/sat_u_add-10.c: New test.
* gcc.target/riscv/sat_u_add-11.c: New test.
* gcc.target/riscv/sat_u_add-12.c: New test.
* gcc.target/riscv/sat_u_add-9.c: New test.
* gcc.target/riscv/sat_u_add-run-10.c: New test.
* gcc.target/riscv/sat_u_add-run-11.c: New test.
* gcc.target/riscv/sat_u_add-run-12.c: New test.
* gcc.target/riscv/sat_u_add-run-9.c: New test.


OK

jeff



[COMMITTED] RISC-V: avoid LUI based const mat in alloca epilogue expansion

2024-05-21 Thread Vineet Gupta
This is continuing on the prev patch in function epilogue expansion.
Broken out of easy of review.

gcc/ChangeLog:
* config/riscv/riscv.cc (riscv_expand_epilogue): Handle offset
being sum of two S12.

Tested-by: Patrick O'Neill  # pre-commit-CI #1569
Signed-off-by: Vineet Gupta 
---
 gcc/config/riscv/riscv.cc | 33 ++---
 1 file changed, 26 insertions(+), 7 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 2ecbcf1d0af8..85df5b7ab498 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -8111,7 +8111,10 @@ riscv_expand_epilogue (int style)
   need_barrier_p = false;
 
   poly_int64 adjust_offset = -frame->hard_frame_pointer_offset;
+  rtx dwarf_adj = gen_int_mode (adjust_offset, Pmode);
   rtx adjust = NULL_RTX;
+  bool sum_of_two_s12 = false;
+  HOST_WIDE_INT one, two;
 
   if (!adjust_offset.is_constant ())
{
@@ -8123,14 +8126,23 @@ riscv_expand_epilogue (int style)
}
   else
{
- if (!SMALL_OPERAND (adjust_offset.to_constant ()))
+ HOST_WIDE_INT adj_off_value = adjust_offset.to_constant ();
+ if (SMALL_OPERAND (adj_off_value))
+   {
+ adjust = GEN_INT (adj_off_value);
+   }
+ else if (SUM_OF_TWO_S12_ALGN (adj_off_value))
+   {
+ riscv_split_sum_of_two_s12 (adj_off_value, &one, &two);
+ dwarf_adj = adjust = GEN_INT (one);
+ sum_of_two_s12 = true;
+   }
+ else
{
  riscv_emit_move (RISCV_PROLOGUE_TEMP (Pmode),
-  GEN_INT (adjust_offset.to_constant ()));
+  GEN_INT (adj_off_value));
  adjust = RISCV_PROLOGUE_TEMP (Pmode);
}
- else
-   adjust = GEN_INT (adjust_offset.to_constant ());
}
 
   insn = emit_insn (
@@ -8138,14 +8150,21 @@ riscv_expand_epilogue (int style)
  adjust));
 
   rtx dwarf = NULL_RTX;
-  rtx cfa_adjust_value = gen_rtx_PLUS (
-  Pmode, hard_frame_pointer_rtx,
-  gen_int_mode (-frame->hard_frame_pointer_offset, 
Pmode));
+  rtx cfa_adjust_value = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx,
+  dwarf_adj);
   rtx cfa_adjust_rtx = gen_rtx_SET (stack_pointer_rtx, cfa_adjust_value);
   dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, cfa_adjust_rtx, dwarf);
+
   RTX_FRAME_RELATED_P (insn) = 1;
 
   REG_NOTES (insn) = dwarf;
+
+  if (sum_of_two_s12)
+   {
+ insn = emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
+   GEN_INT (two)));
+ RTX_FRAME_RELATED_P (insn) = 1;
+   }
 }
 
   if (use_restore_libcall || use_multi_pop)
-- 
2.34.1



Re: [PATCH v5 2/5] C++: Support clang compatible [[musttail]] (PR83324)

2024-05-21 Thread Jason Merrill

On 5/14/24 19:23, Andi Kleen wrote:

You need a template testcase; I expect it doesn't work in templates with the
current patch.  It's probably enough to copy it in tsubst_expr where we
currently propagate CALL_EXPR_OPERATOR_SYNTAX.


I tried it with the appended test case, everything seems to work without
changes.

Does it cover the cases you were concerned about?


Not fully; this testcase doesn't seem to check for errors if tail-call
fails, only whether the syntax is accepted.  So it would pass if the
attribute were simply ignored.


Okay I'm not clear how I would do that. Pattern match the assembler
in a target specific test case? From looking at the assembler output
everything got tail converted.


Write a testcase where the tail-call optimization can't happen, perhaps 
because the caller and callee disagree on return type:


int f();

double h() { [[gnu::musttail]] return f(); } // error

template 
T g() { [[gnu::musttail]] return f(); }

int main()
{
  g();
  g(); // should error, but doesn't with v6 patch set
}

Jason



Re: [PATCH] testsuite: Allow for non-SECTION_MERGE systems in gcc.dg/pr115066.c [PR115066]

2024-05-21 Thread Jason Merrill

On 5/21/24 07:36, Rainer Orth wrote:

gcc.dg/pr115066.c currently FAILs on Solaris/SPARC with the native as:

FAIL: gcc.dg/pr115066.c scan-assembler .bytet0xbt# Define macro strx

Instead of the expected

.byte   0xb ! Define macro strx

the assembler output contains

.byte   0x1 ! Define macro

Apparently this happens because the Solaris as/ld combo doesn't support
SHF_MERGE.

While I initially meant to just skip the test on sparc*-*-solaris2* && !gas,
Tom suggested to allow for both forms instead, which is what his patch
does.

Tested on sparc-sun-solaris2.11 and i386-pc-solaris2.11 (as and gas
each) and x86_64-pc-linux-gnu.

Ok for trunk?


OK.



Re: [PATCH] c++: folding non-dep enumerator from current inst [PR115139]

2024-05-21 Thread Jason Merrill

On 5/17/24 12:05, Patrick Palka wrote:

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for
trunk/14?


OK for both.


-- >8 --

After the tsubst_copy removal r14-4796-g3e3d73ed5e85e7 GCC 14 ICEs during
fold_non_dependent_expr for 'e1 | e2' ultimately because we no longer exit
early when substituting the CONST_DECLs for e1 and e2 with args=NULL_TREE,
during which we try substituting the class context A (also with
args=NULL_TREE) which ends up ICEing from tsubst_pack_expansion (due to
processing_template_decl being cleared).

Incidentally, the ICE went away on trunk ever since the tsubst_aggr_type
removal r15-123-gf04dc89a991ddc since it made the CONST_DECL case of
tsubst_expr use tsubst to substitute the context, which does short circuit
for empty args and so avoids the ICE.

This patch fixes this ICE for GCC 14 by narrowly restoring the early exit
for empty args that was present in tsubst_copy when substituting an
enumerator CONST_DECL.  We might as well apply this to trunk too, as a
very minor optimization.

PR c++/115139

gcc/cp/ChangeLog:

* pt.cc (tsubst_expr) : Exit early if args
is empty.

gcc/testsuite/ChangeLog:

* g++.dg/template/non-dependent33.C: New test.
---
  gcc/cp/pt.cc|  2 +-
  gcc/testsuite/g++.dg/template/non-dependent33.C | 11 +++
  2 files changed, 12 insertions(+), 1 deletion(-)
  create mode 100644 gcc/testsuite/g++.dg/template/non-dependent33.C

diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index 32640f8e946..e185e3d8941 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -21519,7 +21519,7 @@ tsubst_expr (tree t, tree args, tsubst_flags_t 
complain, tree in_decl)
  
  	if (DECL_TEMPLATE_PARM_P (t))

  RETURN (RECUR (DECL_INITIAL (t)));
-   if (!uses_template_parms (DECL_CONTEXT (t)))
+   if (!args || !uses_template_parms (DECL_CONTEXT (t)))
  RETURN (t);
  
  	/* Unfortunately, we cannot just call lookup_name here.

diff --git a/gcc/testsuite/g++.dg/template/non-dependent33.C 
b/gcc/testsuite/g++.dg/template/non-dependent33.C
new file mode 100644
index 000..2f1dd8a214c
--- /dev/null
+++ b/gcc/testsuite/g++.dg/template/non-dependent33.C
@@ -0,0 +1,11 @@
+// PR c++/115139
+// { dg-do compile { target c++11 } }
+
+template
+class A {
+  enum E {
+e1 = 1,
+e2 = 2,
+e3 = e1 | e2,
+  };
+};




[PING] [PATCH] Fortran: fix bounds check for assignment, class component [PR86100]

2024-05-21 Thread Harald Anlauf

Am 13.05.24 um 22:27 schrieb Harald Anlauf:

Dear all,

the attached patch does two things:

- it fixes a bogus array bounds check when deep-copying a class component
   of a derived type and the class component has rank > 1, the reason being
   that the previous code compared the full size of one side with the size
   of the first dimension of the other

- the bounds-check error message that was generated e.g. by an allocate
   statement with conflicting sizes in the allocation and the source-expr
   will now use an improved abbreviated name pointing to the component
   involved, which was introduced in 14-development.

What I could not resolve: a deep copy may still create no useful array
name in the error message (which I am now unable to trigger).  If someone
sees how to extract it reliably from the tree, please let me know.

Regtested on x86_64-pc-linux-gnu.  OK for mainline?

I would like to backport this to 14-branch after a decent delay.

Thanks,
Harald





Re: [PATCH] Fix mixed input kind permute optimization

2024-05-21 Thread Richard Sandiford
Richard Biener  writes:
> When change_vec_perm_layout runs into a permute combining two
> nodes where one is invariant and one internal the partition of
> one input can be -1 but the other might not be.  The following
> supports this case by simply ignoring inputs with input partiton -1.
>
> I'm not sure this is correct but it avoids ICEing when accessing
> that partitions layout for gcc.target/i386/pr98928.c with the
> change to avoid splitting store dataref groups during SLP discovery.
>
> Bootstrap and regtest running on x86_64-unknown-linux-gnu (ontop of
> the SLP series).  The change can't break anything that's already
> broken but I'm not sure this does the right thing - the testcase
> has an uniform constant.  I'll try to come up with a better runtime
> testcase tomorrow.  Hints as to where to correctly fix such case
> appreciated.

Famous last words, but yeah, it looks correct to me.  I think the
routine in principle should have a free choice of which layout to
choose for invariants (as long as it's consistent for all queries
about the same node).  So it should just be a question of whether
keeping the original layout is more likely to give a valid
permutation, or whether going with out_layout_i would be better.
I don't have a strong intuition either way.

Thanks,
Richard

>
>   * tree-vect-slp.cc (change_vec_perm_layout): Ignore an
>   input partition of -1.
> ---
>  gcc/tree-vect-slp.cc | 2 ++
>  1 file changed, 2 insertions(+)
>
> diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
> index 873748b0a72..f6ec1a81c96 100644
> --- a/gcc/tree-vect-slp.cc
> +++ b/gcc/tree-vect-slp.cc
> @@ -4828,6 +4828,8 @@ change_vec_perm_layout (slp_tree node, 
> lane_permutation_t &perm,
>   {
> slp_tree in_node = SLP_TREE_CHILDREN (node)[entry.first];
> unsigned int in_partition_i = m_vertices[in_node->vertex].partition;
> +   if (in_partition_i == -1u)
> + continue;
> this_in_layout_i = m_partitions[in_partition_i].layout;
>   }
>if (this_in_layout_i > 0)


  1   2   >