[PATCH][pushed] contrib: Support itemx in check-params-in-docs.py.

2021-11-22 Thread Martin Liška

Pushed to master.

Martin

contrib/ChangeLog:

* check-params-in-docs.py: Support @itemx in param documentation
and support multi-line documentation for parameters.
---
 contrib/check-params-in-docs.py | 22 --
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/contrib/check-params-in-docs.py b/contrib/check-params-in-docs.py
index 440549f5fd8..d57055088b7 100755
--- a/contrib/check-params-in-docs.py
+++ b/contrib/check-params-in-docs.py
@@ -47,7 +47,7 @@ ignored = {'logical-op-non-short-circuit'}
 params = {}
 
 for line in open(args.params_output).readlines():

-if line.startswith('  '):
+if line.startswith(' ' * 2) and not line.startswith(' ' * 8):
 r = get_param_tuple(line)
 params[r[0]] = r[1]
 
@@ -57,15 +57,20 @@ texi = dropwhile(lambda x: 'item --param' not in x, texi)

 texi = takewhile(lambda x: '@node Instrumentation Options' not in x, texi)
 texi = list(texi)[1:]
 
-token = '@item '

-texi = [x[len(token):] for x in texi if x.startswith(token)]
+texi_params = []
+for line in texi:
+for token in ('@item ', '@itemx '):
+if line.startswith(token):
+texi_params.append(line[len(token):])
+break
+
 # skip digits
-texi = [x for x in texi if not x[0].isdigit()]
+texi_params = [x for x in texi_params if not x[0].isdigit()]
 # skip aarch64 params
-texi = [x for x in texi if not x.startswith('aarch64')]
-sorted_texi = sorted(texi)
+texi_params = [x for x in texi_params if not x.startswith('aarch64')]
+sorted_params = sorted(texi_params)
 
-texi_set = set(texi) - ignored

+texi_set = set(texi_params) - ignored
 params_set = set(params.keys()) - ignored
 
 success = True

@@ -84,7 +89,4 @@ if len(missing):
 print()
 success = False
 
-if texi != sorted_texi:

-print('WARNING: not sorted alphabetically!')
-
 sys.exit(0 if success else 1)
--
2.33.1



Improve byte-wise DSE (modref-dse-[45].c failures)

2021-11-22 Thread Jan Hubicka via Gcc-patches
Hi,
testcase modref-dse-4.c and modref-dse-5.c fails on some targets because they
depend on store merging.  What really happens is that without store merging
we produce for kill_me combined write that is ao_ref with offset=0, size=32
and max_size=96.  We have size != max_size becaue we do ont track the info that
all 3 writes must happen in a group and conider case only some of them are done.

This disables byte-wise DSE which checks that size == max_size.  This is
completely unnecesary for store being proved to be dead or load being checked
to not read live bytes.  It is only necessary for kill store that is used to
prove that given store is dead.

While looking into this I also noticed that we check that everything is byte
aligned.  This is also unnecessary and with access merging in modref may more
commonly fire on accesses that we could otherwise handle.

This patch fixes both also also changes interface to normalize_ref that I found
confusing since it modifies the ref. Instead of that we have get_byte_range
that is computing range in bytes (since that is what we need to maintain the
bitmap) and has additional parameter specifying if the store in question should
be turned into sub-range or super-range depending whether we compute range
for kill or load.

Bootstrapped/regtested x86_64-linux OK?
gcc/ChangeLog:

2021-11-23  Jan Hubicka  

* tree-ssa-dse.c (valid_ao_ref_for_dse): Rename to ...
(valid_ao_ref_kill_for_dse): ... this; do not check that boundaries
are divisible by BITS_PER_UNIT.
(get_byte_aligned_range_containing_ref): New function.
(get_byte_aligned_range_contained_in_ref): New function.
(normalize_ref): Rename to ...
(get_byte_range): ... this one; handle accesses not aligned to byte
boundary; return range in bytes rater than updating ao_ref.
(clear_live_bytes_for_ref): Take write ref by reference; simplify using
get_byte_access.
(setup_live_bytes_from_ref): Likewise.
(clear_bytes_written_by): Update.
(live_bytes_read): Update.
(dse_classify_store): Simplify tech before live_bytes_read checks.

gcc/testsuite/ChangeLog:

2021-11-23  Jan Hubicka  

* gcc.dg/tree-ssa/modref-dse-4.c: Update template.
* gcc.dg/tree-ssa/modref-dse-5.c: Update template.

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/modref-dse-4.c 
b/gcc/testsuite/gcc.dg/tree-ssa/modref-dse-4.c
index 81aa7dc587c..19e91b00f15 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/modref-dse-4.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/modref-dse-4.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-dse2-details"  } */
+/* { dg-options "-O2 -fdump-tree-dse1-details"  } */
 struct a {int a,b,c;};
 __attribute__ ((noinline))
 void
@@ -23,4 +23,4 @@ set (struct a *a)
   my_pleasure (a);
   a->b=1;
 }
-/* { dg-final { scan-tree-dump "Deleted dead store: kill_me" "dse2" } } */
+/* { dg-final { scan-tree-dump "Deleted dead store: kill_me" "dse1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/modref-dse-5.c 
b/gcc/testsuite/gcc.dg/tree-ssa/modref-dse-5.c
index ad35b70136f..dc2c2892615 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/modref-dse-5.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/modref-dse-5.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-dse2-details"  } */
+/* { dg-options "-O2 -fdump-tree-dse1-details"  } */
 struct a {int a,b,c;};
 __attribute__ ((noinline))
 void
@@ -36,8 +36,7 @@ set (struct a *a)
 {
   wrap (0, a);
   int ret = wrap2 (0, a);
-  //int ret = my_pleasure (a);
   a->b=1;
   return ret;
 }
-/* { dg-final { scan-tree-dump "Deleted dead store: wrap" "dse2" } } */
+/* { dg-final { scan-tree-dump "Deleted dead store: wrap" "dse1" } } */
diff --git a/gcc/tree-ssa-dse.c b/gcc/tree-ssa-dse.c
index 9531d892f76..8717d654e5a 100644
--- a/gcc/tree-ssa-dse.c
+++ b/gcc/tree-ssa-dse.c
@@ -156,57 +156,137 @@ initialize_ao_ref_for_dse (gimple *stmt, ao_ref *write)
 }
 
 /* Given REF from the alias oracle, return TRUE if it is a valid
-   memory reference for dead store elimination, false otherwise.
+   kill memory reference for dead store elimination, false otherwise.
 
In particular, the reference must have a known base, known maximum
size, start at a byte offset and have a size that is one or more
bytes.  */
 
 static bool
-valid_ao_ref_for_dse (ao_ref *ref)
+valid_ao_ref_kill_for_dse (ao_ref *ref)
 {
   return (ao_ref_base (ref)
  && known_size_p (ref->max_size)
  && maybe_ne (ref->size, 0)
  && known_eq (ref->max_size, ref->size)
- && known_ge (ref->offset, 0)
- && multiple_p (ref->offset, BITS_PER_UNIT)
- && multiple_p (ref->size, BITS_PER_UNIT));
+ && known_ge (ref->offset, 0));
+}
+
+/* Given REF from the alias oracle, return TRUE if it is a valid
+   load or store memory reference for dead store elimination, false otherwise.
+
+   Unlike for valid_ao_ref_kill_for_dse we can accept writes where max_size

Re: [PATCH] Fix incorrect loop exit edge probability [PR103270]

2021-11-22 Thread Xionghu Luo via Gcc-patches



On 2021/11/23 13:51, Xionghu Luo wrote:
> r12-4526 cancelled jump thread path rotates loop. It exposes a issue in
> profile-estimate when predict_extra_loop_exits, outer loop's exit edge
> is marked as inner loop's extra loop exit and set with incorrect
> prediction, then a hot inner loop will become cold loop finally through
> optimizations, this patch ignores the EDGE_DFS_BACK edge when searching
> extra exit edges to avoid unexpected predict_edge.
> 
> gcc/ChangeLog:
> 
>   PR middle-end/103270
>   * predict.c (predict_extra_loop_exits): Ignore EDGE_DFS_BACK edge.
> 
> gcc/ChangeLog:
> 
>   PR middle-end/103270
>   * predict.c (predict_extra_loop_exits): New.
> ---
>  gcc/predict.c | 4 
>  1 file changed, 4 insertions(+)
> 
> diff --git a/gcc/predict.c b/gcc/predict.c
> index 68b11135680..1ae8ccff72c 100644
> --- a/gcc/predict.c
> +++ b/gcc/predict.c
> @@ -1910,6 +1910,10 @@ predict_extra_loop_exits (edge exit_edge)
>   continue;
>if ((check_value_one ^ integer_onep (val)) == 1)
>   continue;
> +  if (e->flags & EDGE_DFS_BACK)
> + continue;

Sorry, made a mistake before send the patch, #if 0 #endif should be
removed...


>if (EDGE_COUNT (e->src->succs) != 1)
>   {
> predict_paths_leading_to_edge (e, PRED_LOOP_EXTRA_EXIT, NOT_TAKEN);
> 

-- 
Thanks,
Xionghu


[PATCH] Fix incorrect loop exit edge probability [PR103270]

2021-11-22 Thread Xionghu Luo via Gcc-patches
r12-4526 cancelled jump thread path rotates loop. It exposes a issue in
profile-estimate when predict_extra_loop_exits, outer loop's exit edge
is marked as inner loop's extra loop exit and set with incorrect
prediction, then a hot inner loop will become cold loop finally through
optimizations, this patch ignores the EDGE_DFS_BACK edge when searching
extra exit edges to avoid unexpected predict_edge.

gcc/ChangeLog:

PR middle-end/103270
* predict.c (predict_extra_loop_exits): Ignore EDGE_DFS_BACK edge.

gcc/ChangeLog:

PR middle-end/103270
* predict.c (predict_extra_loop_exits): New.
---
 gcc/predict.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/gcc/predict.c b/gcc/predict.c
index 68b11135680..1ae8ccff72c 100644
--- a/gcc/predict.c
+++ b/gcc/predict.c
@@ -1910,6 +1910,10 @@ predict_extra_loop_exits (edge exit_edge)
continue;
   if ((check_value_one ^ integer_onep (val)) == 1)
continue;
+#if 0
+  if (e->flags & EDGE_DFS_BACK)
+   continue;
+#endif
   if (EDGE_COUNT (e->src->succs) != 1)
{
  predict_paths_leading_to_edge (e, PRED_LOOP_EXTRA_EXIT, NOT_TAKEN);
-- 
2.25.1



[PATCH v2] Canonicalize [ssa_n, CST] to ssa_n p+ CST in fold_stmt_1

2021-11-22 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

This is a new version of the patch to fix PR 102216.
Instead of doing the canonicalization inside forwprop, Richi
mentioned we should do it inside fold_stmt_1 and that is what
this patch does.

PR tree-optimization/102216

gcc/ChangeLog:

* gimple-fold.c (fold_stmt_1): Add canonicalization
of "[ssa_n, CST]" to "ssa_n p+ CST", note this
can only be done if !in_place.

gcc/testsuite/ChangeLog:

* g++.dg/tree-ssa/pr102216-1.C: New test.
* g++.dg/tree-ssa/pr102216-2.C: New test.
---
 gcc/gimple-fold.c  | 21 ++
 gcc/testsuite/g++.dg/tree-ssa/pr102216-1.C | 21 ++
 gcc/testsuite/g++.dg/tree-ssa/pr102216-2.C | 45 ++
 3 files changed, 87 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/tree-ssa/pr102216-1.C
 create mode 100644 gcc/testsuite/g++.dg/tree-ssa/pr102216-2.C

diff --git a/gcc/gimple-fold.c b/gcc/gimple-fold.c
index ad9703ee471..aab6818c93f 100644
--- a/gcc/gimple-fold.c
+++ b/gcc/gimple-fold.c
@@ -6061,6 +6061,27 @@ fold_stmt_1 (gimple_stmt_iterator *gsi, bool inplace, 
tree (*valueize) (tree))
  if (REFERENCE_CLASS_P (*lhs)
  && maybe_canonicalize_mem_ref_addr (lhs))
changed = true;
+ /* Canonicalize [ssa_n, CST] to ssa_n p+ CST.
+This cannot be done in maybe_canonicalize_mem_ref_addr
+as the gimple now has two operands rather than one.
+The same reason why this can't be done in
+maybe_canonicalize_mem_ref_addr is the same reason why
+this can't be done inplace.  */
+ if (!inplace && TREE_CODE (*rhs) == ADDR_EXPR)
+   {
+ tree inner = TREE_OPERAND (*rhs, 0);
+ if (TREE_CODE (inner) == MEM_REF
+ && TREE_CODE (TREE_OPERAND (inner, 0)) == SSA_NAME
+ && TREE_CODE (TREE_OPERAND (inner, 1)) == INTEGER_CST)
+   {
+ tree ptr = TREE_OPERAND (inner, 0);
+ tree addon = TREE_OPERAND (inner, 1);
+ addon = fold_convert (sizetype, addon);
+ gimple_assign_set_rhs_with_ops (gsi, POINTER_PLUS_EXPR,
+ ptr, addon);
+ changed = true;
+   }
+   }
}
   else
{
diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr102216-1.C 
b/gcc/testsuite/g++.dg/tree-ssa/pr102216-1.C
new file mode 100644
index 000..21f7f6797ff
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/pr102216-1.C
@@ -0,0 +1,21 @@
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+void link_error ();
+void g ()
+{
+  const char **language_names;
+
+  language_names = new const char *[6];
+
+  const char **language_names_p = language_names;
+
+  language_names_p++;
+  language_names_p++;
+  language_names_p++;
+
+  if ( (language_names_p) - (language_names+3) != 0)
+link_error();
+  delete[] language_names;
+}
+/* We should have removed the link_error on the gimple level as GCC should
+   be able to tell that language_names_p is the same as language_names+3.  */
+/* { dg-final { scan-tree-dump-times "link_error" 0 "optimized" } } */
diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr102216-2.C 
b/gcc/testsuite/g++.dg/tree-ssa/pr102216-2.C
new file mode 100644
index 000..8d351a9bad0
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/pr102216-2.C
@@ -0,0 +1,45 @@
+/* { dg-options "-O2 -Wall" } */
+#include 
+
+static inline bool
+compare_cstrings (const char *str1, const char *str2)
+{
+  return str1 < str2;
+}
+
+void
+add_set_language_command ()
+{
+  static const char **language_names;
+
+  language_names = new const char *[6];
+
+  language_names[0] = "auto";
+  language_names[1] = "local";
+  language_names[2] = "unknown";
+
+  const char **language_names_p = language_names;
+  /* language_names_p == _names[0].  */
+  language_names_p++;
+  /* language_names_p == _names[1].  */
+  language_names_p++;
+  /* language_names_p == _names[2].  */
+  language_names_p++;
+  /* language_names_p == _names[3].  */
+
+  const char **sort_begin;
+
+  if (0)
+sort_begin = _names[3];
+  else
+sort_begin = language_names_p;
+
+  language_names[3] = "";
+  language_names[4] = "";
+  language_names[5] = NULL;
+
+  /* There should be no warning associated with this std::sort as
+ sort_begin != _names[5] and GCC should be able to figure
+ that out.  */
+  std::sort (sort_begin, _names[5], compare_cstrings);
+}
-- 
2.17.1



Re: [PATCH 0/3] Add zero cycle move support

2021-11-22 Thread HAO CHEN GUI via Gcc-patches
Bill and David,

    Currently, the absolute jump table is not by default enabled. It can be 
enabled by undocumented option "-mno-relative-jumptables". If the target 
supports named sections (have_named_sections), the feature can be enabled. We 
plan to enable the future by default in GCC12 and there is a ticket for it.  
Latest status is that I am waiting for comments on my patch. 
(https://github.ibm.com/wschmidt/power-gcc/issues/998#issuecomment-34643825). 
Thanks.

||

On 23/11/2021 上午 12:09, David Edelsohn wrote:
> On Mon, Nov 22, 2021 at 10:58 AM Bill Schmidt  wrote:
>> Hi!
>>
>> On 11/19/21 8:49 AM, Michael Meissner wrote:
>>> The next set of 3 patches add zero cycle move support to the Power10.  Zero
>>> cycle moves are where the move to LR/CTR/TAR register that is adjacent to 
>>> the
>>> jump to LR/CTR/TAR register can be fused together.
>>>
>>> At the moment, these set of three patches add support for zero cycle moves 
>>> for
>>> indirect jumps and switch tables using the CTR register.  Potential zero 
>>> cycle
>>> moves for doing returns are not currently handled.
>>>
>>> In looking at the code, I discovered that just using zero cycle moves isn't 
>>> as
>>> helpful unless we can eliminate the add instruction before doing the jump.  
>>> I
>>> also noticed that the various power10 fusion options are only done if
>>> -mcpu=power10.  I added a patch to do the fusion for -mtune=power10 as well.
>>>
>>> I have done bootstraps and make check with these patches installed on both
>>> little endian power9 and little endian power10 systems.  Can I install these
>>> patches?
>>>
>>> The following patches will be posted:
>>>
>>> 1) Patch to add zero cycle move for indirect jumps and switches.
>>>
>>> 2) Patch to enable p10 fusion for -mtune=power10 in addition to 
>>> -mcpu=power10.
>>>
>>> 3) Patch to use absolute addresses for switch tables instead of relative
>>>addresses if zero cycle fusion is enabled.
>>>
>> For this last point, I had thought that the plan was to always switch over to
>> absolute addresses for switch tables, following the work that Hao Chen did in
>> this area.  Am I misremembering?  Hao Chen, can you please remind me where we
>> ended up here?
> And do the absolute addressing for switch tables changes work on AIX?
> I thought that Hao Chen only had done the work for PPC64 Linux ELF
> syntax with promises of future changes to accommodate AIX as well.
>
> Thanks, David


Re: [EXTERNAL] Re: [PATCH] PR tree-optimization/102232 Adding a missing pattern to match.pd

2021-11-22 Thread Navid Rahimi via Gcc-patches
Thanks Jeff for this too.

Best wishes,
Navid.


From: Jeff Law 
Sent: Monday, November 22, 2021 19:09
To: Richard Biener; Navid Rahimi
Cc: gcc-patches@gcc.gnu.org
Subject: Re: [EXTERNAL] Re: [PATCH] PR tree-optimization/102232 Adding a 
missing pattern to match.pd

[You don't often get email from jeffreya...@gmail.com. Learn why this is 
important at http://aka.ms/LearnAboutSenderIdentification.]

On 11/10/2021 1:35 AM, Richard Biener via Gcc-patches wrote:
> On Tue, Nov 9, 2021 at 5:25 PM Navid Rahimi  wrote:
>> Hi Richard,
>>
>> Thank you so much for your detailed feedback. I am attaching another version 
>> of the patch which does include all the changes you mentioned.
>>
>> Bellow you can see my response to your feedbacks:
>>
>>> the canonical order of the plus is (plus:s (trunc_div ...) integer_onep) as
>>> constants come last - you can then remove the 'c'
>> Fixed. I was not aware of the canonical order.
>>
>>> you can use INTEGRAL_TYPE_P (type).
>> Fixed. Didn't know about "type" either.
>>
>>> this test is not necessary
>> Fixed.
>>
>>> But should we also optimize x * (2 + y/x) - y -> 2*x - y % x?  So
>>> it looks like a conflict with the x * (1 + b) <-> x + x * b transform
>>> (fold_plusminus_mult)?  That said, the special case of one
>>> definitely makes the result cheaper (one less operation).
>> For this special case, it does remove operation indeed. But I was not able 
>> to reproduce it for any other constant [1]. If it was possible to do it with 
>> other constants I would've changed the pattern to have be more general like 
>> "x * (C + y/x) - y -> C*x - y % x". Basically anything other than 1 wasn't a 
>> win. Regarding the "x * (1 + b) <-> x + x * b" transformation, it appears to 
>> me when there is a "- y" at the end "x * (1 + b)", there is opportunity to 
>> optimize. Without that "- y" I was not able to make any operation more 
>> performant. Either direction, looks like same amount of computation.
>>
>> 1) 
>> https://nam06.safelinks.protection.outlook.com/?url=https%3A%2F%2Fcompiler-explorer.com%2Fz%2FdWsq7Tzf4data=04%7C01%7Cnavidrahimi%40microsoft.com%7C1ddd355d86ee4a5d645808d9ae2e9e73%7C72f988bf86f141af91ab2d7cd011db47%7C1%7C0%7C637732337521412439%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000sdata=K4ecZcigYkh8%2BSiqDegRxm72gkL%2FnsbuDDp5nsM3ZqA%3Dreserved=0
>>
>>> Please move the pattern next to the most relatest which I think is
>> Fixed.
>>
>>> the return value of 1 is an unreliable way to fail, please instead call
>>> __builtin_abort ();
>> Fixed.
>>
>>> do we really need -O3 for this?  I think that -O should be enough here?
>> We don't specifically need that. But I realized that the optimization can 
>> happen in two different level at the compiler. It seems if you spread the 
>> computation over multiple statement like:
>>int c = a/b;
>>int y = b * (1 + c);
>>return y - a;
>>
>> instead of :
>>return b * (1 + a / b) - a;
>>
>> Then you have to have at least -O1 to have it optimized. Granted, I am not 
>> doing that in the testcase. In the new patch I am changing it to -O. Let me 
>> know if you have any suggestions.
> -O is fine, generally at -O0 we shouldn't expect such transforms to
> happen (but they still do, of course).
>
> The patch looks OK now.
I've pushed this to the trunk for Navid.

jeff



Re: [EXTERNAL] Re: [PATCH] PR tree-optimization/96779 Adding a missing pattern to match.pd

2021-11-22 Thread Navid Rahimi via Gcc-patches
Thanks Jeff.

Best wishes,
Navid.


From: Jeff Law 
Sent: Monday, November 22, 2021 16:48
To: Richard Biener; Navid Rahimi
Cc: Navid Rahimi via Gcc-patches
Subject: Re: [EXTERNAL] Re: [PATCH] PR tree-optimization/96779 Adding a missing 
pattern to match.pd

[You don't often get email from jeffreya...@gmail.com. Learn why this is 
important at http://aka.ms/LearnAboutSenderIdentification.]

On 11/22/2021 1:45 AM, Richard Biener via Gcc-patches wrote:
> On Fri, Nov 19, 2021 at 11:33 PM Navid Rahimi  
> wrote:
>> Hi Richard,
>>
>> Thanks for the detailed comment. I am attaching a newer version of the patch 
>> which does have required fixes included. Bellow you can see my response to 
>> your feedbacks:
>>
>>> you need to check TYPE_OVERFLOW_WRAPS on TREE_TYPE (@0),
>>> otherwise you check on boolean.
>> Fixed it.
>>
>>> no need for :c on the result pattern.  Otherwise it looks OK, but how
>>> did you check the patch?
>> Fixed it. For checking the patch, I have script which builds and runs make 
>> check for 1) trunk and 2) trunk+patch in a separate directory and diffs the 
>> test results from each directory. My test script did had a subtle problem. 
>> The bug was, because of a typo in the path I introduced few days ago, it was 
>> diffing same trunk+patch test results against trunk+patch test results.
> OK, please indicate that in the future, like with "Bootstrapped and
> tested on x86_64-linux" or so.
>
>> That was a good reminder to setup an account for myself here asap [1].
>>
>> 1) 
>> https://nam06.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgcc.gnu.org%2Fwiki%2FCompileFarmdata=04%7C01%7Cnavidrahimi%40microsoft.com%7C02e3c84f12dc48c34c7e08d9ae1b030e%7C72f988bf86f141af91ab2d7cd011db47%7C1%7C0%7C637732253312568135%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000sdata=8virYthoelGhRKtjTo1g%2F9GV67I4rze5iv07XnpCpNk%3Dreserved=0
> The updated patch is OK.
I don't think Navid has commit privs, so I fixed up the commit message
and committed the patch for Navid.

jeff



Re: [EXTERNAL] Re: [PATCH] PR tree-optimization/102232 Adding a missing pattern to match.pd

2021-11-22 Thread Jeff Law via Gcc-patches




On 11/10/2021 1:35 AM, Richard Biener via Gcc-patches wrote:

On Tue, Nov 9, 2021 at 5:25 PM Navid Rahimi  wrote:

Hi Richard,

Thank you so much for your detailed feedback. I am attaching another version of 
the patch which does include all the changes you mentioned.

Bellow you can see my response to your feedbacks:


the canonical order of the plus is (plus:s (trunc_div ...) integer_onep) as
constants come last - you can then remove the 'c'

Fixed. I was not aware of the canonical order.


you can use INTEGRAL_TYPE_P (type).

Fixed. Didn't know about "type" either.


this test is not necessary

Fixed.


But should we also optimize x * (2 + y/x) - y -> 2*x - y % x?  So
it looks like a conflict with the x * (1 + b) <-> x + x * b transform
(fold_plusminus_mult)?  That said, the special case of one
definitely makes the result cheaper (one less operation).

For this special case, it does remove operation indeed. But I was not able to reproduce it for any other constant [1]. If it was possible 
to do it with other constants I would've changed the pattern to have be more general like "x * (C + y/x) - y -> C*x - y % x". 
Basically anything other than 1 wasn't a win. Regarding the "x * (1 + b) <-> x + x * b" transformation, it appears to me 
when there is a "- y" at the end "x * (1 + b)", there is opportunity to optimize. Without that "- y" I was 
not able to make any operation more performant. Either direction, looks like same amount of computation.

1) https://compiler-explorer.com/z/dWsq7Tzf4


Please move the pattern next to the most relatest which I think is

Fixed.


the return value of 1 is an unreliable way to fail, please instead call
__builtin_abort ();

Fixed.


do we really need -O3 for this?  I think that -O should be enough here?

We don't specifically need that. But I realized that the optimization can 
happen in two different level at the compiler. It seems if you spread the 
computation over multiple statement like:
   int c = a/b;
   int y = b * (1 + c);
   return y - a;

instead of :
   return b * (1 + a / b) - a;

Then you have to have at least -O1 to have it optimized. Granted, I am not 
doing that in the testcase. In the new patch I am changing it to -O. Let me 
know if you have any suggestions.

-O is fine, generally at -O0 we shouldn't expect such transforms to
happen (but they still do, of course).

The patch looks OK now.

I've pushed this to the trunk for Navid.

jeff



Re: [PATCH 1/2] Sync with binutils: GCC: Pass --plugin to AR and RANLIB

2021-11-22 Thread H.J. Lu via Gcc-patches
On Mon, Nov 22, 2021 at 4:29 PM Jeff Law  wrote:
>
>
>
> On 11/13/2021 9:33 AM, H.J. Lu via Gcc-patches wrote:
> > Sync with binutils for building binutils with LTO:
> >
> >  From 50ad1254d5030d0804cbf89c758359ae202e8d55 Mon Sep 17 00:00:00 2001
> > From: "H.J. Lu" 
> > Date: Sat, 9 Jan 2021 06:43:11 -0800
> > Subject: [PATCH] GCC: Pass --plugin to AR and RANLIB
> >
> > Detect GCC LTO plugin.  Pass --plugin to AR and RANLIB to support LTO
> > build.
> >
> >   * Makefile.tpl (AR): Add @AR_PLUGIN_OPTION@
> >   (RANLIB): Add @RANLIB_PLUGIN_OPTION@.
> >   * configure.ac: Include config/gcc-plugin.m4.
> >   AC_SUBST AR_PLUGIN_OPTION and RANLIB_PLUGIN_OPTION.
> >   * libtool.m4 (_LT_CMD_OLD_ARCHIVE): Pass --plugin to AR and
> >   RANLIB if possible.
> >   * Makefile.in: Regenerated.
> >   * configure: Likewise.
> >
> > config/
> >
> >   * gcc-plugin.m4 (GCC_PLUGIN_OPTION): New.
> >
> > libiberty/
> >
> >   * Makefile.in (AR): Add @AR_PLUGIN_OPTION@
> >   (RANLIB): Add @RANLIB_PLUGIN_OPTION@.
> >   (configure_deps): Depend on ../config/gcc-plugin.m4.
> >   * configure.ac: AC_SUBST AR_PLUGIN_OPTION and
> >   RANLIB_PLUGIN_OPTION.
> >   * aclocal.m4: Regenerated.
> >   * configure: Likewise.
> >
> > zlib/
> >
> >   * configure: Regenerated.
> I thought the plugins were automatically loaded if they're in the right
> place in the filesystem.  Wouldn't that make this patch unnecessary?  Am
> I missing something?
>

It only works for system GCC and binutils.  It doesn't work for non-system
GCC nor binutils since either GCC plugin isn't installed in the binutils plugin
search patch.

-- 
H.J.


Re: [PATCH] libgccjit: Add function to set the initial value of a global variable [PR96089]

2021-11-22 Thread Antoni Boucher via Gcc-patches
Hi David!

I updated the patch to allow initializing global variables with values
of type array or struct.

I also fixed the bug I was talking in my previous message by using the
following workaround: I create a new memento for the action of setting
the global variable initial value and as such, both the global variable
and the initial value are bound to exist when setting the global
variable initializer.
Is that workaround good enough?
(I guess that workaround could be used to fix the same issue that we
have for inline assembly.)

Thanks for the review!

Le vendredi 11 juin 2021 à 16:44 -0400, Antoni Boucher a écrit :
> David: this one wasn't reviewed yet by you, so you can review it.
> 
> Le jeudi 20 mai 2021 à 21:27 -0400, Antoni Boucher a écrit :
> > Hi.
> > 
> > I made this patch to set an arbitrary value to a global variable.
> > 
> > This patch suffers from the same issue as inline assembly
> > (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100380), i.e. it
> > segfaults if the `rvalue` is created after the global variable.
> > It seems to be a design issue so I'm not sure what would be the fix
> > for
> > it and having it fixed would allow me to test this new function
> > much
> > more and see if things are missing (i.e. it might require a way to
> > create a constant struct).
> > See the link above for an explanation of this issue.
> > 
> > Thanks for the review.
> 

From a095291f43ca8348b6c84f94a598895969d94d1b Mon Sep 17 00:00:00 2001
From: Antoni Boucher 
Date: Sat, 25 Sep 2021 16:37:47 -0400
Subject: [PATCH] libgccjit: Add function to set the initial value of a global
 variable [PR96089]

2021-11-22  Antoni Boucher  

gcc/jit/
PR target/96089
* docs/topics/compatibility.rst (LIBGCCJIT_ABI_21): New ABI
tag.
* docs/topics/expressions.rst: Add documentation for the
function gcc_jit_global_set_initializer_value,
gcc_jit_context_new_rvalue_from_array,
and gcc_jit_context_new_rvalue_from_struct.
* jit-common.h: Add missing reference to array_type in class
hierarchy.
* jit-playback.c: New functions (new_global_with_value,
set_global_initial_value, new_rvalue_from_struct, new_rvalue_from_array).
* jit-playback.h: New functions (new_global_with_value,
set_global_initial_value, new_rvalue_from_struct, new_rvalue_from_array).
* jit-recording.c: Add support for setting a value to a
global variable and new methods
(global_initializer::write_reproducer,
global_initializer::make_debug_string,
global_initializer::write_to_dump,
global_initializer::replay_into,
context::new_global_value_initializer,
memento_of_new_rvalue_from_struct::write_reproducer,
memento_of_new_rvalue_from_struct::make_debug_string,
memento_of_new_rvalue_from_struct::visit_children,
memento_of_new_rvalue_from_struct::replay_into,
memento_of_new_rvalue_from_struct::
  memento_of_new_rvalue_from_struct,
context::new_rvalue_from_struct,
memento_of_new_rvalue_from_array::write_reproducer,
memento_of_new_rvalue_from_array::make_debug_string,
memento_of_new_rvalue_from_array::visit_children,
memento_of_new_rvalue_from_array::replay_into,
memento_of_new_rvalue_from_array::
  memento_of_new_rvalue_from_array,
new_rvalue_from_array).
* jit-recording.h: New functions (set_initializer_value,
new_global_value_initializer, new_rvalue_from_struct, new_rvalue_from_array,
get_kind),
new field m_initializer_value and new classes (global_initializer,
memento_of_new_rvalue_from_struct, memento_of_new_rvalue_from_array).
* libgccjit.c: New macro RETURN_IF_FAIL_PRINTF5 and new
functions (gcc_jit_global_set_initializer_value,
gcc_jit_context_new_rvalue_from_struct,
gcc_jit_context_new_rvalue_from_array).
* libgccjit.h: New functions (gcc_jit_global_set_initializer_value,
gcc_jit_context_new_rvalue_from_struct,
gcc_jit_context_new_rvalue_from_array).
* libgccjit.map (LIBGCCJIT_ABI_21): New ABI tag.

gcc/testsuite/
PR target/96089
* jit.dg/test-global-set-initializer.c: Add test for the new
function (gcc_jit_global_set_initializer_value).
* jit.dg/test-error-imported-global-initializer.c: Add test
for error checking in setting a value to a global variable.
---
 gcc/jit/docs/topics/compatibility.rst |  13 +
 gcc/jit/docs/topics/expressions.rst   |  62 
 gcc/jit/jit-common.h  |   1 +
 gcc/jit/jit-playback.c|  72 +
 gcc/jit/jit-playback.h|  21 ++
 

Re: [PATCH 1/2] add -Wuse-after-free

2021-11-22 Thread Jeff Law via Gcc-patches




On 11/1/2021 4:17 PM, Martin Sebor via Gcc-patches wrote:

Patch 1 in the series detects a small subset of uses of pointers
made indeterminate by calls to deallocation functions like free
or C++ operator delete.  To control the conditions the warnings
are issued under the new -Wuse-after-free= option provides three
levels.  At the lowest level the warning triggers only for
unconditional uses of freed pointers and doesn't warn for uses
in equality expressions.  Level 2 warns also for come conditional
uses, and level 3 also for uses in equality expressions.

I debated whether to make level 2 or 3 the default included in
-Wall.  I decided on 3 for two reasons: 1) to raise awareness
of both the problem and GCC's new ability to detect it: using
a pointer after it's been freed, even only in principle, by
a successful call to realloc, is undefined, and 2) because
it's trivial to lower the level either globally, or locally
by suppressing the warning around such misuses.

I've tested the patch on x86_64-linux and by building Glibc
and Binutils/GDB.  It triggers a number of times in each, all
due to comparing invalidated pointers for equality (i.e., level
3).  I have suppressed these in GCC (libiberty) by a #pragma,
and will see how the Glibc folks want to deal with theirs (I
track them in BZ #28521).

The tests contain a number of xfails due to limitations I'm
aware of.  I marked them pr?? until the patch is approved.
I will open bugs for them before committing if I don't resolve
them in a followup.

Martin

gcc-63272-1.diff

Add -Wuse-after-free.

gcc/c-family/ChangeLog

* c.opt (-Wuse-after-free): New options.

gcc/ChangeLog:

* diagnostic-spec.c (nowarn_spec_t::nowarn_spec_t): Handle
OPT_Wreturn_local_addr and OPT_Wuse_after_free_.
* diagnostic-spec.h (NW_DANGLING): New enumerator.
* doc/invoke.texi (-Wuse-after-free): Document new option.
* gimple-ssa-warn-access.cc (pass_waccess::check_call): Rename...
(pass_waccess::check_call_access): ...to this.
(pass_waccess::check): Rename...
(pass_waccess::check_block): ...to this.
(pass_waccess::check_pointer_uses): New function.
(pass_waccess::gimple_call_return_arg): New function.
(pass_waccess::warn_invalid_pointer): New function.
(pass_waccess::check_builtin): Handle free and realloc.
(gimple_use_after_inval_p): New function.
(get_realloc_lhs): New function.
(maybe_warn_mismatched_realloc): New function.
(pointers_related_p): New function.
(pass_waccess::check_call): Call check_pointer_uses.
(pass_waccess::execute): Compute and free dominance info.

libcpp/ChangeLog:

* files.c (_cpp_find_file): Substitute a valid pointer for
an invalid one to avoid -Wuse-0after-free.

libiberty/ChangeLog:

* regex.c: Suppress -Wuse-after-free.

gcc/testsuite/ChangeLog:

* gcc.dg/Wmismatched-dealloc-2.c: Avoid -Wuse-after-free.
* gcc.dg/Wmismatched-dealloc-3.c: Same.
* gcc.dg/attr-alloc_size-6.c: Disable -Wuse-after-free.
* gcc.dg/attr-alloc_size-7.c: Same.
* c-c++-common/Wuse-after-free-2.c: New test.
* c-c++-common/Wuse-after-free-3.c: New test.
* c-c++-common/Wuse-after-free-4.c: New test.
* c-c++-common/Wuse-after-free-5.c: New test.
* c-c++-common/Wuse-after-free-6.c: New test.
* c-c++-common/Wuse-after-free-7.c: New test.
* c-c++-common/Wuse-after-free.c: New test.
* g++.dg/warn/Wdangling-pointer.C: New test.
* g++.dg/warn/Wmismatched-dealloc-3.C: New test.
* g++.dg/warn/Wuse-after-free.C: New test.

diff --git a/gcc/gimple-ssa-warn-access.cc b/gcc/gimple-ssa-warn-access.cc
index 63fc27a1487..2065402a2b9 100644
--- a/gcc/gimple-ssa-warn-access.cc
+++ b/gcc/gimple-ssa-warn-access.cc

@@ -3397,33 +3417,460 @@ pass_waccess::maybe_check_dealloc_call (gcall *call)
  }
  }
  
+/* Return true if either USE_STMT's basic block (that of a pointer's use)

+   is dominated by INVAL_STMT's (that of a pointer's invalidating statement,
+   which is either a clobber or a deallocation call), or if they're in
+   the same block, USE_STMT follows INVAL_STMT.  */
+
+static bool
+gimple_use_after_inval_p (gimple *inval_stmt, gimple *use_stmt,
+ bool last_block = false)
+{
+  tree clobvar =
+gimple_clobber_p (inval_stmt) ? gimple_assign_lhs (inval_stmt) : NULL_TREE;
+
+  basic_block inval_bb = gimple_bb (inval_stmt);
+  basic_block use_bb = gimple_bb (use_stmt);
+
+  if (inval_bb != use_bb)
+{
+  if (dominated_by_p (CDI_DOMINATORS, use_bb, inval_bb))
+   return true;
+
+  if (!clobvar || !last_block)
+   return false;
+
+  auto gsi = gsi_for_stmt (use_stmt);
+
+  auto_bitmap visited;
+
+  /* A use statement in the last basic block in a function or one that
+falls through to it is after any other prior clobber of the used
+

Re: [V2/PATCH] Fix tree-optimization/102216: missed optimization causing Warray-bounds

2021-11-22 Thread Andrew Pinski via Gcc-patches
On Mon, Nov 22, 2021 at 3:40 AM Richard Biener
 wrote:
>
> On Mon, Nov 22, 2021 at 9:40 AM Andrew Pinski  wrote:
> >
> > On Wed, Oct 27, 2021 at 3:42 AM Richard Biener via Gcc-patches
> >  wrote:
> > >
> > > On Wed, Oct 27, 2021 at 12:00 PM apinski--- via Gcc-patches
> > >  wrote:
> > > >
> > > > From: Andrew Pinski 
> > > >
> > > > The problem here is tree-ssa-forwprop.c likes to produce
> > > >   [(void *)_4 + 152B] which is the same as
> > > > _4 p+ 152 which the rest of GCC likes better.
> > > > This implements this transformation back to pointer plus to
> > > > improve better code generation later on.
> > >
> > > Why do you think so?  Can you pin-point the transform that now
> > > fixes the new testcase?
> >
> > So we had originally:
> >   language_names_p_9 =   [(void *)_4 + 24B];
> > ...
> >   _2 = _4 + 40;
>
> Of course if that would have been
>
>   _2 =  [_4 + 40B];
>
> the issue would be fixed as well.   That said, I agree that _4 + 40
> is better but I think we should canonicalize all [SSA + CST]
> this way.  There is a canonicalization phase in fold_stmt_1:
>
>   /* First do required canonicalization of [TARGET_]MEM_REF addresses
>  after propagation.
>  ???  This shouldn't be done in generic folding but in the
>  propagation helpers which also know whether an address was
>  propagated.
>  Also canonicalize operand order.  */
>   switch (gimple_code (stmt))
> {
> case GIMPLE_ASSIGN:
>   if (gimple_assign_rhs_class (stmt) == GIMPLE_SINGLE_RHS)
> {
>   tree *rhs = gimple_assign_rhs1_ptr (stmt);
>   if ((REFERENCE_CLASS_P (*rhs)
>|| TREE_CODE (*rhs) == ADDR_EXPR)
>   && maybe_canonicalize_mem_ref_addr (rhs))
> changed = true;
>
> where this could be done (apart from adding a match.pd pattern for this).

Yes that is a good idea, I now have a patch which I am testing to add
this canonicalization. It is actually simpler than the previous patch
too.

Thanks,
Andrew

>
> >   if (_2 != language_names_p_9)
> >
> > Forwprop is able to figure out that the above if statement is now
> > always false as we have (_4 +p 40) != (_4 +p 24) which gets simplified
> > via a match-and-simplify pattern ().
> > Does that answer your question?
> >
> > I will look into the other comments in a new patch.
> >
> > Thanks,
> > Andrew
> >
> > >
> > > Comments below
> > >
> > > > OK? Bootstrapped and tested on aarch64-linux-gnu.
> > > >
> > > > Changes from v1:
> > > > * v2: Add comments.
> > > >
> > > > gcc/ChangeLog:
> > > >
> > > > PR tree-optimization/102216
> > > > * tree-ssa-forwprop.c (rewrite_assign_addr): New function.
> > > > (forward_propagate_addr_expr_1): Use rewrite_assign_addr
> > > > when rewriting into the addr_expr into an assignment.
> > > >
> > > > gcc/testsuite/ChangeLog:
> > > >
> > > > PR tree-optimization/102216
> > > > * g++.dg/tree-ssa/pr102216.C: New test.
> > > > ---
> > > >  gcc/testsuite/g++.dg/tree-ssa/pr102216.C | 22 +
> > > >  gcc/tree-ssa-forwprop.c  | 58 ++--
> > > >  2 files changed, 67 insertions(+), 13 deletions(-)
> > > >  create mode 100644 gcc/testsuite/g++.dg/tree-ssa/pr102216.C
> > > >
> > > > diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr102216.C 
> > > > b/gcc/testsuite/g++.dg/tree-ssa/pr102216.C
> > > > new file mode 100644
> > > > index 000..b903e4eb57d
> > > > --- /dev/null
> > > > +++ b/gcc/testsuite/g++.dg/tree-ssa/pr102216.C
> > > > @@ -0,0 +1,22 @@
> > > > +/* { dg-options "-O2 -fdump-tree-optimized" } */
> > > > +void link_error ();
> > > > +void g ()
> > > > +{
> > > > +  const char **language_names;
> > > > +
> > > > +  language_names = new const char *[6];
> > > > +
> > > > +  const char **language_names_p = language_names;
> > > > +
> > > > +  language_names_p++;
> > > > +  language_names_p++;
> > > > +  language_names_p++;
> > > > +
> > > > +  if ( (language_names_p) - (language_names+3) != 0)
> > > > +link_error();
> > > > +  delete[] language_names;
> > > > +}
> > > > +/* We should have removed the link_error on the gimple level as GCC 
> > > > should
> > > > +   be able to tell that language_names_p is the same as 
> > > > language_names+3.  */
> > > > +/* { dg-final { scan-tree-dump-times "link_error" 0 "optimized" } } */
> > > > +
> > > > diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c
> > > > index a830bab78ba..e4331c60525 100644
> > > > --- a/gcc/tree-ssa-forwprop.c
> > > > +++ b/gcc/tree-ssa-forwprop.c
> > > > @@ -637,6 +637,47 @@ forward_propagate_into_cond (gimple_stmt_iterator 
> > > > *gsi_p)
> > > >return 0;
> > > >  }
> > > >
> > > > +/* Rewrite the DEF_RHS as needed into the (plain) use statement.  */
> > > > +
> > > > +static void
> > > > +rewrite_assign_addr (gimple_stmt_iterator *use_stmt_gsi, tree def_rhs)
> > > > +{
> > > > +  tree def_rhs_base;
> > > > +  poly_int64 def_rhs_offset;
> > > > +
> > > > +  /* Get the base and offset.  */

Re: [EXTERNAL] Re: [PATCH] PR tree-optimization/96779 Adding a missing pattern to match.pd

2021-11-22 Thread Jeff Law via Gcc-patches




On 11/22/2021 1:45 AM, Richard Biener via Gcc-patches wrote:

On Fri, Nov 19, 2021 at 11:33 PM Navid Rahimi  wrote:

Hi Richard,

Thanks for the detailed comment. I am attaching a newer version of the patch 
which does have required fixes included. Bellow you can see my response to your 
feedbacks:


you need to check TYPE_OVERFLOW_WRAPS on TREE_TYPE (@0),
otherwise you check on boolean.

Fixed it.


no need for :c on the result pattern.  Otherwise it looks OK, but how
did you check the patch?

Fixed it. For checking the patch, I have script which builds and runs make 
check for 1) trunk and 2) trunk+patch in a separate directory and diffs the 
test results from each directory. My test script did had a subtle problem. The 
bug was, because of a typo in the path I introduced few days ago, it was 
diffing same trunk+patch test results against trunk+patch test results.

OK, please indicate that in the future, like with "Bootstrapped and
tested on x86_64-linux" or so.


That was a good reminder to setup an account for myself here asap [1].

1) https://gcc.gnu.org/wiki/CompileFarm

The updated patch is OK.
I don't think Navid has commit privs, so I fixed up the commit message 
and committed the patch for Navid.


jeff



Re: [PATCH] fixincludes: don't abort() on access failure [PR103306]

2021-11-22 Thread Jeff Law via Gcc-patches




On 11/18/2021 4:01 AM, Xi Ruoyao via Gcc-patches wrote:

Some distro may ship dangling symlinks in include directories, triggers
the access failure.  Skip it and continue to next header instead of
being to panic.

Restore to old behavior before r12-5234 but without resurrecting the
problematic getcwd() call, by using the environment variable "INPUT"
exported by fixinc.sh.

Tested on x86_64-linux-gnu, with a dangling symlink in /usr/include.

fixincludes/

PR bootstrap/103306
* fixincl.c (process): Don't call abort().
---
  fixincludes/fixincl.c | 15 ---
  1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/fixincludes/fixincl.c b/fixincludes/fixincl.c
index a17b65866c3..81939ee5ffa 100644
--- a/fixincludes/fixincl.c
+++ b/fixincludes/fixincl.c
@@ -1352,10 +1352,19 @@ process (void)
  
if (access (pz_curr_file, R_OK) != 0)

  {
-  /* Some really strange error happened.  */
-  fprintf (stderr, "Cannot access %s: %s\n", pz_curr_file,
+  /* It may happens if for e. g. the distro ships some broken symlinks
+   * in /usr/include.  */
+
+  /* "INPUT" is exported in fixinc.sh, which is the pwd where fixincl
+   * runs.  It's used instead of getcwd to avoid allocating a buffer
+   * with unknown length.  */
Formatting nits.  We don't use '*' at the start of comment lines. Drop 
the '*' like this


  /* blah blah blah
 more text.  */



+  const char *cwd = getenv ("INPUT");
+  if (!cwd)
+   cwd = "the working directory";
+
+  fprintf (stderr, "Cannot access %s from %s: %s\n", pz_curr_file, cwd,
   xstrerror (errno));
-  abort ();
+  return;
  }
If INPUT is always exported, why not just print it? ie, would CWD after 
actually be NULL?


jeff


Re: [PATCH] configure: define TARGET_LIBC_GNUSTACK on musl

2021-11-22 Thread Jeff Law via Gcc-patches




On 11/15/2021 10:10 PM, Ilya Lipnitskiy wrote:

On Mon, Nov 15, 2021 at 2:50 PM Jeff Law  wrote:



On 11/15/2021 1:25 AM, Ilya Lipnitskiy via Gcc-patches wrote:

musl only uses PT_GNU_STACK to set default thread stack size and has no
executable stack support[0], so there is no reason not to emit the
.note.GNU-stack section on musl builds.

[0]: 
https://lore.kernel.org/all/20190423192534.gn23...@brightrain.aerifal.cx/T/#u

gcc/ChangeLog:

   * configure: Regenerate.
   * configure.ac: define TARGET_LIBC_GNUSTACK on musl

If musl has no executable stack support, then wouldn't we want this
change to apply to all musl platforms, not just mips?

The original change was MIPS-specific[0] and TARGET_LIBC_GNUSTACK is
only used by mips code today. We could change both cases to be more
generic or keep it MIPS-specific. Dragan, what do you think?

Oh yea, I forgot about that  older change.    Let me take another looksie.

jeff



Re: [PATCH 1/2] Sync with binutils: GCC: Pass --plugin to AR and RANLIB

2021-11-22 Thread Jeff Law via Gcc-patches




On 11/13/2021 9:33 AM, H.J. Lu via Gcc-patches wrote:

Sync with binutils for building binutils with LTO:

 From 50ad1254d5030d0804cbf89c758359ae202e8d55 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" 
Date: Sat, 9 Jan 2021 06:43:11 -0800
Subject: [PATCH] GCC: Pass --plugin to AR and RANLIB

Detect GCC LTO plugin.  Pass --plugin to AR and RANLIB to support LTO
build.

* Makefile.tpl (AR): Add @AR_PLUGIN_OPTION@
(RANLIB): Add @RANLIB_PLUGIN_OPTION@.
* configure.ac: Include config/gcc-plugin.m4.
AC_SUBST AR_PLUGIN_OPTION and RANLIB_PLUGIN_OPTION.
* libtool.m4 (_LT_CMD_OLD_ARCHIVE): Pass --plugin to AR and
RANLIB if possible.
* Makefile.in: Regenerated.
* configure: Likewise.

config/

* gcc-plugin.m4 (GCC_PLUGIN_OPTION): New.

libiberty/

* Makefile.in (AR): Add @AR_PLUGIN_OPTION@
(RANLIB): Add @RANLIB_PLUGIN_OPTION@.
(configure_deps): Depend on ../config/gcc-plugin.m4.
* configure.ac: AC_SUBST AR_PLUGIN_OPTION and
RANLIB_PLUGIN_OPTION.
* aclocal.m4: Regenerated.
* configure: Likewise.

zlib/

* configure: Regenerated.
I thought the plugins were automatically loaded if they're in the right 
place in the filesystem.  Wouldn't that make this patch unnecessary?  Am 
I missing something?


jeff



Re: [PATCH] libcpp: Use [[likely]] conditionally

2021-11-22 Thread Jeff Law via Gcc-patches




On 11/22/2021 10:22 AM, Marek Polacek via Gcc-patches wrote:

Let's hide [[likely]] behind a macro, to suppress warnings if the
compiler doesn't support it.

Co-authored-by: Jonathan Wakely 

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

PR preprocessor/103355

libcpp/ChangeLog:

* lex.c: Use ATTR_LIKELY instead of [[likely]].
* system.h (ATTR_LIKELY): Define.

OK
jeff



[PATCH] correct handling of offsets in PHI expressions [PR103215]

2021-11-22 Thread Martin Sebor via Gcc-patches

In an effort to avoid false positives while still detecting
certain out-of-bounds accesses the warning code that handles
PHI nodes chooses the operand with the most space remaining
as the one representative of the PHI.  That's not right when
the offsets into the operands are unequal, because it overly
constrains the range of offsets that can be substracted from
the pointer.

The attached change corrects the logic here to not only use
the size of the largest operand but also to extend the range
of offsets into it to reflect all operand.  Unfortunately,
as a result of the more conservative offset computation,
the fix leads to a fair number of false negatives.  I tried
to avoid those but couldn't come up with a clean solution
that didn't require design changes, so I defer those to GCC
13.

The diff is relative to the "cleanup" patch submitted below:
https://gcc.gnu.org/pipermail/gcc-patches/2021-November/583735.html

Tested on x86_64-linux and by building Glibc and confirming
no new warnings.

Martin
Extend the offset and size of merged object references [PR103215].

Resolves:
PR tree-optimization/103215 - bogus -Warray-bounds with two pointers with different offsets each


gcc/ChangeLog:

	PR tree-optimization/103215
	* pointer-query.cc (access_ref::merge_ref): Extend the offset and
	size of the merged object instead of using the larger.

gcc/testsuite/ChangeLog:

	PR tree-optimization/103215
	* gcc.dg/Wstringop-overflow-58.c: Adjust and xfail expected warnings.
	* gcc.dg/Wstringop-overflow-59.c: Same.
	* gcc.dg/warn-strnlen-no-nul.c: Same.
	* gcc.dg/Warray-bounds-91.c: New test.
	* gcc.dg/Warray-bounds-92.c: New test.
	* gcc.dg/Wstringop-overflow-83.c: New test.
	* gcc.dg/Wstringop-overflow-85.c: New test.

diff --git a/gcc/pointer-query.cc b/gcc/pointer-query.cc
index 2f305514ddc..7dc6e0141f8 100644
--- a/gcc/pointer-query.cc
+++ b/gcc/pointer-query.cc
@@ -686,27 +686,32 @@ access_ref::merge_ref (vec *all_refs, tree arg, gimple *stmt,
   if (known_size && aref.sizrng[0] < minsize)
 minsize = aref.sizrng[0];
 
-  /* Determine the amount of remaining space in the argument.  */
-  offset_int argrem[2];
-  argrem[1] = aref.size_remaining (argrem);
+  /* Extend the size and offset of *THIS to account for AREF.  The result
+ can be cached but results in false negatives.  */
 
-  /* Determine the amount of remaining space computed so far and
- if the remaining space in the argument is more use it instead.  */
-  offset_int merged_rem[2];
-  merged_rem[1] = size_remaining (merged_rem);
+  offset_int orng[2];
+  if (sizrng[1] < aref.sizrng[1])
+{
+  orng[0] = offrng[0];
+  orng[1] = offrng[1];
+  *this = aref;
+}
+  else
+{
+  orng[0] = aref.offrng[0];
+  orng[1] = aref.offrng[1];
+}
+
+  if (orng[0] < offrng[0])
+offrng[0] = orng[0];
+  if (offrng[1] < orng[1])
+offrng[1] = orng[1];
 
   /* Reset the PHI's BASE0 flag if any of the nonnull arguments
  refers to an object at an unknown offset.  */
   if (!aref.base0)
 base0 = false;
 
-  if (merged_rem[1] < argrem[1]
-  || (merged_rem[1] == argrem[1]
-	  && sizrng[1] < aref.sizrng[1]))
-/* Use the argument with the most space remaining as the result,
-   or the larger one if the space is equal.  */
-*this = aref;
-
   sizrng[0] = minsize;
   parmarray = merged_parmarray;
 
diff --git a/gcc/testsuite/gcc.dg/Warray-bounds-91.c b/gcc/testsuite/gcc.dg/Warray-bounds-91.c
new file mode 100644
index 000..1c81091b2a6
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/Warray-bounds-91.c
@@ -0,0 +1,145 @@
+/* PR middle-end/103215 - bogus -Warray-bounds with two pointers with
+   different offsets each
+   Test for accesses into the same array through pointers with different
+   offsets each.
+   { dg-do compile }
+   { dg-options "-O2 -Wall -ftrack-macro-expansion=0" } */
+
+#define NOIPA __attribute__ ((noipa))
+
+#define A(p, off) ((p)[off] = __COUNTER__)
+
+extern int a4[4];
+
+
+NOIPA void p0_p1 (int i)
+{
+  int *p0 = a4 + 0;
+  int *p1 = a4 + 1;
+  int *q = i ? p0 : p1;
+  A (q, -2);  // { dg-warning "-Warray-bounds" }
+  A (q, -1); A (q, 0); A (q, 1); A (q, 2);
+  /* Since q points to a4 and -1 is a valid subscript, +3 must be invalid.
+ But the warning for each subscript is independent of prior subscripts
+ into the same object.  That should be improved.  */
+  A (q, 3);   // { dg-warning "-Warray-bounds" "pr??" { xfail *-*-* } }
+  A (q, 4);   // { dg-warning "-Warray-bounds" }
+}
+
+NOIPA void p1_p0 (int i)
+{
+  int *p1 = a4 + 1;
+  int *p0 = a4 + 0;
+  int *q = i ? p0 : p1;
+  A (q, -2);  // { dg-warning "-Warray-bounds" }
+  A (q, -1); A (q, 0); A (q, 1); A (q, 2);
+  A (q, 3);   // { dg-warning "-Warray-bounds" "pr??" { xfail *-*-* } }
+  A (q, 4);   // { dg-warning "-Warray-bounds" }
+}
+
+
+NOIPA void p1_p2 (int i)
+{
+  int *p1 = a4 + 1;
+  int *p2 = a4 + 2;
+  int *q = i ? p1 : p2;
+  A (q, -3);  // { dg-warning "-Warray-bounds" }
+  A 

Re: [PATCH] handle member references in -Waddress [PR96507]

2021-11-22 Thread Marek Polacek via Gcc-patches
On Mon, Nov 22, 2021 at 04:00:56PM -0700, Martin Sebor via Gcc-patches wrote:
> While going through old -Waddress bug reports to close after
> the recent improvements to the warning I came across PR 96507
> that points out that member references aren't handled.  Since
> testing the address of a reference for equality to null is
> in general diagnosed, this seems like an oversight worth fixing.
>  Attached is a change to the C++ front end to diagnose member
> references as well.
> 
> Tested on x86_64-linux.
> 
> Martin

> Issue -Waddress also for reference members [PR96507].
> 
> Resolves:
> PR c++/96507 - missing -Waddress for member references
> 
> gcc/cp/ChangeLog:
> 
>   PR c++/96507
>   * typeck.c (warn_for_null_address): Handle reference members.
> 
> gcc/testsuite/ChangeLog:
> 
>   PR c++/96507
>   * g++.dg/warn/Waddress-8.C: New test.
> 
> diff --git a/gcc/cp/typeck.c b/gcc/cp/typeck.c
> index 58919aaf13e..694c53eef8a 100644
> --- a/gcc/cp/typeck.c
> +++ b/gcc/cp/typeck.c
> @@ -4676,15 +4676,21 @@ warn_for_null_address (location_t location, tree op, 
> tsubst_flags_t complain)
>   "addition %qE and NULL", cop);
>return;
>  }
> -  else if (CONVERT_EXPR_P (op)
> -  && TYPE_REF_P (TREE_TYPE (TREE_OPERAND (op, 0
> +  else if (CONVERT_EXPR_P (op))
>  {
> -  STRIP_NOPS (op);
> +  tree op0 = TREE_OPERAND (op, 0);
> +  if (TYPE_REF_P (TREE_TYPE (op0)))
> + {

Isn't this just REFERENCE_REF_P?

> +   STRIP_NOPS (op);
> +
> +   if (TREE_CODE (op) == COMPONENT_REF)
> + op = TREE_OPERAND (op, 1);
>  
> -  if (DECL_P (op))
> - warned = warning_at (location, OPT_Waddress,
> -  "the compiler can assume that the address of "
> -  "%qD will never be NULL", op);
> +   if (DECL_P (op))
> + warned = warning_at (location, OPT_Waddress,
> +  "the compiler can assume that the address of "
> +  "%qD will never be NULL", op);
> + }
>  }
>  
>if (warned && DECL_P (op))
> diff --git a/gcc/testsuite/g++.dg/warn/Waddress-8.C 
> b/gcc/testsuite/g++.dg/warn/Waddress-8.C
> new file mode 100644
> index 000..797102d6be4
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/warn/Waddress-8.C
> @@ -0,0 +1,85 @@
> +/* PR c++/96507 - missing -Waddress for member references
> +   { dg-do compile }
> +   { dg-options "-Wall" } */
> +
> +typedef void F ();
> +
> +extern F 
> +extern int 
> +
> +bool warn_ext_rfun ()
> +{
> +  return  != 0;   // { dg-warning "-Waddress" }
> +}
> +
> +bool warn_ext_rvar ()
> +{
> +  return  != 0;   // { dg-warning "-Waddress" }
> +}
> +
> +
> +bool warn_parm_rfun (F )
> +{
> +  return  != 0;// { dg-warning "-Waddress" }
> +}
> +
> +bool warn_parm_rvar (int )
> +{
> +  return  != 0;// { dg-warning "-Waddress" }
> +}
> +
> +// Comparing the address of a reference argument to null also triggers
> +// a -Wnonnull-compare (that seems like a bug, hence PR 103363).
> +// { dg-prune-output "-Wnonnull-compare" }
> +
> +
> +struct S
> +{
> +  F 
> +  int 
> +};
> +
> +extern S es, esa[];
> +
> +bool warn_ext_mem_rfun ()
> +{
> +  return  != 0; // { dg-warning "-Waddress" }
> +}
> +
> +bool warn_ext_mem_rvar ()
> +{
> +  return  != 0; // { dg-warning "-Waddress" }
> +}
> +
> +
> +bool warn_ext_arr_mem_rfun (int i)
> +{
> +  return [i].fr != 0; // { dg-warning "-Waddress" }
> +}
> +
> +bool warn_ext_arr_mem_rvar (int i)
> +{
> +  return [i].ir != 0; // { dg-warning "-Waddress" }
> +}
> +
> +
> +bool warn_parm_mem_rfun (S )
> +{
> +  return  != 0;  // { dg-warning "-Waddress" }
> +}
> +
> +bool warn_parm_mem_rvar (S )
> +{
> +  return  != 0;  // { dg-warning "-Waddress" }
> +}
> +
> +
> +bool warn_parm_arr_mem_rfun (S sa[], int i)
> +{
> +  return [i].fr != 0;  // { dg-warning "-Waddress" }
> +}
> +
> +bool warn_parm_arr_mem_rvar (S sa[], int i)
> +{
> +  return [i].ir != 0;  // { dg-warning "-Waddress" }
> +}


Marek



[PATCH] handle member references in -Waddress [PR96507]

2021-11-22 Thread Martin Sebor via Gcc-patches

While going through old -Waddress bug reports to close after
the recent improvements to the warning I came across PR 96507
that points out that member references aren't handled.  Since
testing the address of a reference for equality to null is
in general diagnosed, this seems like an oversight worth fixing.
 Attached is a change to the C++ front end to diagnose member
references as well.

Tested on x86_64-linux.

Martin
Issue -Waddress also for reference members [PR96507].

Resolves:
PR c++/96507 - missing -Waddress for member references

gcc/cp/ChangeLog:

	PR c++/96507
	* typeck.c (warn_for_null_address): Handle reference members.

gcc/testsuite/ChangeLog:

	PR c++/96507
	* g++.dg/warn/Waddress-8.C: New test.

diff --git a/gcc/cp/typeck.c b/gcc/cp/typeck.c
index 58919aaf13e..694c53eef8a 100644
--- a/gcc/cp/typeck.c
+++ b/gcc/cp/typeck.c
@@ -4676,15 +4676,21 @@ warn_for_null_address (location_t location, tree op, tsubst_flags_t complain)
 		"addition %qE and NULL", cop);
   return;
 }
-  else if (CONVERT_EXPR_P (op)
-  && TYPE_REF_P (TREE_TYPE (TREE_OPERAND (op, 0
+  else if (CONVERT_EXPR_P (op))
 {
-  STRIP_NOPS (op);
+  tree op0 = TREE_OPERAND (op, 0);
+  if (TYPE_REF_P (TREE_TYPE (op0)))
+	{
+	  STRIP_NOPS (op);
+
+	  if (TREE_CODE (op) == COMPONENT_REF)
+	op = TREE_OPERAND (op, 1);
 
-  if (DECL_P (op))
-	warned = warning_at (location, OPT_Waddress,
-			 "the compiler can assume that the address of "
-			 "%qD will never be NULL", op);
+	  if (DECL_P (op))
+	warned = warning_at (location, OPT_Waddress,
+ "the compiler can assume that the address of "
+ "%qD will never be NULL", op);
+	}
 }
 
   if (warned && DECL_P (op))
diff --git a/gcc/testsuite/g++.dg/warn/Waddress-8.C b/gcc/testsuite/g++.dg/warn/Waddress-8.C
new file mode 100644
index 000..797102d6be4
--- /dev/null
+++ b/gcc/testsuite/g++.dg/warn/Waddress-8.C
@@ -0,0 +1,85 @@
+/* PR c++/96507 - missing -Waddress for member references
+   { dg-do compile }
+   { dg-options "-Wall" } */
+
+typedef void F ();
+
+extern F 
+extern int 
+
+bool warn_ext_rfun ()
+{
+  return  != 0;   // { dg-warning "-Waddress" }
+}
+
+bool warn_ext_rvar ()
+{
+  return  != 0;   // { dg-warning "-Waddress" }
+}
+
+
+bool warn_parm_rfun (F )
+{
+  return  != 0;// { dg-warning "-Waddress" }
+}
+
+bool warn_parm_rvar (int )
+{
+  return  != 0;// { dg-warning "-Waddress" }
+}
+
+// Comparing the address of a reference argument to null also triggers
+// a -Wnonnull-compare (that seems like a bug, hence PR 103363).
+// { dg-prune-output "-Wnonnull-compare" }
+
+
+struct S
+{
+  F 
+  int 
+};
+
+extern S es, esa[];
+
+bool warn_ext_mem_rfun ()
+{
+  return  != 0; // { dg-warning "-Waddress" }
+}
+
+bool warn_ext_mem_rvar ()
+{
+  return  != 0; // { dg-warning "-Waddress" }
+}
+
+
+bool warn_ext_arr_mem_rfun (int i)
+{
+  return [i].fr != 0; // { dg-warning "-Waddress" }
+}
+
+bool warn_ext_arr_mem_rvar (int i)
+{
+  return [i].ir != 0; // { dg-warning "-Waddress" }
+}
+
+
+bool warn_parm_mem_rfun (S )
+{
+  return  != 0;  // { dg-warning "-Waddress" }
+}
+
+bool warn_parm_mem_rvar (S )
+{
+  return  != 0;  // { dg-warning "-Waddress" }
+}
+
+
+bool warn_parm_arr_mem_rfun (S sa[], int i)
+{
+  return [i].fr != 0;  // { dg-warning "-Waddress" }
+}
+
+bool warn_parm_arr_mem_rvar (S sa[], int i)
+{
+  return [i].ir != 0;  // { dg-warning "-Waddress" }
+}


[pushed] c++: remember pointer-to-member location

2021-11-22 Thread Jason Merrill via Gcc-patches
Jakub recently mentioned that a PTRMEM_CST has no location; let's give it a
location wrapper.

Tested x86_64-pc-linux-gnu, applying to trunk.

gcc/cp/ChangeLog:

* typeck.c (build_x_unary_op): Set address location.
(convert_member_func_to_ptr): Handle location wrapper.
* pt.c (convert_nontype_argument): Likewise.

gcc/testsuite/ChangeLog:

* g++.dg/template/crash106.C: Adjust.
* g++.dg/diagnostic/ptrtomem3.C: New test.
---
 gcc/cp/pt.c |  2 ++
 gcc/cp/typeck.c | 15 +--
 gcc/testsuite/g++.dg/diagnostic/ptrtomem3.C | 14 ++
 gcc/testsuite/g++.dg/template/crash106.C|  4 ++--
 4 files changed, 31 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/diagnostic/ptrtomem3.C

diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index c73e035d8b8..288625e3e93 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -7267,6 +7267,8 @@ convert_nontype_argument (tree type, tree expr, 
tsubst_flags_t complain)
   const bool val_dep_p = value_dependent_expression_p (expr);
   if (val_dep_p)
 expr = canonicalize_expr_argument (expr, complain);
+  else
+STRIP_ANY_LOCATION_WRAPPER (expr);
 
   /* 14.3.2/5: The null pointer{,-to-member} conversion is applied
  to a non-type argument of "nullptr".  */
diff --git a/gcc/cp/typeck.c b/gcc/cp/typeck.c
index 63a0eaef2da..84dcb6f014f 100644
--- a/gcc/cp/typeck.c
+++ b/gcc/cp/typeck.c
@@ -6492,6 +6492,11 @@ build_x_unary_op (location_t loc, enum tree_code code, 
cp_expr xarg,
}
 
   exp = cp_build_addr_expr_strict (xarg, complain);
+
+  if (TREE_CODE (exp) == PTRMEM_CST)
+   exp = maybe_wrap_with_location (exp, loc);
+  else
+   protected_set_expr_location (exp, loc);
 }
 
   if (processing_template_decl && exp != error_mark_node)
@@ -8179,10 +8184,14 @@ convert_member_func_to_ptr (tree type, tree expr, 
tsubst_flags_t complain)
   if (!(complain & tf_warning_or_error))
 return error_mark_node;
 
+  location_t loc = cp_expr_loc_or_input_loc (expr);
+
   if (pedantic || warn_pmf2ptr)
-pedwarn (input_location, pedantic ? OPT_Wpedantic : OPT_Wpmf_conversions,
+pedwarn (loc, pedantic ? OPT_Wpedantic : OPT_Wpmf_conversions,
 "converting from %qH to %qI", intype, type);
 
+  STRIP_ANY_LOCATION_WRAPPER (expr);
+
   if (TREE_CODE (intype) == METHOD_TYPE)
 expr = build_addr_func (expr, complain);
   else if (TREE_CODE (expr) == PTRMEM_CST)
@@ -8197,7 +8206,9 @@ convert_member_func_to_ptr (tree type, tree expr, 
tsubst_flags_t complain)
   if (expr == error_mark_node)
 return error_mark_node;
 
-  return build_nop (type, expr);
+  expr = build_nop (type, expr);
+  SET_EXPR_LOCATION (expr, loc);
+  return expr;
 }
 
 /* Build a NOP_EXPR to TYPE, but mark it as a reinterpret_cast so that
diff --git a/gcc/testsuite/g++.dg/diagnostic/ptrtomem3.C 
b/gcc/testsuite/g++.dg/diagnostic/ptrtomem3.C
new file mode 100644
index 000..6096a98f708
--- /dev/null
+++ b/gcc/testsuite/g++.dg/diagnostic/ptrtomem3.C
@@ -0,0 +1,14 @@
+// Check that the diagnostic for a pointer-to-member expression has the caret
+// at the &.
+
+struct A
+{
+  int i;
+};
+
+void f();
+
+int main()
+{
+  return ::i;// { dg-error "10:cannot convert" }
+}
diff --git a/gcc/testsuite/g++.dg/template/crash106.C 
b/gcc/testsuite/g++.dg/template/crash106.C
index f904bd4dc9b..35cedb55b0b 100644
--- a/gcc/testsuite/g++.dg/template/crash106.C
+++ b/gcc/testsuite/g++.dg/template/crash106.C
@@ -7,6 +7,6 @@ struct A
   template void foo(); // { dg-error "type" "" { target c++17_down } }
 };
 
-template > struct B {}; // { dg-error 
"type|declared" "" { target c++17_down } }
+template > struct B {}; // { dg-error 
"type|declared|could not convert" "" { target c++17_down } }
 
-B<> b; // { dg-error "(could not convert|no matches)" "" { target c++17_down } 
}
+B<> b; // { dg-message "" "" { target c++17_down } }

base-commit: a6e0d593707ae44dec0bdf2bcdc4f539050b46db
prerequisite-patch-id: c6089a1df492025881ecbe9f955019d4670183d7
-- 
2.27.0



[pushed] c++: improved return expression location

2021-11-22 Thread Jason Merrill via Gcc-patches
Stripping the location wrapper from retval meant we didn't have the
necessary location information for any conversion diagnostics.  We only need
the stripping for the named return value optimization, let's use the
unstripped expression for everything else.

Tested x86_64-pc-linux-gnu, applying to trunk.

gcc/cp/ChangeLog:

* typeck.c (check_return_expr): Only strip location wrapper during
NRV handling.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/pr65327.C: Adjust location.
* g++.dg/cpp23/constexpr-nonlit4.C: Likewise.
* g++.dg/cpp23/constexpr-nonlit5.C: Likewise.
* g++.dg/cpp2a/constexpr-init1.C: Likewise.
---
 gcc/cp/typeck.c| 11 ++-
 gcc/testsuite/g++.dg/cpp0x/pr65327.C   |  4 ++--
 gcc/testsuite/g++.dg/cpp23/constexpr-nonlit4.C |  8 
 gcc/testsuite/g++.dg/cpp23/constexpr-nonlit5.C |  8 
 gcc/testsuite/g++.dg/cpp2a/constexpr-init1.C   |  4 ++--
 5 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/gcc/cp/typeck.c b/gcc/cp/typeck.c
index 58919aaf13e..63a0eaef2da 100644
--- a/gcc/cp/typeck.c
+++ b/gcc/cp/typeck.c
@@ -10545,19 +10545,20 @@ check_return_expr (tree retval, bool *no_warning)
  this restriction, anyway.  (jason 2000-11-19)
 
  See finish_function and finalize_nrv for the rest of this optimization.  
*/
+  tree bare_retval = NULL_TREE;
   if (retval)
 {
   retval = maybe_undo_parenthesized_ref (retval);
-  STRIP_ANY_LOCATION_WRAPPER (retval);
+  bare_retval = tree_strip_any_location_wrapper (retval);
 }
 
-  bool named_return_value_okay_p = can_do_nrvo_p (retval, functype);
+  bool named_return_value_okay_p = can_do_nrvo_p (bare_retval, functype);
   if (fn_returns_value_p && flag_elide_constructors)
 {
   if (named_return_value_okay_p
   && (current_function_return_value == NULL_TREE
-  || current_function_return_value == retval))
-   current_function_return_value = retval;
+ || current_function_return_value == bare_retval))
+   current_function_return_value = bare_retval;
   else
current_function_return_value = error_mark_node;
 }
@@ -10571,7 +10572,7 @@ check_return_expr (tree retval, bool *no_warning)
 maybe_warn_pessimizing_move (retval, functype);
 
   /* Do any required conversions.  */
-  if (retval == result || DECL_CONSTRUCTOR_P (current_function_decl))
+  if (bare_retval == result || DECL_CONSTRUCTOR_P (current_function_decl))
 /* No conversions are required.  */
 ;
   else
diff --git a/gcc/testsuite/g++.dg/cpp0x/pr65327.C 
b/gcc/testsuite/g++.dg/cpp0x/pr65327.C
index 6e888ebff2c..e8149953ffd 100644
--- a/gcc/testsuite/g++.dg/cpp0x/pr65327.C
+++ b/gcc/testsuite/g++.dg/cpp0x/pr65327.C
@@ -14,5 +14,5 @@ foo ()
 constexpr volatile int // { dg-warning "deprecated" "" { target c++2a } }
 bar ()
 {
-  return i;
-} // { dg-error "lvalue-to-rvalue conversion of a volatile lvalue" }
+  return i;  // { dg-error "lvalue-to-rvalue conversion of a volatile lvalue" }
+}
diff --git a/gcc/testsuite/g++.dg/cpp23/constexpr-nonlit4.C 
b/gcc/testsuite/g++.dg/cpp23/constexpr-nonlit4.C
index e4ed2e36c30..bdc97a9bc79 100644
--- a/gcc/testsuite/g++.dg/cpp23/constexpr-nonlit4.C
+++ b/gcc/testsuite/g++.dg/cpp23/constexpr-nonlit4.C
@@ -34,7 +34,7 @@ baz (int x)
 {
   static const int v = qux (); // { dg-message "'v' was not 
initialized with a constant expression" }
 case 12:
-  return v;
+  return v;// { dg-error "the value of 'v' is not usable in a 
constant expression" }
 }
   return 0;
 }
@@ -46,12 +46,12 @@ corge (int x)
 {
   const thread_local int v = qux ();   // { dg-message "'v' was not 
initialized with a constant expression" }
 case 12:
-  return v;
+  return v; // { dg-error "the value of 'v' is not usable in a constant 
expression" }
 }
   return 0;
 }
 
 constexpr int a = foo (12);
 constexpr int b = bar (12);
-constexpr int c = baz (12);// { dg-error "the value of 'v' is not 
usable in a constant expression" }
-constexpr int d = corge (12);  // { dg-error "the value of 'v' is not 
usable in a constant expression" }
+constexpr int c = baz (12);
+constexpr int d = corge (12);
diff --git a/gcc/testsuite/g++.dg/cpp23/constexpr-nonlit5.C 
b/gcc/testsuite/g++.dg/cpp23/constexpr-nonlit5.C
index 838f282c1f9..86d5dba77a3 100644
--- a/gcc/testsuite/g++.dg/cpp23/constexpr-nonlit5.C
+++ b/gcc/testsuite/g++.dg/cpp23/constexpr-nonlit5.C
@@ -34,7 +34,7 @@ baz (int x)
 {
   static int v = 6;// { dg-message "int v' is not const" }
 case 12:
-  return v;
+  return v;// { dg-error "the value of 'v' is not usable in a 
constant expression" }
 }
   return 0;
 }
@@ -46,12 +46,12 @@ corge (int x)
 {
   thread_local int v = 6;  // { dg-message "int v' is not const" }
 case 12:
-  return v;
+  return v;// { dg-error "the value of 'v' is not usable 

[PATCH] c++: Fix missing NSDMI diagnostic in C++98 [PR103347]

2021-11-22 Thread Marek Polacek via Gcc-patches
Here the problem is that we aren't detecting a NSDMI in C++98:

struct A {
  void *x = NULL;
};

because maybe_warn_cpp0x uses input_location and that happens to point
to NULL which comes from a system header.  Jakub suggested changing the
location to the '=', thereby avoiding the system header problem.  To
that end, I've added a new location_t member into cp_declarator.  This
member is used when this declarator is part of an init-declarator.  The
rest of the changes is obvious.  I've also taken the liberty of adding
loc_or_input_loc, since I want to avoid checking for UNKNOWN_LOCATION.

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

PR c++/103347

gcc/cp/ChangeLog:

* cp-tree.h (struct cp_declarator): Add a location_t member.
(maybe_warn_cpp0x): Add a location_t parameter with a default argument.
(loc_or_input_loc): New.
* decl.c (grokdeclarator): Use loc_or_input_loc.  Pass init_loc down
to maybe_warn_cpp0x.
* error.c (maybe_warn_cpp0x): Add a location_t parameter.  Use it.
* parser.c (make_declarator): Initialize init_loc.
(cp_parser_member_declaration): Set init_loc.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/nsdmi-warn1.C: New test.
* g++.dg/cpp0x/nsdmi-warn1.h: New file.
---
 gcc/cp/cp-tree.h | 16 +---
 gcc/cp/decl.c| 22 +---
 gcc/cp/error.c   | 32 
 gcc/cp/parser.c  |  2 ++
 gcc/testsuite/g++.dg/cpp0x/nsdmi-warn1.C | 10 
 gcc/testsuite/g++.dg/cpp0x/nsdmi-warn1.h |  2 ++
 6 files changed, 55 insertions(+), 29 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/nsdmi-warn1.C
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/nsdmi-warn1.h

diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index 3f56cb90d14..2037082b0c7 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -6231,9 +6231,11 @@ struct cp_declarator {
   /* If this declarator is parenthesized, this the open-paren.  It is
  UNKNOWN_LOCATION when not parenthesized.  */
   location_t parenthesized;
-
-  location_t id_loc; /* Currently only set for cdk_id, cdk_decomp and
-   cdk_function. */
+  /* Currently only set for cdk_id, cdk_decomp and cdk_function.  */
+  location_t id_loc;
+  /* If this declarator is part of an init-declarator, the location of the
+ initializer.  */
+  location_t init_loc;
   /* GNU Attributes that apply to this declarator.  If the declarator
  is a pointer or a reference, these attribute apply to the type
  pointed to.  */
@@ -6878,7 +6880,8 @@ extern const char *lang_decl_dwarf_name   (tree, 
int, bool);
 extern const char *language_to_string  (enum languages);
 extern const char *class_key_or_enum_as_string (tree);
 extern void maybe_warn_variadic_templates   (void);
-extern void maybe_warn_cpp0x   (cpp0x_warn_str str);
+extern void maybe_warn_cpp0x   (cpp0x_warn_str str,
+location_t = input_location);
 extern bool pedwarn_cxx98   (location_t, int, const char 
*, ...) ATTRIBUTE_GCC_DIAG(3,4);
 extern location_t location_of   (tree);
 extern void qualified_name_lookup_error(tree, tree, tree,
@@ -7996,6 +7999,11 @@ extern bool decl_in_std_namespace_p   (tree);
 extern void require_complete_eh_spec_types (tree, tree);
 extern void cxx_incomplete_type_diagnostic (location_t, const_tree,
 const_tree, diagnostic_t);
+inline location_t
+loc_or_input_loc (location_t loc)
+{
+  return loc == UNKNOWN_LOCATION ? input_location : loc;
+}
 
 inline location_t
 cp_expr_loc_or_loc (const_tree t, location_t or_loc)
diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c
index 9f68d1a5590..ae0e0bae9cc 100644
--- a/gcc/cp/decl.c
+++ b/gcc/cp/decl.c
@@ -11522,14 +11522,18 @@ grokdeclarator (const cp_declarator *declarator,
   if (initialized == SD_DEFAULTED || initialized == SD_DELETED)
 funcdef_flag = true;
 
-  location_t typespec_loc = smallest_type_location (type_quals,
-   declspecs->locations);
-  if (typespec_loc == UNKNOWN_LOCATION)
-typespec_loc = input_location;
-
-  location_t id_loc = declarator ? declarator->id_loc : input_location;
-  if (id_loc == UNKNOWN_LOCATION)
-id_loc = input_location;
+  location_t typespec_loc = loc_or_input_loc (smallest_type_location
+ (type_quals,
+  declspecs->locations));
+  location_t id_loc;
+  location_t init_loc;
+  if (declarator)
+{
+  id_loc = loc_or_input_loc (declarator->id_loc);
+  init_loc = loc_or_input_loc (declarator->init_loc);
+}
+  else
+init_loc = id_loc = input_location;
 
   /* Look inside a declarator for the name 

Re: [PATCH] rs6000: Add [power6-64] stanza to new builtin support

2021-11-22 Thread Segher Boessenkool
Hi!

On Tue, Nov 16, 2021 at 11:06:55AM -0600, Bill Schmidt via Gcc-patches wrote:
> gcc/
>   * config/rs6000/rs6000-builtin-new.def: Add power6-64 stanza.
>   Move CMPB to power6-64 stanza.

Don't break lines early please.

>   * config/rs6000/rs6000-call.c (rs6000_invalid_new_builtin): Handle
>   ENB_P6_64 case.
>   (rs6000_new_builtin_is_supported): Likewise.
>   (rs6000_expand_new_builtin): Likewise.
>   (rs6000_init_builtins): Likewise.
>   * config/rs6000/rs6000-gen-builtins.c (bif_stanza): Add
>   BSTZ_P6_64.

It's worse here :-(

>   (stanza_map): Add entry mapping power6-64 to BSTZ_P6_64.
>   (enable_string): Add "ENB_P6_64".
>   (write_decls): Add ENB_P6_64 to bif_enable enum.

> @@ -15697,6 +15703,8 @@ rs6000_expand_new_builtin (tree exp, rtx target,
>if (!(e == ENB_ALWAYS
>   || (e == ENB_P5 && TARGET_POPCNTB)
>   || (e == ENB_P6 && TARGET_CMPB)
> + || (e == ENB_P6_64  && TARGET_CMPB
> + && TARGET_POWERPC64)

This is curious formatting.  And you are introducing tabs halfway a
line; don't do that, use spaces there, this is not a table.

This fits easily on a single line, so just do that?

>   || (e == ENB_ALTIVEC&& TARGET_ALTIVEC)
>   || (e == ENB_CELL   && TARGET_ALTIVEC
>   && rs6000_cpu == PROCESSOR_CELL)

(I do realise this entry isn't correct formatting either).

Okay for trunk with those things fixed.  Thanks!


Segher


Re: [PATCH 3/3] c++: P1997 array-copy extensions: Assignment, return, etc. [PR103238]

2021-11-22 Thread will wray via Gcc-patches
On Mon, Nov 22, 2021 at 3:42 PM Joseph Myers  wrote:
>
> On Sun, 21 Nov 2021, Will Wray via Gcc-patches wrote:
>
> > gcc/c/ChangeLog:
> >
> >   * c-decl.c (grokdeclarator): Don't complain of array returns.
>
> A C front-end change like this doesn't belong in a C++ patch

Of course, thanks. I'll remove it.
Hopefully it'll be back before long, in a compatible C array-copy patchset.


Re: [PATCH 0/3] Add zero cycle move support

2021-11-22 Thread Michael Meissner via Gcc-patches
On Mon, Nov 22, 2021 at 11:09:22AM -0500, David Edelsohn wrote:
> On Mon, Nov 22, 2021 at 10:58 AM Bill Schmidt  wrote:
> And do the absolute addressing for switch tables changes work on AIX?
> I thought that Hao Chen only had done the work for PPC64 Linux ELF
> syntax with promises of future changes to accommodate AIX as well.

In theory it should work on AIX, since the assembler has to support syntax to
load the contents of a 64-bit address in memory.

In the past, when I measured this (probably in the power8 days), the issue was
occasionally having 64-bit loads for the switch tables insted of 32-bit loads
and an add instruction meant a slow down for 1-2 benchmarks that were extremely
sensitive to cache sizes.

-- 
Michael Meissner, IBM
PO Box 98, Ayer, Massachusetts, USA, 01432
email: meiss...@linux.ibm.com


Re: [PATCH 2/3] Set power10 fusion if -mtune=power10.

2021-11-22 Thread Michael Meissner via Gcc-patches
On Mon, Nov 22, 2021 at 10:06:17AM -0600, Bill Schmidt wrote:
> Hi Mike,
> 
> On 11/19/21 8:55 AM, Michael Meissner wrote:
> > Set power10 fusion if -mtune=power10.
> >
> > In doing the patch for zero cycle moves for switch statements and indirect
> > jumps, I noticed the fusion support is only done if -mcpu=power10.  This 
> > option
> > enables power10 fusion if we use -mtune=power10.
> >
> > I have built and run the testsuites on little endian power9 and power10 
> > systems
> > with no regressions.  Can I install this patch?
> 
> This all seems fine, but since we're planning on collapsing all those flags
> anyway, maybe it would be better if we did that first.  This seems like work
> that will mostly be removed soon.  But no concerns from me otherwise.
> 
> Thanks!
> Bill

It sitll is useful early on to do builds with/without to see what the bubbles
are.  But yeah, we could eliminate it.

-- 
Michael Meissner, IBM
PO Box 98, Ayer, Massachusetts, USA, 01432
email: meiss...@linux.ibm.com


Re: [PATCH 1/3] Add power10 zero cycle moves for switches & indirect jumps

2021-11-22 Thread Michael Meissner via Gcc-patches
On Mon, Nov 22, 2021 at 10:36:13AM -0600, Bill Schmidt wrote:
> Hi Mike,
> 
> Thanks for this patch!
> > --- a/gcc/config/rs6000/rs6000.md
> > +++ b/gcc/config/rs6000/rs6000.md
> > @@ -12988,15 +12988,34 @@ (define_expand "indirect_jump"
> >  emit_jump_insn (gen_indirect_jump_nospec (Pmode, operands[0], ccreg));
> >  DONE;
> >}
> > +  if (TARGET_P10_FUSION && TARGET_P10_FUSION_ZERO_CYCLE)
> > +{
> > +  emit_jump_insn (gen_indirect_jump_zero_cycle (Pmode, operands[0]));
> > +  DONE;
> > +}
> >  })
> >  
> >  (define_insn "*indirect_jump"
> >[(set (pc)
> > (match_operand:P 0 "register_operand" "c,*l"))]
> > -  "rs6000_speculate_indirect_jumps"
> > +  "rs6000_speculate_indirect_jumps
> > +   && !(TARGET_P10_FUSION && TARGET_P10_FUSION_ZERO_CYCLE)"
> >"b%T0"
> >[(set_attr "type" "jmpreg")])
> >  
> > +(define_insn "@indirect_jump_zero_cycle"
> 
> I don't know why this is an "@" pattern, but honestly I don't
> know why @indirect_jump_nospec is an "@" pattern either.
> The documentation for such things is hard for me to understand,
> so I'm probably just missing something obvious, but I don't
> immediately see why we would need the @ here.

I didn't know about it either.  Basically the next insn used it:

(define_insn "@indirect_jump_nospec"
  [(set (pc) (match_operand:P 0 "register_operand" "c,*l"))
   (clobber (match_operand:CC 1 "cc_reg_operand" "=y,y"))]
  "!rs6000_speculate_indirect_jumps"
  "crset %E1\;beq%T0- %1\;b $"
  [(set_attr "type" "jmpreg")
   (set_attr "length" "12")])

This creates a function:

gen_indirect_jump_nospec (machine_mode arg0, rtx x0, rtx x1)

where the mode of the P iterator is passed as argument.  I.e. you can do:

rtx foo = gen_indirect_jump_nospec (Pmode, op0, op1);

instead of:

rtx foo;
if (Pmode == SImode)
  foo = gen_indirect_jumpsi_nospec (op0, op1);
else if (Pmode == DImode)
  foo = gen_indirect_jumpdi_nospec (op0, op1);
else
  gcc_unreachable ();

> > +  [(set (pc)
> > +   (match_operand:P 0 "register_operand" "r,r,!cl"))
> > +   (clobber (match_scratch:P 1 "=c,*l,X"))]
> 
> Do we need the *l and X alternatives if we're only doing this for
> mtctr/bctr?

Probably not, but I recall back before the current allocator, that it would
cause crashes if we didn't have LR.  I could certainly eliminate the *l
alternative.

-- 
Michael Meissner, IBM
PO Box 98, Ayer, Massachusetts, USA, 01432
email: meiss...@linux.ibm.com


Re: [PATCH] libcpp, v2: Fix _Pragma in #__VA_ARGS__ [PR103165]

2021-11-22 Thread Joseph Myers
On Mon, 22 Nov 2021, Jakub Jelinek via Gcc-patches wrote:

> On Thu, Nov 18, 2021 at 09:55:52PM +, Joseph Myers wrote:
> > On Thu, 18 Nov 2021, Jakub Jelinek via Gcc-patches wrote:
> > 
> > > Are we handling the pragma at a wrong phase of preprocessing?
> > 
> > I think that converting it to a single preprocessing token (rather than 
> > four separate preprocessing tokens), at a stage when stringizing might 
> > still occur, does indicate it's being processed too soon, and it would be 
> > better to do that only when it's known that the _Pragma preprocessing 
> > token will actually occur in the results of preprocessing the source file.
> 
> So like this?  I.e. don't process _Pragma during expand_args where we can't
> know what the macro will do with it?
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: [PATCH 3/3] c++: P1997 array-copy extensions: Assignment, return, etc. [PR103238]

2021-11-22 Thread Joseph Myers
On Sun, 21 Nov 2021, Will Wray via Gcc-patches wrote:

> gcc/c/ChangeLog:
> 
>   * c-decl.c (grokdeclarator): Don't complain of array returns.

A C front-end change like this doesn't belong in a C++ patch (I don't see 
any reason why this C++ patch would need such a C front-end change, it's 
not e.g. changing the interface to a function called from shared code but 
defined separately for both front ends).

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: [PATCH v3] c-format: Add -Wformat-int-precision option [PR80060]

2021-11-22 Thread Joseph Myers
On Sun, 21 Nov 2021, Daniil Stas via Gcc-patches wrote:

> This option is enabled by default when -Wformat option is enabled. A
> user can specify -Wno-format-int-precision to disable emitting
> warnings when passing an argument of an incompatible integer type to
> a 'd', 'i', 'o', 'u', 'x', or 'X' conversion specifier when it has
> the same precision as the expected type.

I'd expect this to apply to 'b' and 'B' as well (affects commit message, 
ChangeLog entry, option help string, documentation).

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: [PATCH] PR fortran/87851 - [9/10/11/12 Regression] Wrong return type for len_trim

2021-11-22 Thread Bernhard Reutner-Fischer via Gcc-patches
On Mon, 22 Nov 2021 19:17:51 +0100
Harald Anlauf via Gcc-patches  wrote:

> Am 21.11.21 um 12:46 schrieb Mikael Morin:
> > Le 19/11/2021 à 20:47, Harald Anlauf via Fortran a écrit :  
> >> Dear Fortranners,
> >>
> >> scalariziation of the elemental intrinsic LEN_TRIM was ICEing
> >> when the optional KIND argument was present.
> >>
> >> The cleanest solution is to use the infrastructure added by
> >> Mikael's fix for PR97896.  In that case it is a 1-liner.  :-)

I'm just wondering loud if it would be more convenient to have a
unsigned hidden_arg:1 bit in let's say gfc_actual_arglist that denotes
if the argument should be const eval'ed and used before, and, most
importantly not passed to the library. We seem to have more than just
the index intrinsic's kind arg in that boat. And from what i read,
powerpc will eventuall want to select quite some kind-specific library
functions soon, depending on how this part is implemented..

Maybe add SPEC_HIDDEN_ARG / SPEC_LIBRARY_SELECTOR additional
gfc_param_spec_type if a separate bit is deemed inappropriate.

Such a hidden_arg/library_selector/non_library_call_arg flag is maybe
better than matching individual functions and strcmp the arg name.

cheers,


Re: [PATCH] PR fortran/87851 - [9/10/11/12 Regression] Wrong return type for len_trim

2021-11-22 Thread Harald Anlauf via Gcc-patches

Am 21.11.21 um 12:46 schrieb Mikael Morin:

Le 19/11/2021 à 20:47, Harald Anlauf via Fortran a écrit :

Dear Fortranners,

scalariziation of the elemental intrinsic LEN_TRIM was ICEing
when the optional KIND argument was present.

The cleanest solution is to use the infrastructure added by
Mikael's fix for PR97896.  In that case it is a 1-liner.  :-)

That fix is available on mainline and on 11-branch only, though.
My suggestion is to fix the current PR only for the same branches,
leaving the regression unfixed for older ones.

Regtested on x86_64-pc-linux-gnu.  OK for mainline and 11-branch?


Your change itself is fine.
The PR was originally about a type mismatch between the gfortran library
and the call generated by the front-end.


The supposed type mismatch was due to Janne's commit r8-5772:

commit f61ab42c4ca550059add89ffda00ed2b3c03
Author: Janne Blomqvist 
Date:   Fri Jan 5 21:01:12 2018 +0200

PR 78534 Change character length from int to size_t

In order to handle large character lengths on (L)LP64 targets, switch
the GFortran character length from an int to a size_t.

This is an ABI change, as procedures with character arguments take
hidden arguments with the character length.
[...]

LEN_TRIM is of course of type default integer, which is handled fine
in gfc_resolve_len_trim, setting f->ts.kind, the same way as it is
done in gfc_resolve_index_func or elsewhere, and conversions are
properly taken care of as far as I can tell.

Things work(ed) fine for the "scalar" version of LEN_TRIM, even if the
optional KIND argument was present, before the above commit.  But not
the elemental version working on rank>=1 with KIND present.  That did
not change.  See PR87711.

Thinking again, the patch primarily addresses PR87711 (for 11/12) and
fixes some of the comments in PR87851.  We'd need to find out what
exactly is left from the latter.

The dump-tree of the testcase looks fine to me, even when compiling
with -fdefault-integer-8, and I do not see any conversion warnings.


As the code generated contains a cast, I think it’s fine as well.


Well, LEN_TRIM is of default integer, unless KIND is given.


But please give Thomas (bug reporter) one more day to comment on this.


Sure.


Then I think you can proceed.

Thanks.



Thanks,
Harald


[PATCH] libcpp: Use [[likely]] conditionally

2021-11-22 Thread Marek Polacek via Gcc-patches
Let's hide [[likely]] behind a macro, to suppress warnings if the
compiler doesn't support it.

Co-authored-by: Jonathan Wakely 

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

PR preprocessor/103355

libcpp/ChangeLog:

* lex.c: Use ATTR_LIKELY instead of [[likely]].
* system.h (ATTR_LIKELY): Define.
---
 libcpp/lex.c|  2 +-
 libcpp/system.h | 10 ++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/libcpp/lex.c b/libcpp/lex.c
index 94c36f0d014..9c27d8b5a08 100644
--- a/libcpp/lex.c
+++ b/libcpp/lex.c
@@ -1286,7 +1286,7 @@ namespace bidi {
   case kind::RTL:
/* These aren't popped by a PDF/PDI.  */
break;
-  [[likely]] case kind::NONE:
+  ATTR_LIKELY case kind::NONE:
break;
   default:
abort ();
diff --git a/libcpp/system.h b/libcpp/system.h
index ee5fbe28889..f6fc583ab80 100644
--- a/libcpp/system.h
+++ b/libcpp/system.h
@@ -422,6 +422,16 @@ extern void fancy_abort (const char *, int, const char *) 
ATTRIBUTE_NORETURN;
 #define gcc_checking_assert(EXPR) ((void)(0 && (EXPR)))
 #endif
 
+#ifdef __has_cpp_attribute
+# if __has_cpp_attribute(likely)
+#  define ATTR_LIKELY [[likely]]
+# elif __has_cpp_attribute(__likely__)
+#  define ATTR_LIKELY [[__likely__]]
+# else
+#  define ATTR_LIKELY
+# endif
+#endif
+
 /* Poison identifiers we do not want to use.  */
 #if (GCC_VERSION >= 3000)
 #undef calloc

base-commit: 1aedb3920a45bfe75db4514502b4e7f83e108f63
-- 
2.33.1



Re: [PATCH 1/3] Add power10 zero cycle moves for switches & indirect jumps

2021-11-22 Thread Bill Schmidt via Gcc-patches
Hi Mike,

Thanks for this patch!

On 11/19/21 8:53 AM, Michael Meissner wrote:
> Add power10 zero cycle moves for switches.
>
> Power10 will fuse adjacenet 'mtctr' and 'bctr' instructions to form zero
> cycle moves.  This code exploits this fusion opportunity.
>
> I have built bootstrapped compilers with this patch on little endian power9 
> and
> power10 systems with no regressions.  Can I install this into the master
> branch?
>
> 2021-11-19  Michael Meissner  
>
>   * config/rs6000/rs6000-cpus.def (ISA_3_1_MASKS_SERVER): Add
>   support for -mpower10-fusion-zero-cycle.
>   (POWERPC_MASKS): Likewise.
>   * config/rs6000/rs6000.c (rs6000_option_override_internal):
>   Likewise.
>   * config/rs6000/rs6000.md (indirect_jump): Support zero cycle
>   moves.
>   (indirect_jump_zero_cycle): New insns.
>   (tablejump_normal): Likewise.
>   (tablejump_absolute): Likewise.
>   (tablejump_insn_zero_cycle): New insn.
>   * config/rs6000/rs6000.opt (-mpower10-fusion-zero-cycle): New
>   debug switch.
> ---
>  gcc/config/rs6000/rs6000-cpus.def |  4 ++-
>  gcc/config/rs6000/rs6000.c|  4 +++
>  gcc/config/rs6000/rs6000.md   | 52 ---
>  gcc/config/rs6000/rs6000.opt  |  4 +++
>  4 files changed, 59 insertions(+), 5 deletions(-)
>
> diff --git a/gcc/config/rs6000/rs6000-cpus.def 
> b/gcc/config/rs6000/rs6000-cpus.def
> index f5812da0184..cc072ee94ea 100644
> --- a/gcc/config/rs6000/rs6000-cpus.def
> +++ b/gcc/config/rs6000/rs6000-cpus.def
> @@ -91,7 +91,8 @@
>| OPTION_MASK_P10_FUSION_LOGADD\
>| OPTION_MASK_P10_FUSION_ADDLOG\
>| OPTION_MASK_P10_FUSION_2ADD  \
> -  | OPTION_MASK_P10_FUSION_2STORE)
> +  | OPTION_MASK_P10_FUSION_2STORE\
> +  | OPTION_MASK_P10_FUSION_ZERO_CYCLE)

I guess it's fine to introduce one more for now, but ultimately we want
all these to get collapsed down to one.  No worries from me.

>  
>  /* Flags that need to be turned off if -mno-power9-vector.  */
>  #define OTHER_P9_VECTOR_MASKS(OPTION_MASK_FLOAT128_HW
> \
> @@ -145,6 +146,7 @@
>| OPTION_MASK_P10_FUSION_ADDLOG\
>| OPTION_MASK_P10_FUSION_2ADD  \
>| OPTION_MASK_P10_FUSION_2STORE\
> +  | OPTION_MASK_P10_FUSION_ZERO_CYCLE\
>| OPTION_MASK_HTM  \
>| OPTION_MASK_ISEL \
>| OPTION_MASK_MFCRF\
> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> index e4843eb0f1c..6780304a5eb 100644
> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -4497,6 +4497,10 @@ rs6000_option_override_internal (bool global_init_p)
>&& (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_2STORE) == 0)
>  rs6000_isa_flags |= OPTION_MASK_P10_FUSION_2STORE;
>  
> +  if (TARGET_POWER10
> +  && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_ZERO_CYCLE) == 
> 0)
> +rs6000_isa_flags |= OPTION_MASK_P10_FUSION_ZERO_CYCLE;
> +
>/* Turn off vector pair/mma options on non-power10 systems.  */
>else if (!TARGET_POWER10 && TARGET_MMA)
>  {
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index 6bec2bddbde..ea41eb4ada3 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -12988,15 +12988,34 @@ (define_expand "indirect_jump"
>  emit_jump_insn (gen_indirect_jump_nospec (Pmode, operands[0], ccreg));
>  DONE;
>}
> +  if (TARGET_P10_FUSION && TARGET_P10_FUSION_ZERO_CYCLE)
> +{
> +  emit_jump_insn (gen_indirect_jump_zero_cycle (Pmode, operands[0]));
> +  DONE;
> +}
>  })
>  
>  (define_insn "*indirect_jump"
>[(set (pc)
>   (match_operand:P 0 "register_operand" "c,*l"))]
> -  "rs6000_speculate_indirect_jumps"
> +  "rs6000_speculate_indirect_jumps
> +   && !(TARGET_P10_FUSION && TARGET_P10_FUSION_ZERO_CYCLE)"
>"b%T0"
>[(set_attr "type" "jmpreg")])
>  
> +(define_insn "@indirect_jump_zero_cycle"

I don't know why this is an "@" pattern, but honestly I don't
know why @indirect_jump_nospec is an "@" pattern either.
The documentation for such things is hard for me to understand,
so I'm probably just missing something obvious, but I don't
immediately see why we would need the @ here.

> +  [(set (pc)
> + (match_operand:P 0 "register_operand" "r,r,!cl"))
> +   (clobber (match_scratch:P 1 "=c,*l,X"))]

Do we need the *l and X alternatives if we're only doing this for
mtctr/bctr?

> +  "rs6000_speculate_indirect_jumps && TARGET_P10_FUSION
> +   && TARGET_P10_FUSION_ZERO_CYCLE"
> +  "@
> +   mt%T1 

PING 2 [PATCH] fix up compute_objsize (including PR 103143)

2021-11-22 Thread Martin Sebor via Gcc-patches

Ping:
https://gcc.gnu.org/pipermail/gcc-patches/2021-November/583735.html

On 11/15/21 9:49 AM, Martin Sebor wrote:

Ping for the following cleanup patch:
https://gcc.gnu.org/pipermail/gcc-patches/2021-November/583735.html

On 11/8/21 7:34 PM, Martin Sebor wrote:

The pointer-query code that implements compute_objsize() that's
in turn used by most middle end access warnings now has a few
warts in it and (at least) one bug.  With the exception of
the bug the warts aren't behind any user-visible bugs that
I know of but they do cause problems in new code I've been
implementing on top of it.  Besides fixing the one bug (just
a typo) the attached patch cleans up these latent issues:

1) It moves the bndrng member from the access_ref class to
    access_data.  As a FIXME in the code notes, the member never
    did belong in the former and only takes up space in the cache.

2) The compute_objsize_r() function is big, unwieldy, and tedious
    to step through because of all the if statements that are better
    coded as one switch statement.  This change factors out more
    of its code into smaller handler functions as has been suggested
    and done a few times before.

3) (2) exposed a few places where I fail to pass the current
    GIMPLE statement down to ranger.  This leads to worse quality
    range info, including possible false positives and negatives.
    I just spotted these problems in code review but I haven't
    taken the time to come up with test cases.  This change fixes
    these oversights as well.

4) The handling of PHI statements is also in one big, hard-to-
    follow function.  This change moves the handling of each PHI
    argument into its own handler which merges it into the previous
    argument.  This makes the code easier to work with and opens it
    to reuse also for MIN_EXPR and MAX_EXPR.  (This is primarily
    used to print informational notes after warnings.)

5) Finally, the patch factors code to dump each access_ref
    cached by the pointer_query cache out of pointer_query::dump
    and into access_ref::dump.  This helps with debugging.

These changes should have no user-visible effect and other than
a regression test for the typo (PR 103143) come with no tests.
They've been tested on x86_64-linux.

Martin






PING 3 [PATCH 0/2] provide simple detection of indeterminate pointers

2021-11-22 Thread Martin Sebor via Gcc-patches

Pinging the two patches below:

-Wuse-after-free:
https://gcc.gnu.org/pipermail/gcc-patches/2021-November/583044.html

and -Wdangling-pointer:
https://gcc.gnu.org/pipermail/gcc-patches/2021-November/583045.html

On 11/15/21 9:47 AM, Martin Sebor wrote:

Pinging the two patches below:

-Wuse-after-free:
https://gcc.gnu.org/pipermail/gcc-patches/2021-November/583044.html

and -Wdangling-pointer:
https://gcc.gnu.org/pipermail/gcc-patches/2021-November/583045.html

On 11/8/21 3:41 PM, Martin Sebor wrote:

Ping for the two patches below:

-Wuse-after-free:
https://gcc.gnu.org/pipermail/gcc-patches/2021-November/583044.html

and -Wdangling-pointer:
https://gcc.gnu.org/pipermail/gcc-patches/2021-November/583045.html

On 11/1/21 4:15 PM, Martin Sebor wrote:

This two-patch series adds support for the detection of uses
of pointers invalidated as a result of the lifetime of
the objects they point to having ended: either explicitly,
after a call to a dynamic deallocation function, or implicitly,
by virtue of an object with automatic storage duration having
gone out of scope.

To minimize false positives the initial logic is very simple
(even simplistic): the code only checks uses in basic blocks
dominated by the invalidating calls (either calls to
deallocation functions or GCC's clobbers).

A more thorough checker is certainly possible and I'd say most
desirable but will require a more sophisticated implementation
and a better predicate analyzer than is available, and so will
need to wait for GCC 13.

Martin








[PATCH][pushed] docs: remove duplicate param documentation

2021-11-22 Thread Martin Liška

The params are documented twice.

Pushed as obvious.

Martin

gcc/ChangeLog:

* doc/invoke.texi: Remove duplicate documentation for 3 params.
---
 gcc/doc/invoke.texi | 12 
 1 file changed, 12 deletions(-)

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 4b1b58318f0..e080b125831 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -14709,10 +14709,6 @@ by the RTL if-conversion pass for a branch that is 
considered unpredictable.
 If @option{-fvariable-expansion-in-unroller} is used, the maximum number
 of times that an individual variable will be expanded during loop unrolling.
 
-@item tracer-min-branch-probability-feedback

-Stop forward growth if the probability of best edge is less than
-this threshold (in percent). Used when profile feedback is available.
-
 @item partial-inlining-entry-probability
 Maximum probability of the entry BB of split region
 (in percent relative to entry BB of the function)
@@ -14762,10 +14758,6 @@ A threshold on the average loop count considered by 
the swing modulo scheduler.
 The number of cycles the swing modulo scheduler considers when checking
 conflicts using DFA.
 
-@item max-inline-insns-recursive-auto

-The maximum number of instructions non-inline function
-can grow to via recursive inlining.
-
 @item graphite-allow-codegen-errors
 Whether codegen errors should be ICEs when @option{-fchecking}.
 
@@ -14815,10 +14807,6 @@ when comparing to the number of (scaled) blocks.

 Maximum number of nested calls to search for control dependencies
 during uninitialized variable analysis.
 
-@item sra-max-scalarization-size-Osize

-Maximum size, in storage units, of an aggregate
-which should be considered for scalarization when compiling for size.
-
 @item fsm-scale-path-blocks
 Scale factor to apply to the number of blocks in a threading path
 when comparing to the number of (scaled) statements.
--
2.33.1



std::basic_string<_Tp> constructor point of instantiation woes?

2021-11-22 Thread Stephan Bergmann via Gcc-patches
When using recent libstc++ trunk with Clang in C++20 mode, 
std::u16string literals as in



#include 
int main() {
  using namespace std::literals;
  u""s;
}


started to cause linker failures due to undefined


_ZNSt7__cxx1112basic_stringIDsSt11char_traitsIDsESaIDsEE12_M_constructIPKDsEEvT_S8_St20forward_iterator_tag


After some head scratching, I found the more insightful


$ cat test.cc
#include 
constexpr std::string s("", 0);



$ clang++ -std=c++20 -fsyntax-only test.cc
test.cc:2:23: error: constexpr variable 's' must be initialized by a constant 
expression
constexpr std::string s("", 0);
  ^~~~
~/gcc/trunk/inst/lib/gcc/x86_64-pc-linux-gnu/12.0.0/../../../../include/c++/12.0.0/bits/basic_string.h:620:2:
 note: undefined function '_M_construct' cannot be used in a 
constant expression
_M_construct(__s, __s + __n, std::forward_iterator_tag());
^
test.cc:2:23: note: in call to 'basic_string(&""[0], 0, std::allocator())'
constexpr std::string s("", 0);
  ^
~/gcc/trunk/inst/lib/gcc/x86_64-pc-linux-gnu/12.0.0/../../../../include/c++/12.0.0/bits/basic_string.h:331:9:
 note: declared here
_M_construct(_FwdIterator __beg, _FwdIterator __end,
^
1 error generated.


and after some more head scratching found Clang to complain about the 
reduced



template struct S {
constexpr void f();
constexpr S() { f(); };
};
S s1;
template constexpr void S::f() {}
constexpr S s2;


(about which GCC does not complain).  Not entirely sure who is right, 
but what would help Clang is to move the definitions of the literal 
operators in basic_string.h (which implicitly instantiate the 
corresponding std::basic_string<_Tp> constructor) past the definition of 
_M_construct (which is called from the constructor) in basic_string.tcc; 
something like



diff --git a/libstdc++-v3/include/bits/basic_string.h 
b/libstdc++-v3/include/bits/basic_string.h
index 6e7de738308..871ca89e16e 100644
--- a/libstdc++-v3/include/bits/basic_string.h
+++ b/libstdc++-v3/include/bits/basic_string.h
@@ -4304,55 +4304,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 struct __is_fast_hash> : std::false_type
 { };
 
-#if __cplusplus >= 201402L

-
-#define __cpp_lib_string_udls 201304
-
-  inline namespace literals
-  {
-  inline namespace string_literals
-  {
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wliteral-suffix"
-
-#if __cpp_lib_constexpr_string >= 201907L
-# define _GLIBCXX_STRING_CONSTEXPR constexpr
-#else
-# define _GLIBCXX_STRING_CONSTEXPR
-#endif
-
-_GLIBCXX_DEFAULT_ABI_TAG _GLIBCXX_STRING_CONSTEXPR
-inline basic_string
-operator""s(const char* __str, size_t __len)
-{ return basic_string{__str, __len}; }
-
-_GLIBCXX_DEFAULT_ABI_TAG _GLIBCXX_STRING_CONSTEXPR
-inline basic_string
-operator""s(const wchar_t* __str, size_t __len)
-{ return basic_string{__str, __len}; }
-
-#ifdef _GLIBCXX_USE_CHAR8_T
-_GLIBCXX_DEFAULT_ABI_TAG _GLIBCXX_STRING_CONSTEXPR
-inline basic_string
-operator""s(const char8_t* __str, size_t __len)
-{ return basic_string{__str, __len}; }
-#endif
-
-_GLIBCXX_DEFAULT_ABI_TAG _GLIBCXX_STRING_CONSTEXPR
-inline basic_string
-operator""s(const char16_t* __str, size_t __len)
-{ return basic_string{__str, __len}; }
-
-_GLIBCXX_DEFAULT_ABI_TAG _GLIBCXX_STRING_CONSTEXPR
-inline basic_string
-operator""s(const char32_t* __str, size_t __len)
-{ return basic_string{__str, __len}; }
-
-#undef _GLIBCXX_STRING_CONSTEXPR
-#pragma GCC diagnostic pop
-  } // inline namespace string_literals
-  } // inline namespace literals
-
 #if __cplusplus >= 201703L
   namespace __detail::__variant
   {
@@ -4369,7 +4320,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   { };
   }  // namespace __detail::__variant
 #endif // C++17
-#endif // C++14
 
 _GLIBCXX_END_NAMESPACE_VERSION

 } // namespace std
diff --git a/libstdc++-v3/include/bits/basic_string.tcc 
b/libstdc++-v3/include/bits/basic_string.tcc
index 6f619a08f70..2fd607ef50a 100644
--- a/libstdc++-v3/include/bits/basic_string.tcc
+++ b/libstdc++-v3/include/bits/basic_string.tcc
@@ -1123,6 +1123,57 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 #endif // _GLIBCXX_USE_WCHAR_T
 #endif // _GLIBCXX_EXTERN_TEMPLATE
 
+#if __cplusplus >= 201402L

+
+#define __cpp_lib_string_udls 201304
+
+  inline namespace literals
+  {
+  inline namespace string_literals
+  {
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wliteral-suffix"
+
+#if __cpp_lib_constexpr_string >= 201907L
+# define _GLIBCXX_STRING_CONSTEXPR constexpr
+#else
+# define _GLIBCXX_STRING_CONSTEXPR
+#endif
+
+_GLIBCXX_DEFAULT_ABI_TAG _GLIBCXX_STRING_CONSTEXPR
+inline basic_string
+operator""s(const char* __str, size_t __len)
+{ return basic_string{__str, __len}; }
+
+_GLIBCXX_DEFAULT_ABI_TAG _GLIBCXX_STRING_CONSTEXPR
+inline basic_string
+operator""s(const wchar_t* __str, size_t __len)
+{ return basic_string{__str, __len}; }
+
+#ifdef 

Re: [PATCH 0/3] Add zero cycle move support

2021-11-22 Thread David Edelsohn via Gcc-patches
On Mon, Nov 22, 2021 at 10:58 AM Bill Schmidt  wrote:
>
> Hi!
>
> On 11/19/21 8:49 AM, Michael Meissner wrote:
> > The next set of 3 patches add zero cycle move support to the Power10.  Zero
> > cycle moves are where the move to LR/CTR/TAR register that is adjacent to 
> > the
> > jump to LR/CTR/TAR register can be fused together.
> >
> > At the moment, these set of three patches add support for zero cycle moves 
> > for
> > indirect jumps and switch tables using the CTR register.  Potential zero 
> > cycle
> > moves for doing returns are not currently handled.
> >
> > In looking at the code, I discovered that just using zero cycle moves isn't 
> > as
> > helpful unless we can eliminate the add instruction before doing the jump.  
> > I
> > also noticed that the various power10 fusion options are only done if
> > -mcpu=power10.  I added a patch to do the fusion for -mtune=power10 as well.
> >
> > I have done bootstraps and make check with these patches installed on both
> > little endian power9 and little endian power10 systems.  Can I install these
> > patches?
> >
> > The following patches will be posted:
> >
> > 1) Patch to add zero cycle move for indirect jumps and switches.
> >
> > 2) Patch to enable p10 fusion for -mtune=power10 in addition to 
> > -mcpu=power10.
> >
> > 3) Patch to use absolute addresses for switch tables instead of relative
> >addresses if zero cycle fusion is enabled.
> >
> For this last point, I had thought that the plan was to always switch over to
> absolute addresses for switch tables, following the work that Hao Chen did in
> this area.  Am I misremembering?  Hao Chen, can you please remind me where we
> ended up here?

And do the absolute addressing for switch tables changes work on AIX?
I thought that Hao Chen only had done the work for PPC64 Linux ELF
syntax with promises of future changes to accommodate AIX as well.

Thanks, David


Re: [PATCH 2/3] Set power10 fusion if -mtune=power10.

2021-11-22 Thread Bill Schmidt via Gcc-patches
Hi Mike,

On 11/19/21 8:55 AM, Michael Meissner wrote:
> Set power10 fusion if -mtune=power10.
>
> In doing the patch for zero cycle moves for switch statements and indirect
> jumps, I noticed the fusion support is only done if -mcpu=power10.  This 
> option
> enables power10 fusion if we use -mtune=power10.
>
> I have built and run the testsuites on little endian power9 and power10 
> systems
> with no regressions.  Can I install this patch?

This all seems fine, but since we're planning on collapsing all those flags
anyway, maybe it would be better if we did that first.  This seems like work
that will mostly be removed soon.  But no concerns from me otherwise.

Thanks!
Bill

>
> 2021-11-19  Michael Meissner  
>
>   * config/rs6000/rs6000.c (rs6000_option_override_internal): Enable
>   power10 fusion if -mtune=power10.
>   (rs6000_opt_masks): Add power10 fusion options.
> ---
>  gcc/config/rs6000/rs6000.c | 25 +
>  1 file changed, 17 insertions(+), 8 deletions(-)
>
> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> index 6780304a5eb..8531cef0337 100644
> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -4469,35 +4469,36 @@ rs6000_option_override_internal (bool global_init_p)
>if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_MMA) == 0)
>  rs6000_isa_flags |= OPTION_MASK_MMA;
>  
> -  if (TARGET_POWER10
> +  /* Enable power10 tuning if either -mcpu=power10 or -mtune=power10.  */
> +  if ((TARGET_POWER10 || rs6000_tune == PROCESSOR_POWER10)
>&& (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION) == 0)
>  rs6000_isa_flags |= OPTION_MASK_P10_FUSION;
>  
> -  if (TARGET_POWER10 &&
> +  if (TARGET_P10_FUSION &&
>(rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_LD_CMPI) == 0)
>  rs6000_isa_flags |= OPTION_MASK_P10_FUSION_LD_CMPI;
>  
> -  if (TARGET_POWER10
> +  if (TARGET_P10_FUSION
>&& (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_2LOGICAL) == 0)
>  rs6000_isa_flags |= OPTION_MASK_P10_FUSION_2LOGICAL;
>  
> -  if (TARGET_POWER10
> +  if (TARGET_P10_FUSION
>&& (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_LOGADD) == 0)
>  rs6000_isa_flags |= OPTION_MASK_P10_FUSION_LOGADD;
>  
> -  if (TARGET_POWER10
> +  if (TARGET_P10_FUSION
>&& (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_ADDLOG) == 0)
>  rs6000_isa_flags |= OPTION_MASK_P10_FUSION_ADDLOG;
>  
> -  if (TARGET_POWER10
> +  if (TARGET_P10_FUSION
>&& (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_2ADD) == 0)
>  rs6000_isa_flags |= OPTION_MASK_P10_FUSION_2ADD;
>  
> -  if (TARGET_POWER10
> +  if (TARGET_P10_FUSION
>&& (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_2STORE) == 0)
>  rs6000_isa_flags |= OPTION_MASK_P10_FUSION_2STORE;
>  
> -  if (TARGET_POWER10
> +  if (TARGET_P10_FUSION
>&& (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_ZERO_CYCLE) == 
> 0)
>  rs6000_isa_flags |= OPTION_MASK_P10_FUSION_ZERO_CYCLE;
>  
> @@ -24292,6 +24293,14 @@ static struct rs6000_opt_mask const 
> rs6000_opt_masks[] =
>{ "power9-misc",   OPTION_MASK_P9_MISC,false, true  },
>{ "power9-vector", OPTION_MASK_P9_VECTOR,  false, true  },
>{ "power10-fusion",OPTION_MASK_P10_FUSION, false, 
> true  },
> +  { "power10-fusion-ld-cmpi",OPTION_MASK_P10_FUSION_LD_CMPI, false, 
> true  },
> +  { "power10-fusion-2logical",   OPTION_MASK_P10_FUSION_2LOGICAL,false, 
> true  },
> +  { "power10-fusion-logical-add", OPTION_MASK_P10_FUSION_LOGADD,false, true  
> },
> +  { "power10-fusion-add-logical", OPTION_MASK_P10_FUSION_ADDLOG,false, true  
> },
> +  { "power10-fusion-2add",   OPTION_MASK_P10_FUSION_2ADD,false, true  },
> +  { "power10-fusion-2store", OPTION_MASK_P10_FUSION_2STORE,  false, true  },
> +  { "power10-fusion-zero-cycle", OPTION_MASK_P10_FUSION_ZERO_CYCLE,
> + false, true  },
>{ "powerpc-gfxopt",OPTION_MASK_PPC_GFXOPT, false, 
> true  },
>{ "powerpc-gpopt", OPTION_MASK_PPC_GPOPT,  false, true  },
>{ "prefixed",  OPTION_MASK_PREFIXED,   false, 
> true  },


Re: [PATCH 0/3] Add zero cycle move support

2021-11-22 Thread Bill Schmidt via Gcc-patches
Hi!

On 11/19/21 8:49 AM, Michael Meissner wrote:
> The next set of 3 patches add zero cycle move support to the Power10.  Zero
> cycle moves are where the move to LR/CTR/TAR register that is adjacent to the
> jump to LR/CTR/TAR register can be fused together.
>
> At the moment, these set of three patches add support for zero cycle moves for
> indirect jumps and switch tables using the CTR register.  Potential zero cycle
> moves for doing returns are not currently handled.
>
> In looking at the code, I discovered that just using zero cycle moves isn't as
> helpful unless we can eliminate the add instruction before doing the jump.  I
> also noticed that the various power10 fusion options are only done if
> -mcpu=power10.  I added a patch to do the fusion for -mtune=power10 as well.
>
> I have done bootstraps and make check with these patches installed on both
> little endian power9 and little endian power10 systems.  Can I install these
> patches?
>
> The following patches will be posted:
>
> 1) Patch to add zero cycle move for indirect jumps and switches.
>
> 2) Patch to enable p10 fusion for -mtune=power10 in addition to -mcpu=power10.
>
> 3) Patch to use absolute addresses for switch tables instead of relative
>addresses if zero cycle fusion is enabled.
>
For this last point, I had thought that the plan was to always switch over to
absolute addresses for switch tables, following the work that Hao Chen did in
this area.  Am I misremembering?  Hao Chen, can you please remind me where we
ended up here?

Thanks!
Bill



Re: [PATCH 1/2] Add cumulative_args_t variants of TARGET_FUNCTION_ROUND_BOUNDARY and friends

2021-11-22 Thread Richard Biener via Gcc-patches
On Mon, Nov 22, 2021 at 3:40 PM Maxim Blinov  wrote:
>
> Hi Richard,
>
> The purpose of this patch is to give more of the target code access to
> the cumulative_args_t structure in order to enable certain (otherwise
> currently impossible) stack layout constraints, specifically for
> parameters that are passed over the stack. For example, there are some
> targets (not yet in GCC trunk) which explicitly require the
> distinction between named and variadic parameters in order to enforce
> different alignment rules (when passing over the stack.) Such a
> constraint would be accommodated by this patch.
>
> The patch itself is very straightforward and simply adds the parameter
> to the two functions which we'd like to extend. The motivation of
> defining new target hooks was to minimize the patch size.

I would prefer to adjust the existing hooks if there's currently no way to
implement the aarch64 darwin ABI instead of optimizing for patch size.

I have no opinion on whether passing down cummulative args is the
proper thing to do design-wise.  It might be that TARGET_FUNCTION_ARG_BOUNDARY
is only one part that cummulative args using code should look at
(given you don't show the other users of function_arg_boundary that do not
conveniently have a CA struct around).

Richard.

> A concrete user of this change that we have in mind is the AArch64
> Darwin parameter passing abi, which mandates that when passing on the
> stack, named parameters are to be naturally-aligned, however variadic
> ones are to be word-aligned. However this patch is completely generic
> in nature and should enable all targets to have more control over
> their parameter layout process.
>
> Best Regards,
> Maxim
>
> On Mon, 15 Nov 2021 at 07:11, Richard Biener  
> wrote:
> >
> > On Sat, Nov 13, 2021 at 10:43 AM Maxim Blinov  
> > wrote:
> > >
> > > The two target hooks responsible for informing GCC about stack
> > > parameter alignment are `TARGET_FUNCTION_ARG_BOUNDARY` and
> > > `TARGET_FUNCTION_ARG_ROUND_BOUNDARY`, which currently only consider
> > > the tree and machine_mode of a specific given argument.
> > >
> > > Create two new target hooks suffixed with '_CA', and pass in a third
> > > `cumulative_args_t` parameter. This enables the backend to make
> > > alignment decisions based on the context of the whole function rather
> > > than individual parameters.
> > >
> > > The orignal machine_mode/tree type macros are not removed - they are
> > > called by the default implementations of `TARGET_...BOUNDARY_CA` and
> > > `TARGET_...ROUND_BOUNDARY_CA`. This is done with the intetnion of
> > > avoiding large mechanical modifications of nearly every backend in
> > > GCC. There is also a new flag, -fstack-use-cumulative-args, which
> > > provides a way to completely bypass the new `..._CA` macros. This
> > > feature is intended for debugging GCC itself.
> >
> > Just two quick comments without looking at the patch.
> >
> > Please do not introduce options in the user namespace -f... which are
> > for debugging only.  I think you should go without this part instead.
> >
> > Second, you fail to motivate the change.  I cannot make sense of
> > "This enables the backend to make alignment decisions based on the
> > context of the whole function rather than individual parameters."
> >
> > Richard.
> >
> > >
> > > gcc/ChangeLog:
> > >
> > > * calls.c (initialize_argument_information): Pass `args_so_far`.
> > > * common.opt: New flag `-fstack-use-cumulative-args`.
> > > * config.gcc: No platforms currently use ..._CA-hooks: Set
> > > -fstack-use-cumulative-args to be off by default.
> > > * target.h (cumulative_args_t): Move declaration from here, to...
> > > * cumulative-args.h (cumulative_args_t): ...this new file. This is
> > > to permit backends to include the declaration of cumulative_args_t
> > > without dragging in circular dependencies.
> > > * function.c (assign_parm_find_entry_rtl): Provide
> > > cumulative_args_t to locate_and_pad_parm.
> > > (gimplify_parameters): Ditto.
> > > (locate_and_pad_parm): Conditionally call new hooks if we're
> > > invoked with -fstack-use-cumulative-args.
> > > * function.h: Include cumulative-args.h.
> > > (locate_and_pad_parm): Add cumulative_args_t parameter.
> > > * target.def (function_arg_boundary_ca): Add.
> > > (function_arg_round_boundary_ca): Ditto.
> > > * targhooks.c (default_function_arg_boundary_ca): Implement.
> > > (default_function_arg_round_boundary_ca): Ditto.
> > > * targhooks.h (default_function_arg_boundary_ca): Declare.
> > > (default_function_arg_round_boundary_ca): Ditto.
> > > * doc/invoke.texi (-fstack-use-cumulative-args): Document.
> > > * doc/tm.texi: Regenerate.
> > > * doc/tm.texi.in: Ditto.
> > > ---
> > >  gcc/calls.c   |  3 +++
> > >  gcc/common.opt|  4 
> > >  

Re: [PATCH] Loop unswitching: support gswitch statements.

2021-11-22 Thread Martin Liška

On 11/19/21 11:00, Richard Biener wrote:

On Tue, Nov 16, 2021 at 3:40 PM Martin Liška  wrote:


On 11/11/21 08:15, Richard Biener wrote:

So I'd try to do no functional change first, improving the costing and
setting up the transform to simply pick up the stmts to "fold" as discovered
during analysis (as I hinted you possibly can use gimple_uid to mark
the stmts that simplify, IIRC gimple_uid is preserved during copying.
gimple_uid would also scale better than gimple_plf in case we do
the analysis for all candidates at once).


Thinking about the analysis. Am I correct that we want to properly calculate
loop size for true and false edge of a potential gcond before the actually 
unswitching?


Yes.


We can do that by finding a first gcond candidate, evaluate (symbolic + irange 
approache)
all other gcond in the loop body and use BB_REACHABLE discovery. Similarly to 
what we do now
at lines 378-446. Then tree_num_loop_insns can be adjusted for only these 
reachable blocks.
Having that, we can calculate # of insns that will live in true/false loops.


So whatever we do here we should record as "this control stmt folds to
{true,false}" (or {true,unknown},
or in future, "this control stmt will lead to edge {e,unknown}"),
recording the simplification
on the true/false loop version in a way we can apply it after the transform.


Then we can call tree_unswitch_loop and make the gcond folding as we do in the 
versioned loops.

Is it a step in good direction? Having that we can then extend it to gswitch 
statements.


One issue I see is that BB_REACHABLE is there only once but you could use
auto_bb_flag reachable_true, reachable_false to distinguish the
true/false loop version
copies.

So yes, I think that sounds reasonable.  At the point we want to
evaluate different
(first) unswitching opportunities against each other storing this only
as BB flag is
likely to hit limits.  When we want to evaluate multiple levels of
unswitching before
doing any transforms even more so (if there are 3 opportunities there'd be
many cases to be considered when going to level 3 ;)).  I _think_ that a sparse
lattice of stmt UID -> edge might do the trick if we change tree_num_loop_insns
do to a DFS walk from the loop header, ignoring not taken edges by
consulting the
lattice.  Or, for speed reason, pre-compute tree_num_loop_insns for each BB
so we just have to sum a different set of BBs rather than walking all
stmts again.

That said, the second step would definitely be to choose the "best" opportunity
on the current level.

Richard.


Cheers,
Martin


Hello.

I'm sending a new version where I changed:
1) all unswitch_predicates are find for a loop
2) context sensitive costing happens based on an unswitch_predicate and BB 
reachability
   is implemented
3) folding happens in recursive invocation once we decide to unswitch
4) the patch folds both symbolic gcond predicates and irange provided by ranger
5) debug counter was added

Patch can bootstrap on x86_64-linux-gnu and survives regression tests. Plus, I 
tested it
on SPEC2006 and SPEC2017 with -size=ref.

Thoughts?
Thanks,
Martin

From 021cac1f6a4c8c0c049f015ab9adce5520cf0f28 Mon Sep 17 00:00:00 2001
From: Martin Liska 
Date: Mon, 22 Nov 2021 13:54:20 +0100
Subject: [PATCH] Loop unswitching: improve costing model

gcc/ChangeLog:

	* dbgcnt.def (loop_unswitch): Add new debug counter.
	* tree-ssa-loop-unswitch.c (struct unswitch_predicate):
	New struct.
	(tree_unswitch_single_loop): First, do costing evaluation before
	we unswitch.
	(tree_may_unswitch_on): Return unswitch_predicate instead of
	tree.
	(tree_ssa_unswitch_loops): Initialize and disable ranger.
	(simplify_using_entry_checks): Consider both symbolic
	expressions and iranges.
	(find_all_unswitching_predicates): New.
	(evaluate_insns): New.
	(evaluate_loop_insns_for_predicate): New.

gcc/testsuite/ChangeLog:

	* gcc.dg/loop-unswitch-8.c: New test.
	* gcc.dg/loop-unswitch-9.c: New test.
---
 gcc/dbgcnt.def |   1 +
 gcc/testsuite/gcc.dg/loop-unswitch-8.c |  31 ++
 gcc/testsuite/gcc.dg/loop-unswitch-9.c |  27 ++
 gcc/tree-ssa-loop-unswitch.c   | 468 +++--
 4 files changed, 338 insertions(+), 189 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/loop-unswitch-8.c
 create mode 100644 gcc/testsuite/gcc.dg/loop-unswitch-9.c

diff --git a/gcc/dbgcnt.def b/gcc/dbgcnt.def
index f8a15f3d1d1..278fb1112b3 100644
--- a/gcc/dbgcnt.def
+++ b/gcc/dbgcnt.def
@@ -187,6 +187,7 @@ DEBUG_COUNTER (ira_move)
 DEBUG_COUNTER (ivopts_loop)
 DEBUG_COUNTER (lim)
 DEBUG_COUNTER (local_alloc_for_sched)
+DEBUG_COUNTER (loop_unswitch)
 DEBUG_COUNTER (match)
 DEBUG_COUNTER (merged_ipa_icf)
 DEBUG_COUNTER (phiopt_edge_range)
diff --git a/gcc/testsuite/gcc.dg/loop-unswitch-8.c b/gcc/testsuite/gcc.dg/loop-unswitch-8.c
new file mode 100644
index 000..7738a24d849
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/loop-unswitch-8.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 

Re: [PATCH] openacc: Fix up C++ #pragma acc routine handling [PR101731]

2021-11-22 Thread Jakub Jelinek via Gcc-patches
On Mon, Nov 22, 2021 at 03:49:42PM +0100, Thomas Schwinge wrote:
> Then, regarding the user-visible behavior:
> 
> > +#pragma acc routine  /* { dg-error "not immediately followed by a single 
> > function declaration or definition" "" { target c++ } } */
> > +int foo (int bar ());
> 
> So in C++ we now refuse, but in C we do accept this.  I suppose I shall
> look into making C behave the same way -- unless there is a reason for
> the different behavior?  And/or, is it actually is useful to allow such
> nested usage?  Per its associated clauses, an OpenACC 'routine' directive
> really is meant to apply to one function only, in contrast to OpenMP
> 'target declare'.  But the question is whether we should raise an error
> for the example above, or whether the 'routine' shall just apply to 'foo'
> but not 'bar', but without an error diagnostic?

All I've verified is that our OpenMP code handles it the same way,
i.e.
#pragma omp declare simd
int foo (int bar ());
is accepted in C and rejected in C++.
I guess one question is to check if it is in both languages actually
the same thing.  If we want to accept it in C++ and let the pragma
apply only to the outer declaration, I guess we'd need to temporarily
set to NULL parser->omp_declare_simd and parser->oacc_routine while
parsing the parameters of a function declaration or definition.
At least OpenMP is fairly fuzzy here, the reason we error on
#pragma omp declare simd
int foo (), i;
has been mainly some discussions in the lang committee and the fact
that it talks about a single declaration, not all affected declarations.
Whether int foo (int bar ()); should be in that light treated as two
function declarations or one with another one nested in it and irrelevant
for it is unclear.

Jakub



[committed] libstdc++: Fix condition for definition of _GLIBCXX14_DEPRECATED

2021-11-22 Thread Jonathan Wakely via Gcc-patches
Tested x86_64-linux, pushed to trunk.



The check for C++14 was using the wrong date.

libstdc++-v3/ChangeLog:

* include/bits/c++config (_GLIBCXX14_DEPRECATED): Fix condition
checking for C++14.
---
 libstdc++-v3/include/bits/c++config | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libstdc++-v3/include/bits/c++config 
b/libstdc++-v3/include/bits/c++config
index 4b7fa659300..cbcdedb366f 100644
--- a/libstdc++-v3/include/bits/c++config
+++ b/libstdc++-v3/include/bits/c++config
@@ -107,7 +107,7 @@
 # define _GLIBCXX11_DEPRECATED_SUGGEST(ALT)
 #endif
 
-#if defined(__DEPRECATED) && (__cplusplus >= 201403L)
+#if defined(__DEPRECATED) && (__cplusplus >= 201402L)
 # define _GLIBCXX14_DEPRECATED _GLIBCXX_DEPRECATED
 # define _GLIBCXX14_DEPRECATED_SUGGEST(ALT) _GLIBCXX_DEPRECATED_SUGGEST(ALT)
 #else
@@ -123,7 +123,7 @@
 # define _GLIBCXX17_DEPRECATED_SUGGEST(ALT)
 #endif
 
-#if defined(__DEPRECATED) && (__cplusplus > 201703L)
+#if defined(__DEPRECATED) && (__cplusplus >= 202002L)
 # define _GLIBCXX20_DEPRECATED(MSG) [[deprecated(MSG)]]
 # define _GLIBCXX20_DEPRECATED_SUGGEST(ALT) _GLIBCXX_DEPRECATED_SUGGEST(ALT)
 #else
-- 
2.31.1



Re: [PATCH] openacc: Fix up C++ #pragma acc routine handling [PR101731]

2021-11-22 Thread Thomas Schwinge
Hi Jakub!

On 2021-11-20T09:39:53+0100, Jakub Jelinek via Gcc-patches 
 wrote:
> The following testcase ICEs because two function declarations are nested in
> each other and the acc routine handling code isn't prepared to put the
> pragma on both.

Ha.  Many things covered in 'c-c++-common/goacc/routine-5.c' -- but not
that.

> The fix is similar to what #pragma omp declare {simd,variant} does,
> in particular set the fndecl_seen flag already in cp_parser_late_parsing*
> when we encounter it rather than only after we finalize it.
>
> In cp_finalize_oacc_routine I had to move the fndecl_seen diagnostics to
> non-FUNCTION_DECL block, because for FUNCTION_DECLs the flag is already
> known to be set from cp_parser_late_parsing_oacc_routine, but can't be
> removed altogether, because that regresses quality of 2 goacc/routine-5.c
> diagnostics - we drop "a single " from the
> '#pragma acc routine' not immediately followed by a single function 
> declaration or definition
> diagnostic say on
> #pragma acc routine
> int foo (), b;
> if we drop it altogether.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Thanks for looking into this.  I now don't recall my exact thinking when
reworking this code five years ago...  Please push your change, fixing
the ICE.

Then, regarding the user-visible behavior:

> +#pragma acc routine  /* { dg-error "not immediately followed by a single 
> function declaration or definition" "" { target c++ } } */
> +int foo (int bar ());

So in C++ we now refuse, but in C we do accept this.  I suppose I shall
look into making C behave the same way -- unless there is a reason for
the different behavior?  And/or, is it actually is useful to allow such
nested usage?  Per its associated clauses, an OpenACC 'routine' directive
really is meant to apply to one function only, in contrast to OpenMP
'target declare'.  But the question is whether we should raise an error
for the example above, or whether the 'routine' shall just apply to 'foo'
but not 'bar', but without an error diagnostic?

OpenACC 3.2, 2.15.1 "Routine Directive" states that "the 'routine'
directive without a name may appear immediately before a function
definition, a C++ _lambda_, or just before a function prototype and
applies to that immediately following function or prototype".


Grüße
 Thomas


> 2021-11-20  Jakub Jelinek  
>
>   PR c++/101731
>   * parser.c (cp_parser_late_parsing_oacc_routine): Set
>   parser->oacc_routine->fndecl_seen here, rather than ...
>   (cp_finalize_oacc_routine): ... here.  Don't error if
>   parser->oacc_routine->fndecl_seen is set for FUNCTION_DECLs.
>
>   * c-c++-common/goacc/routine-6.c: New test.
>
> --- gcc/cp/parser.c.jj2021-11-19 16:39:51.534595887 +0100
> +++ gcc/cp/parser.c   2021-11-19 22:14:41.209591009 +0100
> @@ -46852,8 +46852,8 @@ cp_parser_late_parsing_oacc_routine (cp_
>   emission easier.  */
>parser->oacc_routine->clauses = nreverse (parser->oacc_routine->clauses);
>cp_parser_pop_lexer (parser);
> -  /* Later, cp_finalize_oacc_routine will process the clauses, and then set
> - fndecl_seen.  */
> +  /* Later, cp_finalize_oacc_routine will process the clauses.  */
> +  parser->oacc_routine->fndecl_seen = true;
>
>return attrs;
>  }
> @@ -46871,16 +46871,17 @@ cp_finalize_oacc_routine (cp_parser *par
> || fndecl == error_mark_node)
>   return;
>
> -  if (parser->oacc_routine->fndecl_seen)
> - {
> -   error_at (parser->oacc_routine->loc,
> - "%<#pragma acc routine%> not immediately followed by"
> - " a single function declaration or definition");
> -   parser->oacc_routine = NULL;
> -   return;
> - }
>if (TREE_CODE (fndecl) != FUNCTION_DECL)
>   {
> +   if (parser->oacc_routine->fndecl_seen)
> + {
> +   error_at (parser->oacc_routine->loc,
> + "%<#pragma acc routine%> not immediately followed by"
> + " a single function declaration or definition");
> +   parser->oacc_routine = NULL;
> +   return;
> + }
> +
> cp_ensure_no_oacc_routine (parser);
> return;
>   }
> @@ -46921,11 +46922,6 @@ cp_finalize_oacc_routine (cp_parser *par
>parser->oacc_routine->clauses,
>DECL_ATTRIBUTES (fndecl));
>   }
> -
> -  /* Don't unset parser->oacc_routine here: we may still need it to
> -  diagnose wrong usage.  But, remember that we've used this "#pragma acc
> -  routine".  */
> -  parser->oacc_routine->fndecl_seen = true;
>  }
>  }
>
> --- gcc/testsuite/c-c++-common/goacc/routine-6.c.jj   2021-11-19 
> 22:13:11.150864445 +0100
> +++ gcc/testsuite/c-c++-common/goacc/routine-6.c  2021-11-19 
> 22:13:11.150864445 +0100
> @@ -0,0 +1,4 @@
> +/* PR c++/101731 */
> +
> +#pragma acc routine  /* { dg-error "not immediately followed by a single 
> function declaration or definition" 

Re: [PATCH 0/4] Add aarch64-darwin support for off-stack trampolines

2021-11-22 Thread Maxim Blinov
Hi all, apologies for forgetting to add the cover letter.

The motivation of this work is to provide (limited) support for GCC
nested function trampolines on targets that do not have an executable
stack. This code has been (roughly) tested by creating several
thousand nested functions (i.e. enough to force allocation of a new
page), making sure all the nested functions execute correctly, and
consequently returning back up and ensuring that the pages are freed
when there are no more active trampolines in them.

I was provided the initial design and prototype implementation by
Andrew Burgess, and have since refactored the code to support
allocation/deallocation of trampoline pages, and added AArch64
Linux/Darwin support.

One of the limitations of the implementation in its current state is
the inability to track longjmps. There has been some discussion about
instrumenting calls to setjmp/longjmp so that the state of trampolines
is correctly tracked and freed when necessary, however that hasn't
been worked on yet.

On Sat, 13 Nov 2021 at 09:45, Maxim Blinov  wrote:
>
> Note: This patch is not yet ready for trunk as its dependent on some
> patches that are not-yet-upstream, however it serves as motivation for
> the previous patch(es) which are independent.
>
> 
>
> Implement the __builtin_nested_func_ptr_{created,deleted} functions
> for the aarch64-darwin platform. For this platform
> --enable-off-stack-trampolines is enabled by default, and
> -foff-stack-trampolines is enabled by default if the host MacOS
> operating system version is 11.x or greater.
>
> Co-authored-by: Andrew Burgess 
>
> libgcc/ChangeLog:
>
> * config/aarch64/heap-trampoline.c (allocate_trampoline_page):
> Request for MAP_JIT in the case of __APPLE__.
> Provide __APPLE__ variant of aarch64_trampoline_insns that uses
> x16 as the chain pointer.
> (__builtin_nested_func_ptr_created): Call
> pthread_jit_write_protect_np() to toggle read/write permission on
> page.
> * config.host (aarch64*-*darwin* | arm64*-*darwin*): Handle
> --enable-off-stack-trampolines.
> * configure.ac (--enable-off-stack-trampolines): Permit setting
> for target aarch64*-*darwin* | arm64*-*darwin*, and set default to
> enabled.
> * configure: Regenerate.
> ---
>  gcc/config.gcc  |  7 +
>  libgcc/config.host  |  4 +++
>  libgcc/config/aarch64/heap-trampoline.c | 36 +
>  libgcc/configure|  6 +
>  libgcc/configure.ac |  6 +
>  5 files changed, 59 insertions(+)
>
> diff --git a/gcc/config.gcc b/gcc/config.gcc
> index 031be563c5d..c13f7629d44 100644
> --- a/gcc/config.gcc
> +++ b/gcc/config.gcc
> @@ -1072,6 +1072,13 @@ esac
>
>  # Figure out if we need to enable -foff-stack-trampolines by default.
>  case ${target} in
> +aarch64*-*darwin* | arm64*-*darwin*)
> +  if test ${macos_maj} = 11 || test ${macos_maj} = 12; then
> +tm_defines="$tm_defines OFF_STACK_TRAMPOLINES_INIT=1"
> +  else
> +tm_defines="$tm_defines OFF_STACK_TRAMPOLINES_INIT=0"
> +  fi
> +  ;;
>  *)
>tm_defines="$tm_defines OFF_STACK_TRAMPOLINES_INIT=0"
>;;
> diff --git a/libgcc/config.host b/libgcc/config.host
> index d1a491d27e7..3c536b0928a 100644
> --- a/libgcc/config.host
> +++ b/libgcc/config.host
> @@ -414,6 +414,10 @@ aarch64*-*darwin* | arm64*-*darwin* )
> tmake_file="${tmake_file} t-crtfm"
> # No soft float for now because our long double is DF not TF.
> md_unwind_header=aarch64/aarch64-unwind.h
> +   if test x$off_stack_trampolines = xyes; then
> +   extra_parts="$extra_parts heap-trampoline.o"
> +   tmake_file="${tmake_file} ${cpu_type}/t-heap-trampoline"
> +   fi
> ;;
>  aarch64*-*-freebsd*)
> extra_parts="$extra_parts crtfastmath.o"
> diff --git a/libgcc/config/aarch64/heap-trampoline.c 
> b/libgcc/config/aarch64/heap-trampoline.c
> index 721a2bed400..6994602beaf 100644
> --- a/libgcc/config/aarch64/heap-trampoline.c
> +++ b/libgcc/config/aarch64/heap-trampoline.c
> @@ -5,6 +5,9 @@
>  #include 
>  #include 
>
> +/* For pthread_jit_write_protect_np */
> +#include 
> +
>  void *allocate_trampoline_page (void);
>  int get_trampolines_per_page (void);
>  struct tramp_ctrl_data *allocate_tramp_ctrl (struct tramp_ctrl_data *parent);
> @@ -43,8 +46,15 @@ allocate_trampoline_page (void)
>  {
>void *page;
>
> +#if defined(__gnu_linux__)
>page = mmap (0, getpagesize (), PROT_WRITE | PROT_EXEC,
>MAP_ANON | MAP_PRIVATE, 0, 0);
> +#elif defined(__APPLE__)
> +  page = mmap (0, getpagesize (), PROT_WRITE | PROT_EXEC,
> +  MAP_ANON | MAP_PRIVATE | MAP_JIT, 0, 0);
> +#else
> +  page = MAP_FAILED;
> +#endif
>
>return page;
>  }
> @@ -67,6 +77,7 @@ allocate_tramp_ctrl (struct tramp_ctrl_data *parent)
>return p;
>  }
>
> +#if defined(__gnu_linux__)
>  static const 

Re: [PATCH 1/2] Add cumulative_args_t variants of TARGET_FUNCTION_ROUND_BOUNDARY and friends

2021-11-22 Thread Maxim Blinov
Hi Richard,

The purpose of this patch is to give more of the target code access to
the cumulative_args_t structure in order to enable certain (otherwise
currently impossible) stack layout constraints, specifically for
parameters that are passed over the stack. For example, there are some
targets (not yet in GCC trunk) which explicitly require the
distinction between named and variadic parameters in order to enforce
different alignment rules (when passing over the stack.) Such a
constraint would be accommodated by this patch.

The patch itself is very straightforward and simply adds the parameter
to the two functions which we'd like to extend. The motivation of
defining new target hooks was to minimize the patch size.

A concrete user of this change that we have in mind is the AArch64
Darwin parameter passing abi, which mandates that when passing on the
stack, named parameters are to be naturally-aligned, however variadic
ones are to be word-aligned. However this patch is completely generic
in nature and should enable all targets to have more control over
their parameter layout process.

Best Regards,
Maxim

On Mon, 15 Nov 2021 at 07:11, Richard Biener  wrote:
>
> On Sat, Nov 13, 2021 at 10:43 AM Maxim Blinov  
> wrote:
> >
> > The two target hooks responsible for informing GCC about stack
> > parameter alignment are `TARGET_FUNCTION_ARG_BOUNDARY` and
> > `TARGET_FUNCTION_ARG_ROUND_BOUNDARY`, which currently only consider
> > the tree and machine_mode of a specific given argument.
> >
> > Create two new target hooks suffixed with '_CA', and pass in a third
> > `cumulative_args_t` parameter. This enables the backend to make
> > alignment decisions based on the context of the whole function rather
> > than individual parameters.
> >
> > The orignal machine_mode/tree type macros are not removed - they are
> > called by the default implementations of `TARGET_...BOUNDARY_CA` and
> > `TARGET_...ROUND_BOUNDARY_CA`. This is done with the intetnion of
> > avoiding large mechanical modifications of nearly every backend in
> > GCC. There is also a new flag, -fstack-use-cumulative-args, which
> > provides a way to completely bypass the new `..._CA` macros. This
> > feature is intended for debugging GCC itself.
>
> Just two quick comments without looking at the patch.
>
> Please do not introduce options in the user namespace -f... which are
> for debugging only.  I think you should go without this part instead.
>
> Second, you fail to motivate the change.  I cannot make sense of
> "This enables the backend to make alignment decisions based on the
> context of the whole function rather than individual parameters."
>
> Richard.
>
> >
> > gcc/ChangeLog:
> >
> > * calls.c (initialize_argument_information): Pass `args_so_far`.
> > * common.opt: New flag `-fstack-use-cumulative-args`.
> > * config.gcc: No platforms currently use ..._CA-hooks: Set
> > -fstack-use-cumulative-args to be off by default.
> > * target.h (cumulative_args_t): Move declaration from here, to...
> > * cumulative-args.h (cumulative_args_t): ...this new file. This is
> > to permit backends to include the declaration of cumulative_args_t
> > without dragging in circular dependencies.
> > * function.c (assign_parm_find_entry_rtl): Provide
> > cumulative_args_t to locate_and_pad_parm.
> > (gimplify_parameters): Ditto.
> > (locate_and_pad_parm): Conditionally call new hooks if we're
> > invoked with -fstack-use-cumulative-args.
> > * function.h: Include cumulative-args.h.
> > (locate_and_pad_parm): Add cumulative_args_t parameter.
> > * target.def (function_arg_boundary_ca): Add.
> > (function_arg_round_boundary_ca): Ditto.
> > * targhooks.c (default_function_arg_boundary_ca): Implement.
> > (default_function_arg_round_boundary_ca): Ditto.
> > * targhooks.h (default_function_arg_boundary_ca): Declare.
> > (default_function_arg_round_boundary_ca): Ditto.
> > * doc/invoke.texi (-fstack-use-cumulative-args): Document.
> > * doc/tm.texi: Regenerate.
> > * doc/tm.texi.in: Ditto.
> > ---
> >  gcc/calls.c   |  3 +++
> >  gcc/common.opt|  4 
> >  gcc/config.gcc|  7 +++
> >  gcc/cumulative-args.h | 20 
> >  gcc/doc/invoke.texi   | 12 
> >  gcc/doc/tm.texi   | 20 
> >  gcc/doc/tm.texi.in|  4 
> >  gcc/function.c| 25 +
> >  gcc/function.h|  2 ++
> >  gcc/target.def| 24 
> >  gcc/target.h  | 17 +
> >  gcc/targhooks.c   | 16 
> >  gcc/targhooks.h   |  6 ++
> >  13 files changed, 140 insertions(+), 20 deletions(-)
> >  create mode 100644 gcc/cumulative-args.h
> >
> > diff --git a/gcc/calls.c b/gcc/calls.c
> > index 27b59f26ad3..cef612a6ef4 100644
> > --- 

Re: [PATCH] x86: Speed up target attribute handling by using a cache

2021-11-22 Thread Jakub Jelinek via Gcc-patches
On Mon, Nov 22, 2021 at 02:10:08PM +0100, Martin Liška wrote:
> On 11/22/21 14:05, Jakub Jelinek wrote:
> > On Mon, Nov 22, 2021 at 02:03:19PM +0100, Martin Liška wrote:
> > > I see only one patch attached, Jakub. Can you please send also the second 
> > > one?
> > 
> > The first one has been inlined in the mail body (the one with attribs.[ch]
> > changes), the second one has been attached (the one without that).
> > 
> > Jakub
> > 
> 
> Ah, sorry, I see. I prefer the copy_list version as it achieves more readable 
> code.

But it will allocate fresh memory on each target attribute that isn't
cached.
Perhaps we could add free_list that would free_node all the TREE_LISTs in a
chain.

Jakub



Re: [PATCH] c++: __builtin_bit_cast To C array target type [PR103140]

2021-11-22 Thread will wray via Gcc-patches
Ping.

Another use case; casting arrays of char to arrays of unsigned char
(useful in some crypto APIs).

On Mon, Nov 15, 2021 at 6:32 PM will wray  wrote:
>
> Yes - direct use of any builtin is not to be encouraged, in user code.
>
> This __builtin_bit_cast patch is intended to encourage experimentation
> with array copy semantics now, on truck, in preparation for P1997.
>
> The builtin bit_cast is strictly more powerful than the std::bit_cast
> library function that it helps implement, is available in any -std mode
> and might also be useful in C, independent of any standardization effort.
>
> The semantics of bit_cast is clear - it's just the resulting rvalue array
> itself is unfamiliar and tricky to handle within current language rules.
>
> On Mon, Nov 15, 2021 at 12:21 PM Jakub Jelinek  wrote:
> >
> > On Mon, Nov 15, 2021 at 12:12:22PM -0500, will wray via Gcc-patches wrote:
> > > One motivation for allowing builtin bit_cast to builtin array is that
> > > it enables direct bitwise constexpr comparisons via memcmp:
> > >
> > > template
> > > constexpr int bit_equal(A const& a, B const& b)
> > > {
> > >   static_assert( sizeof a == sizeof b,
> > >   "bit_equal(a,b) requires same sizeof" );
> > >   using bytes = unsigned char[sizeof(A)];
> > >   return __builtin_memcmp(
> > >  __builtin_bit_cast(bytes,a),
> > >  __builtin_bit_cast(bytes,b),
> > >  sizeof(A)) == 0;
> > > }
> >
> > IMNSHO people shouldn't use this builtin directly, and we shouldn't
> > encourage such uses, the standard interface is std::bit_cast.
> >
> > For the above, I don't see a reason to do it that way, you can
> > instead portably:
> >   struct bytes { unsigned char data[sizeof(A)]; };
> >   bytes ab = std::bit_cast(bytes, a);
> >   bytes bb = std::bit_cast(bytes, a);
> >   for (size_t i = 0; i < sizeof(A); ++i)
> > if (ab.data[i] != bb.data[i])
> >   return false;
> >   return true;
> > - __builtin_memcmp isn't portable either and memcmp isn't constexpr.
> >
> > If P1997 is in, it is easy to support it in std::bit_cast and easy to
> > explain what __builtin_bit_cast does for array types, but otherwise
> > it is quite unclear what it exactly does...
> >
> > Jakub
> >


Re: [PATCH] x86: Speed up target attribute handling by using a cache

2021-11-22 Thread Martin Liška

On 11/22/21 14:05, Jakub Jelinek wrote:

On Mon, Nov 22, 2021 at 02:03:19PM +0100, Martin Liška wrote:

I see only one patch attached, Jakub. Can you please send also the second one?


The first one has been inlined in the mail body (the one with attribs.[ch]
changes), the second one has been attached (the one without that).

Jakub



Ah, sorry, I see. I prefer the copy_list version as it achieves more readable 
code.

Martin


Re: [PATCH] x86: Speed up target attribute handling by using a cache

2021-11-22 Thread Jakub Jelinek via Gcc-patches
On Mon, Nov 22, 2021 at 02:03:19PM +0100, Martin Liška wrote:
> I see only one patch attached, Jakub. Can you please send also the second one?

The first one has been inlined in the mail body (the one with attribs.[ch]
changes), the second one has been attached (the one without that).

Jakub



Re: [PATCH] x86: Speed up target attribute handling by using a cache

2021-11-22 Thread Martin Liška

On 11/22/21 10:36, Jakub Jelinek via Gcc-patches wrote:

Hi!

The target attribute handling is very expensive and for the common case
from x86intrin.h where many functions get implicitly the same target
attribute, we can speed up compilation a lot by caching it.

The following patches both create a single entry cache, where they cache
for a particular target attribute argument list the resulting
DECL_FUNCTION_SPECIFIC_TARGET and DECL_FUNCTION_SPECIFIC_OPTIMIZATION
values from ix86_valid_target_attribute_p and use the cache if the
args are the same as last time and we start either from NULL values
of those, or from the recorded values for those from last time.

Compiling a simple:
  #include 

  int i;
testcase with ./cc1 -quiet -O2 -isystem include/ test.c
takes on my WS without the patches ~0.392s and with either of the
patches ~0.182s, i.e. roughly half the time as before.
For ./cc1plus -quiet -O2 -isystem include/ test.c
it is slightly worse, the speed up is from ~0.613s to ~0.403s.

The difference between the 2 patches is that the first one uses copy_list
while the second one uses a vec, so I think the second one has the advantage
of creating less GC garbage.


Hello.

I see only one patch attached, Jakub. Can you please send also the second one?


I've verified both patches achieve the same content of those
DECL_FUNCTION_SPECIFIC_TARGET and DECL_FUNCTION_SPECIFIC_OPTIMIZATION
nodes as before on x86intrin.h by doing debug_tree on those and comparing
the stderr from without these patches to with these patches.

Both patches were bootstrapped/regtested on x86_64-linux and i686-linux,
ok for trunk (and which one)?

2021-11-22  Jakub Jelinek  

* attribs.h (simple_cst_list_equal): Declare.
* attribs.c (simple_cst_list_equal): No longer static.
* config/i386/i386-options.c (target_attribute_cache): New variable.
(ix86_valid_target_attribute_p): Cache DECL_FUNCTION_SPECIFIC_TARGET
and DECL_FUNCTION_SPECIFIC_OPTIMIZATION based on args.

--- gcc/attribs.h.jj2021-11-11 14:35:37.442350841 +0100
+++ gcc/attribs.h   2021-11-19 11:52:08.843252645 +0100
@@ -60,6 +60,7 @@ extern tree build_type_attribute_variant
  extern tree build_decl_attribute_variant (tree, tree);
  extern tree build_type_attribute_qual_variant (tree, tree, int);
  
+extern bool simple_cst_list_equal (const_tree, const_tree);

  extern bool attribute_value_equal (const_tree, const_tree);
  
  /* Return 0 if the attributes for two types are incompatible, 1 if they

--- gcc/attribs.c.jj2021-11-11 14:35:37.442350841 +0100
+++ gcc/attribs.c   2021-11-19 11:51:43.473615692 +0100
@@ -1290,7 +1290,7 @@ cmp_attrib_identifiers (const_tree attr1
  /* Compare two constructor-element-type constants.  Return 1 if the lists
 are known to be equal; otherwise return 0.  */
  
-static bool

+bool
  simple_cst_list_equal (const_tree l1, const_tree l2)
  {
while (l1 != NULL_TREE && l2 != NULL_TREE)
--- gcc/config/i386/i386-options.c.jj   2021-11-15 13:19:07.347900863 +0100
+++ gcc/config/i386/i386-options.c  2021-11-20 00:27:32.919505947 +0100
@@ -1403,6 +1403,8 @@ ix86_valid_target_attribute_tree (tree f
return t;
  }
  
+static GTY(()) tree target_attribute_cache[3];


I would come up with a struct that would wrap the 3 trees as
target_attribute_cache[index] accessing is not much intuitive.

Thanks,
Martin


+
  /* Hook to validate attribute((target("string"))).  */
  
  bool

@@ -1423,6 +1425,19 @@ ix86_valid_target_attribute_p (tree fnde
&& strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
  return true;
  
+  if ((DECL_FUNCTION_SPECIFIC_TARGET (fndecl) == target_attribute_cache[1]

+   || DECL_FUNCTION_SPECIFIC_TARGET (fndecl) == NULL_TREE)
+  && (DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl)
+ == target_attribute_cache[2]
+ || DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) == NULL_TREE)
+  && simple_cst_list_equal (args, target_attribute_cache[0]))
+{
+  DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = target_attribute_cache[1];
+  DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl)
+   = target_attribute_cache[2];
+  return true;
+}
+
tree old_optimize = build_optimization_node (_options,
   _options_set);
  
@@ -1456,8 +1471,17 @@ ix86_valid_target_attribute_p (tree fnde

if (new_target == error_mark_node)
  ret = false;
  
-  else if (fndecl && new_target)

+  else if (new_target)
  {
+  if (DECL_FUNCTION_SPECIFIC_TARGET (fndecl) == NULL_TREE
+ && DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) == NULL_TREE)
+   {
+ target_attribute_cache[0] = copy_list (args);
+ target_attribute_cache[1] = new_target;
+ target_attribute_cache[2]
+   = old_optimize != new_optimize ? new_optimize : NULL_TREE;
+   }
+
DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
  
if (old_optimize != new_optimize)



Re: [PATCH 1v2/3][vect] Add main vectorized loop unrolling

2021-11-22 Thread Richard Biener via Gcc-patches
On Mon, 22 Nov 2021, Andre Vieira (lists) wrote:

> 
> On 12/11/2021 13:12, Richard Biener wrote:
> > On Thu, 11 Nov 2021, Andre Vieira (lists) wrote:
> >
> >> Hi,
> >>
> >> This is the rebased and reworked version of the unroll patch.  I wasn't
> >> entirely sure whether I should compare the costs of the unrolled loop_vinfo
> >> with the original loop_vinfo it was unrolled of. I did now, but I wasn't
> >> too
> >> sure whether it was a good idea to... Any thoughts on this?
> > +  /* Apply the suggested unrolling factor, this was determined by the
> > backend
> > + during finish_cost the first time we ran the analyzis for this
> > + vector mode.  */
> > +  if (loop_vinfo->suggested_unroll_factor > 1)
> > +{
> > +  poly_uint64 unrolled_vf
> > +   = LOOP_VINFO_VECT_FACTOR (loop_vinfo) *
> > loop_vinfo->suggested_unroll_factor;
> > +  /* Make sure the unrolled vectorization factor is less than the max
> > + vectorization factor.  */
> > +  unsigned HOST_WIDE_INT max_vf = LOOP_VINFO_MAX_VECT_FACTOR
> > (loop_vinfo);
> > +  if (max_vf == MAX_VECTORIZATION_FACTOR || known_le (unrolled_vf,
> > max_vf))
> > +   LOOP_VINFO_VECT_FACTOR (loop_vinfo) = unrolled_vf;
> > +  else
> > +   return opt_result::failure_at (vect_location,
> > +  "unrolling failed: unrolled"
> > +  " vectorization factor larger than"
> > +  " maximum vectorization factor:
> > %d\n",
> > +  LOOP_VINFO_MAX_VECT_FACTOR
> > (loop_vinfo));
> > +}
> > +
> > /* This is the point where we can re-start analysis with SLP forced
> > off.  */
> >   start_over:
> >
> > So we're honoring suggested_unroll_factor here but you still have the
> > now unused hunk
> >
> > +vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool ,
> > +unsigned *suggested_unroll_factor, poly_uint64 min_vf
> > = 2)
> >   {
> >
> > I also wonder whether vect_analyze_loop_2 could at least prune
> > suggested_unroll_factor as set by vect_analyze_loop_costing with its
> > knowledge of max_vf itself?  That would avoid using the at the moment
> > unused LOOP_VINFO_MAX_VECT_FACTOR?
> >
> > I think all the things you do in vect_can_unroll should be figured
> > out with the re-analysis, and I'd just amend vect_analyze_loop_1
> > with a suggested unroll factor parameter like it has main_loop_vinfo
> > for the epilogue case.  The main loop adjustment would the be in the
> >
> >if (first_loop_vinfo == NULL)
> >  {
> >first_loop_vinfo = loop_vinfo;
> >first_loop_i = loop_vinfo_i;
> >first_loop_next_i = mode_i;
> >  }
> Sounds good.
> >
> > spot only, adding
> >
> > if (loop_vinfo->suggested_unroll_factor != 1)
> >{
> >  suggested_unroll_factor = loop_vinfo->suggested_unroll_factor;
> >  mode_i = first_loop_i;
> >  if (dump)
> >dump_print ("Trying unrolling by %d\n");
> >  continue;
> >}
> Not quite like this because of how we need to keep the suggestion given at
> finish_costs, put into suggested_unroll_factor, separate from how we tell
> vect_analyze_loop_1 that we are now vectorizing an unrolled vector loop, which
> we do by writing to loop_vinfo->suggested_unroll_factor. Perhaps I should
> renamed the latter, to avoid confusion? Let me know if you think that would
> help and in the mean-time this is what the patch looks like now. I'll follow
> up with a ChangeLog when we settle on the name & structure.

I think the patch looks OK, I'm only wondering about

+  if (first_loop_vinfo->suggested_unroll_factor > 1)
+{
+  if (LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P (first_loop_vinfo))
+   {
+ if (dump_enabled_p ())
+   dump_printf_loc (MSG_NOTE, vect_location,
+"* Re-trying analysis with first vector
mode"
+" %s for epilogue with partial vectors of"
+" unrolled first loop.\n",
+GET_MODE_NAME (vector_modes[0]));
+ mode_i = 0;

and the later done check for bigger VF than main loop - why would
we re-start at 0 rather than at the old mode?  Maybe we want to
remember the iterator value we started at when arriving at the
main loop mode?  So if we analyzed successfully with mode_i == 2,
then sucessfully at mode_i == 4 which suggested an unroll of 2,
re-start at the mode_i we continued after the mode_i == 2
successful analysis?  To just consider the "simple" case of
AVX vs SSE it IMHO doesn't make much sense to succeed with
AVX V4DF, succeed with SSE V2DF and figure it's better than V4DF AVX
but get a suggestion of 2 times unroll and then re-try AVX V4DF
just to re-compute that yes, it's worse than SSE V2DF?  You
are probably thinking of SVE vs ADVSIMD here but do we need to
start at 0?  Adding a comment to the code would be 

Re: [PATCH v3] Do not abort compilation when dump file is /dev/*

2021-11-22 Thread Giuliano Belinassi via Gcc-patches
On Sun, 2021-11-21 at 00:47 -0300, Alexandre Oliva wrote:
> Hello, Giuliano, thanks for turning my suggestion into a patch!
> 
> On Nov 19, 2021, Richard Biener  wrote:
> 
> > > +/* { dg-options "-fdump-ipa-clones -o /dev/null" } */
> 
> May I suggest actually checking that the ipa-clones dump file is
> created
> with the same name and the location it would without -o /dev/null?  I
> think just using any of the normal dump file scanners would do.
> 
> There's a larger set of tests in gcc.misc/outputs.exp, where this
> test
> would be a slightly better fit.  Copying some of the tests under
> "Driver-chosen aux outputs", and some of the link tests that use
> '$oaout', changing them to use -o /dev/null without any changes to
> the
> expected output file names, would give us more coverage of expected
> behavior than just checking that we just don't get an error.
> 

Hi, Oliva.

The patch was already applied in trunk and backported to gcc-11. The
rationale behind it was that exiting with an error because it couldn't
write dumps into /dev/* confused configure. Simply not erroring
(through not dumping in /dev/ on -o /dev/null) was enough to fix
configure, and the testcase reflects that.

As for moving that test to gcc.misc, I am not sure if that deserves
another patch just for that, and then applying it on gcc-11. I could do
that if it is really important, OTOH.

Thank you,
Giuliano.

> Thanks again,
> 



Re: [PATCH 03/10] tree-object-size: Use tree instead of HOST_WIDE_INT

2021-11-22 Thread Jakub Jelinek via Gcc-patches
On Mon, Nov 22, 2021 at 06:01:08PM +0530, Siddhesh Poyarekar wrote:
> On 11/22/21 17:30, Siddhesh Poyarekar wrote:
> > So I've got patch 10/10, which handles dynamic (and consequently
> > negative) offsets.  It basically computes a "whole size", which then
> > gives the extent to which a negative offset is valid, making the
> > estimates a bit more precise.  I didn't do it for static object sizes
> > because I didn't have time then, but I could add a patch 11/10 if the
> > idea sounds OK to you.
> 
> ... or alternatively, I could bring the whole size idea into this tree
> conversion patch so that it handles all kinds of offsets.  That might even
> eliminate patch 10/10.  What would you prefer?

Into this patch.

Jakub



Re: [PATCH 03/10] tree-object-size: Use tree instead of HOST_WIDE_INT

2021-11-22 Thread Siddhesh Poyarekar

On 11/22/21 17:30, Siddhesh Poyarekar wrote:
So I've got patch 10/10, which handles dynamic (and consequently 
negative) offsets.  It basically computes a "whole size", which then 
gives the extent to which a negative offset is valid, making the 
estimates a bit more precise.  I didn't do it for static object sizes 
because I didn't have time then, but I could add a patch 11/10 if the 
idea sounds OK to you.


... or alternatively, I could bring the whole size idea into this tree 
conversion patch so that it handles all kinds of offsets.  That might 
even eliminate patch 10/10.  What would you prefer?


Siddhesh


Re: [PATCH] tree-optimization/103345: Improved load merging

2021-11-22 Thread Richard Biener via Gcc-patches
On Mon, Nov 22, 2021 at 9:40 AM Roger Sayle  wrote:
>
>
> This patch implements PR tree-optimization/103345 to merge adjacent
> loads when combined with addition or bitwise xor.  The current code
> in gimple-ssa-store-merging.c's find_bswap_or_nop alreay handles ior,
> so that all that's required is to treat PLUS_EXPR and BIT_XOR_EXPR in
> the same way at BIT_IOR_EXPR.  Many thanks to Andrew Pinski for
> pointing out that this also resolves PR target/98953.
>
> This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
> and make -k check with no new failures.  The new testcases should also
> pass (but haven't been tested) on other endian targets.
>
> Ok for mainline?

OK.

Thanks,
Richard.

>
> 2021-11-22  Roger Sayle  
>
> gcc/ChangeLog
> PR tree-optimization/98953
> PR tree-optimization/103345
> * gimple-ssa-store-merging.c (find_bswap_or_nop_1): Handle
> BIT_XOR_EXPR and PLUS_EXPR the same as BIT_IOR_EXPR.
> (pass_optimize_bswap::execute): Likewise.
>
> gcc/testsuite/ChangeLog
> PR tree-optimization/98953
> PR tree-optimization/103345
> * gcc.dg/tree-ssa/pr98953.c: New test case.
> * gcc.dg/tree-ssa/pr103345.c: New test case.
>
>
> Thanks in advance,
> Roger
> --
>


Re: [PATCH 03/10] tree-object-size: Use tree instead of HOST_WIDE_INT

2021-11-22 Thread Siddhesh Poyarekar

On 11/22/21 16:01, Jakub Jelinek wrote:

On Mon, Nov 22, 2021 at 03:41:57PM +0530, Siddhesh Poyarekar wrote:

So I played around a bit with this.  Basically:

char buf[8];

__SIZE_TYPE__ test (void)
{
   char *p = [0x9004];
   return __builtin_object_size (p + 2, 0);
}

when built with -m32 returns 0x7002 but on 64-bit, returns 0 as
expected.  of course, with subscript as 0x9004, 64-bit gives
0x7002 as the result.


That is one case, I think when the base is a ADDR_EXPR VAR_DECL, offsets
larger than half of the address space will be UB, though of course the
question is what __bos should return for that because it wants to prevent
UB exploiting.

Another case is where the base is some pointer, something like
   _REF [ptr, -16].a
etc., where the MEM_REF offset negative makes a lot of sense,
there could be
   ptr =  + 32;
earlier etc.
That gives us to the general question on what to do with POINTER_PLUS_EXPR
if the offset is constant but "negative" (as it is sizetype, it is never
negative, but usually we treat it or should treat it as signed in the end).
If the offset is "negative", for e.g. __bos 0/1 one option is to use a
conservative maximum, i.e. subtract the offset from the value (make it even
larger, of course make sure it doesn't wrap around).  Another one would be
to consider also __bos 2 in that case, if the negation of the offset is
bigger than __bos 2, then the pointer points to before the start of the
object and we could return 0.


So I've got patch 10/10, which handles dynamic (and consequently 
negative) offsets.  It basically computes a "whole size", which then 
gives the extent to which a negative offset is valid, making the 
estimates a bit more precise.  I didn't do it for static object sizes 
because I didn't have time then, but I could add a patch 11/10 if the 
idea sounds OK to you.


Siddhesh


Re: [PATCH 1v2/3][vect] Add main vectorized loop unrolling

2021-11-22 Thread Andre Vieira (lists) via Gcc-patches


On 12/11/2021 13:12, Richard Biener wrote:

On Thu, 11 Nov 2021, Andre Vieira (lists) wrote:


Hi,

This is the rebased and reworked version of the unroll patch.  I wasn't
entirely sure whether I should compare the costs of the unrolled loop_vinfo
with the original loop_vinfo it was unrolled of. I did now, but I wasn't too
sure whether it was a good idea to... Any thoughts on this?

+  /* Apply the suggested unrolling factor, this was determined by the
backend
+ during finish_cost the first time we ran the analyzis for this
+ vector mode.  */
+  if (loop_vinfo->suggested_unroll_factor > 1)
+{
+  poly_uint64 unrolled_vf
+   = LOOP_VINFO_VECT_FACTOR (loop_vinfo) *
loop_vinfo->suggested_unroll_factor;
+  /* Make sure the unrolled vectorization factor is less than the max
+ vectorization factor.  */
+  unsigned HOST_WIDE_INT max_vf = LOOP_VINFO_MAX_VECT_FACTOR
(loop_vinfo);
+  if (max_vf == MAX_VECTORIZATION_FACTOR || known_le (unrolled_vf,
max_vf))
+   LOOP_VINFO_VECT_FACTOR (loop_vinfo) = unrolled_vf;
+  else
+   return opt_result::failure_at (vect_location,
+  "unrolling failed: unrolled"
+  " vectorization factor larger than"
+  " maximum vectorization factor:
%d\n",
+  LOOP_VINFO_MAX_VECT_FACTOR
(loop_vinfo));
+}
+
/* This is the point where we can re-start analysis with SLP forced
off.  */
  start_over:

So we're honoring suggested_unroll_factor here but you still have the
now unused hunk

+vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool ,
+unsigned *suggested_unroll_factor, poly_uint64 min_vf
= 2)
  {

I also wonder whether vect_analyze_loop_2 could at least prune
suggested_unroll_factor as set by vect_analyze_loop_costing with its
knowledge of max_vf itself?  That would avoid using the at the moment
unused LOOP_VINFO_MAX_VECT_FACTOR?

I think all the things you do in vect_can_unroll should be figured
out with the re-analysis, and I'd just amend vect_analyze_loop_1
with a suggested unroll factor parameter like it has main_loop_vinfo
for the epilogue case.  The main loop adjustment would the be in the

   if (first_loop_vinfo == NULL)
 {
   first_loop_vinfo = loop_vinfo;
   first_loop_i = loop_vinfo_i;
   first_loop_next_i = mode_i;
 }

Sounds good.


spot only, adding

if (loop_vinfo->suggested_unroll_factor != 1)
   {
 suggested_unroll_factor = loop_vinfo->suggested_unroll_factor;
 mode_i = first_loop_i;
 if (dump)
   dump_print ("Trying unrolling by %d\n");
 continue;
   }
Not quite like this because of how we need to keep the suggestion given 
at finish_costs, put into suggested_unroll_factor, separate from how we 
tell vect_analyze_loop_1 that we are now vectorizing an unrolled vector 
loop, which we do by writing to loop_vinfo->suggested_unroll_factor. 
Perhaps I should renamed the latter, to avoid confusion? Let me know if 
you think that would help and in the mean-time this is what the patch 
looks like now. I'll follow up with a ChangeLog when we settle on the 
name & structure.diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 
a28bb6321d76b8222bc8cfdade151ca9b4dca406..c84f1df9cd9a1325135defcbe1d101642a867373
 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -153,7 +153,8 @@ along with GCC; see the file COPYING3.  If not see
http://gcc.gnu.org/projects/tree-ssa/vectorization.html
 */
 
-static void vect_estimate_min_profitable_iters (loop_vec_info, int *, int *);
+static void vect_estimate_min_profitable_iters (loop_vec_info, int *, int *,
+   unsigned *);
 static stmt_vec_info vect_is_simple_reduction (loop_vec_info, stmt_vec_info,
   bool *, bool *);
 
@@ -828,6 +829,7 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, 
vec_info_shared *shared)
 skip_main_loop_edge (nullptr),
 skip_this_loop_edge (nullptr),
 reusable_accumulators (),
+suggested_unroll_factor (1),
 max_vectorization_factor (0),
 mask_skip_niters (NULL_TREE),
 rgroup_compare_type (NULL_TREE),
@@ -1811,7 +1813,8 @@ vect_known_niters_smaller_than_vf (loop_vec_info 
loop_vinfo)
definitely no, or -1 if it's worth retrying.  */
 
 static int
-vect_analyze_loop_costing (loop_vec_info loop_vinfo)
+vect_analyze_loop_costing (loop_vec_info loop_vinfo,
+  unsigned *suggested_unroll_factor)
 {
   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
   unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo);
@@ -1845,7 +1848,8 @@ vect_analyze_loop_costing (loop_vec_info loop_vinfo)
 
   int min_profitable_iters, min_profitable_estimate;
   vect_estimate_min_profitable_iters (loop_vinfo, _profitable_iters,
- 

Re: [AArch64] Enable generation of FRINTNZ instructions

2021-11-22 Thread Richard Biener via Gcc-patches
On Mon, 22 Nov 2021, Andre Vieira (lists) wrote:

> 
> On 18/11/2021 11:05, Richard Biener wrote:
> >
> > @@ -3713,12 +3713,21 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >  trapping behaviour, so require !flag_trapping_math. */
> >   #if GIMPLE
> >   (simplify
> > -   (float (fix_trunc @0))
> > -   (if (!flag_trapping_math
> > -   && types_match (type, TREE_TYPE (@0))
> > -   && direct_internal_fn_supported_p (IFN_TRUNC, type,
> > - OPTIMIZE_FOR_BOTH))
> > -  (IFN_TRUNC @0)))
> > +   (float (fix_trunc@1 @0))
> > +   (if (types_match (type, TREE_TYPE (@0)))
> > +(if (TYPE_SIGN (TREE_TYPE (@1)) == SIGNED
> > +&& direct_internal_fn_supported_p (IFN_FTRUNC_INT, type,
> > +   TREE_TYPE (@1),
> > OPTIMIZE_FOR_BOTH))
> > + (with {
> > +  tree int_type = TREE_TYPE (@1);
> > +  unsigned HOST_WIDE_INT max_int_c
> > +   = (1ULL << (element_precision (int_type) - 1)) - 1;
> >
> > That's only half-way supporting vector types I fear - you use
> > element_precision but then build a vector integer constant
> > in an unsupported way.  I suppose vector support isn't present
> > for arm?  The cleanest way would probably be to do
> >
> > tree int_type = element_type (@1);
> >
> > with providing element_type in tree.[ch] like we provide
> > element_precision.
> This is a good shout and made me think about something I hadn't before... I
> thought I could handle the vector forms later, but the problem is if I add
> support for the scalar, it will stop the vectorizer. It seems
> vectorizable_call expects all arguments to have the same type, which doesn't
> work with passing the integer type as an operand work around.

We already special case some IFNs there (masked load/store and gather)
to ignore some args, so that would just add to this set.

Richard.


Re: [V2/PATCH] Fix tree-optimization/102216: missed optimization causing Warray-bounds

2021-11-22 Thread Richard Biener via Gcc-patches
On Mon, Nov 22, 2021 at 9:40 AM Andrew Pinski  wrote:
>
> On Wed, Oct 27, 2021 at 3:42 AM Richard Biener via Gcc-patches
>  wrote:
> >
> > On Wed, Oct 27, 2021 at 12:00 PM apinski--- via Gcc-patches
> >  wrote:
> > >
> > > From: Andrew Pinski 
> > >
> > > The problem here is tree-ssa-forwprop.c likes to produce
> > >   [(void *)_4 + 152B] which is the same as
> > > _4 p+ 152 which the rest of GCC likes better.
> > > This implements this transformation back to pointer plus to
> > > improve better code generation later on.
> >
> > Why do you think so?  Can you pin-point the transform that now
> > fixes the new testcase?
>
> So we had originally:
>   language_names_p_9 =   [(void *)_4 + 24B];
> ...
>   _2 = _4 + 40;

Of course if that would have been

  _2 =  [_4 + 40B];

the issue would be fixed as well.   That said, I agree that _4 + 40
is better but I think we should canonicalize all [SSA + CST]
this way.  There is a canonicalization phase in fold_stmt_1:

  /* First do required canonicalization of [TARGET_]MEM_REF addresses
 after propagation.
 ???  This shouldn't be done in generic folding but in the
 propagation helpers which also know whether an address was
 propagated.
 Also canonicalize operand order.  */
  switch (gimple_code (stmt))
{
case GIMPLE_ASSIGN:
  if (gimple_assign_rhs_class (stmt) == GIMPLE_SINGLE_RHS)
{
  tree *rhs = gimple_assign_rhs1_ptr (stmt);
  if ((REFERENCE_CLASS_P (*rhs)
   || TREE_CODE (*rhs) == ADDR_EXPR)
  && maybe_canonicalize_mem_ref_addr (rhs))
changed = true;

where this could be done (apart from adding a match.pd pattern for this).

>   if (_2 != language_names_p_9)
>
> Forwprop is able to figure out that the above if statement is now
> always false as we have (_4 +p 40) != (_4 +p 24) which gets simplified
> via a match-and-simplify pattern ().
> Does that answer your question?
>
> I will look into the other comments in a new patch.
>
> Thanks,
> Andrew
>
> >
> > Comments below
> >
> > > OK? Bootstrapped and tested on aarch64-linux-gnu.
> > >
> > > Changes from v1:
> > > * v2: Add comments.
> > >
> > > gcc/ChangeLog:
> > >
> > > PR tree-optimization/102216
> > > * tree-ssa-forwprop.c (rewrite_assign_addr): New function.
> > > (forward_propagate_addr_expr_1): Use rewrite_assign_addr
> > > when rewriting into the addr_expr into an assignment.
> > >
> > > gcc/testsuite/ChangeLog:
> > >
> > > PR tree-optimization/102216
> > > * g++.dg/tree-ssa/pr102216.C: New test.
> > > ---
> > >  gcc/testsuite/g++.dg/tree-ssa/pr102216.C | 22 +
> > >  gcc/tree-ssa-forwprop.c  | 58 ++--
> > >  2 files changed, 67 insertions(+), 13 deletions(-)
> > >  create mode 100644 gcc/testsuite/g++.dg/tree-ssa/pr102216.C
> > >
> > > diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr102216.C 
> > > b/gcc/testsuite/g++.dg/tree-ssa/pr102216.C
> > > new file mode 100644
> > > index 000..b903e4eb57d
> > > --- /dev/null
> > > +++ b/gcc/testsuite/g++.dg/tree-ssa/pr102216.C
> > > @@ -0,0 +1,22 @@
> > > +/* { dg-options "-O2 -fdump-tree-optimized" } */
> > > +void link_error ();
> > > +void g ()
> > > +{
> > > +  const char **language_names;
> > > +
> > > +  language_names = new const char *[6];
> > > +
> > > +  const char **language_names_p = language_names;
> > > +
> > > +  language_names_p++;
> > > +  language_names_p++;
> > > +  language_names_p++;
> > > +
> > > +  if ( (language_names_p) - (language_names+3) != 0)
> > > +link_error();
> > > +  delete[] language_names;
> > > +}
> > > +/* We should have removed the link_error on the gimple level as GCC 
> > > should
> > > +   be able to tell that language_names_p is the same as 
> > > language_names+3.  */
> > > +/* { dg-final { scan-tree-dump-times "link_error" 0 "optimized" } } */
> > > +
> > > diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c
> > > index a830bab78ba..e4331c60525 100644
> > > --- a/gcc/tree-ssa-forwprop.c
> > > +++ b/gcc/tree-ssa-forwprop.c
> > > @@ -637,6 +637,47 @@ forward_propagate_into_cond (gimple_stmt_iterator 
> > > *gsi_p)
> > >return 0;
> > >  }
> > >
> > > +/* Rewrite the DEF_RHS as needed into the (plain) use statement.  */
> > > +
> > > +static void
> > > +rewrite_assign_addr (gimple_stmt_iterator *use_stmt_gsi, tree def_rhs)
> > > +{
> > > +  tree def_rhs_base;
> > > +  poly_int64 def_rhs_offset;
> > > +
> > > +  /* Get the base and offset.  */
> > > +  if ((def_rhs_base = get_addr_base_and_unit_offset (TREE_OPERAND 
> > > (def_rhs, 0),
> > > +_rhs_offset)))
> >
> > So this will cause us to rewrite [p_1].a.b.c; to a pointer-plus,
> > right?  Don't
> > we want to preserve that for object-size stuff?  So maybe directly pattern
> > match ADDR_EXPR > only?
> >
> > > +{
> > > +  tree new_ptr;
> > > +  poly_offset_int off = 0;
> > > +
> > > +  /* If the base was 

Re: [AArch64] Enable generation of FRINTNZ instructions

2021-11-22 Thread Andre Vieira (lists) via Gcc-patches



On 18/11/2021 11:05, Richard Biener wrote:


@@ -3713,12 +3713,21 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 trapping behaviour, so require !flag_trapping_math. */
  #if GIMPLE
  (simplify
-   (float (fix_trunc @0))
-   (if (!flag_trapping_math
-   && types_match (type, TREE_TYPE (@0))
-   && direct_internal_fn_supported_p (IFN_TRUNC, type,
- OPTIMIZE_FOR_BOTH))
-  (IFN_TRUNC @0)))
+   (float (fix_trunc@1 @0))
+   (if (types_match (type, TREE_TYPE (@0)))
+(if (TYPE_SIGN (TREE_TYPE (@1)) == SIGNED
+&& direct_internal_fn_supported_p (IFN_FTRUNC_INT, type,
+   TREE_TYPE (@1),
OPTIMIZE_FOR_BOTH))
+ (with {
+  tree int_type = TREE_TYPE (@1);
+  unsigned HOST_WIDE_INT max_int_c
+   = (1ULL << (element_precision (int_type) - 1)) - 1;

That's only half-way supporting vector types I fear - you use
element_precision but then build a vector integer constant
in an unsupported way.  I suppose vector support isn't present
for arm?  The cleanest way would probably be to do

tree int_type = element_type (@1);

with providing element_type in tree.[ch] like we provide
element_precision.
This is a good shout and made me think about something I hadn't 
before... I thought I could handle the vector forms later, but the 
problem is if I add support for the scalar, it will stop the vectorizer. 
It seems vectorizable_call expects all arguments to have the same type, 
which doesn't work with passing the integer type as an operand work around.


Should I go back to two separate IFN's, could still have the single optab.

Regards,
Andre



Re: [PATCH 2/2] tree-optimization: [PR92342] Move b & -(a==c) optimization to the gimple level

2021-11-22 Thread Richard Biener via Gcc-patches
On Mon, Nov 22, 2021 at 7:26 AM apinski--- via Gcc-patches
 wrote:
>
> From: Andrew Pinski 
>
> Combine disabled this optimization in r10-254-gddbb5da5199fb42 but it makes
> sense to do this on the gimple level and then let expand decide which way is
> better. So this adds the transformation on the gimple level (late like was
> done for the multiply case).
>
> OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.
>
> PR tree-optimization/92342
>
> gcc/ChangeLog:
>
> * match.pd (b & -(a CMP c) -> (a CMP c)?b:0): New pattern.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.dg/tree-ssa/andnegcmp-1.c: New test.
> * gcc.dg/tree-ssa/andnegcmp-2.c: New test.
> ---
>  gcc/match.pd|  8 +++-
>  gcc/testsuite/gcc.dg/tree-ssa/andnegcmp-1.c | 14 ++
>  gcc/testsuite/gcc.dg/tree-ssa/andnegcmp-2.c | 14 ++
>  3 files changed, 35 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/andnegcmp-1.c
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/andnegcmp-2.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index ed43c321cbc..b55cbc91b57 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -1794,7 +1794,13 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>   (for cmp (tcc_comparison)
>(simplify
> (mult:c (convert (cmp @0 @1)) @2)
> -   (cond (cmp @0 @1) @2 { build_zero_cst (type); }
> +   (cond (cmp @0 @1) @2 { build_zero_cst (type); }))
> +/* (-(m1 CMP m2)) & d -> (m1 CMP m2) ? d : 0  */
> +  (simplify
> +   (bit_and:c (negate (convert (cmp @0 @1))) @2)

I think you need to guard against signed bools (and vector compares,
and allow view_convert for vector compare results?)?

> +   (cond (cmp @0 @1) @2 { build_zero_cst (type); }))
> + )
> +)
>
>  /* For integral types with undefined overflow and C != 0 fold
> x * C EQ/NE y * C into x EQ/NE y.  */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/andnegcmp-1.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/andnegcmp-1.c
> new file mode 100644
> index 000..6f16783f169
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/andnegcmp-1.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
> +/* PR tree-optimization/92342 */
> +
> +int
> +f (int m1, int m2, int c)
> +{
> +  int d = m1 == m2;
> +  d = -d;
> +  int e = d & c;
> +  return e;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "\\? c_\[0-9\]\\(D\\) : 0" 1 
> "optimized" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/andnegcmp-2.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/andnegcmp-2.c
> new file mode 100644
> index 000..0e25c8abc39
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/andnegcmp-2.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
> +/* PR tree-optimization/92342 */
> +
> +int
> +f (int m1, int m2, int c)
> +{
> +  int d = m1 < m2;
> +  d = -d;
> +  int e = c & d;
> +  return e;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "\\? c_\[0-9\]\\(D\\) : 0" 1 
> "optimized" } } */
> --
> 2.17.1
>


Re: [PATCH 1/2] Improve/Fix (m1 CMP m2) * d -> (m1 CMP m2) ? d : 0 pattern.

2021-11-22 Thread Richard Biener via Gcc-patches
On Mon, Nov 22, 2021 at 7:25 AM apinski--- via Gcc-patches
 wrote:
>
> From: Andrew Pinski 
>
> The pattern here was not catching all comparisons and the multiply
> was not commutative when it should have been. This patches fixes
> that by using tcc_comparison and adding :c to the multiply.
>
> OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.

OK.

Thanks,
Richard.

> gcc/ChangeLog:
>
> * match.pd ((m1 CMP m2) * d -> (m1 CMP m2) ? d : 0):
> Use tcc_comparison and :c for the multiply.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.dg/tree-ssa/multcmp-1.c: New test.
> * gcc.dg/tree-ssa/multcmp-2.c: New test.
> ---
>  gcc/match.pd  |  4 ++--
>  gcc/testsuite/gcc.dg/tree-ssa/multcmp-1.c | 12 
>  gcc/testsuite/gcc.dg/tree-ssa/multcmp-2.c | 12 
>  3 files changed, 26 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/multcmp-1.c
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/multcmp-2.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index ca6c9eff624..ed43c321cbc 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -1791,9 +1791,9 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>
>  /* (m1 CMP m2) * d -> (m1 CMP m2) ? d : 0  */
>  (if (!canonicalize_math_p ())
> - (for cmp (gt lt ge le)
> + (for cmp (tcc_comparison)
>(simplify
> -   (mult (convert (cmp @0 @1)) @2)
> +   (mult:c (convert (cmp @0 @1)) @2)
> (cond (cmp @0 @1) @2 { build_zero_cst (type); }
>
>  /* For integral types with undefined overflow and C != 0 fold
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/multcmp-1.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/multcmp-1.c
> new file mode 100644
> index 000..fb44cacde77
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/multcmp-1.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
> +
> +int
> +f (int m1, int m2, int c)
> +{
> +  int d = m1 == m2;
> +  int e = d * c;
> +  return e;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "\\? c_\[0-9\]\\(D\\) : 0" 1 
> "optimized" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/multcmp-2.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/multcmp-2.c
> new file mode 100644
> index 000..be38b2e0044
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/multcmp-2.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
> +
> +int
> +f (int m1, int m2, int c)
> +{
> +  int d = m1 != m2;
> +  int e = c * d;
> +  return e;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "\\? c_\[0-9\]\\(D\\) : 0" 1 
> "optimized" } } */
> --
> 2.17.1
>


Re: [PATCH] tree-optimization: [PR31531] Improve ~a < CST, allow a nop cast inbetween ~ and a

2021-11-22 Thread Richard Biener via Gcc-patches
On Sun, Nov 21, 2021 at 3:19 PM apinski--- via Gcc-patches
 wrote:
>
> From: Andrew Pinski 
>
> This PR was orignally for the missed optimization of a few isnegative which
> had been solved a long time ago (sometime before 4.4.0). I noticed there was
> one missed optimization on the gimple level. There is a match.pd pattern
> for ~a < CST but we miss that there could be a nop_convert between the the
> comparison and the bit_not. This adds the optional option cast to the current
> match.pd pattern.
>
> OK? Bootstrapped and tested on x86_64 with no regressions.
>
> PR tree-optimization/31531
>
> gcc/ChangeLog:
>
> * match.pd (~X op C): Allow for an optional nop convert.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.dg/tree-ssa/pr31531-1.c: New test.
> ---
>  gcc/match.pd  |  5 +++--
>  gcc/testsuite/gcc.dg/tree-ssa/pr31531-1.c | 19 +++
>  2 files changed, 22 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr31531-1.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 37c5be9e5f4..ca6c9eff624 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -4729,10 +4729,11 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>  (for cmp (simple_comparison)
>   scmp (swapped_simple_comparison)
>   (simplify
> -  (cmp (bit_not@2 @0) CONSTANT_CLASS_P@1)
> +  (cmp (nop_convert?:s (bit_not@2 @0)) CONSTANT_CLASS_P@1)

Any specific reason you add :s to the conversion?

OK with removing it.

Thanks,
Richard.

>(if (single_use (@2)
> && (TREE_CODE (@1) == INTEGER_CST || TREE_CODE (@1) == VECTOR_CST))
> -   (scmp @0 (bit_not @1)
> +   (with { tree type1 = TREE_TYPE (@1); }
> +(scmp (convert:type1 @0) (bit_not @1))
>
>  (for cmp (simple_comparison)
>   /* Fold (double)float1 CMP (double)float2 into float1 CMP float2.  */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr31531-1.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/pr31531-1.c
> new file mode 100644
> index 000..c27299151eb
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr31531-1.c
> @@ -0,0 +1,19 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
> +/* PR tree-optimization/31531 */
> +
> +int f(int a)
> +{
> +  int b = ~a;
> +  return b<0;
> +}
> +
> +
> +int f1(unsigned a)
> +{
> +  int b = ~a;
> +  return b<0;
> +}
> +/* We should convert the above two functions from b <0 to ((int)a) >= 0. */
> +/* { dg-final { scan-tree-dump-times ">= 0" 2 "optimized"} } */
> +/* { dg-final { scan-tree-dump-times "~" 0 "optimized"} } */
> --
> 2.17.1
>


[PATCH] tree-optimization/103351 - avoid compare-debug issue wrt CD-DCE change

2021-11-22 Thread Richard Biener via Gcc-patches
This avoids differences in the split edge of a cluster due to different
order of same key PHI args when sorting by sorting after the edge
destination index as second key.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

2021-11-22  Richard Biener  

PR tree-optimization/103351
* tree-ssa-dce.c (sort_phi_args): Sort after e->dest_idx as
second key.

* g++.dg/torture/pr103351.C: New testcase.
---
 gcc/testsuite/g++.dg/torture/pr103351.C | 88 +
 gcc/tree-ssa-dce.c  |  4 ++
 2 files changed, 92 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/torture/pr103351.C

diff --git a/gcc/testsuite/g++.dg/torture/pr103351.C 
b/gcc/testsuite/g++.dg/torture/pr103351.C
new file mode 100644
index 000..d0bf7216ffc
--- /dev/null
+++ b/gcc/testsuite/g++.dg/torture/pr103351.C
@@ -0,0 +1,88 @@
+// { dg-do compile }
+// { dg-additional-options "-fcompare-debug" }
+
+template  struct __conditional;
+template 
+using __conditional_t = typename __conditional<_Cond>::type;
+template  struct __is_void_helper {};
+template  _Tp *__addressof(_Tp &__r) {
+  return __builtin_addressof(__r);
+}
+template  _Tp *addressof(_Tp &__r) { return __addressof(__r); }
+template 
+using __make_not_void = __conditional_t<_Tp ::value, int, _Tp>;
+template  struct pointer_traits;
+template  struct pointer_traits<_Tp *> {
+  typedef _Tp *pointer;
+  typedef _Tp element_type;
+  static pointer pointer_to(element_type &__r) { return addressof(__r); }
+};
+namespace {
+template  struct new_allocator;
+}
+template  struct allocator_traits;
+template  struct allocator;
+template  struct allocator_traits> {
+  using pointer = _Tp *;
+  using const_pointer = _Tp *;
+};
+namespace __gnu_cxx {
+template 
+struct __alloc_traits : allocator_traits> {};
+} // namespace __gnu_cxx
+template  struct char_traits;
+template 
+class Trans_NS___cxx11_basic_string;
+template <> struct char_traits {
+  typedef char char_type;
+  static void assign(char_type, char_type);
+};
+template  struct Trans_NS___cxx11_basic_string {
+  typedef __gnu_cxx::__alloc_traits<> _Alloc_traits;
+  typedef char_traits traits_type;
+  typedef _Alloc_traits::pointer pointer;
+  typedef _Alloc_traits::const_pointer const_pointer;
+  struct {
+pointer _M_p;
+  } _M_dataplus;
+  char _M_local_buf[];
+  void _M_data(pointer __p) { _M_dataplus._M_p = __p; }
+  bool _M_is_local() {
+const_pointer __trans_tmp_5 =
+pointer_traits::pointer_to(*_M_local_buf);
+return _M_dataplus._M_p == __trans_tmp_5;
+  }
+  void operator=(Trans_NS___cxx11_basic_string __str) {
+bool __trans_tmp_2;
+if (__str._M_is_local()) {
+  Trans_NS___cxx11_basic_string *__trans_tmp_1;
+  if (__builtin_expect(__trans_tmp_1 != this, true))
+size();
+} else if (__trans_tmp_2)
+  __str._M_data(__str._M_local_buf);
+__str.clear();
+  }
+  void size();
+  void clear() { traits_type::assign(_M_dataplus._M_p[0], char()); }
+};
+template  struct Pool {
+  template  struct PoolIterator {
+bool operator!=(PoolIterator);
+T *operator*();
+void operator++();
+  };
+  template  struct IterateWrapper {
+PoolIterator begin();
+PoolIterator end();
+  };
+};
+struct BaseConsist {
+  Trans_NS___cxx11_basic_string name;
+};
+struct Vehicle : BaseConsist {};
+Pool::IterateWrapper __trans_tmp_4;
+Trans_NS___cxx11_basic_string __trans_tmp_6;
+void FixOldVehicles() {
+  for (Vehicle *v : __trans_tmp_4)
+v->name = __trans_tmp_6;
+}
diff --git a/gcc/tree-ssa-dce.c b/gcc/tree-ssa-dce.c
index dbf02c434de..e3e6f0955b7 100644
--- a/gcc/tree-ssa-dce.c
+++ b/gcc/tree-ssa-dce.c
@@ -1626,6 +1626,10 @@ sort_phi_args (const void *a_, const void *b_)
 return -1;
   else if (ha > hb)
 return 1;
+  else if (a->first->dest_idx < b->first->dest_idx)
+return -1;
+  else if (a->first->dest_idx > b->first->dest_idx)
+return 1;
   else
 return 0;
 }
-- 
2.31.1


Re: [PATCH 03/10] tree-object-size: Use tree instead of HOST_WIDE_INT

2021-11-22 Thread Jakub Jelinek via Gcc-patches
On Mon, Nov 22, 2021 at 03:41:57PM +0530, Siddhesh Poyarekar wrote:
> So I played around a bit with this.  Basically:
> 
> char buf[8];
> 
> __SIZE_TYPE__ test (void)
> {
>   char *p = [0x9004];
>   return __builtin_object_size (p + 2, 0);
> }
> 
> when built with -m32 returns 0x7002 but on 64-bit, returns 0 as
> expected.  of course, with subscript as 0x9004, 64-bit gives
> 0x7002 as the result.

That is one case, I think when the base is a ADDR_EXPR VAR_DECL, offsets
larger than half of the address space will be UB, though of course the
question is what __bos should return for that because it wants to prevent
UB exploiting.

Another case is where the base is some pointer, something like
  _REF [ptr, -16].a
etc., where the MEM_REF offset negative makes a lot of sense,
there could be
  ptr =  + 32;
earlier etc.
That gives us to the general question on what to do with POINTER_PLUS_EXPR
if the offset is constant but "negative" (as it is sizetype, it is never
negative, but usually we treat it or should treat it as signed in the end).
If the offset is "negative", for e.g. __bos 0/1 one option is to use a
conservative maximum, i.e. subtract the offset from the value (make it even
larger, of course make sure it doesn't wrap around).  Another one would be
to consider also __bos 2 in that case, if the negation of the offset is
bigger than __bos 2, then the pointer points to before the start of the
object and we could return 0.

Jakub



[ping^5] Make sure that we get unique test names if several DejaGnu directives refer to the same line [PR102735]

2021-11-22 Thread Thomas Schwinge
Hi!

Ping.


Grüße
 Thomas


On 2021-11-15T15:50:58+0100, I wrote:
> Hi!
>
> ..., and here is another ping.
>
>
> Grüße
>  Thomas
>
>
> On 2021-11-08T11:45:12+0100, I wrote:
>> Hi!
>>
>> Ping, once more.
>>
>>
>> Grüße
>>  Thomas
>>
>>
>> On 2021-10-14T12:12:41+0200, I wrote:
>>> Hi!
>>>
>>> Ping, again.
>>>
>>> Commit log updated for 
>>> "privatization-1-compute.c results in both XFAIL and PASS".
>>>
>>>
>>> Grüße
>>>  Thomas
>>>
>>>
>>> On 2021-09-30T08:42:25+0200, I wrote:
 Hi!

 Ping.

 On 2021-09-22T13:03:46+0200, I wrote:
> On 2021-09-19T11:35:00-0600, Jeff Law via Gcc-patches 
>  wrote:
>> A couple of goacc tests do not have unique names.
>
> Thanks for fixing this up, and sorry, largely my "fault", I suppose.  ;-|
>
>> This causes problems
>> for the test comparison script when one of the test passes and the other
>> fails -- in this scenario the test comparison script claims there is a
>> regression.
>
> So I understand correctly that this is a problem not just for actual
> mixed PASS vs. FAIL (which we'd like you to report anyway!) that appear
> for the same line, but also for mixed PASS vs. XFAIL?  (Because, the
> latter appears to be what you're addressing with your commit here.)
>
>> This slipped through for a while because I had turned off x86_64 testing
>> (others test it regularly and I was revamping the tester's hardware
>> requirements).  Now that I've acquired more x86_64 resources and turned
>> on native x86 testing again, it's been flagged.
>
> (I don't follow that argument -- these test cases should be all generic?
> Anyway, not important, I guess.)
>
>> This patch just adds a numeric suffix to the TODO string to disambiguate
>> them.
>
> So, instead of doing this manually (always error-prone!), like you've...
>
>> Committed to the trunk,
>
>> commit f75b237254f32d5be32c9d9610983b777abea633
>> Author: Jeff Law 
>> Date:   Sun Sep 19 13:31:32 2021 -0400
>>
>> [committed] Make test names unique for a couple of goacc tests
>
>> --- a/gcc/testsuite/gfortran.dg/goacc/privatization-1-compute.f90
>> +++ b/gcc/testsuite/gfortran.dg/goacc/privatization-1-compute.f90
>> @@ -39,9 +39,9 @@ contains
>>!$acc atomic write ! ... to force 'TREE_ADDRESSABLE'.
>>y = a
>>  !$acc end parallel
>> -! { dg-note {variable 'i' in 'private' clause potentially has 
>> improper OpenACC privatization level: 'parm_decl'} "TODO" { xfail *-*-* 
>> } l_compute$c_compute }
>> -! { dg-note {variable 'j' in 'private' clause potentially has 
>> improper OpenACC privatization level: 'parm_decl'} "TODO" { xfail *-*-* 
>> } l_compute$c_compute }
>> -! { dg-note {variable 'a' in 'private' clause potentially has 
>> improper OpenACC privatization level: 'parm_decl'} "TODO" { xfail *-*-* 
>> } l_compute$c_compute }
>> +! { dg-note {variable 'i' in 'private' clause potentially has 
>> improper OpenACC privatization level: 'parm_decl'} "TODO2" { xfail *-*-* 
>> } l_compute$c_compute }
>> +! { dg-note {variable 'j' in 'private' clause potentially has 
>> improper OpenACC privatization level: 'parm_decl'} "TODO3" { xfail *-*-* 
>> } l_compute$c_compute }
>> +! { dg-note {variable 'a' in 'private' clause potentially has 
>> improper OpenACC privatization level: 'parm_decl'} "TODO4" { xfail *-*-* 
>> } l_compute$c_compute }
>
> ... etc. (also similarly in a handful of earlier commits, if I remember
> correctly), why don't we do that programmatically, like in the attached
> "Make sure that we get unique test names if several DejaGnu directives
> refer to the same line", once and for all?  OK to push after proper
> testing?

 Attached again, for easy reference.

 I figure it may help if I showed an example of how this changes things;
 for the test case cited above (word-diff):

 PASS: gfortran.dg/goacc/privatization-1-compute.f90   -O   {+at line 
 40+} (test for warnings, line 39)
 PASS: gfortran.dg/goacc/privatization-1-compute.f90   -O   {+at line 
 41+} (test for warnings, line 22)
 PASS: gfortran.dg/goacc/privatization-1-compute.f90   -O   {+at line 
 42+} (test for warnings, line 39)
 PASS: gfortran.dg/goacc/privatization-1-compute.f90   -O   {+at line 
 43+} (test for warnings, line 22)
 PASS: gfortran.dg/goacc/privatization-1-compute.f90   -O   {+at line 
 44+} (test for warnings, line 39)
 PASS: gfortran.dg/goacc/privatization-1-compute.f90   -O   {+at line 
 45+} (test for warnings, line 22)
 XFAIL: gfortran.dg/goacc/privatization-1-compute.f90   -O  TODO2 {+at 
 line 50+} (test for warnings, line 29)
 XFAIL: 

Re: [PATCH 03/10] tree-object-size: Use tree instead of HOST_WIDE_INT

2021-11-22 Thread Siddhesh Poyarekar

On 11/20/21 00:31, Siddhesh Poyarekar wrote:
This doesn't match what the code did and I'm surprised if it works at 
all.

TREE_OPERAND (pt_var, 1), while it is an INTEGER_CST or POLY_INT_CST,
has in its type encoded the type for aliasing, so the type is some 
pointer

type.  Performing size_binop etc. on such values can misbehave, the code
assumes that it is sizetype or at least some integral type compatible 
with

it.Also, mem_ref_offset is signed and offset_int has bigger precision
than pointers on the target such that it can be always signed.  So
e.g. if MEM_REF's second operand is bigger or equal than half of the
address space, in the old code it would appear to be negative, wi::sub 
would

result in a value bigger than sz.  Not really sure right now if that is
exactly how we want to treat it, would be nice to try some testcase.


Let me try coming up with a test case for it.



So I played around a bit with this.  Basically:

char buf[8];

__SIZE_TYPE__ test (void)
{
  char *p = [0x9004];
  return __builtin_object_size (p + 2, 0);
}

when built with -m32 returns 0x7002 but on 64-bit, returns 0 as 
expected.  of course, with subscript as 0x9004, 64-bit gives 
 0x7002 as the result.


With the tree conversion, this is at least partly taken care of since 
offset larger than size, to the extent that it fits into sizetype, 
returns a size of zero.  So in the above example, the size returned is 
zero in both -m32 as well as -m64.  Likewise for negative offset, i.e. 
[-4]; the old code returns 10 while with trees it returns 0, which 
seems correct to me since it is an underflow.


It's only partly taken care of because, e.g.

  char *p = [0x10004];

ends up truncating the offset, returning object size of 2.  This however 
is an unrelated problem; it's the folding of offsets that is responsible 
for this since it ends up truncating the offset to shwi bounds.  Perhaps 
there's an opportunity in get_addr_base_and_unit_offset to warn of 
overflow if offset goes above pointer precision before truncating it.


So for this patch, may I simply ensure that offset is converted to 
sizetype and keep everything else the same?  it appears to demonstrate 
better behaviour than the older code.  I'll also add these tests.


Thanks,
Siddhesh


[PATCH] x86: Speed up target attribute handling by using a cache

2021-11-22 Thread Jakub Jelinek via Gcc-patches
Hi!

The target attribute handling is very expensive and for the common case
from x86intrin.h where many functions get implicitly the same target
attribute, we can speed up compilation a lot by caching it.

The following patches both create a single entry cache, where they cache
for a particular target attribute argument list the resulting
DECL_FUNCTION_SPECIFIC_TARGET and DECL_FUNCTION_SPECIFIC_OPTIMIZATION
values from ix86_valid_target_attribute_p and use the cache if the
args are the same as last time and we start either from NULL values
of those, or from the recorded values for those from last time.

Compiling a simple:
 #include 

 int i;
testcase with ./cc1 -quiet -O2 -isystem include/ test.c
takes on my WS without the patches ~0.392s and with either of the
patches ~0.182s, i.e. roughly half the time as before.
For ./cc1plus -quiet -O2 -isystem include/ test.c
it is slightly worse, the speed up is from ~0.613s to ~0.403s.

The difference between the 2 patches is that the first one uses copy_list
while the second one uses a vec, so I think the second one has the advantage
of creating less GC garbage.
I've verified both patches achieve the same content of those
DECL_FUNCTION_SPECIFIC_TARGET and DECL_FUNCTION_SPECIFIC_OPTIMIZATION
nodes as before on x86intrin.h by doing debug_tree on those and comparing
the stderr from without these patches to with these patches.

Both patches were bootstrapped/regtested on x86_64-linux and i686-linux,
ok for trunk (and which one)?

2021-11-22  Jakub Jelinek  

* attribs.h (simple_cst_list_equal): Declare.
* attribs.c (simple_cst_list_equal): No longer static.
* config/i386/i386-options.c (target_attribute_cache): New variable.
(ix86_valid_target_attribute_p): Cache DECL_FUNCTION_SPECIFIC_TARGET
and DECL_FUNCTION_SPECIFIC_OPTIMIZATION based on args.

--- gcc/attribs.h.jj2021-11-11 14:35:37.442350841 +0100
+++ gcc/attribs.h   2021-11-19 11:52:08.843252645 +0100
@@ -60,6 +60,7 @@ extern tree build_type_attribute_variant
 extern tree build_decl_attribute_variant (tree, tree);
 extern tree build_type_attribute_qual_variant (tree, tree, int);
 
+extern bool simple_cst_list_equal (const_tree, const_tree);
 extern bool attribute_value_equal (const_tree, const_tree);
 
 /* Return 0 if the attributes for two types are incompatible, 1 if they
--- gcc/attribs.c.jj2021-11-11 14:35:37.442350841 +0100
+++ gcc/attribs.c   2021-11-19 11:51:43.473615692 +0100
@@ -1290,7 +1290,7 @@ cmp_attrib_identifiers (const_tree attr1
 /* Compare two constructor-element-type constants.  Return 1 if the lists
are known to be equal; otherwise return 0.  */
 
-static bool
+bool
 simple_cst_list_equal (const_tree l1, const_tree l2)
 {
   while (l1 != NULL_TREE && l2 != NULL_TREE)
--- gcc/config/i386/i386-options.c.jj   2021-11-15 13:19:07.347900863 +0100
+++ gcc/config/i386/i386-options.c  2021-11-20 00:27:32.919505947 +0100
@@ -1403,6 +1403,8 @@ ix86_valid_target_attribute_tree (tree f
   return t;
 }
 
+static GTY(()) tree target_attribute_cache[3];
+
 /* Hook to validate attribute((target("string"))).  */
 
 bool
@@ -1423,6 +1425,19 @@ ix86_valid_target_attribute_p (tree fnde
   && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
 return true;
 
+  if ((DECL_FUNCTION_SPECIFIC_TARGET (fndecl) == target_attribute_cache[1]
+   || DECL_FUNCTION_SPECIFIC_TARGET (fndecl) == NULL_TREE)
+  && (DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl)
+ == target_attribute_cache[2]
+ || DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) == NULL_TREE)
+  && simple_cst_list_equal (args, target_attribute_cache[0]))
+{
+  DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = target_attribute_cache[1];
+  DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl)
+   = target_attribute_cache[2];
+  return true;
+}
+
   tree old_optimize = build_optimization_node (_options,
   _options_set);
 
@@ -1456,8 +1471,17 @@ ix86_valid_target_attribute_p (tree fnde
   if (new_target == error_mark_node)
 ret = false;
 
-  else if (fndecl && new_target)
+  else if (new_target)
 {
+  if (DECL_FUNCTION_SPECIFIC_TARGET (fndecl) == NULL_TREE
+ && DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) == NULL_TREE)
+   {
+ target_attribute_cache[0] = copy_list (args);
+ target_attribute_cache[1] = new_target;
+ target_attribute_cache[2]
+   = old_optimize != new_optimize ? new_optimize : NULL_TREE;
+   }
+
   DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
 
   if (old_optimize != new_optimize)

Jakub
2021-11-22  Jakub Jelinek  

* config/i386/i386-options.c (target_attribute_cache): New variable.
(ix86_valid_target_attribute_p): Cache DECL_FUNCTION_SPECIFIC_TARGET
and DECL_FUNCTION_SPECIFIC_OPTIMIZATION based on args.

--- gcc/config/i386/i386-options.c.jj   2021-11-20 23:53:35.730637746 +0100
+++ 

Improve modref tracking of base pointers

2021-11-22 Thread Jan Hubicka via Gcc-patches
Hi,
on exchange2 benchamrk we miss some useful propagation because modref gives
up very early on analyzing accesses through pointers.  For example in
int test (int *a)
{
  int i;
  for (i=0; a[i];i++);
  return i+a[i];
}

We are not able to determine that a[i] accesses are relative to a.
This is because get_access requires the SSA name that is in MEM_REF to be
PARM_DECL while on other places we use ipa-prop helper to work out the proper
base pointers.

This patch commonizes the code in get_access and parm_map_for_arg so both
use the check properly and extends it to also figure out that newly allocated
memory is not a side effect to caller.

It improves disambiguation rates:

Alias oracle query stats:
  refs_may_alias_p: 77359588 disambiguations, 102170294 queries
  ref_maybe_used_by_call_p: 645390 disambiguations, 78392252 queries
  call_may_clobber_ref_p: 386653 disambiguations, 389576 queries
  stmt_kills_ref_p: 106470 kills, 5685744 queries
  nonoverlapping_component_refs_p: 0 disambiguations, 8923 queries
  nonoverlapping_refs_since_match_p: 30581 disambiguations, 65481 must 
overlaps, 97009 queries
  aliasing_component_refs_p: 56854 disambiguations, 15459249 queries
  TBAA oracle: 28236957 disambiguations 104812620 queries
   15360807 are in alias set 0
   8863925 queries asked about the same object
   99 queries asked about the same alias set
   0 access volatile
   50367859 are dependent in the DAG
   1982973 are aritificially in conflict with void *

Modref stats:
  modref kill: 71 kills, 8151 queries
  modref use: 25273 disambiguations, 704264 queries
  modref clobber: 1676006 disambiguations, 21805867 queries
  5264985 tbaa queries (0.241448 per modref query)
  762265 base compares (0.034957 per modref query)

PTA query stats:
  pt_solution_includes: 13460623 disambiguations, 40881373 queries
  pt_solutions_intersect: 1668037 disambiguations, 13958255 queries

to:

Alias oracle query stats:
  refs_may_alias_p: 77575173 disambiguations, 102390852 queries
  ref_maybe_used_by_call_p: 645932 disambiguations, 78607413 queries
  call_may_clobber_ref_p: 386813 disambiguations, 389693 queries
  stmt_kills_ref_p: 106551 kills, 5688432 queries
  nonoverlapping_component_refs_p: 0 disambiguations, 8936 queries
  nonoverlapping_refs_since_match_p: 30583 disambiguations, 65514 must 
overlaps, 97044 queries
  aliasing_component_refs_p: 56847 disambiguations, 15459371 queries
  TBAA oracle: 28238952 disambiguations 104938558 queries
   15435200 are in alias set 0
   8876784 queries asked about the same object
   89 queries asked about the same alias set
   0 access volatile
   50400613 are dependent in the DAG
   1986920 are aritificially in conflict with void *

Modref stats:
  modref kill: 71 kills, 8130 queries
  modref use: 30684 disambiguations, 704287 queries
  modref clobber: 1694295 disambiguations, 21697882 queries
  5233712 tbaa queries (0.241208 per modref query)
  902240 base compares (0.041582 per modref query)

PTA query stats:
  pt_solution_includes: 13495059 disambiguations, 40917961 queries
  pt_solutions_intersect: 1667032 disambiguations, 13951159 queries

So 20% more modref use disambiguations which accounts to 0.3% overal
disambiguation and alo improves a bit situation with exchange2 benchmark,
while the real problem is still present (as dicussed in the pr)

gcc/ChangeLog:

2021-11-21  Jan Hubicka  

PR ipa/103227
* ipa-modref.c (parm_map_for_arg): Rename to ...
(parm_map_for_ptr): .. this one; handle static chain and calls to
malloc functions.
(modref_access_analysis::get_access): Use parm_map_for_ptr.
(modref_access_analysis::process_fnspec): Update.
(modref_access_analysis::analyze_load): Update.
(modref_access_analysis::analyze_store): Update.

gcc/testsuite/ChangeLog:

2021-11-21  Jan Hubicka  

PR ipa/103227
* gcc.dg/tree-ssa/modref-15.c: New test.

diff --git a/gcc/ipa-modref.c b/gcc/ipa-modref.c
index a04e5855a9a..4f9323165ea 100644
--- a/gcc/ipa-modref.c
+++ b/gcc/ipa-modref.c
@@ -812,14 +812,15 @@ ignore_stores_p (tree caller, int flags)
   return false;
 }
 
-/* Determine parm_map for argument OP.  */
+/* Determine parm_map for PTR which is supposed to be a pointer.  */
 
 modref_parm_map
-parm_map_for_arg (tree op)
+parm_map_for_ptr (tree op)
 {
   bool offset_known;
   poly_int64 offset;
   struct modref_parm_map parm_map;
+  gcall *call;
 
   parm_map.parm_offset_known = false;
   parm_map.parm_offset = 0;
@@ -830,22 +831,26 @@ parm_map_for_arg (tree op)
   && TREE_CODE (SSA_NAME_VAR (op)) == PARM_DECL)
 {
   int index = 0;
-  for (tree t = DECL_ARGUMENTS (current_function_decl);
-  t != SSA_NAME_VAR (op); t = DECL_CHAIN (t))
-   {
- if (!t)
-   {
- index = MODREF_UNKNOWN_PARM;
- break;
-  

[PATCH] libcpp, v2: Fix _Pragma in #__VA_ARGS__ [PR103165]

2021-11-22 Thread Jakub Jelinek via Gcc-patches
On Thu, Nov 18, 2021 at 09:55:52PM +, Joseph Myers wrote:
> On Thu, 18 Nov 2021, Jakub Jelinek via Gcc-patches wrote:
> 
> > Are we handling the pragma at a wrong phase of preprocessing?
> 
> I think that converting it to a single preprocessing token (rather than 
> four separate preprocessing tokens), at a stage when stringizing might 
> still occur, does indicate it's being processed too soon, and it would be 
> better to do that only when it's known that the _Pragma preprocessing 
> token will actually occur in the results of preprocessing the source file.

So like this?  I.e. don't process _Pragma during expand_args where we can't
know what the macro will do with it?

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2021-11-22  Jakub Jelinek  
Tobias Burnus  

PR preprocessor/103165
libcpp/
* internal.h (struct lexer_state): Add ignore__Pragma field.
* macro.c (builtin_macro): Don't interpret _Pragma if
pfile->state.ignore__Pragma.
(expand_arg): Temporarily set pfile->state.ignore__Pragma to 1.
gcc/testsuite/
* c-c++-common/gomp/pragma-3.c: New test.
* c-c++-common/gomp/pragma-4.c: New test.
* c-c++-common/gomp/pragma-5.c: New test.

--- libcpp/internal.h.jj2021-11-18 12:33:18.409679558 +0100
+++ libcpp/internal.h   2021-11-20 11:00:58.04430 +0100
@@ -287,6 +287,9 @@ struct lexer_state
 
   /* Nonzero if the deferred pragma being handled allows macro expansion.  */
   unsigned char pragma_allow_expansion;
+
+  /* Nonzero if _Pragma should not be interpreted.  */
+  unsigned char ignore__Pragma;
 };
 
 /* Special nodes - identifiers with predefined significance.  */
--- libcpp/macro.c.jj   2021-11-18 12:33:18.462678802 +0100
+++ libcpp/macro.c  2021-11-20 11:02:03.249478159 +0100
@@ -750,8 +750,10 @@ builtin_macro (cpp_reader *pfile, cpp_ha
   if (node->value.builtin == BT_PRAGMA)
 {
   /* Don't interpret _Pragma within directives.  The standard is
- not clear on this, but to me this makes most sense.  */
-  if (pfile->state.in_directive)
+ not clear on this, but to me this makes most sense.
+ Similarly, don't interpret _Pragma inside expand_args, we might
+ need to stringize it later on.  */
+  if (pfile->state.in_directive || pfile->state.ignore__Pragma)
return 0;
 
   return _cpp_do__Pragma (pfile, loc);
@@ -2648,6 +2650,7 @@ expand_arg (cpp_reader *pfile, macro_arg
   size_t capacity;
   bool saved_warn_trad;
   bool track_macro_exp_p = CPP_OPTION (pfile, track_macro_expansion);
+  bool saved_ignore__Pragma;
 
   if (arg->count == 0
   || arg->expanded != NULL)
@@ -2670,6 +2673,9 @@ expand_arg (cpp_reader *pfile, macro_arg
 push_ptoken_context (pfile, NULL, NULL,
 arg->first, arg->count + 1);
 
+  saved_ignore__Pragma = pfile->state.ignore__Pragma;
+  pfile->state.ignore__Pragma = 1;
+
   for (;;)
 {
   const cpp_token *token;
@@ -2692,6 +2698,7 @@ expand_arg (cpp_reader *pfile, macro_arg
   _cpp_pop_context (pfile);
 
   CPP_WTRADITIONAL (pfile) = saved_warn_trad;
+  pfile->state.ignore__Pragma = saved_ignore__Pragma;
 }
 
 /* Returns the macro associated to the current context if we are in
--- gcc/testsuite/c-c++-common/gomp/pragma-3.c.jj   2021-11-20 
11:04:27.636429378 +0100
+++ gcc/testsuite/c-c++-common/gomp/pragma-3.c  2021-11-20 11:27:46.892589048 
+0100
@@ -0,0 +1,20 @@
+/* { dg-additional-options "-fdump-tree-original" }  */
+/* PR preprocessor/103165  */
+
+#define inner(...) #__VA_ARGS__ ; _Pragma("omp error severity(warning) message 
(\"Test\") at(compilation)")
+#define outer(...) inner(__VA_ARGS__)
+
+void
+f (void)
+{
+  const char *str = outer(inner(1,2));  /* { dg-warning "'pragma omp error' 
encountered: Test" } */
+}
+
+#if 0
+After preprocessing, the expected result are the following three lines:
+ const char *str = "\"1,2\" ; _Pragma(\"omp error severity(warning) 
message (\\\"Test\\\") at(compilation)\")" ;
+#pragma omp error severity(warning) message ("Test") at(compilation)
+ ;
+#endif
+
+/* { dg-final { scan-tree-dump "const char \\* str = \\(const char \\*\\) 
\"\"1,2\" ; _Pragma\\(\"omp error severity\\(warning\\) message 
\\(\"Test\"\\) at\\(compilation\\)\"\\)\";" 
"original" } }  */
--- gcc/testsuite/c-c++-common/gomp/pragma-4.c.jj   2021-11-20 
11:04:35.355319742 +0100
+++ gcc/testsuite/c-c++-common/gomp/pragma-4.c  2021-11-20 11:28:22.995078169 
+0100
@@ -0,0 +1,20 @@
+/* { dg-additional-options "-fdump-tree-original -save-temps" }  */
+/* PR preprocessor/103165  */
+
+#define inner(...) #__VA_ARGS__ ; _Pragma("omp error severity(warning) message 
(\"Test\") at(compilation)")
+#define outer(...) inner(__VA_ARGS__)
+
+void
+f (void)
+{
+  const char *str = outer(inner(1,2));  /* { dg-warning "'pragma omp error' 
encountered: Test" } */
+}
+
+#if 0
+After 

[committed] openmp: Handle OMP_MASKED in potential_constant_expression_1 [PR103349]

2021-11-22 Thread Jakub Jelinek via Gcc-patches
Hi!

When adding OMP_MASKED, I apparently forgot to handle it in
potential_constant_expression_1, which means we can ICE on it.

Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux,
committed to trunk.

2021-11-22  Jakub Jelinek  

PR c++/103349
* constexpr.c (potential_constant_expression_1): Punt on OMP_MASKED.

* g++.dg/gomp/masked-1.C: New test.

--- gcc/cp/constexpr.c.jj   2021-11-18 12:33:22.014628184 +0100
+++ gcc/cp/constexpr.c  2021-11-21 20:56:15.138499377 +0100
@@ -8686,6 +8686,7 @@ potential_constant_expression_1 (tree t,
 case OMP_SINGLE:
 case OMP_SECTION:
 case OMP_MASTER:
+case OMP_MASKED:
 case OMP_TASKGROUP:
 case OMP_TARGET_UPDATE:
 case OMP_TARGET_ENTER_DATA:
--- gcc/testsuite/g++.dg/gomp/masked-1.C.jj 2021-11-21 21:01:19.610215568 
+0100
+++ gcc/testsuite/g++.dg/gomp/masked-1.C2021-11-21 21:00:44.914703723 
+0100
@@ -0,0 +1,14 @@
+// PR c++/103349
+// { dg-do compile { target c++11 } }
+
+int v;
+
+void
+foo (int x, int y)
+{
+  [=] ()
+  {
+#pragma omp masked
+v = x + y;
+  } ();
+}

Jakub



Re: [RFC PATCH] gcc: Improve reproducability when building cc1/cc1plus

2021-11-22 Thread Richard Biener via Gcc-patches
On Sat, Nov 20, 2021 at 9:43 AM Jacob Kroon via Gcc-patches
 wrote:
>
> On 11/20/21 09:32, Jakub Jelinek wrote:
> > On Sat, Nov 20, 2021 at 12:24:21AM -0800, Andrew Pinski via Gcc-patches 
> > wrote:
> >> On Sat, Nov 20, 2021 at 12:18 AM Jacob Kroon via Gcc-patches
> >>  wrote:
> >>>
> >>> cc1/cc1plus both include a checksum of the object files, archives and
> >>> options used during linking. Unless the host binutils has been built
> >>> with --enable-deterministic-archives, the archives will have different
> >>> checksums each build due to changes in timestamps of the containing
> >>> object files, and thus the checksum that is embedded in cc1/cc1plus will
> >>> also change.
> >>>
> >>> Fix this by passing "D" to ar/ranlib when creating the archives.
> >>
> >> How portable is the D option? That does it work with Mac OS X's ar;
> >> what about AIX's ar; what about Solaris's ar, etc?
> >> GNU binutils is not the only ar which is supported for the host.
> >
> > It isn't portable and even if ar/ranlib do support that option, not all
> > users will want it, so forcing it upon them unconditionally is IMO not a
> > good idea.  If anything, configure needs to check if those options are
> > supported and there needs to be preferrably non-default gcc configure
> > option to request it.
> >
>
> Aha, I see. Then perhaps an easier and more portable solution for
> improving the reproducibility would be to make it optional whether or
> not to include the archives when calculating the checksums.
>
> Or perhaps by checksumming the containing object files instead of the
> archives themselves ?
>
> Would something like that be acceptable ?

At SUSE we are carrying patches to not include the checksum at all
but instead use the linker generated build-id as checksum for PCH.
I suppose when we add the configure option to disable PCH support
we could also elide checksum processing (or as in the patch simply
always use all-zeros).

Attached for reference.

Richard.

>
> Jacob
Use the binaries build-id as checksum for PCH purposes.

diff --git a/gcc/c-family/c-pch.c b/gcc/c-family/c-pch.c
index 9c0bd0b631d..de762517fc3 100644
--- a/gcc/c-family/c-pch.c
+++ b/gcc/c-family/c-pch.c
@@ -65,6 +65,66 @@ static FILE *pch_outfile;
 
 static const char *get_ident (void);
 
+#if _GNU_SOURCE
+#include 
+
+#define ALIGN(val, align)  (((val) + (align) - 1) & ~((align) - 1))
+
+static int
+get_build_id_1 (struct dl_phdr_info *info, size_t, void *data)
+{
+  for (unsigned i = 0; i < info->dlpi_phnum; ++i)
+{
+  if (info->dlpi_phdr[i].p_type != PT_NOTE)
+	continue;
+  ElfW(Nhdr) *nhdr
+	= (ElfW(Nhdr) *)(info->dlpi_addr + info->dlpi_phdr[i].p_vaddr);
+  ptrdiff_t size = info->dlpi_phdr[i].p_filesz;
+  ptrdiff_t align = info->dlpi_phdr[i].p_align;
+  if (align != 8)
+	align = 4;
+  while (size >= (ptrdiff_t)sizeof (ElfW(Nhdr)))
+	{
+	  if (nhdr->n_type == NT_GNU_BUILD_ID
+	  && nhdr->n_namesz == 4
+	  && strncmp ((char *)nhdr
+			  + sizeof (ElfW(Nhdr)),
+			  "GNU", 4) == 0
+	  && nhdr->n_descsz >= 16)
+	{
+	  memcpy (data, 
+		  (char *)nhdr
+		  + ALIGN (sizeof (ElfW(Nhdr))
+			   + nhdr->n_namesz, align), 16);
+	  return 1;
+	}
+	  size_t offset = (ALIGN (sizeof (ElfW(Nhdr))
+  + nhdr->n_namesz, align)
+			   + ALIGN(nhdr->n_descsz, align));
+	  nhdr = (ElfW(Nhdr) *)((char *)nhdr + offset);
+	  size -= offset;
+	}
+}
+
+  return 0;
+}
+
+static const unsigned char *
+get_build_id ()
+{
+  static unsigned char build_id[16];
+  if (!dl_iterate_phdr (get_build_id_1, build_id))
+return NULL;
+  return build_id;
+}
+#else
+static const unsigned char *
+get_build_id ()
+{
+  return NULL;
+}
+#endif
+
 /* Compute an appropriate 8-byte magic number for the PCH file, so that
utilities like file(1) can identify it, and so that GCC can quickly
ignore non-PCH files and PCH files that are of a completely different
@@ -120,8 +180,11 @@ pch_init (void)
   v.pch_init = _init;
   target_validity = targetm.get_pch_validity (_data_length);
 
+  const unsigned char *chksum = get_build_id ();
+  if (!chksum)
+chksum = executable_checksum;
   if (fwrite (partial_pch, IDENT_LENGTH, 1, f) != 1
-  || fwrite (executable_checksum, 16, 1, f) != 1
+  || fwrite (chksum, 16, 1, f) != 1
   || fwrite (, sizeof (v), 1, f) != 1
   || fwrite (target_validity, v.target_data_length, 1, f) != 1)
 fatal_error (input_location, "cannot write to %s: %m", pch_file);
@@ -232,7 +295,10 @@ c_common_valid_pch (cpp_reader *pfile, const char *name, int fd)
 	  cpp_warning (pfile, CPP_W_INVALID_PCH, "%s: not a PCH file", name);
   return 2;
 }
-  if (memcmp (ident + IDENT_LENGTH, executable_checksum, 16) != 0)
+  const unsigned char *chksum = get_build_id ();
+  if (!chksum)
+chksum = executable_checksum;
+  if (memcmp (ident + IDENT_LENGTH, chksum, 16) != 0)
 {
   cpp_warning (pfile, CPP_W_INVALID_PCH,
 		   "%s: created by a different GCC executable", name);

Re: [EXTERNAL] Re: [PATCH] PR tree-optimization/96779 Adding a missing pattern to match.pd

2021-11-22 Thread Richard Biener via Gcc-patches
On Fri, Nov 19, 2021 at 11:33 PM Navid Rahimi  wrote:
>
> Hi Richard,
>
> Thanks for the detailed comment. I am attaching a newer version of the patch 
> which does have required fixes included. Bellow you can see my response to 
> your feedbacks:
>
> > you need to check TYPE_OVERFLOW_WRAPS on TREE_TYPE (@0),
> > otherwise you check on boolean.
> Fixed it.
>
> > no need for :c on the result pattern.  Otherwise it looks OK, but how
> > did you check the patch?
> Fixed it. For checking the patch, I have script which builds and runs make 
> check for 1) trunk and 2) trunk+patch in a separate directory and diffs the 
> test results from each directory. My test script did had a subtle problem. 
> The bug was, because of a typo in the path I introduced few days ago, it was 
> diffing same trunk+patch test results against trunk+patch test results.

OK, please indicate that in the future, like with "Bootstrapped and
tested on x86_64-linux" or so.

> That was a good reminder to setup an account for myself here asap [1].
>
> 1) https://gcc.gnu.org/wiki/CompileFarm

The updated patch is OK.

Thanks,
Richard.

> Best wishes,
> Navid.
>
> 
> From: Richard Biener 
> Sent: Friday, November 19, 2021 03:43
> To: Navid Rahimi
> Cc: Navid Rahimi via Gcc-patches
> Subject: [EXTERNAL] Re: [PATCH] PR tree-optimization/96779 Adding a missing 
> pattern to match.pd
>
> [You don't often get email from richard.guent...@gmail.com. Learn why this is 
> important at http://aka.ms/LearnAboutSenderIdentification.]
>
> On Tue, Nov 16, 2021 at 11:51 PM Navid Rahimi via Gcc-patches
>  wrote:
> >
> > Hi GCC community,
> >
> > This patch will add the missed pattern described in bug 102232 [1] to the 
> > match.pd.
> >
> > Tree-optimization/96779: Adding new optimization to match.pd:
> >
> > * match.pd (-x == x) -> (x == 0): New optimization.
> > * gcc.dg/tree-ssa/pr96779.c: testcase for this optimization.
> > * gcc.dg/tree-ssa/pr96779-disabled.c: testcase for this 
> > optimization when -fwrapv passed.
> >
> > 1) 
> > https://nam06.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgcc.gnu.org%2Fbugzilla%2Fshow_bug.cgi%3Fid%3D96779data=04%7C01%7Cnavidrahimi%40microsoft.com%7C11c3214ef8164af4d50008d9ab51d9bc%7C72f988bf86f141af91ab2d7cd011db47%7C1%7C0%7C637729190792397989%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000sdata=mxYBk6rex%2Bq5UMot%2BWfJqXeTYEYuM16hrvLGyp4PGeI%3Dreserved=0
>
> +/* -x == x -> x == 0 */
> +(for cmp (eq ne)
> + (simplify
> +  (cmp:c @0 (negate @0))
> +   (if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
> +&& !TYPE_OVERFLOW_WRAPS (type))
>
> you need to check TYPE_OVERFLOW_WRAPS on TREE_TYPE (@0),
> otherwise you check on boolean.
>
> +(cmp:c @0 { build_zero_cst (TREE_TYPE(@0)); }
> +
>
> no need for :c on the result pattern.  Otherwise it looks OK, but how
> did you check the patch?
>
> Thanks,
> Richard.
>
>
> > Best wishes,
> > Navid.


Re: [PATCH 03/10] tree-object-size: Use tree instead of HOST_WIDE_INT

2021-11-22 Thread Richard Biener via Gcc-patches
On Fri, Nov 19, 2021 at 8:17 PM Jakub Jelinek via Gcc-patches
 wrote:
>
> On Sat, Nov 20, 2021 at 12:31:19AM +0530, Siddhesh Poyarekar wrote:
> > > Neither of these are equivalent to what it used to do before.
> > > If some target has e.g. pointers wider than size_t, then previously we 
> > > could
> > > compute bytes that doesn't fit into size_t and would return NULL which
> > > eventually would result in object_sizes_execute or 
> > > expand_builtin_object_size
> > > resolving it to the unknown value.  But fold_convert will perform modulo
> > > arithmetics on it, so say if size_t is 24-bit and bytes is 0x101, it
> > > will fold to (size_t) 1.  I think we should still punt if it doesn't fit.
> > > Or do we ensure that what it returns always has sizetype type?
> >
> > compute_builtin_object_size should always return sizetype.  I probably
> > haven't strictly ensured that but I could do that.  Would it be necessary to
> > check for fit in sizetype -> size_type_node conversion too?  I've been
> > assuming that they'd have the same precision.
>
> sizetype and size_type_node should be indeed always the same precision.

I don't think so.  vms.h has

/* Always a 32 bit type.  */
#undef SIZE_TYPE
#define SIZE_TYPE  "unsigned int"

...

#define SIZETYPE (flag_vms_pointer_size == VMS_POINTER_SIZE_NONE ? \
  "unsigned int" : "long long unsigned int")

interestingly that's the _only_ SIZETYPE override we have, so it does look like
we might want to try removing the 'SIZETYPE' target macro.  I'm not sure the
above does anything good - it looks more like a ptr_mode vs Pmode thing.

Richard.

> Jakub
>


Re: [V2/PATCH] Fix tree-optimization/102216: missed optimization causing Warray-bounds

2021-11-22 Thread Andrew Pinski via Gcc-patches
On Wed, Oct 27, 2021 at 3:42 AM Richard Biener via Gcc-patches
 wrote:
>
> On Wed, Oct 27, 2021 at 12:00 PM apinski--- via Gcc-patches
>  wrote:
> >
> > From: Andrew Pinski 
> >
> > The problem here is tree-ssa-forwprop.c likes to produce
> >   [(void *)_4 + 152B] which is the same as
> > _4 p+ 152 which the rest of GCC likes better.
> > This implements this transformation back to pointer plus to
> > improve better code generation later on.
>
> Why do you think so?  Can you pin-point the transform that now
> fixes the new testcase?

So we had originally:
  language_names_p_9 =   [(void *)_4 + 24B];
...
  _2 = _4 + 40;
  if (_2 != language_names_p_9)

Forwprop is able to figure out that the above if statement is now
always false as we have (_4 +p 40) != (_4 +p 24) which gets simplified
via a match-and-simplify pattern ().
Does that answer your question?

I will look into the other comments in a new patch.

Thanks,
Andrew

>
> Comments below
>
> > OK? Bootstrapped and tested on aarch64-linux-gnu.
> >
> > Changes from v1:
> > * v2: Add comments.
> >
> > gcc/ChangeLog:
> >
> > PR tree-optimization/102216
> > * tree-ssa-forwprop.c (rewrite_assign_addr): New function.
> > (forward_propagate_addr_expr_1): Use rewrite_assign_addr
> > when rewriting into the addr_expr into an assignment.
> >
> > gcc/testsuite/ChangeLog:
> >
> > PR tree-optimization/102216
> > * g++.dg/tree-ssa/pr102216.C: New test.
> > ---
> >  gcc/testsuite/g++.dg/tree-ssa/pr102216.C | 22 +
> >  gcc/tree-ssa-forwprop.c  | 58 ++--
> >  2 files changed, 67 insertions(+), 13 deletions(-)
> >  create mode 100644 gcc/testsuite/g++.dg/tree-ssa/pr102216.C
> >
> > diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr102216.C 
> > b/gcc/testsuite/g++.dg/tree-ssa/pr102216.C
> > new file mode 100644
> > index 000..b903e4eb57d
> > --- /dev/null
> > +++ b/gcc/testsuite/g++.dg/tree-ssa/pr102216.C
> > @@ -0,0 +1,22 @@
> > +/* { dg-options "-O2 -fdump-tree-optimized" } */
> > +void link_error ();
> > +void g ()
> > +{
> > +  const char **language_names;
> > +
> > +  language_names = new const char *[6];
> > +
> > +  const char **language_names_p = language_names;
> > +
> > +  language_names_p++;
> > +  language_names_p++;
> > +  language_names_p++;
> > +
> > +  if ( (language_names_p) - (language_names+3) != 0)
> > +link_error();
> > +  delete[] language_names;
> > +}
> > +/* We should have removed the link_error on the gimple level as GCC should
> > +   be able to tell that language_names_p is the same as language_names+3.  
> > */
> > +/* { dg-final { scan-tree-dump-times "link_error" 0 "optimized" } } */
> > +
> > diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c
> > index a830bab78ba..e4331c60525 100644
> > --- a/gcc/tree-ssa-forwprop.c
> > +++ b/gcc/tree-ssa-forwprop.c
> > @@ -637,6 +637,47 @@ forward_propagate_into_cond (gimple_stmt_iterator 
> > *gsi_p)
> >return 0;
> >  }
> >
> > +/* Rewrite the DEF_RHS as needed into the (plain) use statement.  */
> > +
> > +static void
> > +rewrite_assign_addr (gimple_stmt_iterator *use_stmt_gsi, tree def_rhs)
> > +{
> > +  tree def_rhs_base;
> > +  poly_int64 def_rhs_offset;
> > +
> > +  /* Get the base and offset.  */
> > +  if ((def_rhs_base = get_addr_base_and_unit_offset (TREE_OPERAND 
> > (def_rhs, 0),
> > +_rhs_offset)))
>
> So this will cause us to rewrite [p_1].a.b.c; to a pointer-plus,
> right?  Don't
> we want to preserve that for object-size stuff?  So maybe directly pattern
> match ADDR_EXPR > only?
>
> > +{
> > +  tree new_ptr;
> > +  poly_offset_int off = 0;
> > +
> > +  /* If the base was a MEM, then add the offset to the other
> > + offset and adjust the base. */
> > +  if (TREE_CODE (def_rhs_base) == MEM_REF)
> > +   {
> > + off += mem_ref_offset (def_rhs_base);
> > + new_ptr = TREE_OPERAND (def_rhs_base, 0);
> > +   }
> > +  else
> > +   new_ptr = build_fold_addr_expr (def_rhs_base);
> > +
> > +  /* If we have the new base is not an address express, then use a p+ 
> > expression
> > + as the new expression instead of [x, offset]. */
> > +  if (TREE_CODE (new_ptr) != ADDR_EXPR)
> > +   {
> > + tree offset = wide_int_to_tree (sizetype, off);
> > + def_rhs = build2 (POINTER_PLUS_EXPR, TREE_TYPE (def_rhs), 
> > new_ptr, offset);
>
> Ick.  You should be able to use gimple_assign_set_rhs_with_ops.
>
> > +   }
> > +}
> > +
> > +  /* Replace the rhs with the new expression.  */
> > +  def_rhs = unshare_expr (def_rhs);
>
> and definitely no need to unshare anything here?
>
> > +  gimple_assign_set_rhs_from_tree (use_stmt_gsi, def_rhs);
> > +  gimple *use_stmt = gsi_stmt (*use_stmt_gsi);
> > +  update_stmt (use_stmt);
> > +}
> > +
> >  /* We've just substituted an ADDR_EXPR into stmt.  Update all the
> > relevant data structures to match.  */

[PATCH] tree-optimization/103345: Improved load merging

2021-11-22 Thread Roger Sayle

This patch implements PR tree-optimization/103345 to merge adjacent
loads when combined with addition or bitwise xor.  The current code
in gimple-ssa-store-merging.c's find_bswap_or_nop alreay handles ior,
so that all that's required is to treat PLUS_EXPR and BIT_XOR_EXPR in
the same way at BIT_IOR_EXPR.  Many thanks to Andrew Pinski for
pointing out that this also resolves PR target/98953.

This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
and make -k check with no new failures.  The new testcases should also
pass (but haven't been tested) on other endian targets.

Ok for mainline?


2021-11-22  Roger Sayle  

gcc/ChangeLog
PR tree-optimization/98953
PR tree-optimization/103345
* gimple-ssa-store-merging.c (find_bswap_or_nop_1): Handle
BIT_XOR_EXPR and PLUS_EXPR the same as BIT_IOR_EXPR.
(pass_optimize_bswap::execute): Likewise.

gcc/testsuite/ChangeLog
PR tree-optimization/98953
PR tree-optimization/103345
* gcc.dg/tree-ssa/pr98953.c: New test case.
* gcc.dg/tree-ssa/pr103345.c: New test case.


Thanks in advance,
Roger
--

diff --git a/gcc/gimple-ssa-store-merging.c b/gcc/gimple-ssa-store-merging.c
index 4efa200..1740c9e 100644
--- a/gcc/gimple-ssa-store-merging.c
+++ b/gcc/gimple-ssa-store-merging.c
@@ -742,10 +742,7 @@ find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number 
*n, int limit)
   struct symbolic_number n1, n2;
   gimple *source_stmt, *source_stmt2;
 
-  if (code != BIT_IOR_EXPR)
-   return NULL;
-
-  if (TREE_CODE (rhs2) != SSA_NAME)
+  if (!rhs2 || TREE_CODE (rhs2) != SSA_NAME)
return NULL;
 
   rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
@@ -753,6 +750,8 @@ find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number 
*n, int limit)
   switch (code)
{
case BIT_IOR_EXPR:
+   case BIT_XOR_EXPR:
+   case PLUS_EXPR:
  source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, , limit - 1);
 
  if (!source_stmt1)
@@ -1495,6 +1494,8 @@ pass_optimize_bswap::execute (function *fun)
continue;
  /* Fall through.  */
case BIT_IOR_EXPR:
+   case BIT_XOR_EXPR:
+   case PLUS_EXPR:
  break;
case CONSTRUCTOR:
  {
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr103345.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr103345.c
new file mode 100644
index 000..94388b5
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr103345.c
@@ -0,0 +1,53 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-bswap-details" } */
+
+typedef unsigned int uint32_t;
+typedef unsigned char uint8_t;
+
+uint32_t load_le_32_or(const uint8_t *ptr)
+{
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+  return ((uint32_t)ptr[0]) |
+ ((uint32_t)ptr[1] << 8) |
+ ((uint32_t)ptr[2] << 16) |
+ ((uint32_t)ptr[3] << 24);
+#else
+  return ((uint32_t)ptr[3]) |
+ ((uint32_t)ptr[2] << 8) |
+ ((uint32_t)ptr[1] << 16) |
+ ((uint32_t)ptr[0] << 24);
+#endif
+}
+
+uint32_t load_le_32_add(const uint8_t *ptr)
+{
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+  return ((uint32_t)ptr[0]) +
+ ((uint32_t)ptr[1] << 8) +
+ ((uint32_t)ptr[2] << 16) +
+ ((uint32_t)ptr[3] << 24);
+#else
+  return ((uint32_t)ptr[3]) +
+ ((uint32_t)ptr[2] << 8) +
+ ((uint32_t)ptr[1] << 16) +
+ ((uint32_t)ptr[0] << 24);
+#endif
+}
+
+uint32_t load_le_32_xor(const uint8_t *ptr)
+{
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+  return ((uint32_t)ptr[0]) ^
+ ((uint32_t)ptr[1] << 8) ^
+ ((uint32_t)ptr[2] << 16) ^
+ ((uint32_t)ptr[3] << 24);
+#else
+  return ((uint32_t)ptr[0]) ^
+ ((uint32_t)ptr[1] << 8) ^
+ ((uint32_t)ptr[2] << 16) ^
+ ((uint32_t)ptr[3] << 24);
+#endif
+}
+
+/* { dg-final { scan-tree-dump-times "32 bit load in target endianness found" 
3 "bswap" } } */
+
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr98953.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr98953.c
new file mode 100644
index 000..7687dc2
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr98953.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-bswap-details" } */
+
+int foo(unsigned char *ptr)
+{
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+return ptr[0] + (ptr[1] << 8);
+#else
+return ptr[1] + (ptr[0] << 8);
+#endif
+}
+
+/* { dg-final { scan-tree-dump "16 bit load in target endianness found" 
"bswap" } } */
+


[PATCH v2 0/2] RISC-V: add gcc support for Scalar Cryptography v1.0.0-rc6

2021-11-22 Thread siyu
From: SiYu Wu 

This patch add gcc backend support for RISC-V Scalar Cryptography 
Extension (k-ext), including machine description, builtins defines and 
testcases for each k-ext's subset.

A note about Zbkx: The Zbkx should be implemented in bitmanip's Zbp, but 
since zbp is not included in the bitmanip spec v1.0, and crypto's v1.0 
release will earlier than bitmanip's next release, so for now we 
implementing it here.

Version logs:

v2: As Kito mentions, now this patch only includes the arch string related 
stuff, the builtins and md changes is not included, waiting for the builtin
and intrinsic added to the spec. Also removed the unnecessary patches and add
Changelogs.


SiYu Wu (2):
  RISC-V: Add option defines for Scalar Cryptography
  RISC-V: Add implied defines of Zk, Zkn and Zks

 gcc/common/config/riscv/riscv-common.c | 38 +-
 gcc/config/riscv/arch-canonicalize | 16 ++-
 gcc/config/riscv/riscv-opts.h  | 22 +++
 gcc/config/riscv/riscv.opt |  3 ++
 4 files changed, 77 insertions(+), 2 deletions(-)

-- 
2.25.1



[PATCH v2 1/2] RISC-V: Add option defines for Scalar Cryptography

2021-11-22 Thread siyu
From: SiYu Wu 

gcc/ChangeLog:

2021-11-21  SiYu Wu  

* common/config/riscv/riscv-common.c (riscv_ext_version_table):
Add zbk* and zk*.
* config/riscv/riscv-opts.h (MASK_ZBKB): New.
(MASK_ZBKC): Ditto.
(MASK_ZBKX): Ditto.
(MASK_ZKNE): Ditto.
(MASK_ZKND): Ditto.
(MASK_ZKNH): Ditto.
(MASK_ZKR): Ditto.
(MASK_ZKSED): Ditto.
(MASK_ZKSH): Ditto.
(MASK_ZKT): Ditto.
(TARGET_ZBKB): Ditto.
(TARGET_ZBKC): Ditto.
(TARGET_ZBKX): Ditto.
(TARGET_ZKNE): Ditto.
(TARGET_ZKND): Ditto.
(TARGET_ZKNH): Ditto.
(TARGET_ZKR): Ditto.
(TARGET_ZKSED): Ditto.
(TARGET_ZKSH): Ditto.
(TARGET_ZKT): Ditto.
* config/riscv/riscv.opt (riscv_zk_subext): New.
---
 gcc/common/config/riscv/riscv-common.c | 22 ++
 gcc/config/riscv/riscv-opts.h  | 22 ++
 gcc/config/riscv/riscv.opt |  3 +++
 3 files changed, 47 insertions(+)

diff --git a/gcc/common/config/riscv/riscv-common.c 
b/gcc/common/config/riscv/riscv-common.c
index b8dd0aeac3e..14dc6057ecd 100644
--- a/gcc/common/config/riscv/riscv-common.c
+++ b/gcc/common/config/riscv/riscv-common.c
@@ -106,6 +106,17 @@ static const struct riscv_ext_version 
riscv_ext_version_table[] =
   {"zbc", ISA_SPEC_CLASS_NONE, 1, 0},
   {"zbs", ISA_SPEC_CLASS_NONE, 1, 0},
 
+  {"zbkb",  ISA_SPEC_CLASS_NONE, 1, 0},
+  {"zbkc",  ISA_SPEC_CLASS_NONE, 1, 0},
+  {"zbkx",  ISA_SPEC_CLASS_NONE, 1, 0},
+  {"zkne",  ISA_SPEC_CLASS_NONE, 1, 0},
+  {"zknd",  ISA_SPEC_CLASS_NONE, 1, 0},
+  {"zknh",  ISA_SPEC_CLASS_NONE, 1, 0},
+  {"zkr",   ISA_SPEC_CLASS_NONE, 1, 0},
+  {"zksed", ISA_SPEC_CLASS_NONE, 1, 0},
+  {"zksh",  ISA_SPEC_CLASS_NONE, 1, 0},
+  {"zkt",   ISA_SPEC_CLASS_NONE, 1, 0},
+
   /* Terminate the list.  */
   {NULL, ISA_SPEC_CLASS_NONE, 0, 0}
 };
@@ -915,6 +926,17 @@ static const riscv_ext_flag_table_t riscv_ext_flag_table[] 
=
   {"zbc",_options::x_riscv_zb_subext, MASK_ZBC},
   {"zbs",_options::x_riscv_zb_subext, MASK_ZBS},
 
+  {"zbkb",   _options::x_riscv_zk_subext, MASK_ZBKB},
+  {"zbkc",   _options::x_riscv_zk_subext, MASK_ZBKC},
+  {"zbkx",   _options::x_riscv_zk_subext, MASK_ZBKX},
+  {"zknd",   _options::x_riscv_zk_subext, MASK_ZKND},
+  {"zkne",   _options::x_riscv_zk_subext, MASK_ZKNE},
+  {"zknh",   _options::x_riscv_zk_subext, MASK_ZKNH},
+  {"zkr",_options::x_riscv_zk_subext, MASK_ZKR},
+  {"zksed",  _options::x_riscv_zk_subext, MASK_ZKSED},
+  {"zksh",   _options::x_riscv_zk_subext, MASK_ZKSH},
+  {"zkt",_options::x_riscv_zk_subext, MASK_ZKT},
+
   {NULL, NULL, 0}
 };
 
diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h
index 2efc4b80f1f..f65ff678811 100644
--- a/gcc/config/riscv/riscv-opts.h
+++ b/gcc/config/riscv/riscv-opts.h
@@ -83,4 +83,26 @@ enum stack_protector_guard {
 #define TARGET_ZBC((riscv_zb_subext & MASK_ZBC) != 0)
 #define TARGET_ZBS((riscv_zb_subext & MASK_ZBS) != 0)
 
+#define MASK_ZBKB (1 << 0)
+#define MASK_ZBKC (1 << 1)
+#define MASK_ZBKX (1 << 2)
+#define MASK_ZKNE (1 << 3)
+#define MASK_ZKND (1 << 4)
+#define MASK_ZKNH (1 << 5)
+#define MASK_ZKR  (1 << 6)
+#define MASK_ZKSED(1 << 7)
+#define MASK_ZKSH (1 << 8)
+#define MASK_ZKT  (1 << 9)
+
+#define TARGET_ZBKB   ((riscv_zk_subext & MASK_ZBKB) != 0)
+#define TARGET_ZBKC   ((riscv_zk_subext & MASK_ZBKC) != 0)
+#define TARGET_ZBKX   ((riscv_zk_subext & MASK_ZBKX) != 0)
+#define TARGET_ZKNE   ((riscv_zk_subext & MASK_ZKNE) != 0)
+#define TARGET_ZKND   ((riscv_zk_subext & MASK_ZKND) != 0)
+#define TARGET_ZKNH   ((riscv_zk_subext & MASK_ZKNH) != 0)
+#define TARGET_ZKR((riscv_zk_subext & MASK_ZKR) != 0)
+#define TARGET_ZKSED  ((riscv_zk_subext & MASK_ZKSED) != 0)
+#define TARGET_ZKSH   ((riscv_zk_subext & MASK_ZKSH) != 0)
+#define TARGET_ZKT((riscv_zk_subext & MASK_ZKT) != 0)
+
 #endif /* ! GCC_RISCV_OPTS_H */
diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt
index 15bf89e17c2..617000975bf 100644
--- a/gcc/config/riscv/riscv.opt
+++ b/gcc/config/riscv/riscv.opt
@@ -198,6 +198,9 @@ int riscv_zi_subext
 TargetVariable
 int riscv_zb_subext
 
+TargetVariable
+int riscv_zk_subext
+
 Enum
 Name(isa_spec_class) Type(enum riscv_isa_spec_class)
 Supported ISA specs (for use with the -misa-spec= option):
-- 
2.25.1



[PATCH v2 2/2] RISC-V: Add implied defines of Zk, Zkn and Zks

2021-11-22 Thread siyu
From: SiYu Wu 

gcc/ChangeLog:

2021-11-22  SiYu Wu  

* common/config/riscv/riscv-common.c (riscv_implied_info):
Add K-ext related entry.
(riscv_supported_std_ext): Add 'k'.
* config/riscv/arch-canonicalize (CANONICAL_ORDER): Add 'k'.
(IMPLIED_EXT): Add K-ext related entry.
---
 gcc/common/config/riscv/riscv-common.c | 16 +++-
 gcc/config/riscv/arch-canonicalize | 16 +++-
 2 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/gcc/common/config/riscv/riscv-common.c 
b/gcc/common/config/riscv/riscv-common.c
index 14dc6057ecd..f352ff4ce7b 100644
--- a/gcc/common/config/riscv/riscv-common.c
+++ b/gcc/common/config/riscv/riscv-common.c
@@ -50,6 +50,20 @@ static const riscv_implied_info_t riscv_implied_info[] =
   {"d", "f"},
   {"f", "zicsr"},
   {"d", "zicsr"},
+  {"zk", "zkn"},
+  {"zk", "zkr"},
+  {"zk", "zkt"},
+  {"zkn", "zbkb"},
+  {"zkn", "zbkc"},
+  {"zkn", "zbkx"},
+  {"zkn", "zkne"},
+  {"zkn", "zknd"},
+  {"zkn", "zknh"},
+  {"zks", "zbkb"},
+  {"zks", "zbkc"},
+  {"zks", "zbkx"},
+  {"zks", "zksed"},
+  {"zks", "zksh"},
   {NULL, NULL}
 };
 
@@ -502,7 +516,7 @@ riscv_subset_list::lookup (const char *subset, int 
major_version,
 static const char *
 riscv_supported_std_ext (void)
 {
-  return "mafdqlcbjtpvn";
+  return "mafdqlcbjktpvn";
 }
 
 /* Parsing subset version.
diff --git a/gcc/config/riscv/arch-canonicalize 
b/gcc/config/riscv/arch-canonicalize
index c7df3c8a313..90dbd194801 100755
--- a/gcc/config/riscv/arch-canonicalize
+++ b/gcc/config/riscv/arch-canonicalize
@@ -28,7 +28,7 @@ import itertools
 from functools import reduce
 
 
-CANONICAL_ORDER = "imafdgqlcbjtpvn"
+CANONICAL_ORDER = "imafdgqlcbjktpvn"
 LONG_EXT_PREFIXES = ['z', 's', 'h', 'x']
 
 #
@@ -36,6 +36,20 @@ LONG_EXT_PREFIXES = ['z', 's', 'h', 'x']
 #
 IMPLIED_EXT = {
   "d" : ["f"],
+  "zk" : ["zkn"],
+  "zk" : ["zkr"],
+  "zk" : ["zkt"],
+  "zkn" : ["zbkb"],
+  "zkn" : ["zbkc"],
+  "zkn" : ["zbkx"],
+  "zkn" : ["zkne"],
+  "zkn" : ["zknd"],
+  "zkn" : ["zknh"],
+  "zks" : ["zbkb"],
+  "zks" : ["zbkc"],
+  "zks" : ["zbkx"],
+  "zks" : ["zksed"],
+  "zks" : ["zksh"],
 }
 
 def arch_canonicalize(arch):
-- 
2.25.1