RE: [PATCH] middle-end: fix de-optimizations with bitclear patterns on signed values

2021-11-11 Thread Tamar Christina via Gcc-patches


> -Original Message-
> From: Jakub Jelinek 
> Sent: Thursday, November 4, 2021 4:11 PM
> To: Tamar Christina 
> Cc: Jonathan Wakely ; Richard Biener
> ; gcc-patches@gcc.gnu.org; nd 
> Subject: Re: [PATCH] middle-end: fix de-optimizations with bitclear patterns
> on signed values
> 
> On Thu, Nov 04, 2021 at 12:19:34PM +, Tamar Christina wrote:
> > I'm not sure the precision matters since if the conversion resulted in
> > not enough precision such that It influences the compare it would have
> been optimized out.
> 
> You can't really rely on other optimizations being performed.  They will
> usually happen, but might not because such code only materialized short
> time ago without folding happening in between, or some debug counters or -
> fno-* disabling some passes, ...

Fair point, I have separated out the logic as you requested and added the debug 
fix.

Bootstrapped Regtested on aarch64-none-linux-gnu,
x86_64-pc-linux-gnu and no regressions.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

* tree-ssa-phiopt.c (spaceship_replacement): Handle new canonical
codegen.

--- inline copy of patch ---

diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
index 
0e339c46afa29fa97f90d9bc4394370cd9b4b396..3ad5b23885a37eec0beff229e2a96e86658b2d1a
 100644
--- a/gcc/tree-ssa-phiopt.c
+++ b/gcc/tree-ssa-phiopt.c
@@ -2038,11 +2038,36 @@ spaceship_replacement (basic_block cond_bb, basic_block 
middle_bb,
   gimple *orig_use_stmt = use_stmt;
   tree orig_use_lhs = NULL_TREE;
   int prec = TYPE_PRECISION (TREE_TYPE (phires));
-  if (is_gimple_assign (use_stmt)
-  && gimple_assign_rhs_code (use_stmt) == BIT_AND_EXPR
-  && TREE_CODE (gimple_assign_rhs2 (use_stmt)) == INTEGER_CST
-  && (wi::to_wide (gimple_assign_rhs2 (use_stmt))
- == wi::shifted_mask (1, prec - 1, false, prec)))
+  bool is_cast = false;
+
+  /* Deal with the case when match.pd has rewritten the (res & ~1) == 0
+ into res <= 1 and has left a type-cast for signed types.  */
+  if (gimple_assign_cast_p (use_stmt))
+{
+  orig_use_lhs = gimple_assign_lhs (use_stmt);
+  /* match.pd would have only done this for a signed type,
+so the conversion must be to an unsigned one.  */
+  tree ty1 = TREE_TYPE (gimple_assign_rhs1 (use_stmt));
+  tree ty2 = TREE_TYPE (orig_use_lhs);
+
+  if (!TYPE_UNSIGNED (ty2) || !INTEGRAL_TYPE_P (ty2))
+   return false;
+  if (TYPE_PRECISION (ty1) != TYPE_PRECISION (ty2))
+   return false;
+  if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (orig_use_lhs))
+   return false;
+  if (EDGE_COUNT (phi_bb->preds) != 4)
+   return false;
+  if (!single_imm_use (orig_use_lhs, _p, _stmt))
+   return false;
+
+  is_cast = true;
+}
+  else if (is_gimple_assign (use_stmt)
+  && gimple_assign_rhs_code (use_stmt) == BIT_AND_EXPR
+  && TREE_CODE (gimple_assign_rhs2 (use_stmt)) == INTEGER_CST
+  && (wi::to_wide (gimple_assign_rhs2 (use_stmt))
+  == wi::shifted_mask (1, prec - 1, false, prec)))
 {
   /* For partial_ordering result operator>= with unspec as second
 argument is (res & 1) == res, folded by match.pd into
@@ -2099,7 +2124,7 @@ spaceship_replacement (basic_block cond_bb, basic_block 
middle_bb,
   || !tree_fits_shwi_p (rhs)
   || !IN_RANGE (tree_to_shwi (rhs), -1, 1))
 return false;
-  if (orig_use_lhs)
+  if (orig_use_lhs && !is_cast)
 {
   if ((cmp != EQ_EXPR && cmp != NE_EXPR) || !integer_zerop (rhs))
return false;
@@ -2310,62 +2335,101 @@ spaceship_replacement (basic_block cond_bb, 
basic_block middle_bb,
 one_cmp = GT_EXPR;
 
   enum tree_code res_cmp;
-  switch (cmp)
+
+  if (is_cast)
 {
-case EQ_EXPR:
-  if (integer_zerop (rhs))
-   res_cmp = EQ_EXPR;
-  else if (integer_minus_onep (rhs))
-   res_cmp = one_cmp == LT_EXPR ? GT_EXPR : LT_EXPR;
-  else if (integer_onep (rhs))
-   res_cmp = one_cmp;
-  else
+  if (TREE_CODE (rhs) != INTEGER_CST)
return false;
-  break;
-case NE_EXPR:
-  if (integer_zerop (rhs))
-   res_cmp = NE_EXPR;
-  else if (integer_minus_onep (rhs))
-   res_cmp = one_cmp == LT_EXPR ? LE_EXPR : GE_EXPR;
-  else if (integer_onep (rhs))
-   res_cmp = one_cmp == LT_EXPR ? GE_EXPR : LE_EXPR;
-  else
-   return false;
-  break;
-case LT_EXPR:
-  if (integer_onep (rhs))
-   res_cmp = one_cmp == LT_EXPR ? GE_EXPR : LE_EXPR;
-  else if (integer_zerop (rhs))
-   res_cmp = one_cmp == LT_EXPR ? GT_EXPR : LT_EXPR;
-  else
-   return false;
-  break;
-case LE_EXPR:
-  if (integer_zerop (rhs))
-   res_cmp = one_cmp == LT_EXPR ? GE_EXPR : LE_EXPR;
-  else if (integer_minus_onep (rhs))
-   res_cmp = one_cmp == LT_EXPR ? GT_EXPR : LT_EXPR;
-  else
-   return false;
-  break;
-case GT_EXPR:
-  if (integer_minus_onep (rhs))
-   res_cmp = one_cmp == LT_EXPR ? LE_EXPR : 

Re: [PATCH] testsuite/102690 - XFAIL g++.dg/warn/Warray-bounds-16.C

2021-11-11 Thread Richard Biener via Gcc-patches
On Thu, 11 Nov 2021, Martin Sebor wrote:

> On 11/11/21 1:18 AM, Richard Biener wrote:
> > On Wed, 10 Nov 2021, Martin Sebor wrote:
> > 
> >> On 11/10/21 3:09 AM, Richard Biener via Gcc-patches wrote:
> >>> This XFAILs the bogus diagnostic test and rectifies the expectation
> >>> on the optimization.
> >>>
> >>> Tested on x86_64-unknown-linux-gnu, pushed.
> >>>
> >>> 2021-11-10  Richard Biener  
> >>>
> >>>   PR testsuite/102690
> >>>   * g++.dg/warn/Warray-bounds-16.C: XFAIL diagnostic part
> >>>   and optimization.
> >>> ---
> >>>gcc/testsuite/g++.dg/warn/Warray-bounds-16.C | 6 +++---
> >>>1 file changed, 3 insertions(+), 3 deletions(-)
> >>>
> >>> diff --git a/gcc/testsuite/g++.dg/warn/Warray-bounds-16.C
> >>> b/gcc/testsuite/g++.dg/warn/Warray-bounds-16.C
> >>> index 17b4d0d194e..89cbadb91c7 100644
> >>> --- a/gcc/testsuite/g++.dg/warn/Warray-bounds-16.C
> >>> +++ b/gcc/testsuite/g++.dg/warn/Warray-bounds-16.C
> >>> @@ -19,11 +19,11 @@ struct S
> >>>p = (int*) new unsigned char [sizeof (int) * m];
> >>>
> >>>for (int i = 0; i < m; i++)
> >>> -  new (p + i) int ();
> >>> +  new (p + i) int (); /* { dg-bogus "bounds" "pr102690" { xfail *-*-*
> >>> }
> >>> } */
> >>>  }
> >>>};
> >>>
> >>>S a (0);
> >>>
> >>> -/* Verify the loop has been eliminated.
> >>> -   { dg-final { scan-tree-dump-not "goto" "optimized" } } */
> >>> +/* The loop cannot be eliminated since the global 'new' can change 'm'.
> >>> */
> >>
> >> I don't understand this comment.  Can you please explain how
> >> the global operator new (i.e., the one outside the loop below)
> >> can change the member of the class whose ctor calls the new?
> >>
> >> The member, or more precisely the enclosing object, doesn't
> >> yet exist at the time the global new is called because its
> >> ctor hasn't finished, so nothing outside the ctor can access
> >> it.  A pointer to the S under construction can be used (and
> >> could be accessed by a replacement new) but it cannot be
> >> dereferenced to access its members because the object it
> >> points to doesn't exist until after the ctor completes.
> > 
> > Yes, that's the C++ legalise - which is why I XFAILed that
> > part of the test rather than just removed it.  The middle-end
> > sees the object *this as existing and being global, thus
> > accessible and mutable by '::new' which when replaced by
> > the user could access and alter *this.  Like maybe for
> > 
> > S s;
> > 
> > void *operator new(..) { s.m = 0; }
> > 
> > main()
> > {
> >new () (1);
> > }
> > 
> > that may be invalid C++ but this detail of C++ is not
> > reflected in the GIMPLE IL.  Before the change that regressed
> > this if S::S() would call a global function foo() instead
> > of new to do the allocation the behavior would be as after
> > the change.  Isn't the call to new or foo part of the
> > construction and as such obviously allowed to access
> > and alter the in-construction object?
> 
> Here's my understanding.
> 
> The lifetime of an object ends when its storage is reused to
> create another object of a class type, and the lifetime of
> the other object begins when its initialization is complete.
> 
> In the window between the new ctor starting and completing
> the new object can be accessed in limited ways, but not
> the old object.  (If it were otherwise, imagine the new
> object's ctor throwing in the middle of constructing the new
> object.  What state would that leave the old object in?  Which
> members could still be accessed?)
> 
> The only way to access a subobject of the new object while
> it's still under construction is through a pointer to that
> subobject or through this (or a pointer derive from this).

OK, so that might be the detail that one could exploit eventually.

> It seems that it should be possible to capture the constraint
> in the middle end that no member of an object under construction
> can be accessed unless a pointer to it has escaped that's derived
> from the this pointer.

IIRC at some point we had this * restrict qualified in the CTOR
but that broke cases where that's clearly not correct (but I forgot
the details).  Note for the middle-end that would just point at
another missed optimization, namely that restrict does not work
to disambiguate against calls.

> But this seems like a sufficiently obscure case that an expert
> on the C++ object model should confirm it.
>
> Martin
> 
> > 
> >> I copy the test below:
> >>
> >> inline void* operator new (__SIZE_TYPE__, void * v)
> >> {
> >>return v;
> >> }
> >>
> >> struct S
> >> {
> >>int* p;
> >>int m;
> >>
> >>S (int i)
> >>{
> >>  m = i;
> >>  p = (int*) new unsigned char [sizeof (int) * m];
> >>
> >>  for (int i = 0; i < m; i++)
> >>new (p + i) int (); /* { dg-bogus "bounds" "pr102690" { xfail *-*-*
> >> } }
> >> */
> >>}
> >> };
> >>
> >> S a (0);
> >>
> >> Thanks
> >> Martin
> >>
> > 
> 
> 
> 

-- 
Richard Biener 
SUSE Software Solutions 

[PATCH] c++: implicit dummy object in requires clause [PR103198]

2021-11-11 Thread Patrick Palka via Gcc-patches
In the testcase below satisfaction misbehaves for f and g ultimately
because find_template_parameters fails to notice that the constraint
'val.x' depends on the template parameters of the class template.
In contrast, satisfaction works just fine for h.

The problem seems to come down to a difference in how any_template_parm_r
handles 'this' vs a dummy object: we walk TREE_TYPE of the former but
not the latter, and this causes us to miss the tparm dependencies in
f/g's constraints since in their case the implicit object parameter
through which we access 'val' is a dummy object.  (For h, since we know
it's a non-static member function when parsing its trailing constraints,
the implicit object parameter is 'this' instead of a dummy object.)

This patch fixes this inconsistency by making any_template_parm_r also
walk into the TREE_TYPE of a dummy object, as is already done for
'this'.

Bootstrapped and regtested on x86_64-pc-linux-gnu, also tested on
cmcstl2 and range-v3, does this look OK for trunk and 11?

PR c++/103198

gcc/cp/ChangeLog:

* pt.c (any_template_parm_r): Walk the TREE_TYPE of a dummy
object.

gcc/testsuite/ChangeLog:

* g++.dg/cpp2a/concepts-this1.C: New test.
---
 gcc/cp/pt.c |  5 
 gcc/testsuite/g++.dg/cpp2a/concepts-this1.C | 30 +
 2 files changed, 35 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-this1.C

diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index 82bf7dc26f6..fa55857d783 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -10766,6 +10766,11 @@ any_template_parm_r (tree t, void *data)
WALK_SUBTREE (TREE_TYPE (t));
   break;
 
+case CONVERT_EXPR:
+  if (is_dummy_object (t))
+   WALK_SUBTREE (TREE_TYPE (t));
+  break;
+
 default:
   break;
 }
diff --git a/gcc/testsuite/g++.dg/cpp2a/concepts-this1.C 
b/gcc/testsuite/g++.dg/cpp2a/concepts-this1.C
new file mode 100644
index 000..d717028201a
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/concepts-this1.C
@@ -0,0 +1,30 @@
+// PR c++/103198
+// { dg-do compile { target c++20 } }
+
+template
+struct A {
+  T val;
+
+  template
+requires requires { val.x; }
+  void f(U);
+
+  static void g(int)
+requires requires { val.x; };
+
+  void h(int)
+requires requires { val.x; };
+};
+
+struct B { int x; };
+struct C { };
+
+int main() {
+  A().f(0);
+  A().g(0);
+  A().h(0);
+
+  A().f(0); // { dg-error "no match" }
+  A().g(0); // { dg-error "no match" }
+  A().h(0); // { dg-error "no match" }
+}
-- 
2.34.0.rc2.9.g4d53e91c6b



Re: [PATCH] testsuite/102690 - XFAIL g++.dg/warn/Warray-bounds-16.C

2021-11-11 Thread Martin Sebor via Gcc-patches

On 11/11/21 1:18 AM, Richard Biener wrote:

On Wed, 10 Nov 2021, Martin Sebor wrote:


On 11/10/21 3:09 AM, Richard Biener via Gcc-patches wrote:

This XFAILs the bogus diagnostic test and rectifies the expectation
on the optimization.

Tested on x86_64-unknown-linux-gnu, pushed.

2021-11-10  Richard Biener  

  PR testsuite/102690
  * g++.dg/warn/Warray-bounds-16.C: XFAIL diagnostic part
  and optimization.
---
   gcc/testsuite/g++.dg/warn/Warray-bounds-16.C | 6 +++---
   1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/testsuite/g++.dg/warn/Warray-bounds-16.C
b/gcc/testsuite/g++.dg/warn/Warray-bounds-16.C
index 17b4d0d194e..89cbadb91c7 100644
--- a/gcc/testsuite/g++.dg/warn/Warray-bounds-16.C
+++ b/gcc/testsuite/g++.dg/warn/Warray-bounds-16.C
@@ -19,11 +19,11 @@ struct S
   p = (int*) new unsigned char [sizeof (int) * m];
   
   for (int i = 0; i < m; i++)

-  new (p + i) int ();
+  new (p + i) int (); /* { dg-bogus "bounds" "pr102690" { xfail *-*-* }
} */
 }
   };
   
   S a (0);
   
-/* Verify the loop has been eliminated.

-   { dg-final { scan-tree-dump-not "goto" "optimized" } } */
+/* The loop cannot be eliminated since the global 'new' can change 'm'.  */


I don't understand this comment.  Can you please explain how
the global operator new (i.e., the one outside the loop below)
can change the member of the class whose ctor calls the new?

The member, or more precisely the enclosing object, doesn't
yet exist at the time the global new is called because its
ctor hasn't finished, so nothing outside the ctor can access
it.  A pointer to the S under construction can be used (and
could be accessed by a replacement new) but it cannot be
dereferenced to access its members because the object it
points to doesn't exist until after the ctor completes.


Yes, that's the C++ legalise - which is why I XFAILed that
part of the test rather than just removed it.  The middle-end
sees the object *this as existing and being global, thus
accessible and mutable by '::new' which when replaced by
the user could access and alter *this.  Like maybe for

S s;

void *operator new(..) { s.m = 0; }

main()
{
   new () (1);
}

that may be invalid C++ but this detail of C++ is not
reflected in the GIMPLE IL.  Before the change that regressed
this if S::S() would call a global function foo() instead
of new to do the allocation the behavior would be as after
the change.  Isn't the call to new or foo part of the
construction and as such obviously allowed to access
and alter the in-construction object?


Here's my understanding.

The lifetime of an object ends when its storage is reused to
create another object of a class type, and the lifetime of
the other object begins when its initialization is complete.

In the window between the new ctor starting and completing
the new object can be accessed in limited ways, but not
the old object.  (If it were otherwise, imagine the new
object's ctor throwing in the middle of constructing the new
object.  What state would that leave the old object in?  Which
members could still be accessed?)

The only way to access a subobject of the new object while
it's still under construction is through a pointer to that
subobject or through this (or a pointer derive from this).

It seems that it should be possible to capture the constraint
in the middle end that no member of an object under construction
can be accessed unless a pointer to it has escaped that's derived
from the this pointer.

But this seems like a sufficiently obscure case that an expert
on the C++ object model should confirm it.

Martin




I copy the test below:

inline void* operator new (__SIZE_TYPE__, void * v)
{
   return v;
}

struct S
{
   int* p;
   int m;

   S (int i)
   {
 m = i;
 p = (int*) new unsigned char [sizeof (int) * m];

 for (int i = 0; i < m; i++)
   new (p + i) int (); /* { dg-bogus "bounds" "pr102690" { xfail *-*-* } }
*/
   }
};

S a (0);

Thanks
Martin







[PATCH][GCC] aarch64: Add LS64 extension and intrinsics

2021-11-11 Thread Przemyslaw Wirkus via Gcc-patches
Hi,

This patch is adding support for LS64 (Armv8.7-A Load/Store 64 Byte extension)
which is part of Armv8.7-A architecture. Changes include missing plumbing for
TARGET_LS64, LS64 data structure and intrinsics defined in ACLE [0]. Machine
description of intrinsics is using new V8DI mode added in a separate patch.
__ARM_FEATURE_LS64 is defined if the Armv8.7-A LS64 instructions for atomic
64-byte access to device memory are supported.

New compiler internal type is added wrapping ACLE struct data512_t [0]:

typedef struct {
  uint64_t val[8];
} __arm_data512_t;

Please note that command line support for this feature was already added [1].

  [0] 
https://github.com/ARM-software/acle/blob/main/main/acle.rst#load-store-64-byte-intrinsics
  [1] commit e159c0aa10e50c292a534535c73f38d22b6129a8 (AArch64: Add command-line
  support for Armv8.7-a)

For below C code see example snippets of generated code:

#include 

void
func(const void * addr, data512_t *data) {
  *data = __arm_ld64b (addr);
}

func:
ld64b   x8, [x0]
stp x8, x9, [x1]
sub sp, sp, #64
stp x10, x11, [x1, 16]
stp x12, x13, [x1, 32]
stp x14, x15, [x1, 48]
add sp, sp, 64
ret
~~~

#include 

uint64_t
func(void *addr, data512_t value) {
return  __arm_st64bv (addr, value);
}

func:
ldp x8, x9, [x1]
ldp x10, x11, [x1, 16]
ldp x12, x13, [x1, 32]
ldp x14, x15, [x1, 48]
st64bv  x1, x8, [x0]
mov x0, x1
ret

~~~

uint64_t
ls64_store_v0(const data512_t *input, void *addr)
{
uint64_t status;
__asm__ volatile ("st64bv0 %0, %2, [%1]"
  : "=r" (status), "=r" (addr)
  : "r" (*input)
  : "memory");
return status;
}

ls64_store_v0:
ldp x8, x9, [x0]
ldp x10, x11, [x0, 16]
ldp x12, x13, [x0, 32]
ldp x14, x15, [x0, 48]
st64bv0 x0, x8, [x1]
ret

Regtested on aarch64-elf cross and no issues.

OK for master?

gcc/ChangeLog:

2021-11-11  Przemyslaw Wirkus  

* config/aarch64/aarch64-builtins.c (enum aarch64_builtins):
Define AARCH64_LS64_BUILTIN_LD64B, AARCH64_LS64_BUILTIN_ST64B,
AARCH64_LS64_BUILTIN_ST64BV, AARCH64_LS64_BUILTIN_ST64BV0.
(aarch64_init_ls64_builtin_decl): Helper function.
(aarch64_init_ls64_builtins): Helper function.
(aarch64_init_ls64_builtins_types): Helper function.
(aarch64_general_init_builtins): Init LS64 intrisics for
TARGET_LS64.
(aarch64_expand_builtin_ls64): LS64 intrinsics expander.
(aarch64_general_expand_builtin): Handle aarch64_expand_builtin_ls64.
(ls64_builtins_data): New helper struct.
(v8di_UP): New define.
* config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): Define
__ARM_FEATURE_LS64.
* config/aarch64/aarch64.h (AARCH64_ISA_LS64): New define.
(AARCH64_ISA_V8_7): New define.
(TARGET_LS64): New define.
* config/aarch64/aarch64.md: Add UNSPEC_LD64B, UNSPEC_ST64B,
UNSPEC_ST64BV and UNSPEC_ST64BV0.
(ld64b): New define_insn.
(st64b): New define_insn.
(st64bv): New define_insn.
(st64bv0): New define_insn.
* config/aarch64/arm_acle.h (target):
(data512_t): New type derived from __arm_data512_t.
(__arm_data512_t): New internal type.
(__arm_ld64b): New intrinsic.
(__arm_st64b): New intrinsic.
(__arm_st64bv): New intrinsic.
(__arm_st64bv0): New intrinsic.
* config/arm/types.md: Add new type ls64.

gcc/testsuite/ChangeLog:

2021-11-11  Przemyslaw Wirkus  

* gcc.target/aarch64/acle/ls64_asm.c: New test.
* gcc.target/aarch64/acle/ls64_ld64b-2.c: New test.
* gcc.target/aarch64/acle/ls64_ld64b.c: New test.
* gcc.target/aarch64/acle/ls64_st64b.c: New test.
* gcc.target/aarch64/acle/ls64_st64bv-2.c: New test.
* gcc.target/aarch64/acle/ls64_st64bv.c: New test.
* gcc.target/aarch64/acle/ls64_st64bv0-2.c: New test.
* gcc.target/aarch64/acle/ls64_st64bv0.c: New test.
* gcc.target/aarch64/pragma_cpp_predefs_2.c: Add checks
for __ARM_FEATURE_LS64.


rb14982.patch
Description: rb14982.patch


[PATCH] rs6000: testsuite: Add rop_ok effective-target function

2021-11-11 Thread Peter Bergner via Gcc-patches
This patch adds a new effective-target function that tests whether
it is safe to emit the ROP-protect instructions and updates the
ROP test cases to use it.

Segher, as we discussed offline, this uses the double [] which you said
isn't needed in general regex's, but for some reason is needed in the gcc
testsuite regex.

Tested on powerpc64le*-linux with no regressions.  Ok for mainline?

Peter


gcc/testsuite/
* lib/target-supports.exp (check_effective_target_rop_ok): New function.
* gcc.target/powerpc/rop-1.c: Use it.
* gcc.target/powerpc/rop-2.c: Likewise.
* gcc.target/powerpc/rop-3.c: Likewise.
* gcc.target/powerpc/rop-4.c: Likewise.
* gcc.target/powerpc/rop-5.c: Likewise.

diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index 1c8b1ebb86e..0d9a3ba67ce 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -6625,6 +6625,13 @@ proc check_effective_target_powerpc_elfv2 { } {
 }
 }
 
+# Return 1 if this is a PowerPC target supporting -mrop-protect
+
+proc check_effective_target_rop_ok { } {
+return [check_effective_target_power10_ok]
+   && [check_effective_target_powerpc_elfv2]
+}
+
 # The VxWorks SPARC simulator accepts only EM_SPARC executables and
 # chokes on EM_SPARC32PLUS or EM_SPARCV9 executables.  Return 1 if the
 # test environment appears to run executables on such a simulator.
diff --git a/gcc/testsuite/gcc.target/powerpc/rop-1.c 
b/gcc/testsuite/gcc.target/powerpc/rop-1.c
index 8cedcb6668a..12893dec027 100644
--- a/gcc/testsuite/gcc.target/powerpc/rop-1.c
+++ b/gcc/testsuite/gcc.target/powerpc/rop-1.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -mdejagnu-cpu=power10 -mrop-protect" } */
-/* { dg-require-effective-target powerpc_elfv2 } */
+/* { dg-require-effective-target rop_ok } */
 
 /* Verify that ROP-protect instructions are inserted when a
call is present.  */
diff --git a/gcc/testsuite/gcc.target/powerpc/rop-2.c 
b/gcc/testsuite/gcc.target/powerpc/rop-2.c
index c556952aec1..5f1d7c39bfc 100644
--- a/gcc/testsuite/gcc.target/powerpc/rop-2.c
+++ b/gcc/testsuite/gcc.target/powerpc/rop-2.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -mdejagnu-cpu=power10 -mrop-protect -mprivileged" } */
-/* { dg-require-effective-target powerpc_elfv2 } */
+/* { dg-require-effective-target rop_ok } */
 
 /* Verify that privileged ROP-protect instructions are inserted when a
call is present.  */
diff --git a/gcc/testsuite/gcc.target/powerpc/rop-3.c 
b/gcc/testsuite/gcc.target/powerpc/rop-3.c
index 8d03792e3e5..d2ef3bf4bad 100644
--- a/gcc/testsuite/gcc.target/powerpc/rop-3.c
+++ b/gcc/testsuite/gcc.target/powerpc/rop-3.c
@@ -1,5 +1,5 @@
 /* { dg-do run { target { power10_hw } } } */
-/* { dg-require-effective-target powerpc_elfv2 } */
+/* { dg-require-effective-target rop_ok } */
 /* { dg-options "-O2 -mdejagnu-cpu=power10 -mrop-protect" } */
 
 /* Verify that ROP-protect instructions execute correctly when a
diff --git a/gcc/testsuite/gcc.target/powerpc/rop-4.c 
b/gcc/testsuite/gcc.target/powerpc/rop-4.c
index dcf47c63fb7..80faa9b3f7b 100644
--- a/gcc/testsuite/gcc.target/powerpc/rop-4.c
+++ b/gcc/testsuite/gcc.target/powerpc/rop-4.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -mdejagnu-cpu=power10 -mrop-protect" } */
-/* { dg-require-effective-target powerpc_elfv2 } */
+/* { dg-require-effective-target rop_ok } */
 
 /* Verify that no ROP-protect instructions are inserted when no
call is present.  */
diff --git a/gcc/testsuite/gcc.target/powerpc/rop-5.c 
b/gcc/testsuite/gcc.target/powerpc/rop-5.c
index f2594df8a44..f7970f1fe25 100644
--- a/gcc/testsuite/gcc.target/powerpc/rop-5.c
+++ b/gcc/testsuite/gcc.target/powerpc/rop-5.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-require-effective-target powerpc_elfv2 } */
+/* { dg-require-effective-target rop_ok } */
 /* { dg-options "-O2 -mdejagnu-cpu=power10 -mrop-protect" } */
 
 /* Verify that __ROP_PROTECT__ is predefined for -mrop-protect.  */


Re: rs6000: Fix up flag_shrink_wrap handling in presence of -mrop-protect [PR101324]

2021-11-11 Thread Peter Bergner via Gcc-patches
Sorry for taking so long to get back to this.

On 10/29/21 4:45 PM, Segher Boessenkool wrote:
> On Wed, Oct 27, 2021 at 10:17:39PM -0500, Peter Bergner wrote:
>> +/* Ensure hashst comes after mflr and hashchk comes after ld 0,16(1).  */
>> +/* { dg-final { scan-assembler "mflr 0.*hashst 0," } } */
>> +/* { dg-final { scan-assembler "ld 0,16\\\(1\\\).*hashchk 0," } } */
> 
> First: don't use double quotes, or you get double backslashes (or more)
> as well.  Use curlies instead:
> 
> /* { dg-final { scan-assembler {ld 0,16\(1\).*hashchk 0,} } } */
> 
> But, more importantly, "." by default matches anything, newlines as
> well.  You probably do not want that here, because your RE as written
> can match an "ld" in one function and a "hashchk" many functions later,
> many million lines later.
> 
> You can for example do
> /* { dg-final { scan-assembler {(?p)ld 0,.*\n.*\mhashchk 0,} } } */

I had to change your suggestion to the following, which works:

/* { dg-final { scan-assembler {(?p)\mmflr 0.*\n.*\n.*\mhashst 0,} } } */
/* { dg-final { scan-assembler {(?p)ld 0,.*\n.*\n.*\n.*\mhashchk 0,} } } */


Meaning from the current code generation, the hashst is 2 insns after
the mflr insn and the hashck is 3 insns after the ld 0,16(1) insn.
But is this fragile?  Are we sure we won't schedule the hashst and
hashchk to some other location breaking the test case?



> (?p) is "partial newline-sensitive matching": it makes "." not match
> newlines.  This is often what you want.  This RE also makes sure that
> "hashchk" is the full mnemonic (not the tail of one), and that it is on
> the line after that "ld".

Well, this test case only has one function and is very small, so there
should only be one "mflr 0" and one "ld 0,16(1)".  Given the small size,
I assumed if the hashst and hashchk were after the mflr/ld 0,16(1), then
it's in the correct order and "fixed".  But maybe that's just as fragile
as assuming how many insns proceed the hashst/hashchk?


If you prefer the updated change to your suggestion, let me know and I'll
commit it that way. 


> I'll note the test case uses the "new" rop_ok effective-target function which
> I submitted as a separate patch.

This test case uses the new rop_ok effective-target which I said I submitted
as a separate patch, but I can't seem to find the submission anywhere and it's
not even in the archive.  Did I forget to submit it?  Probably.  :-(
Let me do that now, as this relies on that patch.


Peter



Re: [PATH][_GLIBCXX_DEBUG] Fix unordered container merge

2021-11-11 Thread Jonathan Wakely via Gcc-patches
On Thu, 11 Nov 2021 at 21:33, François Dumont  wrote:

> On 11/11/21 9:41 pm, Jonathan Wakely wrote:
>
>
>
> On Wed, 10 Nov 2021 at 11:55, Jonathan Wakely  wrote:
>
>>
>>
>> On Tue, 9 Nov 2021 at 16:25, Jonathan Wakely  wrote:
>>
>>>
>>>
>>> On Mon, 8 Nov 2021 at 21:36, François Dumont 
>>> wrote:
>>>
 Yet another version this time with only 1 guard implementation. The
 predicate to invalidate the safe iterators has been externalized.

 Ok to commit ?

>>>
>>> I like this version a lot - thanks for persisting with it.
>>>
>>>
>>
>> I'm seeing new failures with this:
>>
>> make check RUNTESTFLAGS="conformance.exp=23_containers/*/invalidation/*
>> --target_board=unix/-D_GLIBCXX_DEBUG/-std=gnu++98"
>>
>> FAIL: 23_containers/deque/debug/invalidation/1.cc (test for excess
>> errors)
>> FAIL: 23_containers/list/debug/invalidation/1.cc (test for excess errors)
>> FAIL: 23_containers/map/debug/invalidation/1.cc (test for excess errors)
>> FAIL: 23_containers/multimap/debug/invalidation/1.cc (test for excess
>> errors)
>> FAIL: 23_containers/multiset/debug/invalidation/1.cc (test for excess
>> errors)
>> FAIL: 23_containers/set/debug/invalidation/1.cc (test for excess errors)
>> FAIL: 23_containers/vector/debug/invalidation/1.cc (test for excess
>> errors)
>>
>
> It's caused by:
>
> --- a/libstdc++-v3/include/debug/safe_container.h
> +++ b/libstdc++-v3/include/debug/safe_container.h
> @@ -78,7 +78,6 @@ namespace __gnu_debug
>   { }
> #endif
>
> -public:
>   // Copy assignment invalidate all iterators.
>   _Safe_container&
>   operator=(const _Safe_container&) _GLIBCXX_NOEXCEPT
>
>
> For C++98 mode that gets called explicitly by the user-provided copy
> assignment operators in the derived class.
>
> I'm testing the attached fix.
>
> I am also testing a patch but yours looks nicer so go ahead.
>

I've pushed it to trunk now.



> I'll just complete it with some additional cleanup I did to suppress
> _Safe_container::_M_safe() and reduce usages of _M_base().
>

Sounds good, thanks.


[PATCH v2] implement -Winfinite-recursion [PR88232]

2021-11-11 Thread Martin Sebor via Gcc-patches

Attached is a v2 of the solution I posted earlier this week
with a few tweaks made after a more careful consideration of
the problem and possible false negatives and positives.

1) It avoids warning for [apparently infinitely recursive] calls
   in noreturn functions where the recursion may be prevented by
   a call to a noreturn function.
2) It avoids warning for calls where the recursion may be prevented
   by a call to a longjmp or siglongjmp.
3) It warns for recursive calls to built-ins in definitions of
   the corresponding library functions (e.g., for a call to
   __builtin_malloc in malloc).
4) It warns for calls to C++ functions even if they call other
   functions that might throw and so break out of the infinite
   recursion.  (E.g., operator new.)  This is the same as Clang.
5) It doesn't warn for calls to C++ functions with the throw
   expression.

Besides these changes to the warning itself, I've also improved
the code a bit by making the workhorse function a member of
the pass so recursive calls don't need to pass as many arguments
to itself.

Retested on x86_64-linux and by building Glibc and Binutils/GDB.

A possible enhancement is to warn for calls to calloc, malloc,
or realloc from within the definition of one of the other two
functions.  That might be a mistake made in code that tries
naively to replace the allocator with its own implementation.

On 11/9/21 9:28 PM, Martin Sebor wrote:

The attached patch adds support to the middle end for detecting
infinitely recursive calls.  The warning is controlled by the new
-Winfinite-recursion option.  The option name is the same as
Clang's.

I scheduled the warning pass to run after early inlining to
detect mutually recursive calls but before tail recursion which
turns some recursive calls into infinite loops and so makes
the two indistinguishable.

The warning detects a superset of problems detected by Clang
(based on its tests).  It detects the problem in PR88232
(the feature request) as well as the one in PR 87742,
an unrelated problem report that was root-caused to bug due
to infinite recursion.

This initial version doesn't attempt to deal with superimposed
symbols, so those might trigger false positives.  I'm not sure
that's something to worry about.

The tests are very light, but those for the exceptional cases
are exceedingly superficial, so it's possible those might harbor
some false positives and negatives.

Tested on x86_64-linux.

Martin



Implement -Winfinite-recursion [PR88232].

Resolves:
PR middle-end/88232 - Please implement -Winfinite-recursion

gcc/ChangeLog:

	PR middle-end/88232
	* Makefile.in (OBJS): Add gimple-warn-recursion.o.
	* common.opt: Add -Winfinite-recursion.
	* doc/invoke.texi (-Winfinite-recursion): Document.
	* passes.def (pass_warn_recursion): Schedule a new pass.
	* tree-pass.h (make_pass_warn_recursion): Declare.
	* gimple-warn-recursion.c: New file.

gcc/c-family/ChangeLog:

	PR middle-end/88232
	* c.opt: Add -Winfinite-recursion.

gcc/testsuite/ChangeLog:

	PR middle-end/88232
	* c-c++-common/attr-used-5.c: Suppress valid warning.
	* c-c++-common/attr-used-6.c: Same.
	* c-c++-common/attr-used-9.c: Same.
	* g++.dg/warn/Winfinite-recursion-2.C: New test.
	* g++.dg/warn/Winfinite-recursion-3.C: New test.
	* g++.dg/warn/Winfinite-recursion.C: New test.
	* gcc.dg/Winfinite-recursion-2.c: New test.
	* gcc.dg/Winfinite-recursion.c: New test.

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 571e9c28e29..a4344d67f44 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1420,6 +1420,7 @@ OBJS = \
 	gimple-streamer-in.o \
 	gimple-streamer-out.o \
 	gimple-walk.o \
+	gimple-warn-recursion.o \
 	gimplify.o \
 	gimplify-me.o \
 	godump.o \
diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
index 06457ac739e..7fb13f278e8 100644
--- a/gcc/c-family/c.opt
+++ b/gcc/c-family/c.opt
@@ -714,6 +714,10 @@ Wincompatible-pointer-types
 C ObjC Var(warn_incompatible_pointer_types) Init(1) Warning
 Warn when there is a conversion between pointers that have incompatible types.
 
+Winfinite-recursion
+C ObjC C++ LTO ObjC++ Var(warn_infinite_recursion) Warning LangEnabledBy(C ObjC C++ LTO ObjC++, Wall)
+Warn for infinitely recursive calls.
+
 Waddress-of-packed-member
 C ObjC C++ ObjC++ Var(warn_address_of_packed_member) Init(1) Warning
 Warn when the address of packed member of struct or union is taken.
diff --git a/gcc/common.opt b/gcc/common.opt
index de9b848eda5..a98545641fa 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -636,6 +636,10 @@ Wimplicit-fallthrough=
 Common Var(warn_implicit_fallthrough) RejectNegative Joined UInteger Warning IntegerRange(0, 5)
 Warn when a switch case falls through.
 
+Winfinite-recursion
+Var(warn_infinite_recursion) Warning
+Warn for infinitely recursive calls.
+
 Winline
 Common Var(warn_inline) Warning Optimization
 Warn when an inlined function cannot be inlined.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index a7c4d24a762..bd4e2a78695 100644
--- a/gcc/doc/invoke.texi

Re: [PATH][_GLIBCXX_DEBUG] Fix unordered container merge

2021-11-11 Thread François Dumont via Gcc-patches

On 11/11/21 9:41 pm, Jonathan Wakely wrote:



On Wed, 10 Nov 2021 at 11:55, Jonathan Wakely > wrote:




On Tue, 9 Nov 2021 at 16:25, Jonathan Wakely mailto:jwak...@redhat.com>> wrote:



On Mon, 8 Nov 2021 at 21:36, François Dumont
mailto:frs.dum...@gmail.com>> wrote:

Yet another version this time with only 1 guard
implementation. The predicate to invalidate the safe
iterators has been externalized.

Ok to commit ?


I like this version a lot - thanks for persisting with it.



I'm seeing new failures with this:

make check
RUNTESTFLAGS="conformance.exp=23_containers/*/invalidation/*
--target_board=unix/-D_GLIBCXX_DEBUG/-std=gnu++98"

FAIL: 23_containers/deque/debug/invalidation/1.cc (test for excess
errors)
FAIL: 23_containers/list/debug/invalidation/1.cc (test for excess
errors)
FAIL: 23_containers/map/debug/invalidation/1.cc (test for excess
errors)
FAIL: 23_containers/multimap/debug/invalidation/1.cc (test for
excess errors)
FAIL: 23_containers/multiset/debug/invalidation/1.cc (test for
excess errors)
FAIL: 23_containers/set/debug/invalidation/1.cc (test for excess
errors)
FAIL: 23_containers/vector/debug/invalidation/1.cc (test for
excess errors)


It's caused by:

--- a/libstdc++-v3/include/debug/safe_container.h
+++ b/libstdc++-v3/include/debug/safe_container.h
@@ -78,7 +78,6 @@namespace __gnu_debug
  { }
#endif

-    public:
  // Copy assignment invalidate all iterators.
  _Safe_container&
  operator=(const _Safe_container&) _GLIBCXX_NOEXCEPT


For C++98 mode that gets called explicitly by the user-provided copy 
assignment operators in the derived class.


I'm testing the attached fix.

I am also testing a patch but yours looks nicer so go ahead. I'll just 
complete it with some additional cleanup I did to suppress 
_Safe_container::_M_safe() and reduce usages of _M_base().


Thanks




Re: [PATCH 16/18] rs6000: Test case adjustments

2021-11-11 Thread Bill Schmidt via Gcc-patches
On 11/11/21 2:06 PM, Bill Schmidt wrote:
>
>>> --- a/gcc/testsuite/gcc.target/powerpc/int_128bit-runnable.c
>>> +++ b/gcc/testsuite/gcc.target/powerpc/int_128bit-runnable.c
>>> @@ -11,9 +11,9 @@
>>>  /* { dg-final { scan-assembler-times {\mvrlq\M} 2 } } */
>>>  /* { dg-final { scan-assembler-times {\mvrlqnm\M} 2 } } */
>>>  /* { dg-final { scan-assembler-times {\mvrlqmi\M} 2 } } */
>>> -/* { dg-final { scan-assembler-times {\mvcmpequq\M} 16 } } */
>>> -/* { dg-final { scan-assembler-times {\mvcmpgtsq\M} 16 } } */
>>> -/* { dg-final { scan-assembler-times {\mvcmpgtuq\M} 16 } } */
>>> +/* { dg-final { scan-assembler-times {\mvcmpequq\M} 24 } } */
>>> +/* { dg-final { scan-assembler-times {\mvcmpgtsq\M} 26 } } */
>>> +/* { dg-final { scan-assembler-times {\mvcmpgtuq\M} 26 } } */
>>>  /* { dg-final { scan-assembler-times {\mvmuloud\M} 1 } } */
>>>  /* { dg-final { scan-assembler-times {\mvmulesd\M} 1 } } */
>>>  /* { dg-final { scan-assembler-times {\mvmulosd\M} 1 } } */
>> And this?
> Again I'm a little sketchy on the details, but I believe this resulted
> from some of the vector compares having been previously omitted by
> accident from gimple expansion.  When I added them in for the new
> support, that gave us increased counts here because the code generation
> was improved.  I'll double-check this one as well to provide a more
> certain explanation.

Upon review [1], it was the other way around.  I removed some of the
builtins from early gimple expansion because if we expand those early,
we get poor code generation instead of the vector compare instructions
we want.  As a result we get more matches in this test case.

Thanks!
Bill

[1] https://gcc.gnu.org/pipermail/gcc-patches/2021-August/576526.html

>
>>> --- a/gcc/testsuite/gcc.target/powerpc/p8vector-builtin-8.c
>>> +++ b/gcc/testsuite/gcc.target/powerpc/p8vector-builtin-8.c
>>> @@ -126,6 +126,7 @@ void foo (vector signed char *vscr,
>>>  /* { dg-final { scan-assembler-times "vsubcuw" 4 } } */
>>>  /* { dg-final { scan-assembler-times "vsubuwm" 4 } } */
>>>  /* { dg-final { scan-assembler-times "vbpermq" 2 } } */
>>> +/* { dg-final { scan-assembler-times "vbpermd" 0 } } */
>>>  /* { dg-final { scan-assembler-times "xxleqv" 4 } } */
>>>  /* { dg-final { scan-assembler-times "vgbbd" 1 } } */
>>>  /* { dg-final { scan-assembler-times "xxlnand" 4 } } */
>> This curious one could have been a separate (obvious) patch.  It is a
>> bit out-of-place here.
> Yeah, bit of a head-scratcher, this.  The test case probably went
> through a few revisions.  I'll test it once more and commit it
> separately.
>
>>> --- a/gcc/testsuite/gcc.target/powerpc/pragma_power8.c
>>> +++ b/gcc/testsuite/gcc.target/powerpc/pragma_power8.c
>>> @@ -19,6 +19,7 @@ test1 (vector int a, vector int b)
>>>  #pragma GCC target ("cpu=power7")
>>>  /* Force a re-read of altivec.h with new cpu target. */
>>>  #undef _ALTIVEC_H
>>> +#undef _RS6000_VECDEFINES_H
>>>  #include 
>> Wow ugly :-)  But nothing new here, heh.  Best not to look at testcase
>> internals too closely, in any case.
>>
>>> --- a/gcc/testsuite/gcc.target/powerpc/test_mffsl.c
>>> +++ b/gcc/testsuite/gcc.target/powerpc/test_mffsl.c
>>> @@ -1,5 +1,6 @@
>>>  /* { dg-do run { target { powerpc*-*-* } } } */
>>> -/* { dg-options "-O2 -std=c99" } */
>>> +/* { dg-options "-O2 -std=c99 -mcpu=power9" } */
>>> +/* { dg-require-effective-target powerpc_p9vector_ok } */
>>>  
>>>  #ifdef DEBUG
>>>  #include 
>> This one is a bug fix as well (and obvious).
> Yeah. :-(  Will handle.
>>> --- a/gcc/testsuite/gcc.target/powerpc/vsu/vec-all-nez-7.c
>>> +++ b/gcc/testsuite/gcc.target/powerpc/vsu/vec-all-nez-7.c
>>> @@ -12,5 +12,5 @@ test_all_not_equal_and_not_zero (vector unsigned short 
>>> *arg1_p,
>>>vector unsigned short arg_2 = *arg2_p;
>>>  
>>>return __builtin_vec_vcmpnez_p (__CR6_LT, arg_1, arg_2);
>>> -  /* { dg-error "'__builtin_altivec_vcmpnezh_p' requires the 
>>> '-mcpu=power9' option" "" { target *-*-* } .-1 } */
>>> +  /* { dg-error "'__builtin_altivec_vcmpnezh_p' requires the 
>>> '-mpower9-vector' option" "" { target *-*-* } .-1 } */
>>>  }
>> Hrm.  People should not use the -mpower9-vector option (except implied
>> by -mcpu=power9, without vectors disabled).  How hard is it to give a
>> better error message here?
> Yeah, agreed, I think I can fix that easily enough.  There may be similar
> issues with -mpower8-vector as well that should be fixed.
>
> Thanks for the review!  I'll get back on this one soon.
>
> Bill
>
>> The obvious bugfixes independent of this series are of course okay for
>> trunk, as separate patches, now.  But some more work is needed
>> elsewhere.
>>
>>
>> Segher


[PATCH, committed] rs6000: Fix test_mffsl.c to require Power9 support

2021-11-11 Thread Bill Schmidt via Gcc-patches
Hi!  Per previous discussion, committed the following as obvious.

commit 8a8458ac6bbc4263dd2c1ee55979b29fc7195794
Author: Bill Schmidt 
Date:   Thu Nov 11 14:36:04 2021 -0600

rs6000: Fix test_mffsl.c to require Power9 support

2021-11-11  Bill Schmidt  

gcc/testsuite/
* gcc.target/powerpc/test_mffsl.c: Require Power9.

diff --git a/gcc/testsuite/gcc.target/powerpc/test_mffsl.c 
b/gcc/testsuite/gcc.target/powerpc/test_mffsl.c
index 41377efba1a..28c2b91988e 100644
--- a/gcc/testsuite/gcc.target/powerpc/test_mffsl.c
+++ b/gcc/testsuite/gcc.target/powerpc/test_mffsl.c
@@ -1,5 +1,6 @@
 /* { dg-do run { target { powerpc*-*-* } } } */
-/* { dg-options "-O2 -std=c99" } */
+/* { dg-options "-O2 -std=c99 -mcpu=power9" } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
 
 #ifdef DEBUG
 #include 



Re: [PATH][_GLIBCXX_DEBUG] Fix unordered container merge

2021-11-11 Thread Jonathan Wakely via Gcc-patches
On Wed, 10 Nov 2021 at 11:55, Jonathan Wakely  wrote:

>
>
> On Tue, 9 Nov 2021 at 16:25, Jonathan Wakely  wrote:
>
>>
>>
>> On Mon, 8 Nov 2021 at 21:36, François Dumont 
>> wrote:
>>
>>> Yet another version this time with only 1 guard implementation. The
>>> predicate to invalidate the safe iterators has been externalized.
>>>
>>> Ok to commit ?
>>>
>>
>> I like this version a lot - thanks for persisting with it.
>>
>>
>
> I'm seeing new failures with this:
>
> make check RUNTESTFLAGS="conformance.exp=23_containers/*/invalidation/*
> --target_board=unix/-D_GLIBCXX_DEBUG/-std=gnu++98"
>
> FAIL: 23_containers/deque/debug/invalidation/1.cc (test for excess errors)
> FAIL: 23_containers/list/debug/invalidation/1.cc (test for excess errors)
> FAIL: 23_containers/map/debug/invalidation/1.cc (test for excess errors)
> FAIL: 23_containers/multimap/debug/invalidation/1.cc (test for excess
> errors)
> FAIL: 23_containers/multiset/debug/invalidation/1.cc (test for excess
> errors)
> FAIL: 23_containers/set/debug/invalidation/1.cc (test for excess errors)
> FAIL: 23_containers/vector/debug/invalidation/1.cc (test for excess errors)
>

It's caused by:

--- a/libstdc++-v3/include/debug/safe_container.h
+++ b/libstdc++-v3/include/debug/safe_container.h
@@ -78,7 +78,6 @@ namespace __gnu_debug
  { }
#endif

-public:
  // Copy assignment invalidate all iterators.
  _Safe_container&
  operator=(const _Safe_container&) _GLIBCXX_NOEXCEPT


For C++98 mode that gets called explicitly by the user-provided copy
assignment operators in the derived class.

I'm testing the attached fix.
commit 7075abd518364b8d9767079e044baba86145cc08
Author: Jonathan Wakely 
Date:   Thu Nov 11 20:23:48 2021

libstdc++: Fix debug containers for C++98 mode

Since r12-5072 made _Safe_container::operator=(const _Safe_container&)
protected, the debug containers no longer compile in C++98 mode. They
have user-provided copy assignment operators in C++98 mode, and they
assign each base class in turn. The 'this->_M_safe() = __x' expressions
fail, because calling a protected member function is only alowed via
`this`. They could be fixed by using this->_Safe::operator=(__x) but a
simpler solution is to just remove the user-provided assignment
operators and let the compiler defined them (as in C++11 and later).

The only change needed for that to work is to define the _Safe_vector
copy assignment operator in C++98 mode, so that the implicit
__gnu_debug::vector::operator= definition will call it, instead of
calling _M_update_guaranteed_capacity() manually.

libstdc++-v3/ChangeLog:

* include/debug/deque (deque::operator=(const deque&)): Remove
definition.
* include/debug/list (list::operator=(const list&)): Likewise.
* include/debug/map.h (map::operator=(const map&)): Likewise.
* include/debug/multimap.h (multimap::operator=(const multimap&)):
Likewise.
* include/debug/multiset.h (multiset::operator=(const multiset&)):
Likewise.
* include/debug/set.h (set::operator=(const set&)): Likewise.
* include/debug/string (basic_string::operator=(const 
basic_string&)):
Likewise.
* include/debug/vector (vector::operator=(const vector&)):
Likewise.
(_Safe_vector::operator=(const _Safe_vector&)): Define for
C++98 as well.

diff --git a/libstdc++-v3/include/debug/deque b/libstdc++-v3/include/debug/deque
index 8e4811149d2..52778ba1617 100644
--- a/libstdc++-v3/include/debug/deque
+++ b/libstdc++-v3/include/debug/deque
@@ -156,15 +156,7 @@ namespace __debug
   deque(_Base_ref __x)
   : _Base(__x._M_ref) { }
 
-#if __cplusplus < 201103L
-  deque&
-  operator=(const deque& __x)
-  {
-   this->_M_safe() = __x;
-   _M_base() = __x;
-   return *this;
-  }
-#else
+#if __cplusplus >= 201103L
   deque&
   operator=(const deque&) = default;
 
diff --git a/libstdc++-v3/include/debug/list b/libstdc++-v3/include/debug/list
index de30edb19c2..f40ebc8521e 100644
--- a/libstdc++-v3/include/debug/list
+++ b/libstdc++-v3/include/debug/list
@@ -161,15 +161,7 @@ namespace __debug
   list(_Base_ref __x)
   : _Base(__x._M_ref) { }
 
-#if __cplusplus < 201103L
-  list&
-  operator=(const list& __x)
-  {
-   this->_M_safe() = __x;
-   _M_base() = __x;
-   return *this;
-  }
-#else
+#if __cplusplus >= 201103L
   list&
   operator=(const list&) = default;
 
diff --git a/libstdc++-v3/include/debug/map.h b/libstdc++-v3/include/debug/map.h
index 9e142cf7023..3883c546871 100644
--- a/libstdc++-v3/include/debug/map.h
+++ b/libstdc++-v3/include/debug/map.h
@@ -152,15 +152,7 @@ namespace __debug
__gnu_debug::__base(__last),
__comp, __a) { }
 
-#if __cplusplus < 201103L
-  map&
-  operator=(const map& __x)
-  {

Go patch committed: Traverse func subexprs when creating func descriptors

2021-11-11 Thread Ian Lance Taylor via Gcc-patches
This patch to the Go frontend fixes the Create_func_descriptors pass
to traverse the subexpressions of the function in a Call_expression.
There are no subexpressions in the normal case of calling a function a
method directly, but there are subexpressions when in code like
F().M() when F returns an interface type.

Forgetting to traverse the function subexpressions was almost entirely
hidden by the fact that we also created the necessary thunks in
Bound_method_expression::do_flatten and
Interface_field_reference_expression::do_get_backend.  However, when
the thunks were created there, they did not go through the
order_evaluations pass.  This almost always worked, but failed in the
case in which the function being thunked returned multiple results, as
order_evaluations takes the necessary step of moving the
Call_expression into its own statement, and that would not happen when
order_evaluations was not called.  Avoid hiding errors like this by
changing those methods to only lookup the previously created thunk,
rather than creating it if it was not already created.

The test case for this is https://golang.org/cl/363156.

This fixes https://golang.org/issue/49512.

Bootstrapped and ran Go tests on x86_64-pc-linux-gnu.  Committed to mainline.

Ian

patch.txt
2b7d9dc49cbda7a8e98cfe0c59a8057b5c30907c
diff --git a/gcc/go/gofrontend/MERGE b/gcc/go/gofrontend/MERGE
index e7ff6705563..05e47ec3fa9 100644
--- a/gcc/go/gofrontend/MERGE
+++ b/gcc/go/gofrontend/MERGE
@@ -1,4 +1,4 @@
-128ea3dce9b8753167f33d0a96bd093a6cbd58b8
+3e9f4ee16683883ccfb8661d99318c74bb7a4bef
 
 The first line of this file holds the git revision number of the last
 merge done from the gofrontend repository.
diff --git a/gcc/go/gofrontend/expressions.cc b/gcc/go/gofrontend/expressions.cc
index ddb1d91f3e5..79702821336 100644
--- a/gcc/go/gofrontend/expressions.cc
+++ b/gcc/go/gofrontend/expressions.cc
@@ -7981,7 +7981,7 @@ Bound_method_expression::do_check_types(Gogo*)
 Bound_method_expression::Method_value_thunks
   Bound_method_expression::method_value_thunks;
 
-// Find or create the thunk for METHOD.
+// Find or create the thunk for FN.
 
 Named_object*
 Bound_method_expression::create_thunk(Gogo* gogo, const Method* method,
@@ -8078,14 +8078,28 @@ Bound_method_expression::create_thunk(Gogo* gogo, const 
Method* method,
   gogo->add_statement(s);
   Block* b = gogo->finish_block(loc);
   gogo->add_block(b, loc);
+
+  // This is called after lowering but before determine_types.
   gogo->lower_block(new_no, b);
-  gogo->flatten_block(new_no, b);
+
   gogo->finish_function(loc);
 
   ins.first->second = new_no;
   return new_no;
 }
 
+// Look up a thunk for FN.
+
+Named_object*
+Bound_method_expression::lookup_thunk(Named_object* fn)
+{
+  Method_value_thunks::const_iterator p =
+Bound_method_expression::method_value_thunks.find(fn);
+  if (p == Bound_method_expression::method_value_thunks.end())
+return NULL;
+  return p->second;
+}
+
 // Return an expression to check *REF for nil while dereferencing
 // according to FIELD_INDEXES.  Update *REF to build up the field
 // reference.  This is a static function so that we don't have to
@@ -8129,10 +8143,11 @@ Bound_method_expression::do_flatten(Gogo* gogo, 
Named_object*,
 {
   Location loc = this->location();
 
-  Named_object* thunk = Bound_method_expression::create_thunk(gogo,
- this->method_,
- this->function_);
-  if (thunk->is_erroneous())
+  Named_object* thunk = Bound_method_expression::lookup_thunk(this->function_);
+
+  // The thunk should have been created during the
+  // create_function_descriptors pass.
+  if (thunk == NULL || thunk->is_erroneous())
 {
   go_assert(saw_errors());
   return Expression::make_error(loc);
@@ -14757,14 +14772,34 @@ 
Interface_field_reference_expression::create_thunk(Gogo* gogo,
   gogo->add_statement(s);
   Block* b = gogo->finish_block(loc);
   gogo->add_block(b, loc);
+
+  // This is called after lowering but before determine_types.
   gogo->lower_block(new_no, b);
-  gogo->flatten_block(new_no, b);
+
   gogo->finish_function(loc);
 
   ins.first->second->push_back(std::make_pair(name, new_no));
   return new_no;
 }
 
+// Lookup a thunk to call method NAME on TYPE.
+
+Named_object*
+Interface_field_reference_expression::lookup_thunk(Interface_type* type,
+  const std::string& name)
+{
+  Interface_method_thunks::const_iterator p =
+Interface_field_reference_expression::interface_method_thunks.find(type);
+  if (p == Interface_field_reference_expression::interface_method_thunks.end())
+return NULL;
+  for (Method_thunks::const_iterator pm = p->second->begin();
+   pm != p->second->end();
+   ++pm)
+if (pm->first == name)
+  return pm->second;
+  return NULL;
+}
+
 // Get the backend representation for a method value.
 
 Bexpression*
@@ -14778,9 

Re: [PATCH 16/18] rs6000: Test case adjustments

2021-11-11 Thread Bill Schmidt via Gcc-patches
Hi Segher,

[Sorry to be answering these out of order...]

On 11/5/21 5:37 PM, Segher Boessenkool wrote:
> On Wed, Sep 01, 2021 at 11:13:52AM -0500, Bill Schmidt wrote:
>>  * gcc.target/powerpc/bfp/scalar-extract-exp-2.c: Adjust.
> My favourite changelog entry!  But, adjust to what?  This is the first
> line :-)
>
> "Adjust expected error message"?

OK, I'll be a bit less succinct. :-)
>
> But you should fold this patch with some previous patch anyway, when
> committing (or you break bisecting).

Yes, I failed to mention that patches 15-17 need to go in together to avoid
bisection problems.

>
>> --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-splat-floatdouble.c
>> +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-splat-floatdouble.c
>> @@ -18,7 +18,7 @@ vector float test_fc ()
>>  vector double testd_00 (vector double x) { return vec_splat (x, 0b0); }
>>  vector double testd_01 (vector double x) { return vec_splat (x, 0b1); }
>>  vector double test_dc ()
>> -{ const vector double y = { 3.0, 5.0 }; return vec_splat (y, 0b00010); }
>> +{ const vector double y = { 3.0, 5.0 }; return vec_splat (y, 0b1); }
>>  
>>  /* If the source vector is a known constant, we will generate a load or 
>> possibly
>> XXSPLTIW.  */
>> @@ -28,5 +28,5 @@ vector double test_dc ()
>>  /* { dg-final { scan-assembler-times {\mvspltw\M|\mxxspltw\M} 3 } } */
>>  
>>  /* For double types, we will generate xxpermdi instructions.  */
>> -/* { dg-final { scan-assembler-times "xxpermdi" 3 } } */
>> +/* { dg-final { scan-assembler-times "xxpermdi" 2 } } */
> Why these changes?

Sorry, I should have done a better job of explaining these.  For vector
double, only one bit matters, so the bit mask 0b00010 is a nonsensical
thing to have in the test case.  Replacing that with 0b1 resulted
in one fewer xxpermdi required.  I'm going to review this one more time
to remind myself why, since I made this change a long time ago and it's
not fresh in my mind; it made sense then! :-)

>
>> --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-splat-longlong.c
>> +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-splat-longlong.c
>> @@ -9,23 +9,19 @@
>>  
>>  vector bool long long testb_00 (vector bool long long x) { return vec_splat 
>> (x, 0b0); }
>>  vector bool long long testb_01 (vector bool long long x) { return vec_splat 
>> (x, 0b1); }
>> -vector bool long long testb_02 (vector bool long long x) { return vec_splat 
>> (x, 0b00010); }
>>  
>>  vector signed long long tests_00 (vector signed long long x) { return 
>> vec_splat (x, 0b0); }
>>  vector signed long long tests_01 (vector signed long long x) { return 
>> vec_splat (x, 0b1); }
>> -vector signed long long tests_02 (vector signed long long x) { return 
>> vec_splat (x, 0b00010); }
>>  
>>  vector unsigned long long testu_00 (vector unsigned long long x) { return 
>> vec_splat (x, 0b0); }
>>  vector unsigned long long testu_01 (vector unsigned long long x) { return 
>> vec_splat (x, 0b1); }
>> -vector unsigned long long testu_02 (vector unsigned long long x) { return 
>> vec_splat (x, 0b00010); }
>>  
>>  /* Similar test as above, but the source vector is a known constant. */
>> -vector bool long long test_bll () { const vector bool long long y = {12, 
>> 23}; return vec_splat (y, 0b00010); }
>> -vector signed long long test_sll () { const vector signed long long y = 
>> {34, 45}; return vec_splat (y, 0b00010); }
>> -vector unsigned long long test_ull () { const vector unsigned long long y = 
>> {56, 67}; return vec_splat (y, 0b00010); }
>> +vector bool long long test_bll () { const vector bool long long y = {12, 
>> 23}; return vec_splat (y, 0b1); }
>> +vector signed long long test_sll () { const vector signed long long y = 
>> {34, 45}; return vec_splat (y, 0b1); }
>>  
>>  /* Assorted load instructions for the initialization with known constants. 
>> */
>> -/* { dg-final { scan-assembler-times {\mlvx\M|\mlxvd2x\M|\mlxv\M|\mplxv\M} 
>> 3 } } */
>> +/* { dg-final { scan-assembler-times 
>> {\mlvx\M|\mlxvd2x\M|\mlxv\M|\mplxv\M|\mxxspltib\M} 2 } } */
>>  
>>  /* xxpermdi for vec_splat of long long vectors.
>>   At the time of this writing, the number of xxpermdi instructions
> Ditto.

Same issue.  0b00010 makes no sense for vector long long.  I need to remind
myself about the change in counts here as well.

>
>> --- a/gcc/testsuite/gcc.target/powerpc/int_128bit-runnable.c
>> +++ b/gcc/testsuite/gcc.target/powerpc/int_128bit-runnable.c
>> @@ -11,9 +11,9 @@
>>  /* { dg-final { scan-assembler-times {\mvrlq\M} 2 } } */
>>  /* { dg-final { scan-assembler-times {\mvrlqnm\M} 2 } } */
>>  /* { dg-final { scan-assembler-times {\mvrlqmi\M} 2 } } */
>> -/* { dg-final { scan-assembler-times {\mvcmpequq\M} 16 } } */
>> -/* { dg-final { scan-assembler-times {\mvcmpgtsq\M} 16 } } */
>> -/* { dg-final { scan-assembler-times {\mvcmpgtuq\M} 16 } } */
>> +/* { dg-final { scan-assembler-times {\mvcmpequq\M} 24 } } */
>> +/* { dg-final { scan-assembler-times 

[committed] libstdc++: Make pmr::memory_resource::allocate implicitly create objects

2021-11-11 Thread Jonathan Wakely via Gcc-patches
Tested x86_64-linux, pushed to trunk.


Calling the placement version of ::operator new "implicitly creates
objects in the returned region of storage" as per [intro.object]. This
allows the returned memory to be used as storage for implicit-lifetime
types (including arrays) without additional action by the caller. This
is required by the proposed resolution of LWG 3147.

libstdc++-v3/ChangeLog:

* include/std/memory_resource (memory_resource::allocate):
Implicitly create objects in the returned storage.
---
 libstdc++-v3/include/std/memory_resource | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libstdc++-v3/include/std/memory_resource 
b/libstdc++-v3/include/std/memory_resource
index 6bca0afa018..97d37f18512 100644
--- a/libstdc++-v3/include/std/memory_resource
+++ b/libstdc++-v3/include/std/memory_resource
@@ -33,6 +33,7 @@
 
 #if __cplusplus >= 201703L
 
+#include 
 #include   // vector
 #include  // size_t, max_align_t, byte
 #include // shared_mutex
@@ -103,7 +104,7 @@ namespace pmr
 void*
 allocate(size_t __bytes, size_t __alignment = _S_max_align)
 __attribute__((__returns_nonnull__,__alloc_size__(2),__alloc_align__(3)))
-{ return do_allocate(__bytes, __alignment); }
+{ return ::operator new(__bytes, do_allocate(__bytes, __alignment)); }
 
 void
 deallocate(void* __p, size_t __bytes, size_t __alignment = _S_max_align)
-- 
2.31.1



[committed] libstdc++: Remove public std::vector::data() member

2021-11-11 Thread Jonathan Wakely via Gcc-patches
Tested x86_64-linux, pushed to trunk.


This function only exists to avoid an error in the debug mode vector, so
doesn't need to be public.

libstdc++-v3/ChangeLog:

* include/bits/stl_bvector.h (vector::data()): Give
protected access, and delete for C++11 and later.
---
 libstdc++-v3/include/bits/stl_bvector.h | 22 +-
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/libstdc++-v3/include/bits/stl_bvector.h 
b/libstdc++-v3/include/bits/stl_bvector.h
index 3778d5a770a..31d878427c2 100644
--- a/libstdc++-v3/include/bits/stl_bvector.h
+++ b/libstdc++-v3/include/bits/stl_bvector.h
@@ -996,14 +996,6 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
   back() const
   { return *(end() - 1); }
 
-  // _GLIBCXX_RESOLVE_LIB_DEFECTS
-  // DR 464. Suggestion for new member functions in standard containers.
-  // N.B. DR 464 says nothing about vector but we need something
-  // here due to the way we are implementing DR 464 in the debug-mode
-  // vector class.
-  void
-  data() _GLIBCXX_NOEXCEPT { }
-
   void
   push_back(bool __x)
   {
@@ -1363,7 +1355,19 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
 
   iterator
   _M_erase(iterator __first, iterator __last);
-  };
+
+protected:
+  // _GLIBCXX_RESOLVE_LIB_DEFECTS
+  // DR 464. Suggestion for new member functions in standard containers.
+  // N.B. DR 464 says nothing about vector but we need something
+  // here due to the using-declaration in __gnu_debug::vector.
+  // vector class.
+#if __cplusplus >= 201103L
+  void data() = delete;
+#else
+  void data() { }
+#endif
+};
 
 _GLIBCXX_END_NAMESPACE_CONTAINER
 
-- 
2.31.1



[PATCH 2/3] gimple-fold: Use ranges to simplify _chk calls

2021-11-11 Thread Siddhesh Poyarekar
Instead of comparing LEN and SIZE only if they are constants, use their
ranges to decide if LEN will always be lower than or same as SIZE.

This change ends up putting the stringop-overflow warning line number
against the strcpy implementation, so adjust the warning check to be
line number agnostic.

gcc/ChangeLog:

* gimple-fold.c (known_safe): New function.
(gimple_fold_builtin_strncat_chk,
gimple_fold_builtin_memory_chk, gimple_fold_builtin_stxcpy_chk,
gimple_fold_builtin_stxncpy_chk,
gimple_fold_builtin_snprintf_chk,
gimple_fold_builtin_sprintf_chk): Use it.

gcc/testsuite/ChangeLog:

* gcc.dg/Wobjsize-1.c: Make warning change line agnostic.
* gcc.dg/builtin-chk-fold.c: New test.

Signed-off-by: Siddhesh Poyarekar 
---
 gcc/gimple-fold.c   | 216 +---
 gcc/testsuite/gcc.dg/Wobjsize-1.c   |   5 +-
 gcc/testsuite/gcc.dg/fold-stringops-2.c |  49 ++
 3 files changed, 132 insertions(+), 138 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/fold-stringops-2.c

diff --git a/gcc/gimple-fold.c b/gcc/gimple-fold.c
index 92e15784803..bcfd5d97feb 100644
--- a/gcc/gimple-fold.c
+++ b/gcc/gimple-fold.c
@@ -2031,6 +2031,28 @@ get_maxval_strlen (tree arg, strlen_range_kind rkind, 
tree *nonstr = NULL)
   return lendata.decl ? NULL_TREE : lendata.maxlen;
 }
 
+/* Return true if LEN is known to be less than or equal to SIZE at compile time
+   and false otherwise.  EXTRABYTE is true if there needs to be room for an
+   extra NUL byte at the end.  */
+
+static bool
+known_safe (gimple *stmt, tree len, tree size, bool extrabyte = false)
+{
+  if (len == NULL_TREE)
+return false;
+
+  wide_int size_range[2];
+  wide_int len_range[2];
+  if (get_range (len, stmt, len_range) && get_range (size, stmt, size_range))
+{
+  if (extrabyte)
+   return wi::ltu_p (len_range[1], size_range[0]);
+  else
+   return wi::leu_p (len_range[1], size_range[0]);
+}
+
+  return false;
+}
 
 /* Fold function call to builtin strcpy with arguments DEST and SRC.
If LEN is not NULL, it represents the length of the string to be
@@ -2566,16 +2588,10 @@ gimple_fold_builtin_strncat_chk (gimple_stmt_iterator 
*gsi)
   return true;
 }
 
-  if (! tree_fits_uhwi_p (size))
-return false;
-
   if (! integer_all_onesp (size))
 {
   tree src_len = c_strlen (src, 1);
-  if (src_len
- && tree_fits_uhwi_p (src_len)
- && tree_fits_uhwi_p (len)
- && ! tree_int_cst_lt (len, src_len))
+  if (known_safe (stmt, src_len, len))
{
  /* If LEN >= strlen (SRC), optimize into __strcat_chk.  */
  fn = builtin_decl_explicit (BUILT_IN_STRCAT_CHK);
@@ -3024,39 +3040,24 @@ gimple_fold_builtin_memory_chk (gimple_stmt_iterator 
*gsi,
}
 }
 
-  if (! tree_fits_uhwi_p (size))
-return false;
-
   tree maxlen = get_maxval_strlen (len, SRK_INT_VALUE);
-  if (! integer_all_onesp (size))
+  if (! integer_all_onesp (size)
+  && !known_safe (stmt, len, size) && !known_safe (stmt, maxlen, size))
 {
-  if (! tree_fits_uhwi_p (len))
+  /* MAXLEN and LEN both cannot be proved to be less than SIZE, at
+least try to optimize (void) __mempcpy_chk () into
+(void) __memcpy_chk () */
+  if (fcode == BUILT_IN_MEMPCPY_CHK && ignore)
{
- /* If LEN is not constant, try MAXLEN too.
-For MAXLEN only allow optimizing into non-_ocs function
-if SIZE is >= MAXLEN, never convert to __ocs_fail ().  */
- if (maxlen == NULL_TREE || ! tree_fits_uhwi_p (maxlen))
-   {
- if (fcode == BUILT_IN_MEMPCPY_CHK && ignore)
-   {
- /* (void) __mempcpy_chk () can be optimized into
-(void) __memcpy_chk ().  */
- fn = builtin_decl_explicit (BUILT_IN_MEMCPY_CHK);
- if (!fn)
-   return false;
+ fn = builtin_decl_explicit (BUILT_IN_MEMCPY_CHK);
+ if (!fn)
+   return false;
 
- gimple *repl = gimple_build_call (fn, 4, dest, src, len, 
size);
- replace_call_with_call_and_fold (gsi, repl);
- return true;
-   }
- return false;
-   }
+ gimple *repl = gimple_build_call (fn, 4, dest, src, len, size);
+ replace_call_with_call_and_fold (gsi, repl);
+ return true;
}
-  else
-   maxlen = len;
-
-  if (tree_int_cst_lt (size, maxlen))
-   return false;
+  return false;
 }
 
   fn = NULL_TREE;
@@ -3139,61 +3140,48 @@ gimple_fold_builtin_stxcpy_chk (gimple_stmt_iterator 
*gsi,
   return true;
 }
 
-  if (! tree_fits_uhwi_p (size))
-return false;
-
   tree maxlen = get_maxval_strlen (src, SRK_STRLENMAX);
   if (! integer_all_onesp (size))
 {
   len = c_strlen (src, 1);
-  if (! len || ! tree_fits_uhwi_p (len))
+  if (!known_safe 

[PATCH 3/3] gimple-fold: Use ranges to simplify strncat and snprintf

2021-11-11 Thread Siddhesh Poyarekar
Remove the warnings for strncat since it is already handled (and even
the error messages look identical) in gimple-ssa-warn-access.  Instead,
use len range to determine if it is within bounds of source and
destination and simplify it to strcat if it's safe.

Likewise for snprintf, use ranges to determine if it can be transformed
to strcpy.

gcc/ChangeLog:

* gimple-fold.c (gimple_fold_builtin_strncat): Remove warnings
and use ranges to determine if it is safe to transform to
strcat.
(gimple_fold_builtin_snprintf): Likewise.

gcc/testsuite/ChangeLog:

* gcc.dg/fold-stringops-2.c: Define size_t.
(safe1): Adjust.
(safe4): New test.
* gcc.dg/fold-stringops-3.c: New test.

Signed-off-by: Siddhesh Poyarekar 
---
 gcc/gimple-fold.c   | 76 +
 gcc/testsuite/gcc.dg/fold-stringops-2.c | 16 +-
 gcc/testsuite/gcc.dg/fold-stringops-3.c | 18 ++
 3 files changed, 47 insertions(+), 63 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/fold-stringops-3.c

diff --git a/gcc/gimple-fold.c b/gcc/gimple-fold.c
index bcfd5d97feb..3112b86c2f7 100644
--- a/gcc/gimple-fold.c
+++ b/gcc/gimple-fold.c
@@ -2485,72 +2485,29 @@ gimple_fold_builtin_strncat (gimple_stmt_iterator *gsi)
   tree dst = gimple_call_arg (stmt, 0);
   tree src = gimple_call_arg (stmt, 1);
   tree len = gimple_call_arg (stmt, 2);
-
-  const char *p = c_getstr (src);
+  tree src_len = c_strlen (src, 1);
 
   /* If the requested length is zero, or the src parameter string
  length is zero, return the dst parameter.  */
-  if (integer_zerop (len) || (p && *p == '\0'))
+  if (integer_zerop (len) || (src_len && integer_zerop (src_len)))
 {
   replace_call_with_value (gsi, dst);
   return true;
 }
 
-  if (TREE_CODE (len) != INTEGER_CST || !p)
-return false;
-
-  unsigned srclen = strlen (p);
-
-  int cmpsrc = compare_tree_int (len, srclen);
-
   /* Return early if the requested len is less than the string length.
  Warnings will be issued elsewhere later.  */
-  if (cmpsrc < 0)
+  if (!known_safe (stmt, src_len, len))
 return false;
 
   unsigned HOST_WIDE_INT dstsize;
 
-  bool nowarn = warning_suppressed_p (stmt, OPT_Wstringop_overflow_);
-
-  if (!nowarn && compute_builtin_object_size (dst, 1, ))
-{
-  int cmpdst = compare_tree_int (len, dstsize);
-
-  if (cmpdst >= 0)
-   {
- tree fndecl = gimple_call_fndecl (stmt);
-
- /* Strncat copies (at most) LEN bytes and always appends
-the terminating NUL so the specified bound should never
-be equal to (or greater than) the size of the destination.
-If it is, the copy could overflow.  */
- location_t loc = gimple_location (stmt);
- nowarn = warning_at (loc, OPT_Wstringop_overflow_,
-  cmpdst == 0
-  ? G_("%qD specified bound %E equals "
-   "destination size")
-  : G_("%qD specified bound %E exceeds "
-   "destination size %wu"),
-  fndecl, len, dstsize);
- if (nowarn)
-   suppress_warning (stmt, OPT_Wstringop_overflow_);
-   }
-}
-
-  if (!nowarn && cmpsrc == 0)
-{
-  tree fndecl = gimple_call_fndecl (stmt);
-  location_t loc = gimple_location (stmt);
-
-  /* To avoid possible overflow the specified bound should also
-not be equal to the length of the source, even when the size
-of the destination is unknown (it's not an uncommon mistake
-to specify as the bound to strncpy the length of the source).  */
-  if (warning_at (loc, OPT_Wstringop_overflow_,
- "%qD specified bound %E equals source length",
- fndecl, len))
-   suppress_warning (stmt, OPT_Wstringop_overflow_);
-}
+  /* Likewise, bail out from the transformation if we're unable to determine
+ the destination size.  Warnings will be issued elsewhere later.  */
+  if (!compute_builtin_object_size (dst, 1, )
+  || !known_safe (stmt, len, build_int_cstu (TREE_TYPE (len), dstsize),
+ true))
+return false;
 
   tree fn = builtin_decl_implicit (BUILT_IN_STRCAT);
 
@@ -3626,10 +3583,6 @@ gimple_fold_builtin_snprintf (gimple_stmt_iterator *gsi)
   if (gimple_call_num_args (stmt) == 4)
 orig = gimple_call_arg (stmt, 3);
 
-  if (!tree_fits_uhwi_p (destsize))
-return false;
-  unsigned HOST_WIDE_INT destlen = tree_to_uhwi (destsize);
-
   /* Check whether the format is a literal string constant.  */
   fmt_str = c_getstr (fmt);
   if (fmt_str == NULL)
@@ -3649,6 +3602,8 @@ gimple_fold_builtin_snprintf (gimple_stmt_iterator *gsi)
   if (orig)
return false;
 
+  tree len = build_int_cstu (TREE_TYPE (destsize), strlen (fmt_str));
+
   /* We could expand this as
 memcpy (str, fmt, cst - 1); 

[PATCH 1/3] gimple-fold: Transform stp*cpy_chk to str*cpy directly

2021-11-11 Thread Siddhesh Poyarekar
Avoid going through another folding cycle and use the ignore flag to
directly transform BUILT_IN_STPCPY_CHK to BUILT_IN_STRCPY when set,
likewise for BUILT_IN_STPNCPY_CHK to BUILT_IN_STPNCPY.

Dump the transformation in dump_file so that we can verify in tests that
the direct transformation actually happened.

gcc/ChangeLog:

* gimple-fold.c (gimple_fold_builtin_stxcpy_chk,
gimple_fold_builtin_stxncpy_chk): Use BUILT_IN_STRNCPY if return
value is not used.

gcc/testsuite/ChangeLog:

* gcc.dg/fold-stringops.c: New test.

Signed-off-by: Siddhesh Poyarekar 
---
 gcc/gimple-fold.c   | 50 +
 gcc/testsuite/gcc.dg/fold-stringops-1.c | 23 
 2 files changed, 57 insertions(+), 16 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/fold-stringops-1.c

diff --git a/gcc/gimple-fold.c b/gcc/gimple-fold.c
index 6e25a7c05db..92e15784803 100644
--- a/gcc/gimple-fold.c
+++ b/gcc/gimple-fold.c
@@ -3088,6 +3088,19 @@ gimple_fold_builtin_memory_chk (gimple_stmt_iterator 
*gsi,
   return true;
 }
 
+static void
+dump_transformation (gimple *from, gimple *to)
+{
+  if (dump_file && (dump_flags & TDF_DETAILS))
+{
+  fprintf (dump_file, "transformed ");
+  print_generic_expr (dump_file, gimple_call_fn (from), dump_flags);
+  fprintf (dump_file, " to ");
+  print_generic_expr (dump_file, gimple_call_fn (to), dump_flags);
+  fprintf (dump_file, "\n");
+}
+}
+
 /* Fold a call to the __st[rp]cpy_chk builtin.
DEST, SRC, and SIZE are the arguments to the call.
IGNORE is true if return value can be ignored.  FCODE is the BUILT_IN_*
@@ -3184,12 +3197,13 @@ gimple_fold_builtin_stxcpy_chk (gimple_stmt_iterator 
*gsi,
 }
 
   /* If __builtin_st{r,p}cpy_chk is used, assume st{r,p}cpy is available.  */
-  fn = builtin_decl_explicit (fcode == BUILT_IN_STPCPY_CHK
+  fn = builtin_decl_explicit (fcode == BUILT_IN_STPCPY_CHK && !ignore
  ? BUILT_IN_STPCPY : BUILT_IN_STRCPY);
   if (!fn)
 return false;
 
   gimple *repl = gimple_build_call (fn, 2, dest, src);
+  dump_transformation (stmt, repl);
   replace_call_with_call_and_fold (gsi, repl);
   return true;
 }
@@ -3209,19 +3223,6 @@ gimple_fold_builtin_stxncpy_chk (gimple_stmt_iterator 
*gsi,
   bool ignore = gimple_call_lhs (stmt) == NULL_TREE;
   tree fn;
 
-  if (fcode == BUILT_IN_STPNCPY_CHK && ignore)
-{
-   /* If return value of __stpncpy_chk is ignored,
-  optimize into __strncpy_chk.  */
-   fn = builtin_decl_explicit (BUILT_IN_STRNCPY_CHK);
-   if (fn)
-{
-  gimple *repl = gimple_build_call (fn, 4, dest, src, len, size);
-  replace_call_with_call_and_fold (gsi, repl);
-  return true;
-}
-}
-
   if (! tree_fits_uhwi_p (size))
 return false;
 
@@ -3234,7 +3235,23 @@ gimple_fold_builtin_stxncpy_chk (gimple_stmt_iterator 
*gsi,
 For MAXLEN only allow optimizing into non-_ocs function
 if SIZE is >= MAXLEN, never convert to __ocs_fail ().  */
  if (maxlen == NULL_TREE || ! tree_fits_uhwi_p (maxlen))
-   return false;
+   {
+ if (fcode == BUILT_IN_STPNCPY_CHK && ignore)
+   {
+ /* If return value of __stpncpy_chk is ignored,
+optimize into __strncpy_chk.  */
+ fn = builtin_decl_explicit (BUILT_IN_STRNCPY_CHK);
+ if (fn)
+   {
+ gimple *repl = gimple_build_call (fn, 4, dest, src, len,
+   size);
+ replace_call_with_call_and_fold (gsi, repl);
+ return true;
+   }
+   }
+
+ return false;
+   }
}
   else
maxlen = len;
@@ -3244,12 +3261,13 @@ gimple_fold_builtin_stxncpy_chk (gimple_stmt_iterator 
*gsi,
 }
 
   /* If __builtin_st{r,p}ncpy_chk is used, assume st{r,p}ncpy is available.  */
-  fn = builtin_decl_explicit (fcode == BUILT_IN_STPNCPY_CHK
+  fn = builtin_decl_explicit (fcode == BUILT_IN_STPNCPY_CHK && !ignore
  ? BUILT_IN_STPNCPY : BUILT_IN_STRNCPY);
   if (!fn)
 return false;
 
   gimple *repl = gimple_build_call (fn, 3, dest, src, len);
+  dump_transformation (stmt, repl);
   replace_call_with_call_and_fold (gsi, repl);
   return true;
 }
diff --git a/gcc/testsuite/gcc.dg/fold-stringops-1.c 
b/gcc/testsuite/gcc.dg/fold-stringops-1.c
new file mode 100644
index 000..712dd369e7c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/fold-stringops-1.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-lower-details" } */
+
+char dst[2048];
+
+char *
+copy1 (const char *src, int cond)
+{
+  __builtin___stpncpy_chk (dst, src, 42, __builtin_object_size (dst, 0));
+
+  return dst;
+}
+
+char *
+copy2 (void)
+{
+  __builtin___stpcpy_chk (dst, "Hello world", __builtin_object_size (dst, 

[PATCH 0/3] gimple-fold improvements

2021-11-11 Thread Siddhesh Poyarekar
This patchset improves folding in cases where input lengths
and/or destination sizes may not be constant but are range bound.

Tested on x86_64 with a full bootstrap build and verified that there are
no regressions resulting from this patchset.

Also tested a build of bash, which results in 3 __memcpy_chk calls being
optimized away completely in addition to a couple of memmove and strcpy
chk variants being transformed into regular calls.

Siddhesh Poyarekar (3):
  gimple-fold: Transform stp*cpy_chk to str*cpy directly
  gimple-fold: Use ranges to simplify _chk calls
  gimple-fold: Use ranges to simplify strncat and snprintf

 gcc/gimple-fold.c   | 312 +---
 gcc/testsuite/gcc.dg/Wobjsize-1.c   |   5 +-
 gcc/testsuite/gcc.dg/fold-stringops-1.c |  23 ++
 gcc/testsuite/gcc.dg/fold-stringops-2.c |  63 +
 gcc/testsuite/gcc.dg/fold-stringops-3.c |  18 ++
 5 files changed, 220 insertions(+), 201 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/fold-stringops-1.c
 create mode 100644 gcc/testsuite/gcc.dg/fold-stringops-2.c
 create mode 100644 gcc/testsuite/gcc.dg/fold-stringops-3.c

-- 
2.31.1



Re: [r12-5150 Regression] FAIL: gfortran.dg/vector_subscript_1.f90 -Os execution test on Linux/x86_64

2021-11-11 Thread Aldy Hernandez via Gcc-patches
This is expected. It was failing at all optimization levels except -Os. The
oversight has been fixed :-).

On Thu, Nov 11, 2021, 20:14 sunil.k.pandey  wrote:

> On Linux/x86_64,
>
> 3e5a19053310ad090b930fbecebceb28bd1b91a4 is the first bad commit
> commit 3e5a19053310ad090b930fbecebceb28bd1b91a4
> Author: Aldy Hernandez 
> Date:   Thu Nov 11 11:27:07 2021 +0100
>
> Resolve entry loop condition for the edge remaining in the loop.
>
> caused
>
> FAIL: gfortran.dg/vector_subscript_1.f90   -O1  execution test
> FAIL: gfortran.dg/vector_subscript_1.f90   -O2  execution test
> FAIL: gfortran.dg/vector_subscript_1.f90   -O3 -fomit-frame-pointer
> -funroll-loops -fpeel-loops -ftracer -finline-functions  execution test
> FAIL: gfortran.dg/vector_subscript_1.f90   -O3 -g  execution test
> FAIL: gfortran.dg/vector_subscript_1.f90   -Os  execution test
>
> with GCC configured with
>
> ../../gcc/configure
> --prefix=/local/skpandey/gccwork/toolwork/gcc-bisect-master/master/r12-5150/usr
> --enable-clocale=gnu --with-system-zlib --with-demangler-in-ld
> --with-fpmath=sse --enable-languages=c,c++,fortran --enable-cet
> --without-isl --enable-libmpx x86_64-linux --disable-bootstrap
>
> To reproduce:
>
> $ cd {build_dir}/gcc && make check
> RUNTESTFLAGS="dg.exp=gfortran.dg/vector_subscript_1.f90
> --target_board='unix{-m32}'"
> $ cd {build_dir}/gcc && make check
> RUNTESTFLAGS="dg.exp=gfortran.dg/vector_subscript_1.f90
> --target_board='unix{-m32\ -march=cascadelake}'"
> $ cd {build_dir}/gcc && make check
> RUNTESTFLAGS="dg.exp=gfortran.dg/vector_subscript_1.f90
> --target_board='unix{-m64}'"
> $ cd {build_dir}/gcc && make check
> RUNTESTFLAGS="dg.exp=gfortran.dg/vector_subscript_1.f90
> --target_board='unix{-m64\ -march=cascadelake}'"
>
> (Please do not reply to this email, for question about this report,
> contact me at skpgkp2 at gmail dot com)
>
>


Re: [PATCH] libbacktrace: fix UBSAN issues

2021-11-11 Thread Ian Lance Taylor via Gcc-patches
On Thu, Nov 11, 2021 at 7:39 AM Martin Liška  wrote:
>
> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
>
> Ready to be installed?
> Thanks,
> Martin
>
> Fix issues mentioned in the PR.
>
> PR libbacktrace/103167
>
> libbacktrace/ChangeLog:
>
> * elf.c (elf_uncompress_lzma_block): Cast to unsigned int.
> (elf_uncompress_lzma): Likewise.
> * xztest.c (test_samples): memcpy only if v > 0.
>
> Co-Authored-By: Andrew Pinski 
> ---
>   libbacktrace/elf.c| 8 
>   libbacktrace/xztest.c | 2 +-
>   2 files changed, 5 insertions(+), 5 deletions(-)
>
> diff --git a/libbacktrace/elf.c b/libbacktrace/elf.c
> index 79d56146fc6..e69ac41c88b 100644
> --- a/libbacktrace/elf.c
> +++ b/libbacktrace/elf.c
> @@ -3175,7 +3175,7 @@ elf_uncompress_lzma_block (const unsigned char 
> *compressed,
> stream_crc = (compressed[off]
> | (compressed[off + 1] << 8)
> | (compressed[off + 2] << 16)
> -   | (compressed[off + 3] << 24));
> +   | ((unsigned)(compressed[off + 3]) << 24));

Thanks, but this kind of thing looks strange and is therefore likely
to break again in the future.  I suggest instead

  stream_crc = ((uint32_t) compressed[off]
 | ((uint32_t) compressed[off + 1] << 8)
 | ((uint32_t) compressed[off + 2] << 16)
 | ((uint32_t) compressed[off + 3] << 24));

Same for the similar cases elsewhere.

Ian


[r12-5150 Regression] FAIL: gfortran.dg/vector_subscript_1.f90 -Os execution test on Linux/x86_64

2021-11-11 Thread sunil.k.pandey via Gcc-patches
On Linux/x86_64,

3e5a19053310ad090b930fbecebceb28bd1b91a4 is the first bad commit
commit 3e5a19053310ad090b930fbecebceb28bd1b91a4
Author: Aldy Hernandez 
Date:   Thu Nov 11 11:27:07 2021 +0100

Resolve entry loop condition for the edge remaining in the loop.

caused

FAIL: gfortran.dg/vector_subscript_1.f90   -O1  execution test
FAIL: gfortran.dg/vector_subscript_1.f90   -O2  execution test
FAIL: gfortran.dg/vector_subscript_1.f90   -O3 -fomit-frame-pointer 
-funroll-loops -fpeel-loops -ftracer -finline-functions  execution test
FAIL: gfortran.dg/vector_subscript_1.f90   -O3 -g  execution test
FAIL: gfortran.dg/vector_subscript_1.f90   -Os  execution test

with GCC configured with

../../gcc/configure 
--prefix=/local/skpandey/gccwork/toolwork/gcc-bisect-master/master/r12-5150/usr 
--enable-clocale=gnu --with-system-zlib --with-demangler-in-ld 
--with-fpmath=sse --enable-languages=c,c++,fortran --enable-cet --without-isl 
--enable-libmpx x86_64-linux --disable-bootstrap

To reproduce:

$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="dg.exp=gfortran.dg/vector_subscript_1.f90 
--target_board='unix{-m32}'"
$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="dg.exp=gfortran.dg/vector_subscript_1.f90 
--target_board='unix{-m32\ -march=cascadelake}'"
$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="dg.exp=gfortran.dg/vector_subscript_1.f90 
--target_board='unix{-m64}'"
$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="dg.exp=gfortran.dg/vector_subscript_1.f90 
--target_board='unix{-m64\ -march=cascadelake}'"

(Please do not reply to this email, for question about this report, contact me 
at skpgkp2 at gmail dot com)


[PATCH] openmp: Honor OpenMP 5.1 num_teams lower bound

2021-11-11 Thread Jakub Jelinek via Gcc-patches
Hi!

The following patch implements what I've been talking about earlier,
honor that for explicit num_teams clause we create at least the
lower-bound (if not specified, upper-bound) teams in the league.
For host fallback, it still means we only have one thread doing all the
teams, sequentially one after another.
For PTX and GCN, I think the new teams-2.c test and maybe teams-4.c too
will or might fail.
For these offloads, I think it is ok to remove symbols no longer used
from libgomp.a.
If num_teams_lower is bigger than the provided num_blocks or num_workgroups,
we should arrange for gomp_num_teams_var to be num_teams_lower - 1,
stop using the %ctaid.x or __builtin_gcn_dim_pos (0) for omp_get_team_num ()
and instead use for it some .shared var that GOMP_teams4 initializes to
%ctaid.x or __builtin_gcn_dim_pos (0) when first and for !first
increment that by num_blocks or num_workgroups each time and only
return false when we are above num_teams_lower.
Any help with actually implementing this for the 2 architectures highly
appreciated.

I have only lightly tested the patch so far, will bootstrap/regtest
it overnight without offloading.

2021-11-11  Jakub Jelinek  

gcc/
* omp-builtins.def (BUILT_IN_GOMP_TEAMS): Remove.
(BUILT_IN_GOMP_TEAMS4): New.
* builtin-types.def (BT_FN_VOID_UINT_UINT): Remove.
(BT_FN_BOOL_UINT_UINT_UINT_BOOL): New.
* omp-low.c (lower_omp_teams): Use GOMP_teams4 instead of
GOMP_teams, pass to it also num_teams lower-bound expression
or a dup of upper-bound if it is missing and a flag whether
it is the first call or not.
gcc/fortran/
* types.def (BT_FN_VOID_UINT_UINT): Remove.
(BT_FN_BOOL_UINT_UINT_UINT_BOOL): New.
libgomp/
* libgomp_g.h (GOMP_teams4): Declare.
* libgomp.map (GOMP_5.1): Export GOMP_teams4.
* target.c (GOMP_teams4): New function.
* config/nvptx/target.c (GOMP_teams): Remove.
(GOMP_teams4): New function.
* config/gcn/target.c (GOMP_teams): Remove.
(GOMP_teams4): New function.
* testsuite/libgomp.c/teams-4.c (main): Expect exactly 2
teams instead of <= 2.
* testsuite/libgomp.c-c++-common/teams-2.c: New test.

--- gcc/omp-builtins.def.jj 2021-08-20 11:36:30.961244658 +0200
+++ gcc/omp-builtins.def2021-11-11 17:53:44.092433139 +0100
@@ -442,8 +442,8 @@ DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TARGET_U
 DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA,
  "GOMP_target_enter_exit_data",
  BT_FN_VOID_INT_SIZE_PTR_PTR_PTR_UINT_PTR, ATTR_NOTHROW_LIST)
-DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TEAMS, "GOMP_teams",
- BT_FN_VOID_UINT_UINT, ATTR_NOTHROW_LIST)
+DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TEAMS4, "GOMP_teams4",
+ BT_FN_BOOL_UINT_UINT_UINT_BOOL, ATTR_NOTHROW_LIST)
 DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TEAMS_REG, "GOMP_teams_reg",
  BT_FN_VOID_OMPFN_PTR_UINT_UINT_UINT, ATTR_NOTHROW_LIST)
 DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TASKGROUP_REDUCTION_REGISTER,
--- gcc/builtin-types.def.jj2021-02-04 18:15:05.253113955 +0100
+++ gcc/builtin-types.def   2021-11-11 17:54:12.693023370 +0100
@@ -489,7 +489,6 @@ DEF_FUNCTION_TYPE_2 (BT_FN_BOOL_VPTR_INT
 DEF_FUNCTION_TYPE_2 (BT_FN_BOOL_SIZE_CONST_VPTR, BT_BOOL, BT_SIZE,
 BT_CONST_VOLATILE_PTR)
 DEF_FUNCTION_TYPE_2 (BT_FN_BOOL_INT_BOOL, BT_BOOL, BT_INT, BT_BOOL)
-DEF_FUNCTION_TYPE_2 (BT_FN_VOID_UINT_UINT, BT_VOID, BT_UINT, BT_UINT)
 DEF_FUNCTION_TYPE_2 (BT_FN_UINT_UINT_PTR, BT_UINT, BT_UINT, BT_PTR)
 DEF_FUNCTION_TYPE_2 (BT_FN_UINT_UINT_CONST_PTR, BT_UINT, BT_UINT, BT_CONST_PTR)
 DEF_FUNCTION_TYPE_2 (BT_FN_PTR_CONST_PTR_SIZE, BT_PTR, BT_CONST_PTR, BT_SIZE)
@@ -680,6 +679,8 @@ DEF_FUNCTION_TYPE_4 (BT_FN_BOOL_UINT_ULL
 BT_PTR_ULONGLONG)
 DEF_FUNCTION_TYPE_4 (BT_FN_VOID_UINT_PTR_INT_PTR, BT_VOID, BT_INT, BT_PTR,
 BT_INT, BT_PTR)
+DEF_FUNCTION_TYPE_4 (BT_FN_BOOL_UINT_UINT_UINT_BOOL,
+BT_BOOL, BT_UINT, BT_UINT, BT_UINT, BT_BOOL)
 
 DEF_FUNCTION_TYPE_5 (BT_FN_INT_STRING_INT_SIZE_CONST_STRING_VALIST_ARG,
 BT_INT, BT_STRING, BT_INT, BT_SIZE, BT_CONST_STRING,
--- gcc/omp-low.c.jj2021-11-11 14:35:37.633348092 +0100
+++ gcc/omp-low.c   2021-11-11 19:19:43.940085275 +0100
@@ -13902,14 +13902,24 @@ lower_omp_teams (gimple_stmt_iterator *g
 
   tree num_teams = omp_find_clause (gimple_omp_teams_clauses (teams_stmt),
OMP_CLAUSE_NUM_TEAMS);
+  tree num_teams_lower = NULL_TREE;
   if (num_teams == NULL_TREE)
 num_teams = build_int_cst (unsigned_type_node, 0);
   else
 {
+  num_teams_lower = OMP_CLAUSE_NUM_TEAMS_LOWER_EXPR (num_teams);
+  if (num_teams_lower)
+   {
+ num_teams_lower = fold_convert (unsigned_type_node, num_teams_lower);
+ gimplify_expr (_teams_lower, _body, NULL, is_gimple_val,
+fb_rvalue);
+   }
   num_teams = 

Re: [PATCH 0/2] fortran: Ignore unused arguments for scalarisation [PR97896]

2021-11-11 Thread Thomas Koenig via Gcc-patches



On 07.11.21 17:17, Mikael Morin via Fortran wrote:

Regression-tested on x86_64-linux-gnu.  Ok for master and 11 branch?


OK.

Just one remark: Since just reverting my old patch would introduce
a regression for that one revision, please squash the patches before
committing.

Thanks a lot for the patch!

Best regards

Thomas


Re: [PATCH v1 8/8] RISC-V: bitmanip: relax minmax to operate on GPR

2021-11-11 Thread Philipp Tomsich
Kito,

Thanks for the reality-check: the subreg-expressions are getting in the way.
I'll drop this from v2, as a permanent resolution for this will be a
bit more involved.

Philipp.

On Thu, 11 Nov 2021 at 17:42, Kito Cheng  wrote:
>
> Hi Philipp:
>
> This testcase got wrong result with this patch even w/o
> si3_sext pattern:
>
> #include 
>
> #define MAX(A, B) ((A) > (B) ? (A) : (B))
>
> long long __attribute__((noinline, noipa))
> foo6(long long a, long long b, int c)
> {
>   int xa = a;
>   int xb = b;
>   return MAX(MAX(xa, xb), c);
> }
> int main() {
>   long long a = 0x2ll;
>   long long b = 0x1l;
>   int c = 10;
>   long long d = foo6(a, b, c);
>   printf ("%lld %lld %d = %lld\n", a, b, c, d);
>   return 0;
> }
>
> On Fri, Nov 12, 2021 at 12:27 AM Kito Cheng  wrote:
> >
> > IIRC it's not work even without sign extend pattern since I did similar 
> > experimental before (not for RISC-V, but same concept), I guess I need more 
> > time to test that.
> >
> > Philipp Tomsich  於 2021年11月12日 週五 00:18 寫道:
> >>
> >> Kito,
> >>
> >> Unless I am missing something, the problem is not the relaxation to
> >> GPR, but rather the sign-extending pattern I had squashed into the
> >> same patch.
> >> If you disable "si3_sext", a sext.w will be have to be
> >> emitted after the 'max' and before the return (or before the SImode
> >> output is consumed as a DImode), pushing the REE opportunity to a
> >> subsequent consumer (e.g. an addw).
> >>
> >> This will generate
> >>foo6:
> >>   max a0,a0,a1
> >>   sext.w a0,a0
> >>   ret
> >> which (assuming that the inputs to max are properly sign-extended
> >> SImode values living in DImode registers) will be the same as
> >> performing the two sext.w before the max.
> >>
> >> Having a second set of eyes on this is appreciated — let me know if
> >> you agree and I'll revise, once I have collected feedback on the
> >> remaining patches of the series.
> >>
> >> Philipp.
> >>
> >>
> >> On Thu, 11 Nov 2021 at 17:00, Kito Cheng  wrote:
> >> >
> >> > Hi Philipp:
> >> >
> >> > We can't pretend we have SImode min/max instruction without that 
> >> > semantic.
> >> > Give this testcase, x86 and rv64gc print out 8589934592 8589934591 = 0,
> >> > but with this patch and compile with rv64gc_zba_zbb -O3, the output
> >> > become 8589934592 8589934591 = 8589934592
> >> >
> >> > -Testcase---
> >> > #include 
> >> > long long __attribute__((noinline, noipa))
> >> > foo6(long long a, long long b)
> >> > {
> >> >   int xa = a;
> >> >   int xb = b;
> >> >   return (xa > xb ? xa : xb);
> >> > }
> >> > int main() {
> >> >   long long a = 0x2ll;
> >> >   long long b = 0x1l;
> >> >   long long c = foo6(a, b);
> >> >   printf ("%lld %lld = %lld\n", a, b, c);
> >> >   return 0;
> >> > }
> >> > --
> >> > v64gc_zba_zbb -O3 w/o this patch:
> >> > foo6:
> >> > sext.w  a1,a1
> >> > sext.w  a0,a0
> >> > max a0,a0,a1
> >> > ret
> >> >
> >> > --
> >> > v64gc_zba_zbb -O3 w/ this patch:
> >> > foo6:
> >> > max a0,a0,a1
> >> > ret
> >> >
> >> > On Thu, Nov 11, 2021 at 10:10 PM Philipp Tomsich
> >> >  wrote:
> >> > >
> >> > > While min/minu/max/maxu instructions are provided for XLEN only, these
> >> > > can safely operate on GPRs (i.e. SImode or DImode for RV64): SImode is
> >> > > always sign-extended, which ensures that the XLEN-wide instructions
> >> > > can be used for signed and unsigned comparisons on SImode yielding a
> >> > > correct ordering of value.
> >> > >
> >> > > This commit
> >> > >  - relaxes the minmax pattern to express for GPR (instead of X only),
> >> > >providing both a si3 and di3 expansion on RV64
> >> > >  - adds a sign-extending form for thee si3 pattern for RV64 to all REE
> >> > >to eliminate redundant extensions
> >> > >  - adds test-cases for both
> >> > >
> >> > > gcc/ChangeLog:
> >> > >
> >> > > * config/riscv/bitmanip.md: Relax minmax to GPR (i.e SImode or
> >> > >   DImode) on RV64.
> >> > > * config/riscv/bitmanip.md (si3_sext): Add
> >> > >   pattern for REE.
> >> > >
> >> > > gcc/testsuite/ChangeLog:
> >> > >
> >> > > * gcc.target/riscv/zbb-min-max.c: Add testcases for SImode
> >> > >   operands checking that no redundant sign- or zero-extensions
> >> > >   are emitted.
> >> > >
> >> > > Signed-off-by: Philipp Tomsich 
> >> > > ---
> >> > >
> >> > >  gcc/config/riscv/bitmanip.md | 14 +++---
> >> > >  gcc/testsuite/gcc.target/riscv/zbb-min-max.c | 20 +---
> >> > >  2 files changed, 28 insertions(+), 6 deletions(-)
> >> > >
> >> > > diff --git a/gcc/config/riscv/bitmanip.md 
> >> > > b/gcc/config/riscv/bitmanip.md
> >> > > index 000deb48b16..2a28f78f5f6 100644
> >> > > --- a/gcc/config/riscv/bitmanip.md
> >> > > +++ b/gcc/config/riscv/bitmanip.md
> >> > > @@ -260,13 +260,21 @@ 

Re: [PATCH v3 0/5] fortran: Ignore unused arguments for scalarisation [PR97896]

2021-11-11 Thread Thomas Koenig via Gcc-patches



Hi Mikael,


Regression-tested on x86_64-linux-gnu.  Ok for master?


This looks quite good, and is (I think) a cleaner version than
what we had before.  OK.

Thanks a lot for the patch(es)!

Best regards

Thomas


Re: [PATCH v2] implement -Winfinite-recursion [PR88232]

2021-11-11 Thread Marek Polacek via Gcc-patches
On Thu, Nov 11, 2021 at 11:21:01AM -0700, Martin Sebor wrote:
> On 11/10/21 2:27 PM, Marek Polacek wrote:
> > On Tue, Nov 09, 2021 at 09:28:43PM -0700, Martin Sebor via Gcc-patches 
> > wrote:
> > > The attached patch adds support to the middle end for detecting
> > > infinitely recursive calls.  The warning is controlled by the new
> > > -Winfinite-recursion option.  The option name is the same as
> > > Clang's.
> > > 
> > > I scheduled the warning pass to run after early inlining to
> > > detect mutually recursive calls but before tail recursion which
> > > turns some recursive calls into infinite loops and so makes
> > > the two indistinguishable.
> > > 
> > > The warning detects a superset of problems detected by Clang
> > > (based on its tests).  It detects the problem in PR88232
> > > (the feature request) as well as the one in PR 87742,
> > > an unrelated problem report that was root-caused to bug due
> > > to infinite recursion.
> > 
> > Nice, I've long wanted this warning.  I've made this mistake a couple of
> > times:
> > 
> > struct S {
> >operator int() { return S{}; }
> > };
> > 
> > and the patch warns about it.
> 
> Thanks for looking at it!  Consider it an early Christmas present :)
> 
> Like all middle end warnings, it warns for inline functions only
> if their bodies are actually emitted.  To handle the others we'd
> need to also implement the warning in the front end.  Or, "fake"-
> emit them somehow as I think Jason was suggesting for
> the -Wuninitialized warnings.

Yea, we're probably back at https://gcc.gnu.org/PR21678
 
> I think Clang implements it fully in the font end so it might be
> doable at least for the simple subset that doesn't need to traverse
> the CFG.

I think so too, but it comes with the usual caveats about implementing
warnings too early/too late.
 
> > > --- a/gcc/common.opt
> > > +++ b/gcc/common.opt
> > > @@ -629,6 +629,10 @@ Wimplicit-fallthrough=
> > >   Common Var(warn_implicit_fallthrough) RejectNegative Joined UInteger 
> > > Warning IntegerRange(0, 5)
> > >   Warn when a switch case falls through.
> > > +Winfinite-recursion
> > > +Var(warn_infinite_recursion) Warning
> > > +Warn for infinitely recursive calls.
> > 
> > Why do you need this hunk?
> 
> So other languages besides the C family can control the warning.
> I didn't really think about it too much, but since it's a middle
> end warning it seems like they might need to (other than that,
> I just copied some other warning that's in both places too).
 
It could be marked as Common in common.opt and then you don't need
it in c.opt.  Except you do because you can't do EnabledBy(Wall) in
common.opt :(.

> > > +  edge e;
> > > +  edge_iterator ei;
> > > +  FOR_EACH_EDGE (e, ei, bb->succs)
> > > +{
> > > +  int eflags = e->flags;
> > > +  if (find_function_exit (fndecl, e->dest, eflags, exit_bb, calls, 
> > > visited))
> > 
> > Why not use e->flags directly?  find_function_exit can't change it AFAICS.
> 
> Only because it's shorter and doesn't break up if () statement
> on multiple lines.  I think it's easier to read that way.  But
> in v2 of the patch this is simpler and not necessary anymore.
> 
> > 
> > I find the shadowing of 'loc' unsightly.  While here, I think
> > 
> >if (warning_at (DECL_SOURCE_LOCATION (func->decl), 
> > OPT_Winfinite_recursion,
> >   "infinite recursion detected"))
> >  for (auto stmt: calls)
> >{
> > ...
> >}
> > 
> > would look neater (and avoids the shadowing), but that's just my opinion.
> 
> I didn't even notice it but sure.
> 
> After thinking about the exceptional handling and taking a closer
> look at what Clang warns for I decided that what I had done was
> overly conservative and adjusted things to diagnose more of
> the same C++ code as it does.  I'll post an update shortly,
> once it finished testing.

Aha, ok.

Marek



[PATCH v2] implement -Winfinite-recursion [PR88232]

2021-11-11 Thread Martin Sebor via Gcc-patches

On 11/10/21 2:27 PM, Marek Polacek wrote:

On Tue, Nov 09, 2021 at 09:28:43PM -0700, Martin Sebor via Gcc-patches wrote:

The attached patch adds support to the middle end for detecting
infinitely recursive calls.  The warning is controlled by the new
-Winfinite-recursion option.  The option name is the same as
Clang's.

I scheduled the warning pass to run after early inlining to
detect mutually recursive calls but before tail recursion which
turns some recursive calls into infinite loops and so makes
the two indistinguishable.

The warning detects a superset of problems detected by Clang
(based on its tests).  It detects the problem in PR88232
(the feature request) as well as the one in PR 87742,
an unrelated problem report that was root-caused to bug due
to infinite recursion.


Nice, I've long wanted this warning.  I've made this mistake a couple of
times:

struct S {
   operator int() { return S{}; }
};

and the patch warns about it.


Thanks for looking at it!  Consider it an early Christmas present :)

Like all middle end warnings, it warns for inline functions only
if their bodies are actually emitted.  To handle the others we'd
need to also implement the warning in the front end.  Or, "fake"-
emit them somehow as I think Jason was suggesting for
the -Wuninitialized warnings.

I think Clang implements it fully in the font end so it might be
doable at least for the simple subset that doesn't need to traverse
the CFG.

  

--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -629,6 +629,10 @@ Wimplicit-fallthrough=
  Common Var(warn_implicit_fallthrough) RejectNegative Joined UInteger Warning 
IntegerRange(0, 5)
  Warn when a switch case falls through.
  
+Winfinite-recursion

+Var(warn_infinite_recursion) Warning
+Warn for infinitely recursive calls.


Why do you need this hunk?


So other languages besides the C family can control the warning.
I didn't really think about it too much, but since it's a middle
end warning it seems like they might need to (other than that,
I just copied some other warning that's in both places too).




+  edge e;
+  edge_iterator ei;
+  FOR_EACH_EDGE (e, ei, bb->succs)
+{
+  int eflags = e->flags;
+  if (find_function_exit (fndecl, e->dest, eflags, exit_bb, calls, 
visited))


Why not use e->flags directly?  find_function_exit can't change it AFAICS.


Only because it's shorter and doesn't break up if () statement
on multiple lines.  I think it's easier to read that way.  But
in v2 of the patch this is simpler and not necessary anymore.



I find the shadowing of 'loc' unsightly.  While here, I think

   if (warning_at (DECL_SOURCE_LOCATION (func->decl), OPT_Winfinite_recursion,
  "infinite recursion detected"))
 for (auto stmt: calls)
   {
...
   }

would look neater (and avoids the shadowing), but that's just my opinion.


I didn't even notice it but sure.

After thinking about the exceptional handling and taking a closer
look at what Clang warns for I decided that what I had done was
overly conservative and adjusted things to diagnose more of
the same C++ code as it does.  I'll post an update shortly,
once it finished testing.

Martin


Re: [PATCH] dwarf: Multi-register CFI address support.

2021-11-11 Thread Hafiz Abid Qadeer
On 09/11/2021 15:59, Jakub Jelinek wrote:
> On Sun, Jun 13, 2021 at 02:27:38PM +0100, Hafiz Abid Qadeer wrote:
>> *** with this patch (edited for brevity)***
>>
>>  0024  CIE
>>
>>   DW_CFA_def_cfa_expression: DW_OP_bregx SGPR49+0, DW_OP_const1u 0x20, 
>> DW_OP_shl, DW_OP_bregx SGPR48+0, DW_OP_plus
>>   DW_CFA_expression: reg16 DW_OP_bregx SGPR51+0, DW_OP_const1u 0x20, 
>> DW_OP_shl, DW_OP_bregx SGPR50+0, DW_OP_plus
>>
>> 0028 003c  FDE cie= pc=...01ac
>>   DW_CFA_advance_loc4: 96
>>   DW_CFA_offset: reg46 0
>>   DW_CFA_offset: reg47 4
>>   DW_CFA_offset: reg50 8
>>   DW_CFA_offset: reg51 12
>>   DW_CFA_offset: reg16 8
>>   DW_CFA_advance_loc4: 4
>>   DW_CFA_def_cfa_expression: DW_OP_bregx SGPR47+0, DW_OP_const1u 0x20, 
>> DW_OP_shl, DW_OP_bregx SGPR46+0, DW_OP_plus, DW_OP_lit16, DW_OP_minus
> 
> I guess as a temporary solution until DWARF6 comes with something more
> compact for cases like that it can be fine, but is there a DWARF issue
> filed for it?  

I have filed an issue today describing the problem. Although I am not sure what 
is the best way to
handle it as there is not much encoding space left in CFA defining instructions.

> Is AMDGCN a DWARF2_ADDR_SIZE == 8 target?
Yes

> 
>> +/* This represents a register, in DWARF_FRAME_REGNUM space, for use in CFA
>> +   definitions and expressions.
>> +   Most architectures only need a single register number, but some (amdgcn)
>> +   have pointers that span multiple registers.  DWARF permits arbitrary
>> +   register sets but existing use-cases only require contiguous register
>> +   sets, as represented here.  */
>> +struct GTY(()) cfa_reg {
>> +  unsigned int reg;
>> +  unsigned int span;
>> +  poly_uint16_pod span_width;  /* A.K.A. register mode size.  */
> 
> If this is only used for span > 1, wouldn't it be better to
> make it
>   unsigned int reg;
>   unsigned short span;
>   unsigned short span_width;
> and keep span_width 0 for the span == 1 cases and only set span_width
> to to_constant () if span > 1 is needed?  If at least for now
> the only target that needs this is AMDGCN and the only target that has
> NUM_POLY_INT_COEFFS != 1 is aarch64 (maybe eventually riscv?), then I don't
> see why we should represent it in poly_uint16...
> Of course we can change it later if a target which needs both
> NUM_POLY_INT_COEFFS > 1 and span > 1 registers with non-constant span_width,
> we can change it, but doing it just in case seems unnecessary
> complication...

Done in the attached patch. Is it ok?

Thanks,
-- 
Hafiz Abid Qadeer
Mentor, a Siemens Business
>From dd25eccd2458c6b6d38a922d9b2c9107c4c0ba2d Mon Sep 17 00:00:00 2001
From: Hafiz Abid Qadeer 
Date: Thu, 11 Nov 2021 13:43:04 +
Subject: [PATCH] dwarf: Multi-register CFI address support.

Add support for architectures such as AMD GCN, in which the pointer size is
larger than the register size.  This allows the CFI information to include
multi-register locations for the stack pointer, frame pointer, and return
address.

This patch was originally posted by Andrew Stubbs in
https://gcc.gnu.org/pipermail/gcc-patches/2020-August/552873.html

It has now been re-worked according to the review comments. It does not use
DW_OP_piece or DW_OP_LLVM_piece_end. Instead it uses
DW_OP_bregx/DW_OP_shl/DW_OP_bregx/DW_OP_plus to build the CFA from multiple
consecutive registers. Here is how .debug_frame looks before and after this
patch:

$ cat factorial.c
int factorial(int n) {
  if (n == 0) return 1;
  return n * factorial (n - 1);
}

$ amdgcn-amdhsa-gcc -g factorial.c -O0 -c -o fac.o
$ llvm-dwarfdump -debug-frame fac.o

*** without this patch (edited for brevity)***

 0014  CIE

  DW_CFA_def_cfa: reg48 +0
  DW_CFA_register: reg16 reg50

0018 002c  FDE cie= pc=...01ac
  DW_CFA_advance_loc4: 96
  DW_CFA_offset: reg46 0
  DW_CFA_offset: reg47 4
  DW_CFA_offset: reg50 8
  DW_CFA_offset: reg51 12
  DW_CFA_offset: reg16 8
  DW_CFA_advance_loc4: 4
  DW_CFA_def_cfa_sf: reg46 -16

*** with this patch (edited for brevity)***

 0024  CIE

  DW_CFA_def_cfa_expression: DW_OP_bregx SGPR49+0, DW_OP_const1u 0x20, DW_OP_shl, DW_OP_bregx SGPR48+0, DW_OP_plus
  DW_CFA_expression: reg16 DW_OP_bregx SGPR51+0, DW_OP_const1u 0x20, DW_OP_shl, DW_OP_bregx SGPR50+0, DW_OP_plus

0028 003c  FDE cie= pc=...01ac
  DW_CFA_advance_loc4: 96
  DW_CFA_offset: reg46 0
  DW_CFA_offset: reg47 4
  DW_CFA_offset: reg50 8
  DW_CFA_offset: reg51 12
  DW_CFA_offset: reg16 8
  DW_CFA_advance_loc4: 4
  DW_CFA_def_cfa_expression: DW_OP_bregx SGPR47+0, DW_OP_const1u 0x20, DW_OP_shl, DW_OP_bregx SGPR46+0, DW_OP_plus, DW_OP_lit16, DW_OP_minus

gcc/ChangeLog:

	* dwarf2cfi.c (dw_stack_pointer_regnum): Change type to struct cfa_reg.
	(dw_frame_pointer_regnum): Likewise.
	(new_cfi_row): Use set_by_dwreg.
	(get_cfa_from_loc_descr): Use set_by_dwreg.  Support register spans.
	handle DW_OP_bregx 

Re: [Patch] Fortran/openmp: Fix '!$omp end'

2021-11-11 Thread Jakub Jelinek via Gcc-patches
On Thu, Nov 11, 2021 at 06:11:23PM +0100, Tobias Burnus wrote:
> --- a/gcc/fortran/parse.c
> +++ b/gcc/fortran/parse.c
> @@ -915,15 +915,16 @@ decode_omp_directive (void)
>matcho ("error", gfc_match_omp_error, ST_OMP_ERROR);
>matcho ("end atomic", gfc_match_omp_eos_error, ST_OMP_END_ATOMIC);
>matcho ("end critical", gfc_match_omp_end_critical, 
> ST_OMP_END_CRITICAL);
> -  matchs ("end distribute parallel do simd", gfc_match_omp_eos_error,
> +  matchs ("end distribute parallel do simd", gfc_match_omp_end_nowait,
> ST_OMP_END_DISTRIBUTE_PARALLEL_DO_SIMD);
> -  matcho ("end distribute parallel do", gfc_match_omp_eos_error,
> +  matcho ("end distribute parallel do", gfc_match_omp_end_nowait,

I think the above two changes are incorrect.
At least looking at 5.1 which is clearer than 5.2, 5.1 [221:17-23] says
for C/C++ that while nowait is allowed on worksharing-loop, it is not
allowed on combined parallel worksharing-loop, and Fortran has that
restriction through the syntax (no [nowait] on !$omp end parallel do).

> @@ -936,9 +937,12 @@ decode_omp_directive (void)
> ST_OMP_END_MASTER_TASKLOOP);
>matcho ("end master", gfc_match_omp_eos_error, ST_OMP_END_MASTER);
>matchs ("end ordered", gfc_match_omp_eos_error, ST_OMP_END_ORDERED);
> -  matchs ("end parallel do simd", gfc_match_omp_eos_error,
> +  matchs ("end parallel do simd", gfc_match_omp_end_nowait,
> ST_OMP_END_PARALLEL_DO_SIMD);
> -  matcho ("end parallel do", gfc_match_omp_eos_error, 
> ST_OMP_END_PARALLEL_DO);
> +  matcho ("end parallel do", gfc_match_omp_end_nowait,
> +   ST_OMP_END_PARALLEL_DO);

Likewise for the above two.

> @@ -951,46 +955,53 @@ decode_omp_directive (void)
> ST_OMP_END_PARALLEL_MASTER_TASKLOOP);
>matcho ("end parallel master", gfc_match_omp_eos_error,
> ST_OMP_END_PARALLEL_MASTER);
> -  matcho ("end parallel sections", gfc_match_omp_eos_error,
> +  matcho ("end parallel sections", gfc_match_omp_end_nowait,
> ST_OMP_END_PARALLEL_SECTIONS);
> -  matcho ("end parallel workshare", gfc_match_omp_eos_error,
> +  matcho ("end parallel workshare", gfc_match_omp_end_nowait,
> ST_OMP_END_PARALLEL_WORKSHARE);

Ditto for the above two.

>matcho ("end parallel", gfc_match_omp_eos_error, ST_OMP_END_PARALLEL);
>matcho ("end scope", gfc_match_omp_end_nowait, ST_OMP_END_SCOPE);
>matcho ("end sections", gfc_match_omp_end_nowait, ST_OMP_END_SECTIONS);
>matcho ("end single", gfc_match_omp_end_single, ST_OMP_END_SINGLE);
>matcho ("end target data", gfc_match_omp_eos_error, 
> ST_OMP_END_TARGET_DATA);
> -  matchs ("end target parallel do simd", gfc_match_omp_eos_error,
> +  matchs ("end target parallel do simd", gfc_match_omp_end_nowait,

The above seems like a bug in 5.1 standard, haven't checked 5.2.
!$omp end target parallel do simd nowait
should be IMO valid, but [241:16] mistakenly doesn't list it.

> ST_OMP_END_TARGET_PARALLEL_DO_SIMD);
> -  matcho ("end target parallel do", gfc_match_omp_eos_error,
> +  matcho ("end target parallel do", gfc_match_omp_end_nowait,

Similarly.

> ST_OMP_END_TARGET_PARALLEL_DO);
> -  matcho ("end target parallel", gfc_match_omp_eos_error,
> +  matcho ("end target parallel loop", gfc_match_omp_end_nowait,
> +   ST_OMP_END_TARGET_PARALLEL_LOOP);

Similarly.

> +  matcho ("end target parallel", gfc_match_omp_end_nowait,
> ST_OMP_END_TARGET_PARALLEL);

Similarly.

> -  matchs ("end target simd", gfc_match_omp_eos_error, 
> ST_OMP_END_TARGET_SIMD);
> +  matchs ("end target simd", gfc_match_omp_end_nowait, 
> ST_OMP_END_TARGET_SIMD);

Similarly.

>matchs ("end target teams distribute parallel do simd",
> -   gfc_match_omp_eos_error,
> +   gfc_match_omp_end_nowait,
> ST_OMP_END_TARGET_TEAMS_DISTRIBUTE_PARALLEL_DO_SIMD);
> -  matcho ("end target teams distribute parallel do", 
> gfc_match_omp_eos_error,
> +  matcho ("end target teams distribute parallel do",
> +   gfc_match_omp_end_nowait,
> ST_OMP_END_TARGET_TEAMS_DISTRIBUTE_PARALLEL_DO);
> -  matchs ("end target teams distribute simd", gfc_match_omp_eos_error,
> +  matchs ("end target teams distribute simd", gfc_match_omp_end_nowait,
> ST_OMP_END_TARGET_TEAMS_DISTRIBUTE_SIMD);
> -  matcho ("end target teams distribute", gfc_match_omp_eos_error,
> +  matcho ("end target teams distribute", gfc_match_omp_end_nowait,
> ST_OMP_END_TARGET_TEAMS_DISTRIBUTE);
> -  matcho ("end target teams", gfc_match_omp_eos_error, 
> ST_OMP_END_TARGET_TEAMS);
> -  matcho ("end target", gfc_match_omp_eos_error, ST_OMP_END_TARGET);
> +  matcho ("end target teams loop", gfc_match_omp_end_nowait,
> +   ST_OMP_END_TARGET_TEAMS_LOOP);
> +  matcho ("end target teams", 

Re: [PATCH] vect: Remove vec_outside/inside_cost fields

2021-11-11 Thread Jan Hubicka via Gcc-patches
> > Now afunc writes to __var_5_mma only indirectly so I think it is correct 
> > that
> > we optimize the conditional out.
> >
> > Easy fix would be to add -fno-ipa-modref, but perhaps someone with
> > better understanding of Fortran would help me to improve the testcase so
> > the calls to matmul_r4 remains reachable?
> 
> I think the two matmul_r4 cases were missed optimizations before so just
> changing the expected number of calls to zero is the correct fix here.  Indeed
> we can now statically determine the matrices are not large and so only
> keep the inline copy.

I have updated the matmul as follows.

gcc/testsuite/ChangeLog:

2021-11-11  Jan Hubicka  

* gfortran.dg/inline_matmul_17.f90: Fix template

diff --git a/gcc/testsuite/gfortran.dg/inline_matmul_17.f90 
b/gcc/testsuite/gfortran.dg/inline_matmul_17.f90
index d2ca8e2948a..cff4b6ce5e2 100644
--- a/gcc/testsuite/gfortran.dg/inline_matmul_17.f90
+++ b/gcc/testsuite/gfortran.dg/inline_matmul_17.f90
@@ -45,4 +45,4 @@ program main
   c = matmul(a, bfunc())
   if (any(c-d /= 0)) STOP 6
 end program main
-! { dg-final { scan-tree-dump-times "matmul_r4" 2 "optimized" } }
+! { dg-final { scan-tree-dump-not "matmul_r4" "optimized" } }


Re: [PATCH][V2] rs6000: Remove unnecessary option manipulation.

2021-11-11 Thread Segher Boessenkool
Hi!

On Thu, Nov 04, 2021 at 01:36:06PM +0100, Martin Liška wrote:
> Sending the patch in a separate thread.

You forgot to send the commit message though?

>   * config/rs6000/rs6000.c (rs6000_override_options_after_change):
>   Do not set flag_rename_registers, it's already enabled with
>   EnabledBy(funroll-loops).
>   Use EnabledBy for unroll_only_small_loops.
>   * config/rs6000/rs6000.opt: Use EnabledBy for
>   unroll_only_small_loops.

Please don't put newlines in random places.  It makes reading changelogs
much harder than needed.

> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -3472,13 +3472,8 @@ rs6000_override_options_after_change (void)
>/* Explicit -funroll-loops turns -munroll-only-small-loops off, and
>   turns -frename-registers on.  */
>if ((OPTION_SET_P (flag_unroll_loops) && flag_unroll_loops)
> -   || (OPTION_SET_P (flag_unroll_all_loops)
> -&& flag_unroll_all_loops))
> +   || (OPTION_SET_P (flag_unroll_all_loops) && flag_unroll_all_loops))
>  {
> -  if (!OPTION_SET_P (unroll_only_small_loops))
> - unroll_only_small_loops = 0;
> -  if (!OPTION_SET_P (flag_rename_registers))
> - flag_rename_registers = 1;
>if (!OPTION_SET_P (flag_cunroll_grow_size))
>   flag_cunroll_grow_size = 1;
>  }

So some explanation for these two changes would be good to have.

> diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
> index 9d7878f144a..faeb7423ca7 100644
> --- a/gcc/config/rs6000/rs6000.opt
> +++ b/gcc/config/rs6000/rs6000.opt
> @@ -546,7 +546,7 @@ Target Undocumented Var(rs6000_optimize_swaps) Init(1) 
> Save
>  Analyze and remove doubleword swaps from VSX computations.
>  
>  munroll-only-small-loops
> -Target Undocumented Var(unroll_only_small_loops) Init(0) Save
> +Target Undocumented Var(unroll_only_small_loops) Init(0) Save 
> EnabledBy(funroll-loops)

You used format=flowed it seems?  Don't.  Patches are mangled with it :-(


Segher


[AArch64] Enable generation of FRINTNZ instructions

2021-11-11 Thread Andre Vieira (lists) via Gcc-patches

Hi,

This patch introduces two IFN's FTRUNC32 and FTRUNC64, the corresponding 
optabs and mappings. It also creates a backend pattern to implement them 
for aarch64 and a match.pd pattern to idiom recognize these.
These IFN's (and optabs) represent a truncation towards zero, as if 
performed by first casting it to a signed integer of 32 or 64 bits and 
then back to the same floating point type/mode.


The match.pd pattern choses to use these, when supported, regardless of 
trapping math, since these new patterns mimic the original behavior of 
truncating through an integer.


I didn't think any of the existing IFN's represented these. I know it's 
a bit late in stage 1, but I thought this might be OK given it's only 
used by a single target and should have very little impact on anything else.


Bootstrapped on aarch64-none-linux.

OK for trunk?

gcc/ChangeLog:

    * config/aarch64/aarch64.md (ftrunc2): New 
pattern.

    * config/aarch64/iterators.md (FRINTZ): New iterator.
    * doc/md.texi: New entry for ftrunc pattern name.
    * internal-fn.def (FTRUNC32): New IFN.
    (FTRUNC64): Likewise.
    * match.pd: Add to the existing TRUNC pattern match.
    * optabs.def (OPTAB_D): New entries for ftrunc.

gcc/testsuite/ChangeLog:

    * gcc.target/aarch64/merge_trunc1.c: Adapted to skip if frintNz 
instruction available.

    * lib/target-supports.exp: Added arm_v8_5a_frintnzx_ok target.
    * gcc.target/aarch64/frintnz.c: New test.
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 
4035e061706793849c68ae09bcb2e4b9580ab7b6..ad4e04d7c874da095513442e7d7f247791d8921d
 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -7345,6 +7345,16 @@ (define_insn "despeculate_simpleti"
(set_attr "speculation_barrier" "true")]
 )
 
+(define_insn "ftrunc2"
+  [(set (match_operand:VSFDF 0 "register_operand" "=w")
+(unspec:VSFDF [(match_operand:VSFDF 1 "register_operand" "w")]
+ FRINTNZ))]
+  "TARGET_FRINT && TARGET_FLOAT
+   && !(VECTOR_MODE_P (mode) && !TARGET_SIMD)"
+  "\\t%0, %1"
+  [(set_attr "type" "f_rint")]
+)
+
 (define_insn "aarch64_"
   [(set (match_operand:VSFDF 0 "register_operand" "=w")
(unspec:VSFDF [(match_operand:VSFDF 1 "register_operand" "w")]
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 
bdc8ba3576cf2c9b4ae96b45a382234e4e25b13f..49510488a2a800689e95c399f2e6c967b566516d
 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -3067,6 +3067,8 @@ (define_int_iterator FCMLA [UNSPEC_FCMLA
 (define_int_iterator FRINTNZX [UNSPEC_FRINT32Z UNSPEC_FRINT32X
   UNSPEC_FRINT64Z UNSPEC_FRINT64X])
 
+(define_int_iterator FRINTNZ [UNSPEC_FRINT32Z UNSPEC_FRINT64Z])
+
 (define_int_iterator SVE_BRK_UNARY [UNSPEC_BRKA UNSPEC_BRKB])
 
 (define_int_iterator SVE_BRK_BINARY [UNSPEC_BRKN UNSPEC_BRKPA UNSPEC_BRKPB])
@@ -3482,6 +3484,8 @@ (define_int_attr f16mac1 [(UNSPEC_FMLAL "a") 
(UNSPEC_FMLSL "s")
 (define_int_attr frintnzs_op [(UNSPEC_FRINT32Z "frint32z") (UNSPEC_FRINT32X 
"frint32x")
  (UNSPEC_FRINT64Z "frint64z") (UNSPEC_FRINT64X 
"frint64x")])
 
+(define_int_attr frintnz_mode [(UNSPEC_FRINT32Z "si") (UNSPEC_FRINT64Z "di")])
+
 ;; The condition associated with an UNSPEC_COND_.
 (define_int_attr cmp_op [(UNSPEC_COND_CMPEQ_WIDE "eq")
 (UNSPEC_COND_CMPGE_WIDE "ge")
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 
41f1850bf6e95005647ca97a495a97d7e184d137..7bd66818144e87e1dca2ef13bef1d6f21f239570
 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -6175,6 +6175,13 @@ operands; otherwise, it may not.
 
 This pattern is not allowed to @code{FAIL}.
 
+@cindex @code{ftrunc@var{m}@var{n}2} instruction pattern
+@item @samp{ftrunc@var{m}@var{n}2}
+Truncate operand 1 to a @var{n} mode signed integer, towards zero, and store
+the result in operand 0. Both operands have mode @var{m}, which is a scalar or
+vector floating-point mode.
+
+
 @cindex @code{round@var{m}2} instruction pattern
 @item @samp{round@var{m}2}
 Round operand 1 to the nearest integer, rounding away from zero in the
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 
bb13c6cce1bf55633760bc14980402f1f0ac1689..64263cbb83548b140f613cb4bf5ce6565373f96d
 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -269,6 +269,8 @@ DEF_INTERNAL_FLT_FLOATN_FN (RINT, ECF_CONST, rint, unary)
 DEF_INTERNAL_FLT_FLOATN_FN (ROUND, ECF_CONST, round, unary)
 DEF_INTERNAL_FLT_FLOATN_FN (ROUNDEVEN, ECF_CONST, roundeven, unary)
 DEF_INTERNAL_FLT_FLOATN_FN (TRUNC, ECF_CONST, btrunc, unary)
+DEF_INTERNAL_OPTAB_FN (FTRUNC32, ECF_CONST, ftrunc32, unary)
+DEF_INTERNAL_OPTAB_FN (FTRUNC64, ECF_CONST, ftrunc64, unary)
 
 /* Binary math functions.  */
 DEF_INTERNAL_FLT_FN (ATAN2, ECF_CONST, atan2, binary)
diff --git a/gcc/match.pd b/gcc/match.pd
index 

Re: [PATCH] Remove loop crossing restriction from the backward threader.

2021-11-11 Thread Jeff Law via Gcc-patches




On 11/11/2021 10:24 AM, Aldy Hernandez wrote:

We have much more thorough restrictions, that are shared between both
threader implementations, in the registry.  I've been meaning to
remove the backward threader one, since it's only purpose was reducing
the search space.  Previously there was a small time penalty for its
removal, but with the various patches in the past month, it looks like
the removal is a wash performance wise.

This catches 8 more jump threads in the backward threader in my suite.
Presumably, because we disallowed all loop crossing, whereas the
registry restrictions allow some crossing (if we exit the loop, etc).

OK pending tests on x86-64 Linux?

gcc/ChangeLog:

* tree-ssa-threadbackward.c
(back_threader_profitability::profitable_path_p): Remove loop
crossing restriction.

OK
jeff



[PATCH] Make ranger optional in path_range_query.

2021-11-11 Thread Aldy Hernandez via Gcc-patches
All users of path_range_query are currently allocating a gimple_ranger
only to pass it to the query object.  It's tidier to just do it from
path_range_query if no ranger was passed.

Will push pending tests on x86-64 Linux.

gcc/ChangeLog:

* gimple-range-path.cc (path_range_query::path_range_query): New
ctor without a ranger.
(path_range_query::~path_range_query): Free ranger if necessary.
(path_range_query::range_on_path_entry): Adjust m_ranger for pointer.
(path_range_query::ssa_range_in_phi): Same.
(path_range_query::compute_ranges_in_block): Same.
(path_range_query::compute_imports): Same.
(path_range_query::compute_ranges): Same.
(path_range_query::range_of_stmt): Same.
(path_range_query::compute_outgoing_relations): Same.
* gimple-range-path.h (class path_range_query): New ctor.
* tree-ssa-loop-ch.c (ch_base::copy_headers): Remove gimple_ranger
as path_range_query allocates one.
* tree-ssa-threadbackward.c (class back_threader): Remove m_ranger.
(back_threader::~back_threader): Same.
---
 gcc/gimple-range-path.cc  | 43 +++
 gcc/gimple-range-path.h   |  9 ++--
 gcc/tree-ssa-loop-ch.c|  4 +---
 gcc/tree-ssa-threadbackward.c |  5 +---
 4 files changed, 37 insertions(+), 24 deletions(-)

diff --git a/gcc/gimple-range-path.cc b/gcc/gimple-range-path.cc
index 4843c133e62..b9aceaf2565 100644
--- a/gcc/gimple-range-path.cc
+++ b/gcc/gimple-range-path.cc
@@ -36,13 +36,24 @@ along with GCC; see the file COPYING3.  If not see
 // Internal construct to help facilitate debugging of solver.
 #define DEBUG_SOLVER (dump_file && (param_threader_debug == 
THREADER_DEBUG_ALL))
 
-path_range_query::path_range_query (gimple_ranger , bool resolve)
-  : m_ranger (ranger)
+path_range_query::path_range_query (gimple_ranger *ranger, bool resolve)
+  : m_cache (new ssa_global_cache),
+m_has_cache_entry (BITMAP_ALLOC (NULL)),
+m_ranger (ranger),
+m_resolve (resolve),
+m_alloced_ranger (false)
 {
-  m_cache = new ssa_global_cache;
-  m_has_cache_entry = BITMAP_ALLOC (NULL);
-  m_resolve = resolve;
-  m_oracle = new path_oracle (ranger.oracle ());
+  m_oracle = new path_oracle (ranger->oracle ());
+}
+
+path_range_query::path_range_query (bool resolve)
+  : m_cache (new ssa_global_cache),
+m_has_cache_entry (BITMAP_ALLOC (NULL)),
+m_ranger (new gimple_ranger),
+m_resolve (resolve),
+m_alloced_ranger (true)
+{
+  m_oracle = new path_oracle (m_ranger->oracle ());
 }
 
 path_range_query::~path_range_query ()
@@ -50,6 +61,8 @@ path_range_query::~path_range_query ()
   BITMAP_FREE (m_has_cache_entry);
   delete m_cache;
   delete m_oracle;
+  if (m_alloced_ranger)
+delete m_ranger;
 }
 
 // Mark cache entry for NAME as unused.
@@ -140,7 +153,7 @@ path_range_query::range_on_path_entry (irange , tree name)
   gimple *last = last_stmt (entry);
   if (last)
 {
-  if (m_ranger.range_of_expr (r, name, last))
+  if (m_ranger->range_of_expr (r, name, last))
return;
   gcc_unreachable ();
 }
@@ -156,7 +169,7 @@ path_range_query::range_on_path_entry (irange , tree name)
 {
   edge e = EDGE_PRED (entry, i);
   if (e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
- && m_ranger.range_on_edge (tmp, e, name))
+ && m_ranger->range_on_edge (tmp, e, name))
{
  r.union_ (tmp);
  changed = true;
@@ -244,7 +257,7 @@ path_range_query::ssa_range_in_phi (irange , gphi *phi)
 
   if (at_entry ())
 {
-  if (m_resolve && m_ranger.range_of_expr (r, name, phi))
+  if (m_resolve && m_ranger->range_of_expr (r, name, phi))
return;
 
   // Try fold just in case we can resolve simple things like PHI <5(99), 
6(88)>.
@@ -275,7 +288,7 @@ path_range_query::ssa_range_in_phi (irange , gphi *phi)
  range_on_path_entry (r, arg);
else
  r.set_varying (TREE_TYPE (name));
-   m_ranger.range_on_edge (tmp, e_in, arg);
+   m_ranger->range_on_edge (tmp, e_in, arg);
r.intersect (tmp);
return;
  }
@@ -370,7 +383,7 @@ path_range_query::compute_ranges_in_block (basic_block bb)
   EXECUTE_IF_SET_IN_BITMAP (m_imports, 0, i, bi)
 {
   tree name = ssa_name (i);
-  gori_compute  = m_ranger.gori ();
+  gori_compute  = m_ranger->gori ();
   bitmap exports = g.exports (bb);
 
   if (bitmap_bit_p (exports, i))
@@ -452,7 +465,7 @@ void
 path_range_query::compute_imports (bitmap imports, basic_block exit)
 {
   // Start with the imports from the exit block...
-  bitmap r_imports = m_ranger.gori ().imports (exit);
+  bitmap r_imports = m_ranger->gori ().imports (exit);
   bitmap_copy (imports, r_imports);
 
   auto_vec worklist (bitmap_count_bits (imports));
@@ -539,7 +552,7 @@ path_range_query::compute_ranges (const vec 
,
 
   if (m_resolve)
{
-   

[PATCH] Remove loop crossing restriction from the backward threader.

2021-11-11 Thread Aldy Hernandez via Gcc-patches
We have much more thorough restrictions, that are shared between both
threader implementations, in the registry.  I've been meaning to
remove the backward threader one, since it's only purpose was reducing
the search space.  Previously there was a small time penalty for its
removal, but with the various patches in the past month, it looks like
the removal is a wash performance wise.

This catches 8 more jump threads in the backward threader in my suite.
Presumably, because we disallowed all loop crossing, whereas the
registry restrictions allow some crossing (if we exit the loop, etc).

OK pending tests on x86-64 Linux?

gcc/ChangeLog:

* tree-ssa-threadbackward.c
(back_threader_profitability::profitable_path_p): Remove loop
crossing restriction.
---
 gcc/tree-ssa-threadbackward.c | 36 ++-
 1 file changed, 6 insertions(+), 30 deletions(-)

diff --git a/gcc/tree-ssa-threadbackward.c b/gcc/tree-ssa-threadbackward.c
index d067c470c38..61aee25d236 100644
--- a/gcc/tree-ssa-threadbackward.c
+++ b/gcc/tree-ssa-threadbackward.c
@@ -615,7 +615,6 @@ back_threader_profitability::profitable_path_p (const 
vec _path,
   int n_insns = 0;
   gimple_stmt_iterator gsi;
   loop_p loop = m_path[0]->loop_father;
-  bool path_crosses_loops = false;
   bool threaded_through_latch = false;
   bool multiway_branch_in_path = false;
   bool threaded_multiway_branch = false;
@@ -634,30 +633,15 @@ back_threader_profitability::profitable_path_p (const 
vec _path,
 
   if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, " bb:%i", bb->index);
-  /* Remember, blocks in the path are stored in opposite order
-in the PATH array.  The last entry in the array represents
-the block with an outgoing edge that we will redirect to the
-jump threading path.  Thus we don't care about that block's
-loop father, nor how many statements are in that block because
-it will not be copied or whether or not it ends in a multiway
-branch.  */
+  /* Remember, blocks in the path are stored in opposite order in
+the PATH array.  The last entry in the array represents the
+block with an outgoing edge that we will redirect to the jump
+threading path.  Thus we don't care how many statements are
+in that block because it will not be copied or whether or not
+it ends in a multiway branch.  */
   if (j < m_path.length () - 1)
{
  int orig_n_insns = n_insns;
- if (bb->loop_father != loop)
-   {
- path_crosses_loops = true;
-
- // Dump rest of blocks.
- if (dump_file && (dump_flags & TDF_DETAILS))
-   for (j++; j < m_path.length (); j++)
- {
-   bb = m_path[j];
-   fprintf (dump_file, " bb:%i", bb->index);
- }
- break;
-   }
-
  /* PHIs in the path will create degenerate PHIS in the
 copied path which will then get propagated away, so
 looking at just the duplicate path the PHIs would
@@ -776,14 +760,6 @@ back_threader_profitability::profitable_path_p (const 
vec _path,
*creates_irreducible_loop = true;
 }
 
-  if (path_crosses_loops)
-{
-  if (dump_file && (dump_flags & TDF_DETAILS))
-   fprintf (dump_file, "  FAIL: Jump-thread path not considered: "
-"the path crosses loops.\n");
-  return false;
-}
-
   /* Threading is profitable if the path duplicated is hot but also
  in a case we separate cold path from hot path and permit optimization
  of the hot path later.  Be on the agressive side here. In some testcases,
-- 
2.31.1



[committed] diagnostic: fix unused variable 'def_tabstop' [PR103129]

2021-11-11 Thread David Malcolm via Gcc-patches
Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r12-5176-gabdff441a07f55d16e3d0e5ced3123c83d210a0a.

gcc/ChangeLog:
PR other/103129
* diagnostic-show-locus.c (def_policy): Use def_tabstop.

Signed-off-by: David Malcolm 
---
 gcc/diagnostic-show-locus.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/diagnostic-show-locus.c b/gcc/diagnostic-show-locus.c
index d796085222a..67be213c5a2 100644
--- a/gcc/diagnostic-show-locus.c
+++ b/gcc/diagnostic-show-locus.c
@@ -693,7 +693,7 @@ static const int def_tabstop = 8;
 
 static cpp_char_column_policy def_policy ()
 {
-  return cpp_char_column_policy (8, cpp_wcwidth);
+  return cpp_char_column_policy (def_tabstop, cpp_wcwidth);
 }
 
 /* Create some expanded locations for testing layout_range.  The filename
-- 
2.26.3



[Patch] Fortran/openmp: Fix '!$omp end'

2021-11-11 Thread Tobias Burnus

Found this when looking at the num_teams patch – and when
converting clauses-1.c to clauses-1.f90.

OK?

Tobias
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
Fortran/openmp: Fix '!$omp end'

gcc/fortran/ChangeLog:

	* parse.c (decode_omp_directive): Fix permitting 'nowait' for some
	combined directives, add missing 'omp end ... loop'.
	(gfc_ascii_statement): Fix ST_OMP_END_TEAMS_LOOP result.
	* openmp.c (resolve_omp_clauses): Add missing combined loop constructs
	case values to the 'if(directive-name: ...)' check.
	* trans-openmp.c (gfc_split_omp_clauses): Put nowait on target if
	first leaf construct accepting it.
	(gfc_trans_omp_parallel_sections, gfc_trans_omp_parallel_workshare):
	Unset nowait for parallel if set.

gcc/testsuite/ChangeLog:

	* gfortran.dg/gomp/unexpected-end.f90: Update dg-error.
	* gfortran.dg/gomp/clauses-1.f90: New test.
	* gfortran.dg/gomp/nowait-2.f90: New test.
	* gfortran.dg/gomp/nowait-3.f90: New test.

 gcc/fortran/openmp.c  |   3 +
 gcc/fortran/parse.c   |  49 +-
 gcc/fortran/trans-openmp.c|   5 +
 gcc/testsuite/gfortran.dg/gomp/clauses-1.f90  | 667 ++
 gcc/testsuite/gfortran.dg/gomp/nowait-2.f90   | 240 
 gcc/testsuite/gfortran.dg/gomp/nowait-3.f90   | 151 +
 gcc/testsuite/gfortran.dg/gomp/unexpected-end.f90 |  12 +-
 7 files changed, 1102 insertions(+), 25 deletions(-)

diff --git a/gcc/fortran/openmp.c b/gcc/fortran/openmp.c
index 7b2df0d0be3..2893ab2befb 100644
--- a/gcc/fortran/openmp.c
+++ b/gcc/fortran/openmp.c
@@ -6232,6 +6232,7 @@ resolve_omp_clauses (gfc_code *code, gfc_omp_clauses *omp_clauses,
 
 	case EXEC_OMP_PARALLEL:
 	case EXEC_OMP_PARALLEL_DO:
+	case EXEC_OMP_PARALLEL_LOOP:
 	case EXEC_OMP_PARALLEL_MASKED:
 	case EXEC_OMP_PARALLEL_MASTER:
 	case EXEC_OMP_PARALLEL_SECTIONS:
@@ -6285,6 +6286,7 @@ resolve_omp_clauses (gfc_code *code, gfc_omp_clauses *omp_clauses,
 	case EXEC_OMP_TARGET:
 	case EXEC_OMP_TARGET_TEAMS:
 	case EXEC_OMP_TARGET_TEAMS_DISTRIBUTE:
+	case EXEC_OMP_TARGET_TEAMS_LOOP:
 	  ok = ifc == OMP_IF_TARGET;
 	  break;
 
@@ -6312,6 +6314,7 @@ resolve_omp_clauses (gfc_code *code, gfc_omp_clauses *omp_clauses,
 	case EXEC_OMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_DO:
 	case EXEC_OMP_TARGET_PARALLEL:
 	case EXEC_OMP_TARGET_PARALLEL_DO:
+	case EXEC_OMP_TARGET_PARALLEL_LOOP:
 	  ok = ifc == OMP_IF_TARGET || ifc == OMP_IF_PARALLEL;
 	  break;
 
diff --git a/gcc/fortran/parse.c b/gcc/fortran/parse.c
index 12aa80ec45c..d4b985e75eb 100644
--- a/gcc/fortran/parse.c
+++ b/gcc/fortran/parse.c
@@ -915,15 +915,16 @@ decode_omp_directive (void)
   matcho ("error", gfc_match_omp_error, ST_OMP_ERROR);
   matcho ("end atomic", gfc_match_omp_eos_error, ST_OMP_END_ATOMIC);
   matcho ("end critical", gfc_match_omp_end_critical, ST_OMP_END_CRITICAL);
-  matchs ("end distribute parallel do simd", gfc_match_omp_eos_error,
+  matchs ("end distribute parallel do simd", gfc_match_omp_end_nowait,
 	  ST_OMP_END_DISTRIBUTE_PARALLEL_DO_SIMD);
-  matcho ("end distribute parallel do", gfc_match_omp_eos_error,
+  matcho ("end distribute parallel do", gfc_match_omp_end_nowait,
 	  ST_OMP_END_DISTRIBUTE_PARALLEL_DO);
   matchs ("end distribute simd", gfc_match_omp_eos_error,
 	  ST_OMP_END_DISTRIBUTE_SIMD);
   matcho ("end distribute", gfc_match_omp_eos_error, ST_OMP_END_DISTRIBUTE);
   matchs ("end do simd", gfc_match_omp_end_nowait, ST_OMP_END_DO_SIMD);
   matcho ("end do", gfc_match_omp_end_nowait, ST_OMP_END_DO);
+  matcho ("end loop", gfc_match_omp_eos_error, ST_OMP_END_LOOP);
   matchs ("end simd", gfc_match_omp_eos_error, ST_OMP_END_SIMD);
   matcho ("end masked taskloop simd", gfc_match_omp_eos_error,
 	  ST_OMP_END_MASKED_TASKLOOP_SIMD);
@@ -936,9 +937,12 @@ decode_omp_directive (void)
 	  ST_OMP_END_MASTER_TASKLOOP);
   matcho ("end master", gfc_match_omp_eos_error, ST_OMP_END_MASTER);
   matchs ("end ordered", gfc_match_omp_eos_error, ST_OMP_END_ORDERED);
-  matchs ("end parallel do simd", gfc_match_omp_eos_error,
+  matchs ("end parallel do simd", gfc_match_omp_end_nowait,
 	  ST_OMP_END_PARALLEL_DO_SIMD);
-  matcho ("end parallel do", gfc_match_omp_eos_error, ST_OMP_END_PARALLEL_DO);
+  matcho ("end parallel do", gfc_match_omp_end_nowait,
+	  ST_OMP_END_PARALLEL_DO);
+  matcho ("end parallel loop", gfc_match_omp_eos_error,
+	  ST_OMP_END_PARALLEL_LOOP);
   matcho ("end parallel masked taskloop simd", gfc_match_omp_eos_error,
 	  ST_OMP_END_PARALLEL_MASKED_TASKLOOP_SIMD);
   matcho ("end parallel masked taskloop", gfc_match_omp_eos_error,
@@ 

Re: [PATCH] tree-optimization/103188 - avoid running ranger on not-up-to-date SSA

2021-11-11 Thread Aldy Hernandez via Gcc-patches
Like this.  It simplifies both loop-ch and the threader.

I'll push this pending tests unless someone objects.

On Thu, Nov 11, 2021 at 5:43 PM Aldy Hernandez  wrote:
>
> Thanks for doing this!
>
> >
> > +  gimple_ranger *ranger = new gimple_ranger;
> > +  path_range_query *query = new path_range_query (*ranger, 
> > /*resolve=*/true);
>
> Hmmm, it looks like both clients are now instantiating a gimple_ranger
> just so they can pass it down to the path_range_query.  Maybe we
> should  have another ctor with just:
>
> path_range_query (bool resolve);
>
> ...and have it allocate its own ranger.
>
> Does this seem like a useful improvement?  For that matter, resolve
> should default to true.  The option is only there so the backward
> threader can run in a "light" mode (early threading, etc).
>
> Aldy
From c446a8c3110b6629e6dc6897028312ed760440df Mon Sep 17 00:00:00 2001
From: Aldy Hernandez 
Date: Thu, 11 Nov 2021 18:06:50 +0100
Subject: [PATCH] Make ranger optional in path_range_query.

All users of path_range_query are currently allocating a gimple_ranger
only to pass it to the query object.  It's tidier to just do it from
path_range_query if no ranger was passed.

gcc/ChangeLog:

	* gimple-range-path.cc (path_range_query::path_range_query): New
	ctor without a ranger.
	(path_range_query::~path_range_query): Free ranger if necessary.
	(path_range_query::range_on_path_entry): Adjust m_ranger for pointer.
	(path_range_query::ssa_range_in_phi): Same.
	(path_range_query::compute_ranges_in_block): Same.
	(path_range_query::compute_imports): Same.
	(path_range_query::compute_ranges): Same.
	(path_range_query::range_of_stmt): Same.
	(path_range_query::compute_outgoing_relations): Same.
	* gimple-range-path.h (class path_range_query): New ctor.
	* tree-ssa-loop-ch.c (ch_base::copy_headers): Remove gimple_ranger
	as path_range_query allocates one.
	* tree-ssa-threadbackward.c (class back_threader): Remove m_ranger.
	(back_threader::~back_threader): Same.
---
 gcc/gimple-range-path.cc  | 43 +++
 gcc/gimple-range-path.h   |  9 ++--
 gcc/tree-ssa-loop-ch.c|  4 +---
 gcc/tree-ssa-threadbackward.c |  5 +---
 4 files changed, 37 insertions(+), 24 deletions(-)

diff --git a/gcc/gimple-range-path.cc b/gcc/gimple-range-path.cc
index 4843c133e62..b9aceaf2565 100644
--- a/gcc/gimple-range-path.cc
+++ b/gcc/gimple-range-path.cc
@@ -36,13 +36,24 @@ along with GCC; see the file COPYING3.  If not see
 // Internal construct to help facilitate debugging of solver.
 #define DEBUG_SOLVER (dump_file && (param_threader_debug == THREADER_DEBUG_ALL))
 
-path_range_query::path_range_query (gimple_ranger , bool resolve)
-  : m_ranger (ranger)
+path_range_query::path_range_query (gimple_ranger *ranger, bool resolve)
+  : m_cache (new ssa_global_cache),
+m_has_cache_entry (BITMAP_ALLOC (NULL)),
+m_ranger (ranger),
+m_resolve (resolve),
+m_alloced_ranger (false)
 {
-  m_cache = new ssa_global_cache;
-  m_has_cache_entry = BITMAP_ALLOC (NULL);
-  m_resolve = resolve;
-  m_oracle = new path_oracle (ranger.oracle ());
+  m_oracle = new path_oracle (ranger->oracle ());
+}
+
+path_range_query::path_range_query (bool resolve)
+  : m_cache (new ssa_global_cache),
+m_has_cache_entry (BITMAP_ALLOC (NULL)),
+m_ranger (new gimple_ranger),
+m_resolve (resolve),
+m_alloced_ranger (true)
+{
+  m_oracle = new path_oracle (m_ranger->oracle ());
 }
 
 path_range_query::~path_range_query ()
@@ -50,6 +61,8 @@ path_range_query::~path_range_query ()
   BITMAP_FREE (m_has_cache_entry);
   delete m_cache;
   delete m_oracle;
+  if (m_alloced_ranger)
+delete m_ranger;
 }
 
 // Mark cache entry for NAME as unused.
@@ -140,7 +153,7 @@ path_range_query::range_on_path_entry (irange , tree name)
   gimple *last = last_stmt (entry);
   if (last)
 {
-  if (m_ranger.range_of_expr (r, name, last))
+  if (m_ranger->range_of_expr (r, name, last))
 	return;
   gcc_unreachable ();
 }
@@ -156,7 +169,7 @@ path_range_query::range_on_path_entry (irange , tree name)
 {
   edge e = EDGE_PRED (entry, i);
   if (e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
-	  && m_ranger.range_on_edge (tmp, e, name))
+	  && m_ranger->range_on_edge (tmp, e, name))
 	{
 	  r.union_ (tmp);
 	  changed = true;
@@ -244,7 +257,7 @@ path_range_query::ssa_range_in_phi (irange , gphi *phi)
 
   if (at_entry ())
 {
-  if (m_resolve && m_ranger.range_of_expr (r, name, phi))
+  if (m_resolve && m_ranger->range_of_expr (r, name, phi))
 	return;
 
   // Try fold just in case we can resolve simple things like PHI <5(99), 6(88)>.
@@ -275,7 +288,7 @@ path_range_query::ssa_range_in_phi (irange , gphi *phi)
 		  range_on_path_entry (r, arg);
 		else
 		  r.set_varying (TREE_TYPE (name));
-		m_ranger.range_on_edge (tmp, e_in, arg);
+		m_ranger->range_on_edge (tmp, e_in, arg);
 		r.intersect (tmp);
 		return;
 	  }
@@ -370,7 +383,7 @@ path_range_query::compute_ranges_in_block (basic_block 

Re: [PATCH] tree-optimization/103188 - avoid running ranger on not-up-to-date SSA

2021-11-11 Thread Richard Biener via Gcc-patches
On November 11, 2021 5:43:48 PM GMT+01:00, Aldy Hernandez  
wrote:
>Thanks for doing this!
>
>>
>> +  gimple_ranger *ranger = new gimple_ranger;
>> +  path_range_query *query = new path_range_query (*ranger, 
>> /*resolve=*/true);
>
>Hmmm, it looks like both clients are now instantiating a gimple_ranger
>just so they can pass it down to the path_range_query.  Maybe we
>should  have another ctor with just:
>
>path_range_query (bool resolve);
>
>...and have it allocate its own ranger.
>
>Does this seem like a useful improvement?  For that matter, resolve
>should default to true.  The option is only there so the backward
>threader can run in a "light" mode (early threading, etc).

I've just copied from the two duplicate instances of this, so I don't know 
nothing here ;) 

Richard. 
>
>Aldy
>



Re: [PATCH] rs6000: Fix a handful of 32-bit built-in function problems in the new support

2021-11-11 Thread Bill Schmidt via Gcc-patches
Hi!

On 11/11/21 7:11 AM, Segher Boessenkool wrote:
> On Wed, Nov 10, 2021 at 03:28:18PM -0600, Bill Schmidt wrote:
>> On 11/10/21 2:33 AM, Segher Boessenkool wrote:
>>> On Tue, Nov 09, 2021 at 03:46:54PM -0600, Bill Schmidt wrote:
* config/rs6000/rs6000-builtin-new.def (CMPB): Flag as no32bit.
(BPERMD): Flag as 32bit.
> So, change this to something like "flag this as needing special handling
> on 32 bit" or something?

Sure.
>
 -  void __builtin_set_texasr (unsigned long long);
 +  void __builtin_set_texasr (unsigned long);
  SET_TEXASR nothing {htm,htmspr}
  
 -  void __builtin_set_texasru (unsigned long long);
 +  void __builtin_set_texasru (unsigned long);
  SET_TEXASRU nothing {htm,htmspr}
  
 -  void __builtin_set_tfhar (unsigned long long);
 +  void __builtin_set_tfhar (unsigned long);
  SET_TFHAR nothing {htm,htmspr}
  
 -  void __builtin_set_tfiar (unsigned long long);
 +  void __builtin_set_tfiar (unsigned long);
  SET_TFIAR nothing {htm,htmspr}
>>> This does not seem to be what the exiting code does, either?  Try with
>>> -m32 -mpowerpc64 (it extends to 64 bit there, so the builtin does not
>>> have long int as parameter, it has long long int).
>> This uses a tfiar_t, which is a typedef for uintptr_t, so long int is 
>> appropriate.
>> This is necessary to make the HTM tests pass on 32-bit powerpc64.
> void f(long x) { __builtin_set_texasr(x); }
>
> built with -m32 -mpowerpc64 gives (in the expand dump):
>
> void f (long int x)
> {
>   long long unsigned int _1;
>
> ;;   basic block 2, loop depth 0
> ;;pred:   ENTRY
>   _1 = (long long unsigned int) x_2(D);
>   __builtin_set_texasr (_1); [tail call]
>   return;
> ;;succ:   EXIT
>
> }
>
> The builtins have a "long long" argument in the existing code, in this
> configuration.  And this is not the same as "long" here.

Hm, strange.  I'll have to go back and revisit this.  Something subtle going on.

Thanks,
Bill

>
 --- a/gcc/testsuite/gcc.target/powerpc/cmpb-3.c
 +++ b/gcc/testsuite/gcc.target/powerpc/cmpb-3.c
 @@ -8,7 +8,7 @@ void abort ();
  long long int
  do_compare (long long int a, long long int b)
  {
 -  return __builtin_cmpb (a, b);   /* { dg-error "'__builtin_cmpb' is not 
 supported in this compiler configuration" } */
 +  return __builtin_cmpb (a, b);   /* { dg-error "'__builtin_p6_cmpb' is 
 not supported in 32-bit mode" } */
  }
>>> The original spelling is the correct one?
>> This is something I have on my to-do list for the future, to see whether I
>> can improve it.  The overloaded function __builtin_cmpb gets translated to
>> the underlying non-overloaded builtin __builtin_p6_cmpb, and that's the only
>> name that's still around by the time we get to the error processing.  I want
>> to see whether I can add some infrastructure to recover the overloaded
>> function name in such cases.  Is it okay to defer this for now?
> It is fine to defer it.  It is not fine to change the testcase like
> this.  The user did not write __builtin_p6_cmpb (which is not even
> documented btw), so the compiler should not talk about that.  It is
> fine to leave the test failing for now.
>
>
> Segher


Re: [PATCH] tree-optimization/103188 - avoid running ranger on not-up-to-date SSA

2021-11-11 Thread Aldy Hernandez via Gcc-patches
Thanks for doing this!

>
> +  gimple_ranger *ranger = new gimple_ranger;
> +  path_range_query *query = new path_range_query (*ranger, /*resolve=*/true);

Hmmm, it looks like both clients are now instantiating a gimple_ranger
just so they can pass it down to the path_range_query.  Maybe we
should  have another ctor with just:

path_range_query (bool resolve);

...and have it allocate its own ranger.

Does this seem like a useful improvement?  For that matter, resolve
should default to true.  The option is only there so the backward
threader can run in a "light" mode (early threading, etc).

Aldy



Re: [PATCH v1 8/8] RISC-V: bitmanip: relax minmax to operate on GPR

2021-11-11 Thread Kito Cheng via Gcc-patches
Hi Philipp:

This testcase got wrong result with this patch even w/o
si3_sext pattern:

#include 

#define MAX(A, B) ((A) > (B) ? (A) : (B))

long long __attribute__((noinline, noipa))
foo6(long long a, long long b, int c)
{
  int xa = a;
  int xb = b;
  return MAX(MAX(xa, xb), c);
}
int main() {
  long long a = 0x2ll;
  long long b = 0x1l;
  int c = 10;
  long long d = foo6(a, b, c);
  printf ("%lld %lld %d = %lld\n", a, b, c, d);
  return 0;
}

On Fri, Nov 12, 2021 at 12:27 AM Kito Cheng  wrote:
>
> IIRC it's not work even without sign extend pattern since I did similar 
> experimental before (not for RISC-V, but same concept), I guess I need more 
> time to test that.
>
> Philipp Tomsich  於 2021年11月12日 週五 00:18 寫道:
>>
>> Kito,
>>
>> Unless I am missing something, the problem is not the relaxation to
>> GPR, but rather the sign-extending pattern I had squashed into the
>> same patch.
>> If you disable "si3_sext", a sext.w will be have to be
>> emitted after the 'max' and before the return (or before the SImode
>> output is consumed as a DImode), pushing the REE opportunity to a
>> subsequent consumer (e.g. an addw).
>>
>> This will generate
>>foo6:
>>   max a0,a0,a1
>>   sext.w a0,a0
>>   ret
>> which (assuming that the inputs to max are properly sign-extended
>> SImode values living in DImode registers) will be the same as
>> performing the two sext.w before the max.
>>
>> Having a second set of eyes on this is appreciated — let me know if
>> you agree and I'll revise, once I have collected feedback on the
>> remaining patches of the series.
>>
>> Philipp.
>>
>>
>> On Thu, 11 Nov 2021 at 17:00, Kito Cheng  wrote:
>> >
>> > Hi Philipp:
>> >
>> > We can't pretend we have SImode min/max instruction without that semantic.
>> > Give this testcase, x86 and rv64gc print out 8589934592 8589934591 = 0,
>> > but with this patch and compile with rv64gc_zba_zbb -O3, the output
>> > become 8589934592 8589934591 = 8589934592
>> >
>> > -Testcase---
>> > #include 
>> > long long __attribute__((noinline, noipa))
>> > foo6(long long a, long long b)
>> > {
>> >   int xa = a;
>> >   int xb = b;
>> >   return (xa > xb ? xa : xb);
>> > }
>> > int main() {
>> >   long long a = 0x2ll;
>> >   long long b = 0x1l;
>> >   long long c = foo6(a, b);
>> >   printf ("%lld %lld = %lld\n", a, b, c);
>> >   return 0;
>> > }
>> > --
>> > v64gc_zba_zbb -O3 w/o this patch:
>> > foo6:
>> > sext.w  a1,a1
>> > sext.w  a0,a0
>> > max a0,a0,a1
>> > ret
>> >
>> > --
>> > v64gc_zba_zbb -O3 w/ this patch:
>> > foo6:
>> > max a0,a0,a1
>> > ret
>> >
>> > On Thu, Nov 11, 2021 at 10:10 PM Philipp Tomsich
>> >  wrote:
>> > >
>> > > While min/minu/max/maxu instructions are provided for XLEN only, these
>> > > can safely operate on GPRs (i.e. SImode or DImode for RV64): SImode is
>> > > always sign-extended, which ensures that the XLEN-wide instructions
>> > > can be used for signed and unsigned comparisons on SImode yielding a
>> > > correct ordering of value.
>> > >
>> > > This commit
>> > >  - relaxes the minmax pattern to express for GPR (instead of X only),
>> > >providing both a si3 and di3 expansion on RV64
>> > >  - adds a sign-extending form for thee si3 pattern for RV64 to all REE
>> > >to eliminate redundant extensions
>> > >  - adds test-cases for both
>> > >
>> > > gcc/ChangeLog:
>> > >
>> > > * config/riscv/bitmanip.md: Relax minmax to GPR (i.e SImode or
>> > >   DImode) on RV64.
>> > > * config/riscv/bitmanip.md (si3_sext): Add
>> > >   pattern for REE.
>> > >
>> > > gcc/testsuite/ChangeLog:
>> > >
>> > > * gcc.target/riscv/zbb-min-max.c: Add testcases for SImode
>> > >   operands checking that no redundant sign- or zero-extensions
>> > >   are emitted.
>> > >
>> > > Signed-off-by: Philipp Tomsich 
>> > > ---
>> > >
>> > >  gcc/config/riscv/bitmanip.md | 14 +++---
>> > >  gcc/testsuite/gcc.target/riscv/zbb-min-max.c | 20 +---
>> > >  2 files changed, 28 insertions(+), 6 deletions(-)
>> > >
>> > > diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
>> > > index 000deb48b16..2a28f78f5f6 100644
>> > > --- a/gcc/config/riscv/bitmanip.md
>> > > +++ b/gcc/config/riscv/bitmanip.md
>> > > @@ -260,13 +260,21 @@ (define_insn "bswap2"
>> > >[(set_attr "type" "bitmanip")])
>> > >
>> > >  (define_insn "3"
>> > > -  [(set (match_operand:X 0 "register_operand" "=r")
>> > > -(bitmanip_minmax:X (match_operand:X 1 "register_operand" "r")
>> > > -  (match_operand:X 2 "register_operand" "r")))]
>> > > +  [(set (match_operand:GPR 0 "register_operand" "=r")
>> > > +(bitmanip_minmax:GPR (match_operand:GPR 1 "register_operand" 
>> > > "r")
>> > > +(match_operand:GPR 2 

Re: [PATCH][V2] rs6000: Remove unnecessary option manipulation.

2021-11-11 Thread Martin Liška



PING^1

On 11/4/21 13:36, Martin Liška wrote:

Sending the patch in a separate thread.

Ready for master?

Cheers,
Martin

gcc/ChangeLog:

 * config/rs6000/rs6000.c (rs6000_override_options_after_change):
 Do not set flag_rename_registers, it's already enabled with
 EnabledBy(funroll-loops).
 Use EnabledBy for unroll_only_small_loops.
 * config/rs6000/rs6000.opt: Use EnabledBy for
 unroll_only_small_loops.
---
  gcc/config/rs6000/rs6000.c   | 7 +--
  gcc/config/rs6000/rs6000.opt | 2 +-
  2 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 01a95591a5d..b9dddcd0aa1 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -3472,13 +3472,8 @@ rs6000_override_options_after_change (void)
    /* Explicit -funroll-loops turns -munroll-only-small-loops off, and
   turns -frename-registers on.  */
    if ((OPTION_SET_P (flag_unroll_loops) && flag_unroll_loops)
-   || (OPTION_SET_P (flag_unroll_all_loops)
-   && flag_unroll_all_loops))
+   || (OPTION_SET_P (flag_unroll_all_loops) && flag_unroll_all_loops))
  {
-  if (!OPTION_SET_P (unroll_only_small_loops))
-    unroll_only_small_loops = 0;
-  if (!OPTION_SET_P (flag_rename_registers))
-    flag_rename_registers = 1;
    if (!OPTION_SET_P (flag_cunroll_grow_size))
  flag_cunroll_grow_size = 1;
  }
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index 9d7878f144a..faeb7423ca7 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -546,7 +546,7 @@ Target Undocumented Var(rs6000_optimize_swaps) Init(1) Save
  Analyze and remove doubleword swaps from VSX computations.

  munroll-only-small-loops
-Target Undocumented Var(unroll_only_small_loops) Init(0) Save
+Target Undocumented Var(unroll_only_small_loops) Init(0) Save 
EnabledBy(funroll-loops)
  ; Use conservative small loop unrolling.

  mpower9-misc




Re: [PATCH] fixincludes: don't assume getcwd() can handle NULL argument

2021-11-11 Thread Jeff Law via Gcc-patches




On 11/11/2021 6:04 AM, Eric Gallager via Gcc-patches wrote:

On Tue, Nov 9, 2021 at 8:50 AM Xi Ruoyao via Gcc-patches
 wrote:

POSIX says:

 On some implementations, if buf is a null pointer, getcwd() may obtain
 size bytes of memory using malloc(). In this case, the pointer returned
 by getcwd() may be used as the argument in a subsequent call to free().
 Invoking getcwd() with buf as a null pointer is not recommended in
 conforming applications.

This produces an error building GCC with --enable-werror-always:

 ../../../fixincludes/fixincl.c: In function ‘process’:
 ../../../fixincludes/fixincl.c:1356:7: error: argument 1 is null but
 the corresponding size argument 2 value is 4096 [-Werror=nonnull]

And, at least we've been leaking memory even if getcwd() supports this
non-standard extension.

fixincludes/ChangeLog:

 * fixincl.c (process): Allocate and deallocate the buffer for
   getcwd() explicitly.
---
  fixincludes/fixincl.c | 4 +++-
  1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fixincludes/fixincl.c b/fixincludes/fixincl.c
index 6dba2f6e830..b4b1e38ede7 100644
--- a/fixincludes/fixincl.c
+++ b/fixincludes/fixincl.c
@@ -1353,9 +1353,11 @@ process (void)
if (access (pz_curr_file, R_OK) != 0)
  {
int erno = errno;
+  char *buf = xmalloc (MAXPATHLEN);
fprintf (stderr, "Cannot access %s from %s\n\terror %d (%s)\n",
-   pz_curr_file, getcwd ((char *) NULL, MAXPATHLEN),
+   pz_curr_file, getcwd (buf, MAXPATHLEN),
 erno, xstrerror (erno));
+  free (buf);
return;
  }

--
2.33.1

This seems to contradict bug 21823:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=21823
I think the suggestion in that BZ is fundamentally broken in that it 
depends on behavior extensions that can not be relied upon. Providing a 
backup value of MAXPATHLEN for systems that don't provide it is a better 
choice.


I'm less concerned about the leak and much more concerned about 
depending on the posix extension.


Jeff


[PATCH] Implement -fprofile-prefix-map.

2021-11-11 Thread Martin Liška

It's functionality that is analogous to -ffile-prefix-map, this time
for gcov purpose.

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.

Ready to be installed?
Thanks,
Martin

PR gcov-profile/96092

gcc/ChangeLog:

* common.opt: New option.
* coverage.c (coverage_begin_function): Emit filename with
remap_profile_filename.
* doc/invoke.texi: Document the new option.
* file-prefix-map.c (add_profile_prefix_map): New.
(remap_profile_filename): Likewise.
* file-prefix-map.h (add_profile_prefix_map): Likewise.
(remap_profile_filename): Likewise.
* lto-opts.c (lto_write_options): Handle
OPT_fprofile_prefix_map_.
* opts-global.c (handle_common_deferred_options): Likewise.
* opts.c (common_handle_option): Likewise.
(gen_command_line_string): Likewise.
* profile.c (output_location): Emit filename with
remap_profile_filename.
---
 gcc/common.opt|  4 
 gcc/coverage.c|  3 ++-
 gcc/doc/invoke.texi   | 14 --
 gcc/file-prefix-map.c | 17 +
 gcc/file-prefix-map.h |  2 ++
 gcc/lto-opts.c|  1 +
 gcc/opts-global.c |  4 
 gcc/opts.c|  2 ++
 gcc/profile.c |  4 
 9 files changed, 48 insertions(+), 3 deletions(-)

diff --git a/gcc/common.opt b/gcc/common.opt
index de9b848eda5..0b59b46b875 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -2323,6 +2323,10 @@ fprofile-prefix-path=
 Common Joined RejectNegative Var(profile_prefix_path)
 Remove prefix from absolute path before mangling name for -fprofile-generate= 
and -fprofile-use=.
 
+fprofile-prefix-map=

+Common Joined RejectNegative Var(common_deferred_options) Defer
+-fprofile-prefix-map==   Map one directory name to another in GCOV 
coverage result.
+
 fprofile-generate
 Common
 Enable common options for generating profile info for profile feedback 
directed optimizations.
diff --git a/gcc/coverage.c b/gcc/coverage.c
index 4daa3f9fc30..7f8b532cb52 100644
--- a/gcc/coverage.c
+++ b/gcc/coverage.c
@@ -51,6 +51,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "profile.h"
 #include "diagnostic.h"
 #include "varasm.h"
+#include "file-prefix-map.h"
 
 #include "gcov-io.c"
 
@@ -646,7 +647,7 @@ coverage_begin_function (unsigned lineno_checksum, unsigned cfg_checksum)

   gcov_write_unsigned (DECL_ARTIFICIAL (current_function_decl)
   && !DECL_FUNCTION_VERSIONED (current_function_decl)
   && !DECL_LAMBDA_FUNCTION_P (current_function_decl));
-  gcov_write_filename (startloc.file);
+  gcov_write_filename (remap_profile_filename (startloc.file));
   gcov_write_unsigned (startloc.line);
   gcov_write_unsigned (startloc.column);
 
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi

index 2aba4c70b44..5fb6a8bfffe 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -606,7 +606,8 @@ Objective-C and Objective-C++ Dialects}.
 -fvtv-counts  -fvtv-debug @gol
 -finstrument-functions @gol
 -finstrument-functions-exclude-function-list=@var{sym},@var{sym},@dots{} @gol
--finstrument-functions-exclude-file-list=@var{file},@var{file},@dots{}}
+-finstrument-functions-exclude-file-list=@var{file},@var{file},@dots{}} @gol
+-fprofile-prefix-map=@var{old}=@var{new}
 
 @item Preprocessor Options

 @xref{Preprocessor Options,,Options Controlling the Preprocessor}.
@@ -2173,7 +2174,8 @@ files resided in directory @file{@var{new}} instead.  
Specifying this
 option is equivalent to specifying all the individual
 @option{-f*-prefix-map} options.  This can be used to make reproducible
 builds that are location independent.  See also
-@option{-fmacro-prefix-map} and @option{-fdebug-prefix-map}.
+@option{-fmacro-prefix-map}, @option{-fdebug-prefix-map} and
+@option{-fprofile-prefix-map}.
 
 @item -fplugin=@var{name}.so

 @opindex fplugin
@@ -15155,6 +15157,14 @@ In such setups 
@option{-fprofile-prefix-path=}@var{path} with @var{path}
 pointing to the base directory of the build can be used to strip the irrelevant
 part of the path and keep all file names relative to the main build directory.
 
+@item -fprofile-prefix-map=@var{old}=@var{new}

+@opindex fprofile-prefix-map
+When compiling files residing in directory @file{@var{old}}, record
+profiling information (with @option{--coverage})
+describing them as if the files resided in
+directory @file{@var{new}} instead.
+See also @option{-ffile-prefix-map}.
+
 @item -fprofile-update=@var{method}
 @opindex fprofile-update
 
diff --git a/gcc/file-prefix-map.c b/gcc/file-prefix-map.c

index ad242e5b9c5..290b4b2da33 100644
--- a/gcc/file-prefix-map.c
+++ b/gcc/file-prefix-map.c
@@ -92,6 +92,7 @@ remap_filename (file_prefix_map *maps, const char *filename)
 /* Linked lists of file_prefix_map structures.  */
 static file_prefix_map *macro_prefix_maps; /* -fmacro-prefix-map  */
 static file_prefix_map *debug_prefix_maps; /* -fdebug-prefix-map  */
+static 

Enable pure/const discovery in modref

2021-11-11 Thread Jan Hubicka via Gcc-patches
Hi,
this patch enables the pure/const discovery in modref, so we newly can handle
some extra cases, for example:

struct a {int a,b,c;};
__attribute__ ((noinline))
int init (struct a *a)
{
  a->a=1;
  a->b=2;
  a->c=3;
}
int const_fn () 
{
  struct a a;
  init ();
  return a.a + a.b + a.c;
}

Here pure/const stops on the fact that const_fn calls non-const init, while
modref knows that the memory it initializes is local to const_fn.

I ended up reordering passes so early modref is done after early pure-const
mostly to avoid need to change testsuite which greps for const functions
being detects in pure-const.  Stil some testuiste compensation is needed.

Boostrapped/regtested x86_64-linux. Will commit it shortly.

gcc/ChangeLog:

2021-11-11  Jan Hubicka  

* ipa-modref.c (analyze_function): Do pure/const discovery, return
true on success.
(pass_modref::execute): If pure/const is discovered fixup cfg.
(ignore_edge): Do not ignore pure/const edges.
(modref_propagate_in_scc): Do pure/const discovery, return true if
cdtor was promoted pure/const.
(pass_ipa_modref::execute): If needed remove unreachable functions.
* ipa-pure-const.c (warn_function_noreturn): Fix whitespace.
(warn_function_cold): Likewise.
(skip_function_for_local_pure_const): Move earlier.
(ipa_make_function_const): Break out from ...
(ipa_make_function_pure): Break out from ...
(propagate_pure_const): ... here.
(pass_local_pure_const::execute): Use it.
* ipa-utils.h (ipa_make_function_const): Declare.
(ipa_make_function_pure): Declare.
* passes.def: Move early modref after pure-const.

gcc/testsuite/ChangeLog:

2021-11-11  Jan Hubicka  

* c-c++-common/tm/inline-asm.c: Disable pure-const.
* g++.dg/ipa/modref-1.C: Update template.
* gcc.dg/tree-ssa/modref-11.c: Disable pure-const.
* gcc.dg/tree-ssa/modref-14.c: New test.
* gcc.dg/tree-ssa/modref-8.c: Do not optimize sibling calls.
* gfortran.dg/do_subscript_3.f90: Add -O0.

diff --git a/gcc/ipa-modref.c b/gcc/ipa-modref.c
index 45b391a565e..72006251f29 100644
--- a/gcc/ipa-modref.c
+++ b/gcc/ipa-modref.c
@@ -2603,11 +2603,13 @@ analyze_parms (modref_summary *summary, 
modref_summary_lto *summary_lto,
 }
 
 /* Analyze function F.  IPA indicates whether we're running in local mode
-   (false) or the IPA mode (true).  */
+   (false) or the IPA mode (true).
+   Return true if fixup cfg is needed after the pass.  */
 
-static void
+static bool
 analyze_function (function *f, bool ipa)
 {
+  bool fixup_cfg = false;
   if (dump_file)
 fprintf (dump_file, "modref analyzing '%s' (ipa=%i)%s%s\n",
 function_name (f), ipa,
@@ -2617,7 +2619,7 @@ analyze_function (function *f, bool ipa)
   /* Don't analyze this function if it's compiled with -fno-strict-aliasing.  
*/
   if (!flag_ipa_modref
   || lookup_attribute ("noipa", DECL_ATTRIBUTES (current_function_decl)))
-return;
+return false;
 
   /* Compute no-LTO summaries when local optimization is going to happen.  */
   bool nolto = (!ipa || ((!flag_lto || flag_fat_lto_objects) && !in_lto_p)
@@ -2774,12 +2776,32 @@ analyze_function (function *f, bool ipa)
  if (!summary->useful_p (ecf_flags, false))
{
  remove_summary (lto, nolto, ipa);
- return;
+ return false;
}
}
  first = false;
}
 }
+  if (summary && !summary->global_memory_written_p () && !summary->side_effects
+  && !finite_function_p ())
+summary->side_effects = true;
+  if (summary_lto && !summary_lto->side_effects && !finite_function_p ())
+summary_lto->side_effects = true;
+
+  if (!ipa && flag_ipa_pure_const)
+{
+  if (!summary->stores->every_base && !summary->stores->bases)
+   {
+ if (!summary->loads->every_base && !summary->loads->bases)
+   fixup_cfg = ipa_make_function_const
+  (cgraph_node::get (current_function_decl),
+   summary->side_effects, true);
+ else
+   fixup_cfg = ipa_make_function_pure
+  (cgraph_node::get (current_function_decl),
+   summary->side_effects, true);
+   }
+}
   if (summary && !summary->useful_p (ecf_flags))
 {
   if (!ipa)
@@ -2793,11 +2815,6 @@ analyze_function (function *f, bool ipa)
   summaries_lto->remove (fnode);
   summary_lto = NULL;
 }
-  if (summary && !summary->global_memory_written_p () && !summary->side_effects
-  && !finite_function_p ())
-summary->side_effects = true;
-  if (summary_lto && !summary_lto->side_effects && !finite_function_p ())
-summary_lto->side_effects = true;
 
   if (ipa && !summary && !summary_lto)
 remove_modref_edge_summaries (fnode);
@@ -2907,6 +2924,7 @@ analyze_function (function *f, bool ipa)
}
}
 }
+  return 

[PATCH] fixincludes: fix portability issues about getcwd() [PR21283, PR80047]

2021-11-11 Thread Xi Ruoyao via Gcc-patches
[Revised to handle PR 21283.]

POSIX says:

On some implementations, if buf is a null pointer, getcwd() may obtain
size bytes of memory using malloc(). In this case, the pointer returned
by getcwd() may be used as the argument in a subsequent call to free().
Invoking getcwd() with buf as a null pointer is not recommended in
conforming applications.

This produces an error building GCC with --enable-werror-always:

../../../fixincludes/fixincl.c: In function ‘process’:
../../../fixincludes/fixincl.c:1356:7: error: argument 1 is null but
the corresponding size argument 2 value is 4096 [-Werror=nonnull]

And, at least we've been leaking memory even if getcwd() supports this
non-standard extension.

And, MAXPATHLEN may be not unavailable on certain platform.  PATH_MAX is
POSIX, but getcwd() may produce a path with length larger than it.  So it's
suggested by POSIX [1] to call getcwd() with progressively larger buffers
until it does not give an [ERANGE] error.

[1]: https://pubs.opengroup.org/onlinepubs/9699919799/functions/getcwd.html

fixincludes/ChangeLog:

PR other/21823
PR bootstrap/80047
* fixincl.c (process): Allocate and deallocate the buffer for
  getcwd() progressively.
---
 fixincludes/fixincl.c | 13 +++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/fixincludes/fixincl.c b/fixincludes/fixincl.c
index 6dba2f6e830..1580c67efec 100644
--- a/fixincludes/fixincl.c
+++ b/fixincludes/fixincl.c
@@ -1353,9 +1353,18 @@ process (void)
   if (access (pz_curr_file, R_OK) != 0)
 {
   int erno = errno;
+  char *buf = NULL;
+  const char *cwd = NULL;
+  for (size_t size = 256; !cwd; size += size)
+   {
+ buf = xrealloc (buf, size);
+ cwd = getcwd (buf, size);
+ if (!cwd && errno != ERANGE)
+   cwd = "the working directory";
+   }
   fprintf (stderr, "Cannot access %s from %s\n\terror %d (%s)\n",
-   pz_curr_file, getcwd ((char *) NULL, MAXPATHLEN),
-   erno, xstrerror (erno));
+  pz_curr_file, cwd, erno, xstrerror (erno));
+  free (buf);
   return;
 }
 
-- 
2.33.1




Re: [PATCH v1 8/8] RISC-V: bitmanip: relax minmax to operate on GPR

2021-11-11 Thread Kito Cheng via Gcc-patches
IIRC it's not work even without sign extend pattern since I did similar
experimental before (not for RISC-V, but same concept), I guess I need more
time to test that.

Philipp Tomsich  於 2021年11月12日 週五 00:18 寫道:

> Kito,
>
> Unless I am missing something, the problem is not the relaxation to
> GPR, but rather the sign-extending pattern I had squashed into the
> same patch.
> If you disable "si3_sext", a sext.w will be have to be
> emitted after the 'max' and before the return (or before the SImode
> output is consumed as a DImode), pushing the REE opportunity to a
> subsequent consumer (e.g. an addw).
>
> This will generate
>foo6:
>   max a0,a0,a1
>   sext.w a0,a0
>   ret
> which (assuming that the inputs to max are properly sign-extended
> SImode values living in DImode registers) will be the same as
> performing the two sext.w before the max.
>
> Having a second set of eyes on this is appreciated — let me know if
> you agree and I'll revise, once I have collected feedback on the
> remaining patches of the series.
>
> Philipp.
>
>
> On Thu, 11 Nov 2021 at 17:00, Kito Cheng  wrote:
> >
> > Hi Philipp:
> >
> > We can't pretend we have SImode min/max instruction without that
> semantic.
> > Give this testcase, x86 and rv64gc print out 8589934592 8589934591 = 0,
> > but with this patch and compile with rv64gc_zba_zbb -O3, the output
> > become 8589934592 8589934591 = 8589934592
> >
> > -Testcase---
> > #include 
> > long long __attribute__((noinline, noipa))
> > foo6(long long a, long long b)
> > {
> >   int xa = a;
> >   int xb = b;
> >   return (xa > xb ? xa : xb);
> > }
> > int main() {
> >   long long a = 0x2ll;
> >   long long b = 0x1l;
> >   long long c = foo6(a, b);
> >   printf ("%lld %lld = %lld\n", a, b, c);
> >   return 0;
> > }
> > --
> > v64gc_zba_zbb -O3 w/o this patch:
> > foo6:
> > sext.w  a1,a1
> > sext.w  a0,a0
> > max a0,a0,a1
> > ret
> >
> > --
> > v64gc_zba_zbb -O3 w/ this patch:
> > foo6:
> > max a0,a0,a1
> > ret
> >
> > On Thu, Nov 11, 2021 at 10:10 PM Philipp Tomsich
> >  wrote:
> > >
> > > While min/minu/max/maxu instructions are provided for XLEN only, these
> > > can safely operate on GPRs (i.e. SImode or DImode for RV64): SImode is
> > > always sign-extended, which ensures that the XLEN-wide instructions
> > > can be used for signed and unsigned comparisons on SImode yielding a
> > > correct ordering of value.
> > >
> > > This commit
> > >  - relaxes the minmax pattern to express for GPR (instead of X only),
> > >providing both a si3 and di3 expansion on RV64
> > >  - adds a sign-extending form for thee si3 pattern for RV64 to all REE
> > >to eliminate redundant extensions
> > >  - adds test-cases for both
> > >
> > > gcc/ChangeLog:
> > >
> > > * config/riscv/bitmanip.md: Relax minmax to GPR (i.e SImode or
> > >   DImode) on RV64.
> > > * config/riscv/bitmanip.md (si3_sext): Add
> > >   pattern for REE.
> > >
> > > gcc/testsuite/ChangeLog:
> > >
> > > * gcc.target/riscv/zbb-min-max.c: Add testcases for SImode
> > >   operands checking that no redundant sign- or zero-extensions
> > >   are emitted.
> > >
> > > Signed-off-by: Philipp Tomsich 
> > > ---
> > >
> > >  gcc/config/riscv/bitmanip.md | 14 +++---
> > >  gcc/testsuite/gcc.target/riscv/zbb-min-max.c | 20 +---
> > >  2 files changed, 28 insertions(+), 6 deletions(-)
> > >
> > > diff --git a/gcc/config/riscv/bitmanip.md
> b/gcc/config/riscv/bitmanip.md
> > > index 000deb48b16..2a28f78f5f6 100644
> > > --- a/gcc/config/riscv/bitmanip.md
> > > +++ b/gcc/config/riscv/bitmanip.md
> > > @@ -260,13 +260,21 @@ (define_insn "bswap2"
> > >[(set_attr "type" "bitmanip")])
> > >
> > >  (define_insn "3"
> > > -  [(set (match_operand:X 0 "register_operand" "=r")
> > > -(bitmanip_minmax:X (match_operand:X 1 "register_operand" "r")
> > > -  (match_operand:X 2 "register_operand"
> "r")))]
> > > +  [(set (match_operand:GPR 0 "register_operand" "=r")
> > > +(bitmanip_minmax:GPR (match_operand:GPR 1 "register_operand"
> "r")
> > > +(match_operand:GPR 2 "register_operand"
> "r")))]
> > >"TARGET_ZBB"
> > >"\t%0,%1,%2"
> > >[(set_attr "type" "bitmanip")])
> > >
> > > +(define_insn "si3_sext"
> > > +  [(set (match_operand:DI 0 "register_operand" "=r")
> > > +(sign_extend:DI (bitmanip_minmax:SI (match_operand:SI 1
> "register_operand" "r")
> > > +(match_operand:SI 2 "register_operand"
> "r"]
> > > +  "TARGET_64BIT && TARGET_ZBB"
> > > +  "\t%0,%1,%2"
> > > +  [(set_attr "type" "bitmanip")])
> > > +
> > >  ;; orc.b (or-combine) is added as an unspec for the benefit of the
> support
> > >  ;; for optimized string functions (such as strcmp).
> > >  

Re: [PATCH v1 8/8] RISC-V: bitmanip: relax minmax to operate on GPR

2021-11-11 Thread Philipp Tomsich
Kito,

Unless I am missing something, the problem is not the relaxation to
GPR, but rather the sign-extending pattern I had squashed into the
same patch.
If you disable "si3_sext", a sext.w will be have to be
emitted after the 'max' and before the return (or before the SImode
output is consumed as a DImode), pushing the REE opportunity to a
subsequent consumer (e.g. an addw).

This will generate
   foo6:
  max a0,a0,a1
  sext.w a0,a0
  ret
which (assuming that the inputs to max are properly sign-extended
SImode values living in DImode registers) will be the same as
performing the two sext.w before the max.

Having a second set of eyes on this is appreciated — let me know if
you agree and I'll revise, once I have collected feedback on the
remaining patches of the series.

Philipp.


On Thu, 11 Nov 2021 at 17:00, Kito Cheng  wrote:
>
> Hi Philipp:
>
> We can't pretend we have SImode min/max instruction without that semantic.
> Give this testcase, x86 and rv64gc print out 8589934592 8589934591 = 0,
> but with this patch and compile with rv64gc_zba_zbb -O3, the output
> become 8589934592 8589934591 = 8589934592
>
> -Testcase---
> #include 
> long long __attribute__((noinline, noipa))
> foo6(long long a, long long b)
> {
>   int xa = a;
>   int xb = b;
>   return (xa > xb ? xa : xb);
> }
> int main() {
>   long long a = 0x2ll;
>   long long b = 0x1l;
>   long long c = foo6(a, b);
>   printf ("%lld %lld = %lld\n", a, b, c);
>   return 0;
> }
> --
> v64gc_zba_zbb -O3 w/o this patch:
> foo6:
> sext.w  a1,a1
> sext.w  a0,a0
> max a0,a0,a1
> ret
>
> --
> v64gc_zba_zbb -O3 w/ this patch:
> foo6:
> max a0,a0,a1
> ret
>
> On Thu, Nov 11, 2021 at 10:10 PM Philipp Tomsich
>  wrote:
> >
> > While min/minu/max/maxu instructions are provided for XLEN only, these
> > can safely operate on GPRs (i.e. SImode or DImode for RV64): SImode is
> > always sign-extended, which ensures that the XLEN-wide instructions
> > can be used for signed and unsigned comparisons on SImode yielding a
> > correct ordering of value.
> >
> > This commit
> >  - relaxes the minmax pattern to express for GPR (instead of X only),
> >providing both a si3 and di3 expansion on RV64
> >  - adds a sign-extending form for thee si3 pattern for RV64 to all REE
> >to eliminate redundant extensions
> >  - adds test-cases for both
> >
> > gcc/ChangeLog:
> >
> > * config/riscv/bitmanip.md: Relax minmax to GPR (i.e SImode or
> >   DImode) on RV64.
> > * config/riscv/bitmanip.md (si3_sext): Add
> >   pattern for REE.
> >
> > gcc/testsuite/ChangeLog:
> >
> > * gcc.target/riscv/zbb-min-max.c: Add testcases for SImode
> >   operands checking that no redundant sign- or zero-extensions
> >   are emitted.
> >
> > Signed-off-by: Philipp Tomsich 
> > ---
> >
> >  gcc/config/riscv/bitmanip.md | 14 +++---
> >  gcc/testsuite/gcc.target/riscv/zbb-min-max.c | 20 +---
> >  2 files changed, 28 insertions(+), 6 deletions(-)
> >
> > diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
> > index 000deb48b16..2a28f78f5f6 100644
> > --- a/gcc/config/riscv/bitmanip.md
> > +++ b/gcc/config/riscv/bitmanip.md
> > @@ -260,13 +260,21 @@ (define_insn "bswap2"
> >[(set_attr "type" "bitmanip")])
> >
> >  (define_insn "3"
> > -  [(set (match_operand:X 0 "register_operand" "=r")
> > -(bitmanip_minmax:X (match_operand:X 1 "register_operand" "r")
> > -  (match_operand:X 2 "register_operand" "r")))]
> > +  [(set (match_operand:GPR 0 "register_operand" "=r")
> > +(bitmanip_minmax:GPR (match_operand:GPR 1 "register_operand" "r")
> > +(match_operand:GPR 2 "register_operand" "r")))]
> >"TARGET_ZBB"
> >"\t%0,%1,%2"
> >[(set_attr "type" "bitmanip")])
> >
> > +(define_insn "si3_sext"
> > +  [(set (match_operand:DI 0 "register_operand" "=r")
> > +(sign_extend:DI (bitmanip_minmax:SI (match_operand:SI 1 
> > "register_operand" "r")
> > +(match_operand:SI 2 "register_operand" "r"]
> > +  "TARGET_64BIT && TARGET_ZBB"
> > +  "\t%0,%1,%2"
> > +  [(set_attr "type" "bitmanip")])
> > +
> >  ;; orc.b (or-combine) is added as an unspec for the benefit of the support
> >  ;; for optimized string functions (such as strcmp).
> >  (define_insn "orcb2"
> > diff --git a/gcc/testsuite/gcc.target/riscv/zbb-min-max.c 
> > b/gcc/testsuite/gcc.target/riscv/zbb-min-max.c
> > index f44c398ea08..7169e873551 100644
> > --- a/gcc/testsuite/gcc.target/riscv/zbb-min-max.c
> > +++ b/gcc/testsuite/gcc.target/riscv/zbb-min-max.c
> > @@ -1,5 +1,5 @@
> >  /* { dg-do compile } */
> > -/* { dg-options "-march=rv64gc_zbb -mabi=lp64 -O2" } */
> > +/* { dg-options "-march=rv64gc_zba_zbb -mabi=lp64 -O2" } */
> >
> >  long
> >  

Re: [PATCH 1v2/3][vect] Add main vectorized loop unrolling

2021-11-11 Thread Andre Vieira (lists) via Gcc-patches

Hi,

This is the rebased and reworked version of the unroll patch.  I wasn't 
entirely sure whether I should compare the costs of the unrolled 
loop_vinfo with the original loop_vinfo it was unrolled of. I did now, 
but I wasn't too sure whether it was a good idea to... Any thoughts on 
this?


Regards,

Andre


gcc/ChangeLog:

    * tree-vect-loop.c (vect_estimate_min_profitable_iters): Add 
suggested_unroll_factor parameter.

    (vect_analyze_loop_costing): Likewise.
    (vect_determine_partial_vectors_and_peeling): Don't mask an 
unrolled loop.

    (vect_analyze_loop_2): Support unrolling of loops.
    (vect_can_unroll): New function.
    (vect_try_unrolling): New function.
    (vect_analyze_loop_1): Add suggested_unroll_factor parameter 
and use it.
    (vect_analyze_loop): Call vect_try_unrolling when unrolling 
suggested.

    (vectorizable_reduction): Don't single_defuse_cycle when unrolling.
    * tree-vectorizer.h (_loop_vec_info::_loop_vec_info):  Add 
suggested_unroll_factor member.

        (vector_costs::vector_costs): Add m_suggested_unroll_factor member.
    (vector_costs::suggested_unroll_factor): New getter.
    (finish_cost): Add suggested_unroll_factor out parameter and 
set it.
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 
a28bb6321d76b8222bc8cfdade151ca9b4dca406..cfce7de0430c852d37f1a93e2d6a2f630694f613
 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -153,7 +153,8 @@ along with GCC; see the file COPYING3.  If not see
http://gcc.gnu.org/projects/tree-ssa/vectorization.html
 */
 
-static void vect_estimate_min_profitable_iters (loop_vec_info, int *, int *);
+static void vect_estimate_min_profitable_iters (loop_vec_info, int *, int *,
+   unsigned *);
 static stmt_vec_info vect_is_simple_reduction (loop_vec_info, stmt_vec_info,
   bool *, bool *);
 
@@ -828,6 +829,7 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, 
vec_info_shared *shared)
 skip_main_loop_edge (nullptr),
 skip_this_loop_edge (nullptr),
 reusable_accumulators (),
+suggested_unroll_factor (1),
 max_vectorization_factor (0),
 mask_skip_niters (NULL_TREE),
 rgroup_compare_type (NULL_TREE),
@@ -1811,7 +1813,8 @@ vect_known_niters_smaller_than_vf (loop_vec_info 
loop_vinfo)
definitely no, or -1 if it's worth retrying.  */
 
 static int
-vect_analyze_loop_costing (loop_vec_info loop_vinfo)
+vect_analyze_loop_costing (loop_vec_info loop_vinfo,
+  unsigned *suggested_unroll_factor)
 {
   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
   unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo);
@@ -1845,7 +1848,8 @@ vect_analyze_loop_costing (loop_vec_info loop_vinfo)
 
   int min_profitable_iters, min_profitable_estimate;
   vect_estimate_min_profitable_iters (loop_vinfo, _profitable_iters,
- _profitable_estimate);
+ _profitable_estimate,
+ suggested_unroll_factor);
 
   if (min_profitable_iters < 0)
 {
@@ -2129,10 +2133,16 @@ vect_determine_partial_vectors_and_peeling 
(loop_vec_info loop_vinfo,
 vectors to the epilogue, with the main loop continuing to operate
 on full vectors.
 
+If we are unrolling we also do not want to use partial vectors. This
+is to avoid the overhead of generating multiple masks and also to
+avoid having to execute entire iterations of FALSE masked instructions
+when dealing with one or less full iterations.
+
 ??? We could then end up failing to use partial vectors if we
 decide to peel iterations into a prologue, and if the main loop
 then ends up processing fewer than VF iterations.  */
-  if (param_vect_partial_vector_usage == 1
+  if ((param_vect_partial_vector_usage == 1
+  || loop_vinfo->suggested_unroll_factor > 1)
  && !LOOP_VINFO_EPILOGUE_P (loop_vinfo)
  && !vect_known_niters_smaller_than_vf (loop_vinfo))
LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P (loop_vinfo) = true;
@@ -2199,12 +2209,12 @@ vect_determine_partial_vectors_and_peeling 
(loop_vec_info loop_vinfo,
for it.  The different analyses will record information in the
loop_vec_info struct.  */
 static opt_result
-vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool )
+vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool ,
+unsigned *suggested_unroll_factor, poly_uint64 min_vf = 2)
 {
   opt_result ok = opt_result::success ();
   int res;
   unsigned int max_vf = MAX_VECTORIZATION_FACTOR;
-  poly_uint64 min_vf = 2;
   loop_vec_info orig_loop_vinfo = NULL;
 
   /* If we are dealing with an epilogue then orig_loop_vinfo points to the
@@ -2359,6 +2369,26 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool 
)
  set of rgroups.  */
   gcc_assert 

Re: [PATCH v1 8/8] RISC-V: bitmanip: relax minmax to operate on GPR

2021-11-11 Thread Kito Cheng via Gcc-patches
Hi Philipp:

We can't pretend we have SImode min/max instruction without that semantic.
Give this testcase, x86 and rv64gc print out 8589934592 8589934591 = 0,
but with this patch and compile with rv64gc_zba_zbb -O3, the output
become 8589934592 8589934591 = 8589934592

-Testcase---
#include 
long long __attribute__((noinline, noipa))
foo6(long long a, long long b)
{
  int xa = a;
  int xb = b;
  return (xa > xb ? xa : xb);
}
int main() {
  long long a = 0x2ll;
  long long b = 0x1l;
  long long c = foo6(a, b);
  printf ("%lld %lld = %lld\n", a, b, c);
  return 0;
}
--
v64gc_zba_zbb -O3 w/o this patch:
foo6:
sext.w  a1,a1
sext.w  a0,a0
max a0,a0,a1
ret

--
v64gc_zba_zbb -O3 w/ this patch:
foo6:
max a0,a0,a1
ret

On Thu, Nov 11, 2021 at 10:10 PM Philipp Tomsich
 wrote:
>
> While min/minu/max/maxu instructions are provided for XLEN only, these
> can safely operate on GPRs (i.e. SImode or DImode for RV64): SImode is
> always sign-extended, which ensures that the XLEN-wide instructions
> can be used for signed and unsigned comparisons on SImode yielding a
> correct ordering of value.
>
> This commit
>  - relaxes the minmax pattern to express for GPR (instead of X only),
>providing both a si3 and di3 expansion on RV64
>  - adds a sign-extending form for thee si3 pattern for RV64 to all REE
>to eliminate redundant extensions
>  - adds test-cases for both
>
> gcc/ChangeLog:
>
> * config/riscv/bitmanip.md: Relax minmax to GPR (i.e SImode or
>   DImode) on RV64.
> * config/riscv/bitmanip.md (si3_sext): Add
>   pattern for REE.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/zbb-min-max.c: Add testcases for SImode
>   operands checking that no redundant sign- or zero-extensions
>   are emitted.
>
> Signed-off-by: Philipp Tomsich 
> ---
>
>  gcc/config/riscv/bitmanip.md | 14 +++---
>  gcc/testsuite/gcc.target/riscv/zbb-min-max.c | 20 +---
>  2 files changed, 28 insertions(+), 6 deletions(-)
>
> diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
> index 000deb48b16..2a28f78f5f6 100644
> --- a/gcc/config/riscv/bitmanip.md
> +++ b/gcc/config/riscv/bitmanip.md
> @@ -260,13 +260,21 @@ (define_insn "bswap2"
>[(set_attr "type" "bitmanip")])
>
>  (define_insn "3"
> -  [(set (match_operand:X 0 "register_operand" "=r")
> -(bitmanip_minmax:X (match_operand:X 1 "register_operand" "r")
> -  (match_operand:X 2 "register_operand" "r")))]
> +  [(set (match_operand:GPR 0 "register_operand" "=r")
> +(bitmanip_minmax:GPR (match_operand:GPR 1 "register_operand" "r")
> +(match_operand:GPR 2 "register_operand" "r")))]
>"TARGET_ZBB"
>"\t%0,%1,%2"
>[(set_attr "type" "bitmanip")])
>
> +(define_insn "si3_sext"
> +  [(set (match_operand:DI 0 "register_operand" "=r")
> +(sign_extend:DI (bitmanip_minmax:SI (match_operand:SI 1 
> "register_operand" "r")
> +(match_operand:SI 2 "register_operand" "r"]
> +  "TARGET_64BIT && TARGET_ZBB"
> +  "\t%0,%1,%2"
> +  [(set_attr "type" "bitmanip")])
> +
>  ;; orc.b (or-combine) is added as an unspec for the benefit of the support
>  ;; for optimized string functions (such as strcmp).
>  (define_insn "orcb2"
> diff --git a/gcc/testsuite/gcc.target/riscv/zbb-min-max.c 
> b/gcc/testsuite/gcc.target/riscv/zbb-min-max.c
> index f44c398ea08..7169e873551 100644
> --- a/gcc/testsuite/gcc.target/riscv/zbb-min-max.c
> +++ b/gcc/testsuite/gcc.target/riscv/zbb-min-max.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-march=rv64gc_zbb -mabi=lp64 -O2" } */
> +/* { dg-options "-march=rv64gc_zba_zbb -mabi=lp64 -O2" } */
>
>  long
>  foo1 (long i, long j)
> @@ -25,7 +25,21 @@ foo4 (unsigned long i, unsigned long j)
>return i > j ? i : j;
>  }
>
> +unsigned int
> +foo5(unsigned int a, unsigned int b)
> +{
> +  return a > b ? a : b;
> +}
> +
> +int
> +foo6(int a, int b)
> +{
> +  return a > b ? a : b;
> +}
> +
>  /* { dg-final { scan-assembler-times "min" 3 } } */
> -/* { dg-final { scan-assembler-times "max" 3 } } */
> +/* { dg-final { scan-assembler-times "max" 4 } } */
>  /* { dg-final { scan-assembler-times "minu" 1 } } */
> -/* { dg-final { scan-assembler-times "maxu" 1 } } */
> +/* { dg-final { scan-assembler-times "maxu" 3 } } */
> +/* { dg-final { scan-assembler-not "zext.w" } } */
> +/* { dg-final { scan-assembler-not "sext.w" } } */
> --
> 2.32.0
>


[PATCH] libbacktrace: fix UBSAN issues

2021-11-11 Thread Martin Liška

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.

Ready to be installed?
Thanks,
Martin

Fix issues mentioned in the PR.

PR libbacktrace/103167

libbacktrace/ChangeLog:

* elf.c (elf_uncompress_lzma_block): Cast to unsigned int.
(elf_uncompress_lzma): Likewise.
* xztest.c (test_samples): memcpy only if v > 0.

Co-Authored-By: Andrew Pinski 
---
 libbacktrace/elf.c| 8 
 libbacktrace/xztest.c | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/libbacktrace/elf.c b/libbacktrace/elf.c
index 79d56146fc6..e69ac41c88b 100644
--- a/libbacktrace/elf.c
+++ b/libbacktrace/elf.c
@@ -3175,7 +3175,7 @@ elf_uncompress_lzma_block (const unsigned char 
*compressed,
   stream_crc = (compressed[off]
| (compressed[off + 1] << 8)
| (compressed[off + 2] << 16)
-   | (compressed[off + 3] << 24));
+   | ((unsigned)(compressed[off + 3]) << 24));
   if (unlikely (computed_crc != stream_crc))
 {
   elf_uncompress_failed ();
@@ -3788,7 +3788,7 @@ elf_uncompress_lzma (struct backtrace_state *state,
   stream_crc = (compressed[8]
| (compressed[9] << 8)
| (compressed[10] << 16)
-   | (compressed[11] << 24));
+   | ((unsigned)(compressed[11]) << 24));
   if (unlikely (computed_crc != stream_crc))
 {
   elf_uncompress_failed ();
@@ -3832,7 +3832,7 @@ elf_uncompress_lzma (struct backtrace_state *state,
   stream_crc = (compressed[offset - 4]
| (compressed[offset - 3] << 8)
| (compressed[offset - 2] << 16)
-   | (compressed[offset - 1] << 24));
+   | ((unsigned)(compressed[offset - 1]) << 24));
   if (unlikely (computed_crc != stream_crc))
 {
   elf_uncompress_failed ();
@@ -3891,7 +3891,7 @@ elf_uncompress_lzma (struct backtrace_state *state,
   stream_crc = (compressed[offset]
| (compressed[offset + 1] << 8)
| (compressed[offset + 2] << 16)
-   | (compressed[offset + 3] << 24));
+   | ((unsigned)(compressed[offset + 3]) << 24));
   if (unlikely (computed_crc != stream_crc))
 {
   elf_uncompress_failed ();
diff --git a/libbacktrace/xztest.c b/libbacktrace/xztest.c
index b2533cb1804..6c60ff50159 100644
--- a/libbacktrace/xztest.c
+++ b/libbacktrace/xztest.c
@@ -172,7 +172,7 @@ test_samples (struct backtrace_state *state)
   tests[i].name, uncompressed_len, v);
  ++failures;
}
- else if (memcmp (tests[i].uncompressed, uncompressed, v) != 0)
+ else if (v > 0 && memcmp (tests[i].uncompressed, uncompressed, v) != 
0)
{
  size_t j;
 
--

2.33.1



Re: [PATCH v2] libgcc: fix backtrace fallback on PowerPC Big-endian. [PR103004]

2021-11-11 Thread Segher Boessenkool
Hi!

On Thu, Nov 11, 2021 at 11:40:10AM -0300, Raphael Moreira Zinsly wrote:
> Changes since v1:
>   - Removed -Wmissing-prototypes fix.
>   - Fixed formatting of Changelog and patch.
> 
> --->8---
> 
> At the end of the backtrace stream _Unwind_Find_FDE() may not be able
> to find the frame unwind info and will later call the backtrace fallback
> instead of finishing. This occurs when using an old libc on ppc64 due to
> dl_iterate_phdr() not being able to set the fde in the last trace.
> When this occurs the cfa of the trace will be behind of context's cfa.
> Also, libgo’s probestackmaps() calls the backtrace with a null pointer
> and can get to the backchain fallback with the same problem, in this case
> we are only interested in find a stack map, we don't need nor can do a
> backchain.
> _Unwind_ForcedUnwind_Phase2() can hit the same issue as it uses
> uw_frame_state_for(), so we need to treat _URC_NORMAL_STOP.
> 
> libgcc/ChangeLog:
> 
>  * config/rs6000/linux-unwind.h (ppc_backchain_fallback): Check if 
> it's
>called with a null argument or at the end of the backtrace and return.
>  * unwind.inc (_Unwind_ForcedUnwind_Phase2): Treat _URC_NORMAL_STOP.

Committed, thanks!  Please note the changelog formatting fixes I had
to do, for later patches :-)


Segher


Re: [Patch] Fortran/openmp: Add support for 2 argument num_teams clause

2021-11-11 Thread Jakub Jelinek via Gcc-patches
On Thu, Nov 11, 2021 at 04:04:04PM +0100, Tobias Burnus wrote:
> Just the Fortran FE work + Fortranized version for the C tests.
> 
> Tobias
> -
> Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
> München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
> Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
> München, HRB 106955

> Fortran/openmp: Add support for 2 argument num_teams clause
> 
> Fortran part to commit r12-5146-g48d7327f2aaf65
> 
> gcc/fortran/ChangeLog:
> 
>   * gfortran.h (struct gfc_omp_clauses): Rename num_teams to
>   num_teams_upper, add num_teams_upper.
>   * dump-parse-tree.c (show_omp_clauses): Update to handle
>   lower-bound num_teams clause.
>   * frontend-passes.c (gfc_code_walker): Likewise
>   * openmp.c (gfc_free_omp_clauses, gfc_match_omp_clauses,
>   resolve_omp_clauses): Likewise.
>   * trans-openmp.c (gfc_trans_omp_clauses, gfc_split_omp_clauses,
>   gfc_trans_omp_target): Likewise.
> 
> libgomp/ChangeLog:
> 
>   * testsuite/libgomp.fortran/teams-1.f90: New test.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gfortran.dg/gomp/num-teams-1.f90: New test.
>   * gfortran.dg/gomp/num-teams-2.f90: New test.

Ok, thanks.

Slightly worried about the combined target teams case where C/C++
ensure the expressions used in there are evaluated before target
like:
  TARGET_EXPR ;
  TARGET_EXPR ;
  #pragma omp target firstprivate(D.2123) firstprivate(D.2122)
{
  {
#pragma omp teams num_teams(TARGET_EXPR :TARGET_EXPR 
)
but what I see in gfc_trans_omp_target seems to instead move the
clause to target, but I admit I haven't tried to eyeball a dump.

Jakub



[vect-patterns] Refactor widen_plus/widen_minus as internal_fns

2021-11-11 Thread Joel Hutton via Gcc-patches
Hi all,

This refactor allows widening vect patterns (such as widen_plus/widen_minus) to 
be represented as
either internal_fns or tree_codes and replaces the current 
widen_plus/widen_minus with internal_fn versions. This refactor is split into 3 
patches.

Boostrapped and regression tested on aarch64.

Ok for stage 3?




0001-vect-patterns-Refactor-to-allow-internal_fn-s.patch
Description: 0001-vect-patterns-Refactor-to-allow-internal_fn-s.patch


0002-vect-patterns-Refactor-widen_plus-as-internal_fn.patch
Description: 0002-vect-patterns-Refactor-widen_plus-as-internal_fn.patch


0003-Remove-widen_plus-minus_expr-tree-codes.patch
Description: 0003-Remove-widen_plus-minus_expr-tree-codes.patch


[PATCH] tree-optimization/103190 - fix assert in reassoc stmt placement with asm

2021-11-11 Thread Richard Biener via Gcc-patches
This makes sure to only assert we don't run into a asm goto when
inserting a stmt in reassoc, matching the condition in
can_reassociate_p.  We can handle EH edges from an asm just like
EH edges from any other stmt.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

2021-11-11  Richard Biener  

PR tree-optimization/103190
* tree-ssa-reassoc.c (insert_stmt_after): Only assert on asm goto.
---
 gcc/tree-ssa-reassoc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/tree-ssa-reassoc.c b/gcc/tree-ssa-reassoc.c
index 6a555e7c553..65316223047 100644
--- a/gcc/tree-ssa-reassoc.c
+++ b/gcc/tree-ssa-reassoc.c
@@ -1515,7 +1515,8 @@ insert_stmt_after (gimple *stmt, gimple *insert_point)
   gsi_insert_after (, stmt, GSI_NEW_STMT);
   return;
 }
-  else if (gimple_code (insert_point) == GIMPLE_ASM)
+  else if (gimple_code (insert_point) == GIMPLE_ASM
+  && gimple_asm_nlabels (as_a  (insert_point)) != 0)
 /* We have no idea where to insert - it depends on where the
uses will be placed.  */
 gcc_unreachable ();
-- 
2.31.1


[Patch] Fortran/openmp: Add support for 2 argument num_teams clause

2021-11-11 Thread Tobias Burnus

Just the Fortran FE work + Fortranized version for the C tests.

Tobias
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
Fortran/openmp: Add support for 2 argument num_teams clause

Fortran part to commit r12-5146-g48d7327f2aaf65

gcc/fortran/ChangeLog:

	* gfortran.h (struct gfc_omp_clauses): Rename num_teams to
	num_teams_upper, add num_teams_upper.
	* dump-parse-tree.c (show_omp_clauses): Update to handle
	lower-bound num_teams clause.
	* frontend-passes.c (gfc_code_walker): Likewise
	* openmp.c (gfc_free_omp_clauses, gfc_match_omp_clauses,
	resolve_omp_clauses): Likewise.
	* trans-openmp.c (gfc_trans_omp_clauses, gfc_split_omp_clauses,
	gfc_trans_omp_target): Likewise.

libgomp/ChangeLog:

	* testsuite/libgomp.fortran/teams-1.f90: New test.

gcc/testsuite/ChangeLog:

	* gfortran.dg/gomp/num-teams-1.f90: New test.
	* gfortran.dg/gomp/num-teams-2.f90: New test.

 gcc/fortran/dump-parse-tree.c  |  9 -
 gcc/fortran/frontend-passes.c  |  3 +-
 gcc/fortran/gfortran.h |  3 +-
 gcc/fortran/openmp.c   | 32 +---
 gcc/fortran/trans-openmp.c | 35 -
 gcc/testsuite/gfortran.dg/gomp/num-teams-1.f90 | 53 ++
 gcc/testsuite/gfortran.dg/gomp/num-teams-2.f90 | 37 ++
 libgomp/testsuite/libgomp.fortran/teams-1.f90  | 22 +++
 8 files changed, 175 insertions(+), 19 deletions(-)

diff --git a/gcc/fortran/dump-parse-tree.c b/gcc/fortran/dump-parse-tree.c
index 14a307856fc..04660d5074a 100644
--- a/gcc/fortran/dump-parse-tree.c
+++ b/gcc/fortran/dump-parse-tree.c
@@ -1741,10 +1741,15 @@ show_omp_clauses (gfc_omp_clauses *omp_clauses)
 	}
   fprintf (dumpfile, " BIND(%s)", type);
 }
-  if (omp_clauses->num_teams)
+  if (omp_clauses->num_teams_upper)
 {
   fputs (" NUM_TEAMS(", dumpfile);
-  show_expr (omp_clauses->num_teams);
+  if (omp_clauses->num_teams_lower)
+	{
+	  show_expr (omp_clauses->num_teams_lower);
+	  fputc (':', dumpfile);
+	}
+  show_expr (omp_clauses->num_teams_upper);
   fputc (')', dumpfile);
 }
   if (omp_clauses->device)
diff --git a/gcc/fortran/frontend-passes.c b/gcc/fortran/frontend-passes.c
index 145bff50f3e..f5ba7cecd54 100644
--- a/gcc/fortran/frontend-passes.c
+++ b/gcc/fortran/frontend-passes.c
@@ -5634,7 +5634,8 @@ gfc_code_walker (gfc_code **c, walk_code_fn_t codefn, walk_expr_fn_t exprfn,
 		  WALK_SUBEXPR (co->ext.omp_clauses->chunk_size);
 		  WALK_SUBEXPR (co->ext.omp_clauses->safelen_expr);
 		  WALK_SUBEXPR (co->ext.omp_clauses->simdlen_expr);
-		  WALK_SUBEXPR (co->ext.omp_clauses->num_teams);
+		  WALK_SUBEXPR (co->ext.omp_clauses->num_teams_lower);
+		  WALK_SUBEXPR (co->ext.omp_clauses->num_teams_upper);
 		  WALK_SUBEXPR (co->ext.omp_clauses->device);
 		  WALK_SUBEXPR (co->ext.omp_clauses->thread_limit);
 		  WALK_SUBEXPR (co->ext.omp_clauses->dist_chunk_size);
diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h
index 9378b4b8a24..1ad2f0df702 100644
--- a/gcc/fortran/gfortran.h
+++ b/gcc/fortran/gfortran.h
@@ -1502,7 +1502,8 @@ typedef struct gfc_omp_clauses
   struct gfc_expr *chunk_size;
   struct gfc_expr *safelen_expr;
   struct gfc_expr *simdlen_expr;
-  struct gfc_expr *num_teams;
+  struct gfc_expr *num_teams_lower;
+  struct gfc_expr *num_teams_upper;
   struct gfc_expr *device;
   struct gfc_expr *thread_limit;
   struct gfc_expr *grainsize;
diff --git a/gcc/fortran/openmp.c b/gcc/fortran/openmp.c
index dcf22ac2c2f..7b2df0d0be3 100644
--- a/gcc/fortran/openmp.c
+++ b/gcc/fortran/openmp.c
@@ -85,7 +85,8 @@ gfc_free_omp_clauses (gfc_omp_clauses *c)
   gfc_free_expr (c->chunk_size);
   gfc_free_expr (c->safelen_expr);
   gfc_free_expr (c->simdlen_expr);
-  gfc_free_expr (c->num_teams);
+  gfc_free_expr (c->num_teams_lower);
+  gfc_free_expr (c->num_teams_upper);
   gfc_free_expr (c->device);
   gfc_free_expr (c->thread_limit);
   gfc_free_expr (c->dist_chunk_size);
@@ -2420,11 +2421,22 @@ gfc_match_omp_clauses (gfc_omp_clauses **cp, const omp_mask mask,
 	  continue;
 	}
 	  if ((mask & OMP_CLAUSE_NUM_TEAMS)
-	  && (m = gfc_match_dupl_check (!c->num_teams, "num_teams", true,
-	>num_teams)) != MATCH_NO)
+	  && (m = gfc_match_dupl_check (!c->num_teams_upper, "num_teams",
+	true)) != MATCH_NO)
 	{
 	  if (m == MATCH_ERROR)
 		goto error;
+	  if (gfc_match ("%e ", >num_teams_upper) != MATCH_YES)
+		goto error;
+	  if (gfc_peek_ascii_char () == ':')
+		{
+		  c->num_teams_lower = c->num_teams_upper;
+		  c->num_teams_upper = NULL;
+		  if (gfc_match (": %e ", >num_teams_upper) != MATCH_YES)
+		goto error;
+		}
+	  if (gfc_match (") ") != MATCH_YES)
+		goto error;
 	  continue;
 	}
 	  if 

Fix some side cases of side effects analysis

2021-11-11 Thread Jan Hubicka via Gcc-patches
Hi,
I wrote script comparing modref pure/const discovery with ipa-pure-const
and found mistakes on both ends.  I fixed ipa-pure-const in previous two
patches.

This plugs the case where modref was too optimistic in handling looping
pure consts which were previously missed due to early exits on ECF_CONST
| ECF_PURE.  Those early exists are bit anoying and I think as a cleanup
I may just drop some of them as premature optimizations coming from time
modref was very simplistic on what it propagates.

Bootstrapped/regtested x86_64-linux, will commit it shortly.

gcc/ChangeLog:

2021-11-11  Jan Hubicka  

* ipa-modref.c (modref_summary::useful_p): Check also for side-effects
with looping const/pure.
(modref_summary_lto::useful_p): Likewise.
(merge_call_side_effects): Merge side effects before early exit
for pure/const.
(process_fnspec): Also handle pure functions.
(analyze_call): Do not early exit on looping pure const.
(propagate_unknown_call): Also handle nontrivial SCC as side-effect.
(modref_propagate_in_scc):

diff --git a/gcc/ipa-modref.c b/gcc/ipa-modref.c
index f8b7b900527..45b391a565e 100644
--- a/gcc/ipa-modref.c
+++ b/gcc/ipa-modref.c
@@ -331,11 +331,11 @@ modref_summary::useful_p (int ecf_flags, bool check_flags)
   && remove_useless_eaf_flags (static_chain_flags, ecf_flags, false))
 return true;
   if (ecf_flags & (ECF_CONST | ECF_NOVOPS))
-return false;
+return (!side_effects && (ecf_flags & ECF_LOOPING_CONST_OR_PURE));
   if (loads && !loads->every_base)
 return true;
   if (ecf_flags & ECF_PURE)
-return false;
+return (!side_effects && (ecf_flags & ECF_LOOPING_CONST_OR_PURE));
   return stores && !stores->every_base;
 }
 
@@ -416,11 +416,11 @@ modref_summary_lto::useful_p (int ecf_flags, bool 
check_flags)
   && remove_useless_eaf_flags (static_chain_flags, ecf_flags, false))
 return true;
   if (ecf_flags & (ECF_CONST | ECF_NOVOPS))
-return false;
+return (!side_effects && (ecf_flags & ECF_LOOPING_CONST_OR_PURE));
   if (loads && !loads->every_base)
 return true;
   if (ecf_flags & ECF_PURE)
-return false;
+return (!side_effects && (ecf_flags & ECF_LOOPING_CONST_OR_PURE));
   return stores && !stores->every_base;
 }
 
@@ -925,6 +925,18 @@ merge_call_side_effects (modref_summary *cur_summary,
   auto_vec  parm_map;
   modref_parm_map chain_map;
   bool changed = false;
+  int flags = gimple_call_flags (stmt);
+
+  if (!cur_summary->side_effects && callee_summary->side_effects)
+{
+  if (dump_file)
+   fprintf (dump_file, " - merging side effects.\n");
+  cur_summary->side_effects = true;
+  changed = true;
+}
+
+  if (flags & (ECF_CONST | ECF_NOVOPS))
+return changed;
 
   /* We can not safely optimize based on summary of callee if it does
  not always bind to current def: it is possible that memory load
@@ -988,12 +1000,6 @@ merge_call_side_effects (modref_summary *cur_summary,
  changed = true;
}
 }
-  if (!cur_summary->side_effects
-  && callee_summary->side_effects)
-{
-  cur_summary->side_effects = true;
-  changed = true;
-}
   return changed;
 }
 
@@ -1091,7 +1097,7 @@ process_fnspec (modref_summary *cur_summary,
   attr_fnspec fnspec = gimple_call_fnspec (call);
   int flags = gimple_call_flags (call);
 
-  if (!(flags & (ECF_CONST | ECF_NOVOPS))
+  if (!(flags & (ECF_CONST | ECF_NOVOPS | ECF_PURE))
   || (flags & ECF_LOOPING_CONST_OR_PURE)
   || (cfun->can_throw_non_call_exceptions
  && stmt_could_throw_p (cfun, call)))
@@ -1101,6 +1107,8 @@ process_fnspec (modref_summary *cur_summary,
   if (cur_summary_lto)
cur_summary_lto->side_effects = true;
 }
+  if (flags & (ECF_CONST | ECF_NOVOPS))
+return true;
   if (!fnspec.known_p ())
 {
   if (dump_file && gimple_call_builtin_p (call, BUILT_IN_NORMAL))
@@ -1203,7 +1211,8 @@ analyze_call (modref_summary *cur_summary, 
modref_summary_lto *cur_summary_lto,
   /* Check flags on the function call.  In certain cases, analysis can be
  simplified.  */
   int flags = gimple_call_flags (stmt);
-  if (flags & (ECF_CONST | ECF_NOVOPS))
+  if ((flags & (ECF_CONST | ECF_NOVOPS))
+  && !(flags & ECF_LOOPING_CONST_OR_PURE))
 {
   if (dump_file)
fprintf (dump_file,
@@ -3963,7 +3972,8 @@ static bool
 propagate_unknown_call (cgraph_node *node,
cgraph_edge *e, int ecf_flags,
modref_summary *cur_summary,
-   modref_summary_lto *cur_summary_lto)
+   modref_summary_lto *cur_summary_lto,
+   bool nontrivial_scc)
 {
   bool changed = false;
   class fnspec_summary *fnspec_sum = fnspec_summaries->get (e);
@@ -3973,12 +3983,12 @@ propagate_unknown_call (cgraph_node *node,
   if (e->callee
   && builtin_safe_for_const_function_p (, e->callee->decl))
 {
-  if (cur_summary && 

[committed] Testsuite: Various fixes for nios2.

2021-11-11 Thread Sandra Loosemore
I've pushed the attached patch to clean up some test failures I've seen 
on nios2-elf.  This target defaults to -fno-delete-null-pointer-checks 
so any optimization tests that depend on assumptions that valid pointers 
are non-zero have to be marked explicitly.  The others ought to be 
obvious, except perhaps struct-by-value-1.c which was giving a link 
error about overflowing the small data region without -G0.


My last set of test results were pretty messy but I think almost all of 
the problems are not nios2-specific (e.g., PR103166, PR103163).  I think 
it is better to wait until we're into stage 3 and the churn settles down 
some before I make another pass to triage remaining nios2-specific 
problems, but I might as well check in what I have now instead of 
sitting on it.


-Sandra
commit eb43f1a95d1d7a0f88a8107d860e5343507554dd
Author: Sandra Loosemore 
Date:   Thu Nov 11 06:31:02 2021 -0800

Testsuite:  Various fixes for nios2.

2021-11-11  Sandra Loosemore  

	gcc/testsuite/
	* g++.dg/warn/Wmismatched-new-delete-5.C: Add
	-fdelete-null-pointer-checks.
	* gcc.dg/attr-returns-nonnull.c: Likewise.
	* gcc.dg/debug/btf/btf-datasec-1.c: Add -G0 option for nios2.
	* gcc.dg/ifcvt-4.c: Skip on nios2.
	* gcc.dg/struct-by-value-1.c: Add -G0 option for nios2.

diff --git a/gcc/testsuite/g++.dg/warn/Wmismatched-new-delete-5.C b/gcc/testsuite/g++.dg/warn/Wmismatched-new-delete-5.C
index 92c75df..bac2b68 100644
--- a/gcc/testsuite/g++.dg/warn/Wmismatched-new-delete-5.C
+++ b/gcc/testsuite/g++.dg/warn/Wmismatched-new-delete-5.C
@@ -1,7 +1,7 @@
 /* PR c++/100876 - -Wmismatched-new-delete should either look through
or ignore placement new
{ dg-do compile }
-   { dg-options "-O2 -Wall" } */
+   { dg-options "-O2 -Wall -fdelete-null-pointer-checks" } */
 
 extern "C" {
   void* malloc (__SIZE_TYPE__);
diff --git a/gcc/testsuite/gcc.dg/attr-returns-nonnull.c b/gcc/testsuite/gcc.dg/attr-returns-nonnull.c
index 22ee30a..e4e20b8 100644
--- a/gcc/testsuite/gcc.dg/attr-returns-nonnull.c
+++ b/gcc/testsuite/gcc.dg/attr-returns-nonnull.c
@@ -1,7 +1,7 @@
 /* Verify that attribute returns_nonnull on global and local function
declarations is merged.
{ dg-do compile }
-   { dg-options "-Wall -fdump-tree-optimized" } */
+   { dg-options "-Wall -fdump-tree-optimized -fdelete-null-pointer-checks" } */
 
 void foo (void);
 
diff --git a/gcc/testsuite/gcc.dg/debug/btf/btf-datasec-1.c b/gcc/testsuite/gcc.dg/debug/btf/btf-datasec-1.c
index f809d93..dbb236b 100644
--- a/gcc/testsuite/gcc.dg/debug/btf/btf-datasec-1.c
+++ b/gcc/testsuite/gcc.dg/debug/btf/btf-datasec-1.c
@@ -12,6 +12,7 @@
 /* { dg-do compile )  */
 /* { dg-options "-O0 -gbtf -dA" } */
 /* { dg-options "-O0 -gbtf -dA -msdata=none" { target { { powerpc*-*-* } && ilp32 } } } */
+/* { dg-options "-O0 -gbtf -dA -G0" { target { nios2-*-* } } } */
 
 /* Check for two DATASEC entries with vlen 3, and one with vlen 1.  */
 /* { dg-final { scan-assembler-times "0xf03\[\t \]+\[^\n\]*btt_info" 2 } } */
diff --git a/gcc/testsuite/gcc.dg/ifcvt-4.c b/gcc/testsuite/gcc.dg/ifcvt-4.c
index e74e449..0525102 100644
--- a/gcc/testsuite/gcc.dg/ifcvt-4.c
+++ b/gcc/testsuite/gcc.dg/ifcvt-4.c
@@ -2,7 +2,7 @@
 /* { dg-additional-options "-misel" { target { powerpc*-*-* } } } */
 /* { dg-additional-options "-march=z196" { target { s390x-*-* } } } */
 /* { dg-additional-options "-mtune-ctrl=^one_if_conv_insn" { target { i?86-*-* x86_64-*-* } } } */
-/* { dg-skip-if "Multiple set if-conversion not guaranteed on all subtargets" { "arm*-*-* avr-*-* hppa*64*-*-* s390-*-* visium-*-*" riscv*-*-* msp430-*-* } }  */
+/* { dg-skip-if "Multiple set if-conversion not guaranteed on all subtargets" { "arm*-*-* avr-*-* hppa*64*-*-* s390-*-* visium-*-*" riscv*-*-* msp430-*-* nios2-*-*} }  */
 /* { dg-skip-if "" { "s390x-*-*" } { "-m31" } }  */
 
 typedef int word __attribute__((mode(word)));
diff --git a/gcc/testsuite/gcc.dg/struct-by-value-1.c b/gcc/testsuite/gcc.dg/struct-by-value-1.c
index addf253..ae7adb5 100644
--- a/gcc/testsuite/gcc.dg/struct-by-value-1.c
+++ b/gcc/testsuite/gcc.dg/struct-by-value-1.c
@@ -1,6 +1,7 @@
 /* Test structure passing by value.  */
 /* { dg-do run } */
 /* { dg-options "-O2" } */
+/* { dg-options "-O2 -G0" { target { nios2-*-* } } } */
 
 #define T(N)	\
 struct S##N { unsigned char i[N]; };		\


Re: Fix recursion discovery in ipa-pure-const

2021-11-11 Thread Jan Hubicka via Gcc-patches
> On Thu, Nov 11, 2021 at 2:41 PM Jan Hubicka via Gcc-patches
>  wrote:
> >
> > Hi,
> > We make self recursive functions as looping of fear of endless recursion.
> > This is done correctly for local pure/const and for non-trivial SCCs in
> > callgraph, but for trivial SCCs we miss the flag.
> >
> > I think it is bad decision since infinite recursion will run out of stack,
> 
> Note it might not always in case we can eliminate the tail-recursion or avoid
> stack use by the recursion by other means.  So I think it is conservatively
> correct.

I don't know.  If function is pure and has infinite recursion in it it
means that it can only run forever without side effects if it gets lucky
and we tail-recurse it.  There are no other means avoid the stack use from
growing.

First i think code relying on tail-recurse optimization to not run out
of stack is not strictly valid in C/C++ other languages we care.
Also in C++ there is the forced progression which makes even the tail
optiimzed code invalid.

I think in high level code such recursive accessors used for no good
reason are not that infrequent.  Also we had this bug in tree probably
forever since LOOPING_PURE_CONST was added and no one complained ;)

Relaxing this rule breaks some testcases, but odd ones - they are
infinitely self-recursive builtin implementations where we then both
prove function as noreturn & later optimize builtin to constant
so the assembly matching does not see expected thing.

Honza


[PATCH v2] libgcc: fix backtrace fallback on PowerPC Big-endian. [PR103004]

2021-11-11 Thread Raphael Moreira Zinsly via Gcc-patches
Changes since v1:
- Removed -Wmissing-prototypes fix.
- Fixed formatting of Changelog and patch.

--->8---

At the end of the backtrace stream _Unwind_Find_FDE() may not be able
to find the frame unwind info and will later call the backtrace fallback
instead of finishing. This occurs when using an old libc on ppc64 due to
dl_iterate_phdr() not being able to set the fde in the last trace.
When this occurs the cfa of the trace will be behind of context's cfa.
Also, libgo’s probestackmaps() calls the backtrace with a null pointer
and can get to the backchain fallback with the same problem, in this case
we are only interested in find a stack map, we don't need nor can do a
backchain.
_Unwind_ForcedUnwind_Phase2() can hit the same issue as it uses
uw_frame_state_for(), so we need to treat _URC_NORMAL_STOP.

libgcc/ChangeLog:

 * config/rs6000/linux-unwind.h (ppc_backchain_fallback): Check if it's
 called with a null argument or at the end of the backtrace and return.
 * unwind.inc (_Unwind_ForcedUnwind_Phase2): Treat _URC_NORMAL_STOP.
---
 libgcc/config/rs6000/linux-unwind.h | 8 +++-
 libgcc/unwind.inc   | 5 +++--
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/libgcc/config/rs6000/linux-unwind.h 
b/libgcc/config/rs6000/linux-unwind.h
index 8deccc1d650..ad1ab286a2f 100644
--- a/libgcc/config/rs6000/linux-unwind.h
+++ b/libgcc/config/rs6000/linux-unwind.h
@@ -401,8 +401,14 @@ void ppc_backchain_fallback (struct _Unwind_Context 
*context, void *a)
   struct trace_arg *arg = a;
   int count;
 
-  /* Get the last address computed and start with the next.  */
+  /* Get the last address computed.  */
   current = context->cfa;
+
+  /* If the trace CFA is not the context CFA the backtrace is done.  */
+  if (arg == NULL || arg->cfa != current)
+   return;
+
+  /* Start with next address.  */
   current = current->backchain;
 
   for (count = arg->count; current != NULL; current = current->backchain)
diff --git a/libgcc/unwind.inc b/libgcc/unwind.inc
index 456a5ee682f..dc2f9c13e97 100644
--- a/libgcc/unwind.inc
+++ b/libgcc/unwind.inc
@@ -160,12 +160,13 @@ _Unwind_ForcedUnwind_Phase2 (struct _Unwind_Exception 
*exc,
 
   /* Set up fs to describe the FDE for the caller of cur_context.  */
   code = uw_frame_state_for (context, );
-  if (code != _URC_NO_REASON && code != _URC_END_OF_STACK)
+  if (code != _URC_NO_REASON && code != _URC_END_OF_STACK
+ && code != _URC_NORMAL_STOP)
return _URC_FATAL_PHASE2_ERROR;
 
   /* Unwind successful.  */
   action = _UA_FORCE_UNWIND | _UA_CLEANUP_PHASE;
-  if (code == _URC_END_OF_STACK)
+  if (code == _URC_END_OF_STACK || code == _URC_NORMAL_STOP)
action |= _UA_END_OF_STACK;
   stop_code = (*stop) (1, action, exc->exception_class, exc,
   context, stop_argument);
-- 
2.31.1



[COMMITTED] Move import population from threader to path solver.

2021-11-11 Thread Aldy Hernandez via Gcc-patches
Imports are our nomenclature for external SSA names to a block that
are used to calculate the outgoing edges for said block.  For example,
in the following snippet:

 :
_1 = b_10 == block_11;
_2 = b_10 != -1;
_3 = _1 & _2;
if (_3 != 0)
  goto ; [INV]
else
  goto ; [INV]

...the imports to the block are b_10 and block_11 since they are both
needed to calculate _3.

The path solver takes a bitmap of imports in addition to the path
itself.  This sets up the number of SSA names to be on the lookout
for, while resolving the final conditional.

Calculating these imports was initially done in the threader, since it
was the only user of the path solver.  With new clients, it has become
obvious that populating the imports should be a task for the path
solver, so it can be shared among the clients.

This patch moves the import code to the solver, making both the solver
and the threader simpler in the process.  This is because intent is
clearer and some duplicate code was removed.

This reshuffling had the net effect of giving us a handful of new
threads through my suite of .ii files (125).  This was unexpected, but
welcome nevertheless.  There is no performance difference in callgrind
over the same suite.

Regstrapped on x86-64 Linux.

gcc/ChangeLog:

* gimple-range-path.cc (path_range_query::add_copies_to_imports):
Rename to...
(path_range_query::compute_imports): ...this.  Adapt it so it can
be passed the imports bitmap instead of working on m_imports.
(path_range_query::compute_ranges): Call compute_imports in all
cases unless an imports bitmap is passed.
* gimple-range-path.h (path_range_query::compute_imports): New.
(path_range_query::add_copies_to_imports): Remove.
* tree-ssa-threadbackward.c (back_threader::resolve_def): Remove.
(back_threader::find_paths_to_names): Inline resolve_def.
(back_threader::find_paths): Call compute_imports.
(back_threader::resolve_phi): Adjust comment.
---
 gcc/gimple-range-path.cc  | 45 -
 gcc/gimple-range-path.h   |  2 +-
 gcc/tree-ssa-threadbackward.c | 47 ++-
 3 files changed, 30 insertions(+), 64 deletions(-)

diff --git a/gcc/gimple-range-path.cc b/gcc/gimple-range-path.cc
index 6da01c7067f..4843c133e62 100644
--- a/gcc/gimple-range-path.cc
+++ b/gcc/gimple-range-path.cc
@@ -439,26 +439,32 @@ path_range_query::add_to_imports (tree name, bitmap 
imports)
   return false;
 }
 
-// Add the copies of any SSA names in IMPORTS to IMPORTS.
+// Compute the imports to the path ending in EXIT.  These are
+// essentially the SSA names used to calculate the final conditional
+// along the path.
 //
-// These are hints for the solver.  Adding more elements (within
-// reason) doesn't slow us down, because we don't solve anything that
-// doesn't appear in the path.  On the other hand, not having enough
-// imports will limit what we can solve.
+// They are hints for the solver.  Adding more elements doesn't slow
+// us down, because we don't solve anything that doesn't appear in the
+// path.  On the other hand, not having enough imports will limit what
+// we can solve.
 
 void
-path_range_query::add_copies_to_imports ()
+path_range_query::compute_imports (bitmap imports, basic_block exit)
 {
-  auto_vec worklist (bitmap_count_bits (m_imports));
+  // Start with the imports from the exit block...
+  bitmap r_imports = m_ranger.gori ().imports (exit);
+  bitmap_copy (imports, r_imports);
+
+  auto_vec worklist (bitmap_count_bits (imports));
   bitmap_iterator bi;
   unsigned i;
-
-  EXECUTE_IF_SET_IN_BITMAP (m_imports, 0, i, bi)
+  EXECUTE_IF_SET_IN_BITMAP (imports, 0, i, bi)
 {
   tree name = ssa_name (i);
   worklist.quick_push (name);
 }
 
+  // ...and add any operands used to define these imports.
   while (!worklist.is_empty ())
 {
   tree name = worklist.pop ();
@@ -466,15 +472,12 @@ path_range_query::add_copies_to_imports ()
 
   if (is_gimple_assign (def_stmt))
{
- // ?? Adding assignment copies doesn't get us much.  At the
- // time of writing, we got 63 more threaded paths across the
- // .ii files from a bootstrap.
- add_to_imports (gimple_assign_rhs1 (def_stmt), m_imports);
+ add_to_imports (gimple_assign_rhs1 (def_stmt), imports);
  tree rhs = gimple_assign_rhs2 (def_stmt);
- if (rhs && add_to_imports (rhs, m_imports))
+ if (rhs && add_to_imports (rhs, imports))
worklist.safe_push (rhs);
  rhs = gimple_assign_rhs3 (def_stmt);
- if (rhs && add_to_imports (rhs, m_imports))
+ if (rhs && add_to_imports (rhs, imports))
worklist.safe_push (rhs);
}
   else if (gphi *phi = dyn_cast  (def_stmt))
@@ -486,7 +489,7 @@ path_range_query::add_copies_to_imports ()
 
  if (TREE_CODE (arg) == SSA_NAME
  && 

Re: Fix recursion discovery in ipa-pure-const

2021-11-11 Thread Richard Biener via Gcc-patches
On Thu, Nov 11, 2021 at 2:41 PM Jan Hubicka via Gcc-patches
 wrote:
>
> Hi,
> We make self recursive functions as looping of fear of endless recursion.
> This is done correctly for local pure/const and for non-trivial SCCs in
> callgraph, but for trivial SCCs we miss the flag.
>
> I think it is bad decision since infinite recursion will run out of stack,

Note it might not always in case we can eliminate the tail-recursion or avoid
stack use by the recursion by other means.  So I think it is conservatively
correct.

Richard.

> but changing it upsets some testcases and should be done independently.
> So this patch is fixing current behaviour to be consistent.
>
> Bootstrapped/regtested x86_64-linux, comitted.
>
> gcc/ChangeLog:
>
> 2021-11-11  Jan Hubicka  
>
> * ipa-pure-const.c (propagate_pure_const): Self recursion is
> a side effects.
>
> diff --git a/gcc/ipa-pure-const.c b/gcc/ipa-pure-const.c
> index 505ed4f8a3b..64777cd2d91 100644
> --- a/gcc/ipa-pure-const.c
> +++ b/gcc/ipa-pure-const.c
> @@ -1513,6 +1611,9 @@ propagate_pure_const (void)
>   enum pure_const_state_e edge_state = IPA_CONST;
>   bool edge_looping = false;
>
> + if (e->recursive_p ())
> +   looping = true;
> +
>   if (dump_file && (dump_flags & TDF_DETAILS))
> {
>   fprintf (dump_file, "Call to %s",


Re: [PATCH] libgcc: fix backtrace fallback on PowerPC Big-endian. [PR103004]

2021-11-11 Thread Raphael M Zinsly via Gcc-patches

Hi Segher,

On 11/11/2021 10:43, Segher Boessenkool wrote:

Hi!

On Wed, Nov 10, 2021 at 06:59:23PM -0300, Raphael Moreira Zinsly wrote:

At the end of the backtrace stream _Unwind_Find_FDE() may not be able
to find the frame unwind info and will later call the backtrace fallback
instead of finishing. This occurs when using an old libc on ppc64 due to
dl_iterate_phdr() not being able to set the fde in the last trace.
When this occurs the cfa of the trace will be behind of context's cfa.
Also, libgo’s probestackmaps() calls the backtrace with a null pointer
and can get to the backchain fallback with the same problem, in this case
we are only interested in find a stack map, we don't need nor can do a
backchain.
_Unwind_ForcedUnwind_Phase2() can hit the same issue as it uses
uw_frame_state_for(), so we need to treat _URC_NORMAL_STOP.

libgcc/ChangeLog:

  * config/rs6000/linux-unwind.h (ppc_backchain_fallback): turn into
 static to fix -Wmissing-prototypes. Check if it's called with a null
 argument or at the end of the backtrace and return.
  * unwind.inc (_Unwind_ForcedUnwind_Phase2): treat _URC_NORMAL_STOP.


Formatting is messed up.  Lines start with a capital.  Two spaces after
full stop, while you're at it.



Ok.


-void ppc_backchain_fallback (struct _Unwind_Context *context, void *a)
+static void
+ppc_backchain_fallback (struct _Unwind_Context *context, void *a)


This was already fixed in 75ef0353a2d3.


Ops, missed that.




  {
struct frame_layout *current;
struct trace_arg *arg = a;
int count;
  
-  /* Get the last address computed and start with the next.  */

+  /* Get the last address computed.  */
current = context->cfa;


Empty line after here please.  Most of the time if you have a full-line
comment it means a new paragraph is starting.



Ok.


+  /* If the trace CFA is not the context CFA the backtrace is done.  */
+  if (arg == NULL || arg->cfa != current)
+   return;
+
+  /* Start with next address.  */
current = current->backchain;


Like you did here :-)

Do you have a testcase (that failed without this, but now doesn't)?



I don't have a simple testcase for that, but many of the asan and go 
tests catch that.



Looks okay, but please update and resend.


Segher



Thanks,
--
Raphael Moreira Zinsly


[PATCH v1 8/8] RISC-V: bitmanip: relax minmax to operate on GPR

2021-11-11 Thread Philipp Tomsich
While min/minu/max/maxu instructions are provided for XLEN only, these
can safely operate on GPRs (i.e. SImode or DImode for RV64): SImode is
always sign-extended, which ensures that the XLEN-wide instructions
can be used for signed and unsigned comparisons on SImode yielding a
correct ordering of value.

This commit
 - relaxes the minmax pattern to express for GPR (instead of X only),
   providing both a si3 and di3 expansion on RV64
 - adds a sign-extending form for thee si3 pattern for RV64 to all REE
   to eliminate redundant extensions
 - adds test-cases for both

gcc/ChangeLog:

* config/riscv/bitmanip.md: Relax minmax to GPR (i.e SImode or
  DImode) on RV64.
* config/riscv/bitmanip.md (si3_sext): Add
  pattern for REE.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/zbb-min-max.c: Add testcases for SImode
  operands checking that no redundant sign- or zero-extensions
  are emitted.

Signed-off-by: Philipp Tomsich 
---

 gcc/config/riscv/bitmanip.md | 14 +++---
 gcc/testsuite/gcc.target/riscv/zbb-min-max.c | 20 +---
 2 files changed, 28 insertions(+), 6 deletions(-)

diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index 000deb48b16..2a28f78f5f6 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -260,13 +260,21 @@ (define_insn "bswap2"
   [(set_attr "type" "bitmanip")])
 
 (define_insn "3"
-  [(set (match_operand:X 0 "register_operand" "=r")
-(bitmanip_minmax:X (match_operand:X 1 "register_operand" "r")
-  (match_operand:X 2 "register_operand" "r")))]
+  [(set (match_operand:GPR 0 "register_operand" "=r")
+(bitmanip_minmax:GPR (match_operand:GPR 1 "register_operand" "r")
+(match_operand:GPR 2 "register_operand" "r")))]
   "TARGET_ZBB"
   "\t%0,%1,%2"
   [(set_attr "type" "bitmanip")])
 
+(define_insn "si3_sext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+(sign_extend:DI (bitmanip_minmax:SI (match_operand:SI 1 
"register_operand" "r")
+(match_operand:SI 2 "register_operand" "r"]
+  "TARGET_64BIT && TARGET_ZBB"
+  "\t%0,%1,%2"
+  [(set_attr "type" "bitmanip")])
+
 ;; orc.b (or-combine) is added as an unspec for the benefit of the support
 ;; for optimized string functions (such as strcmp).
 (define_insn "orcb2"
diff --git a/gcc/testsuite/gcc.target/riscv/zbb-min-max.c 
b/gcc/testsuite/gcc.target/riscv/zbb-min-max.c
index f44c398ea08..7169e873551 100644
--- a/gcc/testsuite/gcc.target/riscv/zbb-min-max.c
+++ b/gcc/testsuite/gcc.target/riscv/zbb-min-max.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-march=rv64gc_zbb -mabi=lp64 -O2" } */
+/* { dg-options "-march=rv64gc_zba_zbb -mabi=lp64 -O2" } */
 
 long
 foo1 (long i, long j)
@@ -25,7 +25,21 @@ foo4 (unsigned long i, unsigned long j)
   return i > j ? i : j;
 }
 
+unsigned int
+foo5(unsigned int a, unsigned int b)
+{
+  return a > b ? a : b;
+}
+
+int
+foo6(int a, int b)
+{
+  return a > b ? a : b;
+}
+
 /* { dg-final { scan-assembler-times "min" 3 } } */
-/* { dg-final { scan-assembler-times "max" 3 } } */
+/* { dg-final { scan-assembler-times "max" 4 } } */
 /* { dg-final { scan-assembler-times "minu" 1 } } */
-/* { dg-final { scan-assembler-times "maxu" 1 } } */
+/* { dg-final { scan-assembler-times "maxu" 3 } } */
+/* { dg-final { scan-assembler-not "zext.w" } } */
+/* { dg-final { scan-assembler-not "sext.w" } } */
-- 
2.32.0



[PATCH v1 7/8] RISC-V: bitmanip: add orc.b as an unspec

2021-11-11 Thread Philipp Tomsich
As a basis for optimized string functions (e.g., the by-pieces
implementations), we need orc.b available.  This adds orc.b as an
unspec, so we can expand to it.

gcc/ChangeLog:

* config/riscv/bitmanip.md (orcb2): Add orc.b as an unspec.
* config/riscv/riscv.md: Add UNSPEC_ORC_B.

Signed-off-by: Philipp Tomsich 
---

 gcc/config/riscv/bitmanip.md | 8 
 gcc/config/riscv/riscv.md| 3 +++
 2 files changed, 11 insertions(+)

diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index 9e10280e306..000deb48b16 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -267,6 +267,14 @@ (define_insn "3"
   "\t%0,%1,%2"
   [(set_attr "type" "bitmanip")])
 
+;; orc.b (or-combine) is added as an unspec for the benefit of the support
+;; for optimized string functions (such as strcmp).
+(define_insn "orcb2"
+  [(set (match_operand:X 0 "register_operand" "=r")
+   (unspec:X [(match_operand:X 1 "register_operand")] UNSPEC_ORC_B))]
+  "TARGET_ZBB"
+  "orc.b\t%0,%1")
+
 ;; ZBS extension.
 
 (define_insn "*bset"
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 225e5b259c1..7a2501ec7a9 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -45,6 +45,9 @@ (define_c_enum "unspec" [
 
   ;; Stack tie
   UNSPEC_TIE
+
+  ;; Zbb OR-combine instruction
+  UNSPEC_ORC_B
 ])
 
 (define_c_enum "unspecv" [
-- 
2.32.0



[PATCH v1 6/8] RISC-V: bitmanip: add splitter to use bexti for "(a & (1 << BIT_NO)) ? 0 : -1"

2021-11-11 Thread Philipp Tomsich
Consider creating a polarity-reversed mask from a set-bit (i.e., if
the bit is set, produce all-ones; otherwise: all-zeros).  Using Zbb,
this can be expressed as bexti, followed by an addi of minus-one.  To
enable the combiner to discover this opportunity, we need to split the
canonical expression for "(a & (1 << BIT_NO)) ? 0 : -1" into a form
combinable into bexti.

Consider the function:
long f(long a)
{
  return (a & (1 << BIT_NO)) ? 0 : -1;
}
This produces the following sequence prior to this change:
andia0,a0,16
seqza0,a0
neg a0,a0
ret
Following this change, it results in:
bexti   a0,a0,4
addia0,a0,-1
ret

gcc/ChangeLog:

* config/riscv/bitmanip.md: Add a splitter to generate
  polarity-reversed masks from a set bit using bexti + addi.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/zbs-bexti.c: New test.

Signed-off-by: Philipp Tomsich 
---

 gcc/config/riscv/bitmanip.md   | 13 +
 gcc/testsuite/gcc.target/riscv/zbs-bexti.c | 14 ++
 2 files changed, 27 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/zbs-bexti.c

diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index 178d1ca0e4b..9e10280e306 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -367,3 +367,16 @@ (define_insn "*bexti"
   "TARGET_ZBS"
   "bexti\t%0,%1,%2"
   [(set_attr "type" "bitmanip")])
+
+;; We can create a polarity-reversed mask (i.e. bit N -> { set = 0, clear = -1 
})
+;; using a bext(i) followed by an addi instruction.
+;; This splits the canonical representation of "(a & (1 << BIT_NO)) ? 0 : -1".
+(define_split
+  [(set (match_operand:GPR 0 "register_operand")
+   (neg:GPR (eq:GPR (zero_extract:GPR (match_operand:GPR 1 
"register_operand")
+  (const_int 1)
+  (match_operand 2))
+(const_int 0]
+  "TARGET_ZBB"
+  [(set (match_dup 0) (zero_extract:GPR (match_dup 1) (const_int 1) (match_dup 
2)))
+   (set (match_dup 0) (plus:GPR (match_dup 0) (const_int -1)))])
diff --git a/gcc/testsuite/gcc.target/riscv/zbs-bexti.c 
b/gcc/testsuite/gcc.target/riscv/zbs-bexti.c
new file mode 100644
index 000..d02c3f7a98d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zbs-bexti.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc_zbs -mabi=lp64 -O2" } */
+
+/* bexti */
+#define BIT_NO  27
+
+long
+foo0 (long a)
+{
+  return (a & (1 << BIT_NO)) ? 0 : -1;
+}
+
+/* { dg-final { scan-assembler "bexti" } } */
+/* { dg-final { scan-assembler "addi" } } */
-- 
2.32.0



[PATCH v1 5/8] RISC-V: bitmanip: improvements to rotate instructions

2021-11-11 Thread Philipp Tomsich
This change improves rotate instructions (motivated by a review of the
code generated for OpenSSL): rotate-left by a constant are synthesized
using a rotate-right-immediate to avoid putting the shift-amount into
a temporary; to do so, we allow either a register or an immediate for
the expansion of rotl3 and then check if the shift-amount is a
constant.

Without these changes, the function
unsigned int f(unsigned int a)
{
  return (a << 2) | (a >> 30);
}
turns into
li  a5,2
rolwa0,a0,a5
while these changes give us:
roriw   a0,a0,30

gcc/ChangeLog:

* config/riscv/bitmanip.md (rotlsi3, rotldi3, rotlsi3_sext):
Synthesize rotate-left-by-immediate from a rotate-right insn.

Signed-off-by: Philipp Tomsich 
---

 gcc/config/riscv/bitmanip.md | 39 ++--
 1 file changed, 33 insertions(+), 6 deletions(-)

diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index 59779b48f27..178d1ca0e4b 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -204,25 +204,52 @@ (define_insn "rotrsi3_sext"
 (define_insn "rotlsi3"
   [(set (match_operand:SI 0 "register_operand" "=r")
(rotate:SI (match_operand:SI 1 "register_operand" "r")
-  (match_operand:QI 2 "register_operand" "r")))]
+  (match_operand:QI 2 "arith_operand" "rI")))]
   "TARGET_ZBB"
-  { return TARGET_64BIT ? "rolw\t%0,%1,%2" : "rol\t%0,%1,%2"; }
+  {
+/* If the rotate-amount is constant, let's synthesize using a
+   rotate-right-immediate instead of using a temporary. */
+
+if (CONST_INT_P(operands[2])) {
+  operands[2] = GEN_INT(32 - INTVAL(operands[2]));
+  return TARGET_64BIT ? "roriw\t%0,%1,%2" : "rori\t%0,%1,%2";
+}
+
+return TARGET_64BIT ? "rolw\t%0,%1,%2" : "rol\t%0,%1,%2";
+  }
   [(set_attr "type" "bitmanip")])
 
 (define_insn "rotldi3"
   [(set (match_operand:DI 0 "register_operand" "=r")
(rotate:DI (match_operand:DI 1 "register_operand" "r")
-  (match_operand:QI 2 "register_operand" "r")))]
+  (match_operand:QI 2 "arith_operand" "rI")))]
   "TARGET_64BIT && TARGET_ZBB"
-  "rol\t%0,%1,%2"
+  {
+if (CONST_INT_P(operands[2])) {
+  operands[2] = GEN_INT(64 - INTVAL(operands[2]));
+  return "rori\t%0,%1,%2";
+}
+
+return "rol\t%0,%1,%2";
+  }
   [(set_attr "type" "bitmanip")])
 
+;; Until we have improved REE to understand that sign-extending the result of
+;; an implicitly sign-extending operation is redundant, we need an additional
+;; pattern to gobble up the redundant sign-extension.
 (define_insn "rotlsi3_sext"
   [(set (match_operand:DI 0 "register_operand" "=r")
(sign_extend:DI (rotate:SI (match_operand:SI 1 "register_operand" "r")
-  (match_operand:QI 2 "register_operand" 
"r"]
+  (match_operand:QI 2 "arith_operand" "rI"]
   "TARGET_64BIT && TARGET_ZBB"
-  "rolw\t%0,%1,%2"
+  {
+if (CONST_INT_P(operands[2])) {
+  operands[2] = GEN_INT(32 - INTVAL(operands[2]));
+  return "roriw\t%0,%1,%2";
+}
+
+return "rolw\t%0,%1,%2";
+  }
   [(set_attr "type" "bitmanip")])
 
 (define_insn "bswap2"
-- 
2.32.0



[PATCH v1 4/8] RISC-V: bitmanip: fix constant-loading for (1ULL << 31) in DImode

2021-11-11 Thread Philipp Tomsich
The SINGLE_BIT_MASK_OPERAND() is overly restrictive, triggering for
bits above 31 only (to side-step any issues with the negative SImode
value 0x8000).  This moves the special handling of this SImode
value (i.e. the check for -2147483648) to riscv.c and relaxes the
SINGLE_BIT_MASK_OPERAND() test.

This changes the code-generation for loading (1ULL << 31) from:
li  a0,1
sllia0,a0,31
to:
bseti   a0,zero,31

gcc/ChangeLog:

* config/riscv/riscv.c (riscv_build_integer_1): Rewrite value as
-2147483648 for the single-bit case, when operating on 0x8000
in SImode.
* gcc/config/riscv/riscv.h (SINGLE_BIT_MASK_OPERAND): Allow for
any single-bit value, moving the special case for 0x8000 to
riscv_build_integer_1 (in riscv.c).

Signed-off-by: Philipp Tomsich 
---

 gcc/config/riscv/riscv.c |  9 +
 gcc/config/riscv/riscv.h | 11 ---
 2 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
index dff4e370471..4c30d4e521d 100644
--- a/gcc/config/riscv/riscv.c
+++ b/gcc/config/riscv/riscv.c
@@ -415,6 +415,15 @@ riscv_build_integer_1 (struct riscv_integer_op 
codes[RISCV_MAX_INTEGER_OPS],
   /* Simply BSETI.  */
   codes[0].code = UNKNOWN;
   codes[0].value = value;
+
+  /* RISC-V sign-extends all 32bit values that life in a 32bit
+register.  To avoid paradoxes, we thus need to use the
+sign-extended (negative) representation for the value, if we
+want to build 0x8000 in SImode.  This will then expand
+to an ADDI/LI instruction.  */
+  if (mode == SImode && value == 0x8000)
+   codes[0].value = -2147483648;
+
   return 1;
 }
 
diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
index 64287124735..abb121ddbea 100644
--- a/gcc/config/riscv/riscv.h
+++ b/gcc/config/riscv/riscv.h
@@ -526,13 +526,10 @@ enum reg_class
   (((VALUE) | ((1UL<<31) - IMM_REACH)) == ((1UL<<31) - IMM_REACH)  \
|| ((VALUE) | ((1UL<<31) - IMM_REACH)) + IMM_REACH == 0)
 
-/* If this is a single bit mask, then we can load it with bseti.  But this
-   is not useful for any of the low 31 bits because we can use addi or lui
-   to load them.  It is wrong for loading SImode 0x8000 on rv64 because it
-   needs to be sign-extended.  So we restrict this to the upper 32-bits
-   only.  */
-#define SINGLE_BIT_MASK_OPERAND(VALUE) \
-  (pow2p_hwi (VALUE) && (ctz_hwi (VALUE) >= 32))
+/* If this is a single bit mask, then we can load it with bseti.  Special
+   handling of SImode 0x8000 on RV64 is done in riscv_build_integer_1. */
+#define SINGLE_BIT_MASK_OPERAND(VALUE) \
+  (pow2p_hwi (VALUE))
 
 /* Stack layout; function entry, exit and calling.  */
 
-- 
2.32.0



[PATCH v1 3/8] RISC-V: costs: support shift-and-add in strength-reduction

2021-11-11 Thread Philipp Tomsich
The strength-reduction implementation in expmed.c will assess the
profitability of using shift-and-add using a RTL expression that wraps
a MULT (with a power-of-2) in a PLUS.  Unless the RISC-V rtx_costs
function recognizes this as expressing a sh[123]add instruction, we
will return an inflated cost, thus defeating the optimization.

This change adds the necessary idiom recognition to provide an
accurate cost for this for of expressing sh[123]add.

Instead on expanding to
li  a5,200
mulwa0,a5,a0
with this change, the expression 'a * 200' is sythesized as:
sh2add  a0,a0,a0   // *5 = a + 4 * a
sh2add  a0,a0,a0   // *5 = a + 4 * a
sllia0,a0,3// *8

gcc/ChangeLog:

* config/riscv/riscv.c (riscv_rtx_costs): Recognize shNadd,
if expressed as a plus and multiplication with a power-of-2.

Signed-off-by: Philipp Tomsich 
---

 gcc/config/riscv/riscv.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
index 8480cf09294..dff4e370471 100644
--- a/gcc/config/riscv/riscv.c
+++ b/gcc/config/riscv/riscv.c
@@ -2020,6 +2020,20 @@ riscv_rtx_costs (rtx x, machine_mode mode, int 
outer_code, int opno ATTRIBUTE_UN
  *total = COSTS_N_INSNS (1);
  return true;
}
+  /* Before strength-reduction, the shNadd can be expressed as the addition
+of a multiplication with a power-of-two.  If this case is not handled,
+the strength-reduction in expmed.c will calculate an inflated cost. */
+  if (TARGET_ZBA
+ && ((!TARGET_64BIT && (mode == SImode)) ||
+ (TARGET_64BIT && (mode == DImode)))
+ && (GET_CODE (XEXP (x, 0)) == MULT)
+ && REG_P (XEXP (XEXP (x, 0), 0))
+ && CONST_INT_P (XEXP (XEXP (x, 0), 1))
+ && IN_RANGE (pow2p_hwi (INTVAL (XEXP (XEXP (x, 0), 1))), 1, 3))
+   {
+ *total = COSTS_N_INSNS (1);
+ return true;
+   }
   /* shNadd.uw pattern for zba.
 [(set (match_operand:DI 0 "register_operand" "=r")
   (plus:DI
-- 
2.32.0



[PATCH v1 2/8] RISC-V: costs: handle BSWAP

2021-11-11 Thread Philipp Tomsich
The BSWAP operation is not handled in rtx_costs. Add it.

gcc/ChangeLog:

* config/riscv/riscv.c (rtx_costs): Add BSWAP.

Signed-off-by: Philipp Tomsich 
---

 gcc/config/riscv/riscv.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
index c77b0322869..8480cf09294 100644
--- a/gcc/config/riscv/riscv.c
+++ b/gcc/config/riscv/riscv.c
@@ -2131,6 +2131,14 @@ riscv_rtx_costs (rtx x, machine_mode mode, int 
outer_code, int opno ATTRIBUTE_UN
   *total = riscv_extend_cost (XEXP (x, 0), GET_CODE (x) == ZERO_EXTEND);
   return false;
 
+case BSWAP:
+  if (TARGET_ZBB)
+   {
+ *total = COSTS_N_INSNS (1);
+ return true;
+   }
+  return false;
+
 case FLOAT:
 case UNSIGNED_FLOAT:
 case FIX:
-- 
2.32.0



[PATCH v1 0/8] Improvements to bitmanip-1.0 (Zb[abcs]) support

2021-11-11 Thread Philipp Tomsich


This series provides assorted improvements for the RISC-V Zb[abcs]
support collected over the last year and a half and forward-ported to
the recently merged upstream support for the Zb[abcs] extensions.

Improvements include:
 - synthesis of HImode bswap from SImode/DImode rev8
 - cost-model change to support shift-and-add (sh[123]add) in the
   strength-reduction of multiplication operations
 - support for constant-loading of (1ULL << 31) on RV64 using bseti
 - generating a polarity-reversed mask from a bit-test
 - adds orc.b as UNSPEC
 - improves min/minu/max/maxu patterns to suppress redundant extensions


Philipp Tomsich (8):
  bswap: synthesize HImode bswap from SImode or DImode
  RISC-V: costs: handle BSWAP
  RISC-V: costs: support shift-and-add in strength-reduction
  RISC-V: bitmanip: fix constant-loading for (1ULL << 31) in DImode
  RISC-V: bitmanip: improvements to rotate instructions
  RISC-V: bitmanip: add splitter to use bexti for "(a & (1 << BIT_NO)) ?
0 : -1"
  RISC-V: bitmanip: add orc.b as an unspec
  RISC-V: bitmanip: relax minmax to operate on GPR

 gcc/config/riscv/bitmanip.md | 74 +---
 gcc/config/riscv/riscv.c | 31 
 gcc/config/riscv/riscv.h | 11 ++-
 gcc/config/riscv/riscv.md|  3 +
 gcc/optabs.c |  6 ++
 gcc/testsuite/gcc.target/riscv/zbb-bswap.c   | 22 ++
 gcc/testsuite/gcc.target/riscv/zbb-min-max.c | 20 +-
 gcc/testsuite/gcc.target/riscv/zbs-bexti.c   | 14 
 8 files changed, 162 insertions(+), 19 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/zbb-bswap.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zbs-bexti.c

-- 
2.32.0



[PATCH v1 1/8] bswap: synthesize HImode bswap from SImode or DImode

2021-11-11 Thread Philipp Tomsich
The RISC-V Zbb extension adds an XLEN (i.e. SImode for rv32, DImode
for rv64) bswap instruction (rev8).  While, with the current master,
SImode is synthesized correctly from DImode, HImode is not.

This change adds an appropriate expansion for a HImode bswap, if a
wider bswap is available.

Without this change, the following rv64gc_zbb code is generated for
__builtin_bswap16():
slliw   a5,a0,8
zext.h  a0,a0
srliw   a0,a0,8
or  a0,a5,a0
sext.h  a0,a0  // this is a 16bit sign-extension following
   // the byteswap (e.g. on a 'short' function
   // return).

After this change, a bswap (rev8) is used and any extensions are
combined into the shift-right:
rev8a0,a0
sraia0,a0,48   // the sign-extension is combined into the
   // shift; a srli is emitted otherwise...

gcc/ChangeLog:

* optabs.c (expand_unop): support expanding a HImode bswap
  using SImode or DImode, followed by a shift.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/zbb-bswap.c: New test.

Signed-off-by: Philipp Tomsich 
---

 gcc/optabs.c   |  6 ++
 gcc/testsuite/gcc.target/riscv/zbb-bswap.c | 22 ++
 2 files changed, 28 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/zbb-bswap.c

diff --git a/gcc/optabs.c b/gcc/optabs.c
index 019bbb62882..7a3ffbe4525 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -3307,6 +3307,12 @@ expand_unop (machine_mode mode, optab unoptab, rtx op0, 
rtx target,
return temp;
}
 
+ /* If we are missing a HImode BSWAP, but have one for SImode or
+DImode, use a BSWAP followed by a SHIFT.  */
+ temp = widen_bswap (as_a  (mode), op0, target);
+ if (temp)
+   return temp;
+
  last = get_last_insn ();
 
  temp1 = expand_binop (mode, ashl_optab, op0,
diff --git a/gcc/testsuite/gcc.target/riscv/zbb-bswap.c 
b/gcc/testsuite/gcc.target/riscv/zbb-bswap.c
new file mode 100644
index 000..6ee27d9f47a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zbb-bswap.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc_zbb -mabi=lp64 -O2" } */
+
+unsigned long
+func64 (unsigned long i)
+{
+  return __builtin_bswap64(i);
+}
+
+unsigned int
+func32 (unsigned int i)
+{
+  return __builtin_bswap32(i);
+}
+
+unsigned short
+func16 (unsigned short i)
+{
+  return __builtin_bswap16(i);
+}
+
+/* { dg-final { scan-assembler-times "rev8" 3 } } */
-- 
2.32.0



[PATCH] tree-optimization/103188 - avoid running ranger on not-up-to-date SSA

2021-11-11 Thread Richard Biener via Gcc-patches
The following splits loop header copying into an analysis phase
that uses ranger and a transform phase that can do without to avoid
running ranger on IL that has SSA form not updated.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

2021-11-11  Richard Biener  

PR tree-optimization/103188
* tree-ssa-loop-ch.c (should_duplicate_loop_header_p):
Remove query parameter, split out check for size
optimization.
(ch_base::m_ranger, cb_base::m_query): Remove.
(ch_base::copy_headers): Split processing loop into
analysis around which we allocate and use ranger and
transform where we do not.
(pass_ch::execute): Do not allocate/free ranger here.
(pass_ch_vect::execute): Likewise.

* gcc.dg/torture/pr103188.c: New testcase.
---
 gcc/testsuite/gcc.dg/torture/pr103188.c | 38 +
 gcc/tree-ssa-loop-ch.c  | 72 ++---
 2 files changed, 78 insertions(+), 32 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/torture/pr103188.c

diff --git a/gcc/testsuite/gcc.dg/torture/pr103188.c 
b/gcc/testsuite/gcc.dg/torture/pr103188.c
new file mode 100644
index 000..0412f6f9b79
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr103188.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+
+int a, b, c, d = 10, e = 1, f, g, h, i;
+int main()
+{
+  int j = -1;
+k:
+  h = c;
+l:
+  c = ~c;
+  if (e)
+  m:
+a = 0;
+  if (j > 1)
+goto m;
+  if (!e)
+goto l;
+  if (c)
+goto p;
+n:
+  goto m;
+o:
+  if (f) {
+if (g)
+  goto k;
+j = 0;
+  p:
+if (d)
+  goto o;
+goto n;
+  }
+  if (i)
+goto l;
+  for (; a < 1; a++)
+while (a > d)
+  b++;
+  return 0;
+}
diff --git a/gcc/tree-ssa-loop-ch.c b/gcc/tree-ssa-loop-ch.c
index c7d86d751d4..0cee38159fb 100644
--- a/gcc/tree-ssa-loop-ch.c
+++ b/gcc/tree-ssa-loop-ch.c
@@ -69,26 +69,12 @@ entry_loop_condition_is_static (class loop *l, 
path_range_query *query)
 
 static bool
 should_duplicate_loop_header_p (basic_block header, class loop *loop,
-   int *limit, path_range_query *query)
+   int *limit)
 {
   gimple_stmt_iterator bsi;
 
   gcc_assert (!header->aux);
 
-  /* Avoid loop header copying when optimizing for size unless we can
- determine that the loop condition is static in the first
- iteration.  */
-  if (optimize_loop_for_size_p (loop)
-  && !loop->force_vectorize
-  && !entry_loop_condition_is_static (loop, query))
-{
-  if (dump_file && (dump_flags & TDF_DETAILS))
-   fprintf (dump_file,
-"  Not duplicating bb %i: optimizing for size.\n",
-header->index);
-  return false;
-}
-
   gcc_assert (EDGE_COUNT (header->succs) > 0);
   if (single_succ_p (header))
 {
@@ -223,8 +209,6 @@ should_duplicate_loop_header_p (basic_block header, class 
loop *loop,
   return false;
 }
 
-  if (dump_file && (dump_flags & TDF_DETAILS))
-fprintf (dump_file, "Will duplicate bb %i\n", header->index); 
   return true;
 }
 
@@ -289,9 +273,6 @@ class ch_base : public gimple_opt_pass
 
   /* Return true to copy headers of LOOP or false to skip.  */
   virtual bool process_loop_p (class loop *loop) = 0;
-
-  gimple_ranger *m_ranger = NULL;
-  path_range_query *m_query = NULL;
 };
 
 const pass_data pass_data_ch =
@@ -386,8 +367,11 @@ ch_base::copy_headers (function *fun)
   copied_bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (fun));
   bbs_size = n_basic_blocks_for_fn (fun);
 
+  auto_vec candidates;
   auto_vec > copied;
 
+  gimple_ranger *ranger = new gimple_ranger;
+  path_range_query *query = new path_range_query (*ranger, /*resolve=*/true);
   for (auto loop : loops_list (cfun, 0))
 {
   int initial_limit = param_max_loop_header_insns;
@@ -406,6 +390,37 @@ ch_base::copy_headers (function *fun)
  || !process_loop_p (loop))
continue;
 
+  /* Avoid loop header copying when optimizing for size unless we can
+determine that the loop condition is static in the first
+iteration.  */
+  if (optimize_loop_for_size_p (loop)
+ && !loop->force_vectorize
+ && !entry_loop_condition_is_static (loop, query))
+   {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+   fprintf (dump_file,
+"  Not duplicating bb %i: optimizing for size.\n",
+header->index);
+ continue;
+   }
+
+  if (should_duplicate_loop_header_p (header, loop, _limit))
+   candidates.safe_push (loop);
+}
+  /* Do not use ranger after we change the IL and not have updated SSA.  */
+  delete query;
+  delete ranger;
+
+  for (auto loop : candidates)
+{
+  int initial_limit = param_max_loop_header_insns;
+  int remaining_limit = initial_limit;
+  if (dump_file && (dump_flags & TDF_DETAILS))
+   fprintf (dump_file,
+"Copying headers of loop %i\n", 

Re: [PATCH] libgcc: fix backtrace fallback on PowerPC Big-endian. [PR103004]

2021-11-11 Thread Segher Boessenkool
Hi!

On Wed, Nov 10, 2021 at 06:59:23PM -0300, Raphael Moreira Zinsly wrote:
> At the end of the backtrace stream _Unwind_Find_FDE() may not be able
> to find the frame unwind info and will later call the backtrace fallback
> instead of finishing. This occurs when using an old libc on ppc64 due to
> dl_iterate_phdr() not being able to set the fde in the last trace.
> When this occurs the cfa of the trace will be behind of context's cfa.
> Also, libgo’s probestackmaps() calls the backtrace with a null pointer
> and can get to the backchain fallback with the same problem, in this case
> we are only interested in find a stack map, we don't need nor can do a
> backchain.
> _Unwind_ForcedUnwind_Phase2() can hit the same issue as it uses
> uw_frame_state_for(), so we need to treat _URC_NORMAL_STOP.
> 
> libgcc/ChangeLog:
> 
>  * config/rs6000/linux-unwind.h (ppc_backchain_fallback): turn into
>static to fix -Wmissing-prototypes. Check if it's called with a null
>argument or at the end of the backtrace and return.
>  * unwind.inc (_Unwind_ForcedUnwind_Phase2): treat _URC_NORMAL_STOP.

Formatting is messed up.  Lines start with a capital.  Two spaces after
full stop, while you're at it.

> -void ppc_backchain_fallback (struct _Unwind_Context *context, void *a)
> +static void
> +ppc_backchain_fallback (struct _Unwind_Context *context, void *a)

This was already fixed in 75ef0353a2d3.

>  {
>struct frame_layout *current;
>struct trace_arg *arg = a;
>int count;
>  
> -  /* Get the last address computed and start with the next.  */
> +  /* Get the last address computed.  */
>current = context->cfa;

Empty line after here please.  Most of the time if you have a full-line
comment it means a new paragraph is starting.

> +  /* If the trace CFA is not the context CFA the backtrace is done.  */
> +  if (arg == NULL || arg->cfa != current)
> + return;
> +
> +  /* Start with next address.  */
>current = current->backchain;

Like you did here :-)

Do you have a testcase (that failed without this, but now doesn't)?

Looks okay, but please update and resend.


Segher


Fix recursion discovery in ipa-pure-const

2021-11-11 Thread Jan Hubicka via Gcc-patches
Hi,
We make self recursive functions as looping of fear of endless recursion.
This is done correctly for local pure/const and for non-trivial SCCs in
callgraph, but for trivial SCCs we miss the flag.

I think it is bad decision since infinite recursion will run out of stack,
but changing it upsets some testcases and should be done independently.
So this patch is fixing current behaviour to be consistent.

Bootstrapped/regtested x86_64-linux, comitted.

gcc/ChangeLog:

2021-11-11  Jan Hubicka  

* ipa-pure-const.c (propagate_pure_const): Self recursion is
a side effects.

diff --git a/gcc/ipa-pure-const.c b/gcc/ipa-pure-const.c
index 505ed4f8a3b..64777cd2d91 100644
--- a/gcc/ipa-pure-const.c
+++ b/gcc/ipa-pure-const.c
@@ -1513,6 +1611,9 @@ propagate_pure_const (void)
  enum pure_const_state_e edge_state = IPA_CONST;
  bool edge_looping = false;
 
+ if (e->recursive_p ())
+   looping = true;
+
  if (dump_file && (dump_flags & TDF_DETAILS))
{
  fprintf (dump_file, "Call to %s",


Fix noreturn discovery

2021-11-11 Thread Jan Hubicka via Gcc-patches
Hi,
this patch fixes ipa-pure-const handling of noreturn flags.  It is not
safe to set it for interposable symbols and we should also set it for
aliases (just like we do for other flags).  This patch merely copies other
flag handling and implements it here.

Bootstrapped/regtested x86_64-linux, will commit it shortly.

Honza

gcc/ChangeLog:

2021-11-11  Jan Hubicka  

* cgraph.c (set_noreturn_flag_1): New function.
(cgraph_node::set_noreturn_flag): New member function
* cgraph.h (cgraph_node::set_noreturn_flags): Declare.
* ipa-pure-const.c (pass_local_pure_const::execute): Use it.

diff --git a/gcc/cgraph.c b/gcc/cgraph.c
index c67d300e7a4..466b66d5ba5 100644
--- a/gcc/cgraph.c
+++ b/gcc/cgraph.c
@@ -2614,6 +2614,53 @@ cgraph_node::set_malloc_flag (bool malloc_p)
   return changed;
 }
 
+/* Worker to set noreturng flag.  */
+static void
+set_noreturn_flag_1 (cgraph_node *node, bool noreturn_p, bool *changed)
+{
+  if (noreturn_p && !TREE_THIS_VOLATILE (node->decl))
+{
+  TREE_THIS_VOLATILE (node->decl) = true;
+  *changed = true;
+}
+
+  ipa_ref *ref;
+  FOR_EACH_ALIAS (node, ref)
+{
+  cgraph_node *alias = dyn_cast (ref->referring);
+  if (!noreturn_p || alias->get_availability () > AVAIL_INTERPOSABLE)
+   set_noreturn_flag_1 (alias, noreturn_p, changed);
+}
+
+  for (cgraph_edge *e = node->callers; e; e = e->next_caller)
+if (e->caller->thunk
+   && (!noreturn_p || e->caller->get_availability () > AVAIL_INTERPOSABLE))
+  set_noreturn_flag_1 (e->caller, noreturn_p, changed);
+}
+
+/* Set TREE_THIS_VOLATILE on NODE's decl and on NODE's aliases if any.  */
+
+bool
+cgraph_node::set_noreturn_flag (bool noreturn_p)
+{
+  bool changed = false;
+
+  if (!noreturn_p || get_availability () > AVAIL_INTERPOSABLE)
+set_noreturn_flag_1 (this, noreturn_p, );
+  else
+{
+  ipa_ref *ref;
+
+  FOR_EACH_ALIAS (this, ref)
+   {
+ cgraph_node *alias = dyn_cast (ref->referring);
+ if (!noreturn_p || alias->get_availability () > AVAIL_INTERPOSABLE)
+   set_noreturn_flag_1 (alias, noreturn_p, );
+   }
+}
+  return changed;
+}
+
 /* Worker to set_const_flag.  */
 
 static void
diff --git a/gcc/cgraph.h b/gcc/cgraph.h
index 0a1f7c8960e..e42e305cdb6 100644
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -1167,6 +1167,10 @@ struct GTY((tag ("SYMTAB_FUNCTION"))) cgraph_node : 
public symtab_node
  if any.  */
   bool set_malloc_flag (bool malloc_p);
 
+  /* SET TREE_THIS_VOLATILE on cgraph_node's decl and on aliases of the node
+ if any.  */
+  bool set_noreturn_flag (bool noreturn_p);
+
   /* If SET_CONST is true, mark function, aliases and thunks to be ECF_CONST.
 If SET_CONST if false, clear the flag.
 
diff --git a/gcc/ipa-pure-const.c b/gcc/ipa-pure-const.c
index 505ed4f8a3b..84a028bcf8e 100644
--- a/gcc/ipa-pure-const.c
+++ b/gcc/ipa-pure-const.c
@@ -2132,11 +2132,10 @@ pass_local_pure_const::execute (function *fun)
 current_function_name ());
 
   /* Update declaration and reduce profile to executed once.  */
-  TREE_THIS_VOLATILE (current_function_decl) = 1;
+  if (cgraph_node::get (current_function_decl)->set_noreturn_flag (true))
+   changed = true;
   if (node->frequency > NODE_FREQUENCY_EXECUTED_ONCE)
node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
-
-  changed = true;
 }
 
   switch (l->pure_const_state)


Re: Use modref summary to DSE calls to non-pure functions

2021-11-11 Thread Jan Hubicka via Gcc-patches
> > Hmm, I could try to do this, but possibly incrementally?
> 
> You mean handle a  argument specially for unknown param offset?
> Yeah, I guess so.

I think it is also pointer that was allocated and is going to be
freed...
> 
> > Basically I want to have
> >
> > foo ()
> > decl = {}
> >
> > To be matched since even if I do not know the offset I know it is dead
> > after end of lifetime of the decl.  I am not quite sure PTA will give me
> > that?
> 
> for this case PTA should tell you the alias is to 'decl' only but then I'm
> not sure if stmt_kills_ref_p is up to the task to determine that 'decl = {}',
> from a quick look it doesn't.  So indeed the only interesting case will
> be a  based parameter which we can special-case.

Yep, i do not think it understands this.  I will look into it - I guess
it is common enough to care about.

Honza


Re: [PATCH][committed]middle-end: Fix signbit tests when ran on ISA with support for masks.

2021-11-11 Thread Tamar Christina via Gcc-patches
Ah yes that particular test checks the vector code.

I see that the function wasn't vectorized but that the scalar replacement was 
done.


_15 = _4 > 0;



So the test is checking if (-x >> bitsize-1) gets optimized to -(x > 0)

I see that the replacement was made on the scalar correctly so I will modify

The test to check for either the vector is vect_int or the scalar replacement

if not.



Cheers,

Tamar


From: Sandra Loosemore 
Sent: Wednesday, November 10, 2021 8:03 PM
To: Tamar Christina ; gcc-patches@gcc.gnu.org 

Cc: nd ; rguent...@suse.de 
Subject: Re: [PATCH][committed]middle-end: Fix signbit tests when ran on ISA 
with support for masks.

On 11/10/21 11:53 AM, Tamar Christina wrote:
> FAIL: gcc.dg/signbit-2.c scan-tree-dump-times optimized
> "[file://\\s+]\\s+>\\s+{ 0,
> 0, 0, 0 }" 1
>
> That's the old test which this patch has changed. Does it still fail
> with the new patch?

My test results are indeed from a couple days ago.  But, I looked at
your new modifications to this test, and still don't see anything like
the pattern it's looking for, or understand what output you expect to be
happening here.  Is the whole test specific to vector ISAs, and not just
your recent changes to it?  I've attached the .optimized dump I got on
nios2-elf.

-Sandra


Re: Use modref summary to DSE calls to non-pure functions

2021-11-11 Thread Richard Biener via Gcc-patches
On Thu, Nov 11, 2021 at 1:42 PM Jan Hubicka  wrote:
>
> Hi,
> >
> > No, I think if it turns out useful then we want a way to have such ref
> > represented by an ao_ref.  Note that when we come from a
> > ref tree we know handled-components only will increase offset,
> > only the base MEM_REF can contain a pointer subtraction (but
> > the result of that is the base then).
>
> Yep, that is why I introduced the parm_offset at first place - it can be
> negative or unknown...
> >
> > In what cases does parm_offset_known end up false?  Is that
> > when seeing a POINTER_PLUS_EXPR with unknown offset?
>
> Yep, a typical example is a loop with pointer walking an array .
>
> > So yes, that's a case we cannot capture right now - the only
> > thing that remains is a pointer with a known points-to-set - a
> > similar problem as with the pure call PRE.  You could in theory
> > allocate a scratch SSA name and attach points-to-info
> > to it.  And when the call argument is  based then you could set
> > offset to zero.
>
> Hmm, I could try to do this, but possibly incrementally?

You mean handle a  argument specially for unknown param offset?
Yeah, I guess so.

> Basically I want to have
>
> foo ()
> decl = {}
>
> To be matched since even if I do not know the offset I know it is dead
> after end of lifetime of the decl.  I am not quite sure PTA will give me
> that?

for this case PTA should tell you the alias is to 'decl' only but then I'm
not sure if stmt_kills_ref_p is up to the task to determine that 'decl = {}',
from a quick look it doesn't.  So indeed the only interesting case will
be a  based parameter which we can special-case.

> > > It was my initial plan. However I was not sure how much I would get from
> > > that.
> > >
> > > The function starts with:
> > >
> > >   /* Don't return early on *this_2(D) ={v} {CLOBBER}.  */
> > >   if (gimple_has_volatile_ops (stmt)
> > >   && (!gimple_clobber_p (stmt)
> > >   || TREE_CODE (gimple_assign_lhs (stmt)) != MEM_REF))
> > > return;
> > >
> > >   ao_ref ref;
> > >   if (!initialize_ao_ref_for_dse (stmt, ))
> > > return;
> > >
> > > The check about clobber does not apply to calls and then it gives up on
> > > functions not returning aggregates (that is a common case).
> > >
> > > For functions returing aggregates it tries to prove that retval is dead
> > > and replace it.
> > >
> > > I guess I can simply call my analysis from the second return above and
> > > from the code removing dead LHS call instead of doing it from the main
> > > walker and drop the LHS handling?
> >
> > Yeah, something like that.
> OK, I will prepare updated patch, thanks!
>
> Honza
> >
> > Richard.
> >
> > > Thank you,
> > > Honza
> > > >
> > > > Thanks,
> > > > Richard.
> > > >
> > > > > +   }
> > > > >   else if (def_operand_p
> > > > >  def_p = single_ssa_def_operand (stmt, 
> > > > > SSA_OP_DEF))
> > > > > {


[committed] libgomp: Use TLS storage for omp_get_num_teams()/omp_get_team_num() values

2021-11-11 Thread Jakub Jelinek via Gcc-patches
Hi!

When thinking about GOMP_teams3, I've realized that using global variables
for the values returned by omp_get_num_teams()/omp_get_team_num() calls
is incorrect even with our right now dumb way of implementing host teams.
The problems are two, one is if host teams is used from multiple pthread_create
created threads - the spec says that host teams can't be nested inside of
explicit parallel or other teams constructs, but with pthread_create the
standard says obviously nothing about it.  Another more important thing
is host fallback, right now we don't do anything for omp_get_num_teams()
or omp_get_team_num() which was fine before host teams was introduced and
the 5.1 requirement that num_teams clause specifies minimum of teams, but
with the global vars it means inside of target teams num_teams (2) we happily
return omp_get_num_teams() == 4 if the target teams is inside of host teams
with num_teams(4).  With target fallback being invoked from parallel
regions global vars simply can't work right on the host.  Both with nowait
target and with synchronous target too, as while doing host fallback from
one thread a different thread could see wrong values.

So, this patch moves them to struct gomp_thread and propagates those for
parallel to child threads.  For host fallback, the implicit zeroing of
*thr results in us returning omp_get_num_teams () == 1 and
omp_get_team_num () == 0 which is fine for target teams without num_teams
clause, for target teams with num_teams clause something to work on and
for target without teams nested in it I've asked on omp-lang what should
be done.

Regtested on x86_64-linux, committed to trunk.

2021-11-11  Jakub Jelinek  

* libgomp.h (struct gomp_thread): Add num_teams and team_num members.
* team.c (struct gomp_thread_start_data): Likewise.
(gomp_thread_start): Initialize thr->num_teams and thr->team_num.
(gomp_team_start): Initialize start_data->num_teams and
start_data->team_num.  Update nthr->num_teams and nthr->team_num.
* teams.c (gomp_num_teams, gomp_team_num): Remove.
(GOMP_teams_reg): Set and restore thr->num_teams and thr->team_num
instead of gomp_num_teams and gomp_team_num.
(omp_get_num_teams): Use thr->num_teams + 1 instead of gomp_num_teams.
(omp_get_team_num): Use thr->team_num instead of gomp_team_num.
* testsuite/libgomp.c/teams-4.c: New test.

--- libgomp/libgomp.h.jj2021-10-20 09:34:47.004331626 +0200
+++ libgomp/libgomp.h   2021-11-11 12:44:47.710092897 +0100
@@ -768,6 +768,14 @@ struct gomp_thread
   /* User pthread thread pool */
   struct gomp_thread_pool *thread_pool;
 
+#ifdef LIBGOMP_USE_PTHREADS
+  /* omp_get_num_teams () - 1.  */
+  unsigned int num_teams;
+
+  /* omp_get_team_num ().  */
+  unsigned int team_num;
+#endif
+
 #if defined(LIBGOMP_USE_PTHREADS) \
 && (!defined(HAVE_TLS) \
|| !defined(__GLIBC__) \
--- libgomp/team.c.jj   2021-09-28 11:34:29.380146749 +0200
+++ libgomp/team.c  2021-11-11 12:55:22.524952564 +0100
@@ -56,6 +56,8 @@ struct gomp_thread_start_data
   struct gomp_task *task;
   struct gomp_thread_pool *thread_pool;
   unsigned int place;
+  unsigned int num_teams;
+  unsigned int team_num;
   bool nested;
   pthread_t handle;
 };
@@ -88,6 +90,8 @@ gomp_thread_start (void *xdata)
   thr->ts = data->ts;
   thr->task = data->task;
   thr->place = data->place;
+  thr->num_teams = data->num_teams;
+  thr->team_num = data->team_num;
 #ifdef GOMP_NEEDS_THREAD_HANDLE
   thr->handle = data->handle;
 #endif
@@ -645,6 +649,8 @@ gomp_team_start (void (*fn) (void *), vo
  nthr->ts.single_count = 0;
 #endif
  nthr->ts.static_trip = 0;
+ nthr->num_teams = thr->num_teams;
+ nthr->team_num = thr->team_num;
  nthr->task = >implicit_task[i];
  nthr->place = place;
  gomp_init_task (nthr->task, task, icv);
@@ -833,6 +839,8 @@ gomp_team_start (void (*fn) (void *), vo
   start_data->ts.single_count = 0;
 #endif
   start_data->ts.static_trip = 0;
+  start_data->num_teams = thr->num_teams;
+  start_data->team_num = thr->team_num;
   start_data->task = >implicit_task[i];
   gomp_init_task (start_data->task, task, icv);
   team->implicit_task[i].icv.nthreads_var = nthreads_var;
--- libgomp/teams.c.jj  2021-10-11 12:20:21.927063104 +0200
+++ libgomp/teams.c 2021-11-11 12:43:58.769797557 +0100
@@ -28,14 +28,12 @@
 #include "libgomp.h"
 #include 
 
-static unsigned gomp_num_teams = 1, gomp_team_num = 0;
-
 void
 GOMP_teams_reg (void (*fn) (void *), void *data, unsigned int num_teams,
unsigned int thread_limit, unsigned int flags)
 {
+  struct gomp_thread *thr = gomp_thread ();
   (void) flags;
-  (void) num_teams;
   unsigned old_thread_limit_var = 0;
   if (thread_limit == 0)
 thread_limit = gomp_teams_thread_limit_var;
@@ -48,11 +46,11 @@ GOMP_teams_reg (void (*fn) (void *), voi
 }
   if (num_teams == 0)
 num_teams = 

RE: [PATCH] aarch64: Use type-qualified builtins for UADD[LW][2] Neon intrinsics

2021-11-11 Thread Kyrylo Tkachov via Gcc-patches
Hi Jonathan,

> -Original Message-
> From: Jonathan Wright 
> Sent: Thursday, November 11, 2021 10:18 AM
> To: gcc-patches@gcc.gnu.org
> Cc: Richard Sandiford ; Kyrylo Tkachov
> 
> Subject: [PATCH] aarch64: Use type-qualified builtins for UADD[LW][2] Neon
> intrinsics
> 
> Hi,
> 
> This patch declares unsigned type-qualified builtins and uses them to
> implement widening-add Neon intrinsics. This removes the need for
> many casts in arm_neon.h.
> 
> Bootstrapped and regression tested on aarch64-none-linux-gnu - no
> issues.
> 
> Ok for master?
> 
> Thanks,
> Jonathan
> 
> ---
> 
> gcc/ChangeLog:
> 
> 2021-11-09  Jonathan Wright  
> 
>   * config/aarch64/aarch64-simd-builtins.def: Use BINOPU type
>   qualifiers in generator macros for uadd[lw][2] builtins.
>   * config/aarch64/arm_neon.h (vaddl_s8): Remove unnecessary
>   cast.
>   (vaddl_s16): Likewise.
>   (vaddl_s32): Likewise.
>   (vaddl_u8): Use type-qualified builtin and remove casts.
>   (vaddl_u16): Likewise.
>   (vaddl_u32): Likewise.
>   (vaddl_high_s8): Remove unnecessary cast.
>   (vaddl_high_s16): Likewise.
>   (vaddl_high_s32): Likewise.
>   (vaddl_high_u8): Use type-qualified builtin and remove casts.
>   (vaddl_high_u16): Likewise.
>   (vaddl_high_u32): Likewise.
>   (vaddw_s8): Remove unnecessary cast.
>   (vaddw_s16): Likewise.
>   (vaddw_s32): Likewise.
>   (vaddw_u8): Use type-qualified builtin and remove casts.
>   (vaddw_u16): Likewise.
>   (vaddw_u32): Likewise.
>   (vaddw_high_s8): Remove unnecessary cast.
>   (vaddw_high_s16): Likewise.
>   (vaddw_high_s32): Likewise.
>   (vaddw_high_u8): Use type-qualified builtin and remove casts.
>   (vaddw_high_u16): Likewise.
>   (vaddw_high_u32): Likewise.

Ok.
Thanks,
Kyrill




Re: [committed] openmp: Fix handling of numa_domains(1)

2021-11-11 Thread Thomas Schwinge
Hi!

On 2021-10-18T15:03:08+0200, Jakub Jelinek via Gcc-patches 
 wrote:
> On Fri, Oct 15, 2021 at 12:26:34PM -0700, sunil.k.pandey wrote:
>> 4764049dd620affcd3e2658dc7f03a6616370a29 is the first bad commit
>> commit 4764049dd620affcd3e2658dc7f03a6616370a29
>> Author: Jakub Jelinek 
>> Date:   Fri Oct 15 16:25:25 2021 +0200
>>
>> openmp: Fix up handling of OMP_PLACES=threads(1)
>>
>> caused
>>
>> FAIL: libgomp.c/places-10.c execution test
>
> Reproduced on gcc112 in CompileFarm (my ws isn't NUMA).
> If numa-domains is used with num-places count, sometimes the function
> could create more places than requested and crash.  This depended on the
> content of /sys/devices/system/node/online file, e.g. if the file
> contains
> 0-1,16-17
> and all NUMA nodes contain at least one CPU in the cpuset of the program,
> then numa_domains(2) or numa_domains(4) (or 5+) work fine while
> numa_domains(1) or numa_domains(3) misbehave.  I.e. the function was able
> to stop after reaching limit on the , separators (or trivially at the end),
> but not within in the ranges.
>
> Fixed thusly, tested on powerpc64le-linux, committed to trunk.

There appears to be yet another issue: there still are quite a number of
'FAIL: libgomp.c/places-10.c execution test' reports on
.  Also in my testing testing, on a system
where '/sys/devices/system/node/online' contains '0-1', I get a FAIL:

[...]
OPENMP DISPLAY ENVIRONMENT BEGIN
  _OPENMP = '201511'
  OMP_DYNAMIC = 'FALSE'
  OMP_NESTED = 'FALSE'
  OMP_NUM_THREADS = '8'
  OMP_SCHEDULE = 'DYNAMIC'
  OMP_PROC_BIND = 'TRUE'
  OMP_PLACES = '{0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30},{FAIL: 
libgomp.c/places-10.c execution test


Grüße
 Thomas


> 2021-10-18  Jakub Jelinek  
>
>   * config/linux/affinity.c (gomp_affinity_init_numa_domains): Add
>   && gomp_places_list_len < count after nfirst <= nlast loop condition.
>
> --- libgomp/config/linux/affinity.c.jj2021-10-15 16:28:30.374460522 
> +0200
> +++ libgomp/config/linux/affinity.c   2021-10-18 14:44:51.559667127 +0200
> @@ -401,7 +401,7 @@ gomp_affinity_init_numa_domains (unsigne
>   break;
> q = end;
>   }
> -  for (; nfirst <= nlast; nfirst++)
> +  for (; nfirst <= nlast && gomp_places_list_len < count; nfirst++)
>   {
> sprintf (name + prefix_len, "node%lu/cpulist", nfirst);
> f = fopen (name, "r");
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955


Re: [PATCH] rs6000: Fix a handful of 32-bit built-in function problems in the new support

2021-11-11 Thread Segher Boessenkool
On Wed, Nov 10, 2021 at 03:28:18PM -0600, Bill Schmidt wrote:
> On 11/10/21 2:33 AM, Segher Boessenkool wrote:
> > On Tue, Nov 09, 2021 at 03:46:54PM -0600, Bill Schmidt wrote:
> >>* config/rs6000/rs6000-builtin-new.def (CMPB): Flag as no32bit.
> >>(BPERMD): Flag as 32bit.

So, change this to something like "flag this as needing special handling
on 32 bit" or something?

> >> -  void __builtin_set_texasr (unsigned long long);
> >> +  void __builtin_set_texasr (unsigned long);
> >>  SET_TEXASR nothing {htm,htmspr}
> >>  
> >> -  void __builtin_set_texasru (unsigned long long);
> >> +  void __builtin_set_texasru (unsigned long);
> >>  SET_TEXASRU nothing {htm,htmspr}
> >>  
> >> -  void __builtin_set_tfhar (unsigned long long);
> >> +  void __builtin_set_tfhar (unsigned long);
> >>  SET_TFHAR nothing {htm,htmspr}
> >>  
> >> -  void __builtin_set_tfiar (unsigned long long);
> >> +  void __builtin_set_tfiar (unsigned long);
> >>  SET_TFIAR nothing {htm,htmspr}
> > This does not seem to be what the exiting code does, either?  Try with
> > -m32 -mpowerpc64 (it extends to 64 bit there, so the builtin does not
> > have long int as parameter, it has long long int).
> 
> This uses a tfiar_t, which is a typedef for uintptr_t, so long int is 
> appropriate.
> This is necessary to make the HTM tests pass on 32-bit powerpc64.

void f(long x) { __builtin_set_texasr(x); }

built with -m32 -mpowerpc64 gives (in the expand dump):

void f (long int x)
{
  long long unsigned int _1;

;;   basic block 2, loop depth 0
;;pred:   ENTRY
  _1 = (long long unsigned int) x_2(D);
  __builtin_set_texasr (_1); [tail call]
  return;
;;succ:   EXIT

}

The builtins have a "long long" argument in the existing code, in this
configuration.  And this is not the same as "long" here.

> >> --- a/gcc/testsuite/gcc.target/powerpc/cmpb-3.c
> >> +++ b/gcc/testsuite/gcc.target/powerpc/cmpb-3.c
> >> @@ -8,7 +8,7 @@ void abort ();
> >>  long long int
> >>  do_compare (long long int a, long long int b)
> >>  {
> >> -  return __builtin_cmpb (a, b);   /* { dg-error "'__builtin_cmpb' is not 
> >> supported in this compiler configuration" } */
> >> +  return __builtin_cmpb (a, b);   /* { dg-error "'__builtin_p6_cmpb' is 
> >> not supported in 32-bit mode" } */
> >>  }
> > The original spelling is the correct one?
> 
> This is something I have on my to-do list for the future, to see whether I
> can improve it.  The overloaded function __builtin_cmpb gets translated to
> the underlying non-overloaded builtin __builtin_p6_cmpb, and that's the only
> name that's still around by the time we get to the error processing.  I want
> to see whether I can add some infrastructure to recover the overloaded
> function name in such cases.  Is it okay to defer this for now?

It is fine to defer it.  It is not fine to change the testcase like
this.  The user did not write __builtin_p6_cmpb (which is not even
documented btw), so the compiler should not talk about that.  It is
fine to leave the test failing for now.


Segher


Re: [PATCH] fixincludes: don't assume getcwd() can handle NULL argument

2021-11-11 Thread Eric Gallager via Gcc-patches
On Tue, Nov 9, 2021 at 8:50 AM Xi Ruoyao via Gcc-patches
 wrote:
>
> POSIX says:
>
> On some implementations, if buf is a null pointer, getcwd() may obtain
> size bytes of memory using malloc(). In this case, the pointer returned
> by getcwd() may be used as the argument in a subsequent call to free().
> Invoking getcwd() with buf as a null pointer is not recommended in
> conforming applications.
>
> This produces an error building GCC with --enable-werror-always:
>
> ../../../fixincludes/fixincl.c: In function ‘process’:
> ../../../fixincludes/fixincl.c:1356:7: error: argument 1 is null but
> the corresponding size argument 2 value is 4096 [-Werror=nonnull]
>
> And, at least we've been leaking memory even if getcwd() supports this
> non-standard extension.
>
> fixincludes/ChangeLog:
>
> * fixincl.c (process): Allocate and deallocate the buffer for
>   getcwd() explicitly.
> ---
>  fixincludes/fixincl.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
>
> diff --git a/fixincludes/fixincl.c b/fixincludes/fixincl.c
> index 6dba2f6e830..b4b1e38ede7 100644
> --- a/fixincludes/fixincl.c
> +++ b/fixincludes/fixincl.c
> @@ -1353,9 +1353,11 @@ process (void)
>if (access (pz_curr_file, R_OK) != 0)
>  {
>int erno = errno;
> +  char *buf = xmalloc (MAXPATHLEN);
>fprintf (stderr, "Cannot access %s from %s\n\terror %d (%s)\n",
> -   pz_curr_file, getcwd ((char *) NULL, MAXPATHLEN),
> +   pz_curr_file, getcwd (buf, MAXPATHLEN),
> erno, xstrerror (erno));
> +  free (buf);
>return;
>  }
>
> --
> 2.33.1

This seems to contradict bug 21823:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=21823
It would fix bug 80047, though:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80047


Basic kill analysis for modref

2021-11-11 Thread Jan Hubicka via Gcc-patches
Hi,
This patch enables optimization of stores that are killed by calls.
Modref summary is extended by array containing list of access ranges, relative
to function parameters, that are known to be killed by the function.
This array is collected during local analysis and optimized (so separate
stores are glued together). 

Kill analysis in ipa-modref.c is quite simplistic.  In particular no WPA
propagation is done and also we take very simple approach to prove that
given store is executed each invocation of the function.  I simply
require it to be in the first basic block and before anyting that can
throw externally.  I have more fancy code for that but with this patch I
want to primarily discuss interace to tree-ssa-alias.c. I wonder if thre
are some helpers I can re-use?

>From GCC linktime I get 814 functions with non-empty kill vector.

Modref stats:
  modref kill: 39 kills, 7162 queries
  modref use: 25169 disambiguations, 697722 queries
  modref clobber: 2290122 disambiguations, 22750147 queries
  5240008 tbaa queries (0.230329 per modref query)
  806190 base compares (0.035437 per modref query)

(note that more kills happens at early optimization where we did not
inlined that much yet).

For tramp3d (non-lto -O3 build):

Modref stats:
  modref kill: 45 kills, 630 queries
  modref use: 750 disambiguations, 10061 queries
  modref clobber: 35253 disambiguations, 543262 queries
  85347 tbaa queries (0.157101 per modref query)
  18727 base compares (0.034471 per modref query)

So it is not that high, but it gets better after improving the analysis side
and also with -Os and/or PGO (wehre we offline cdtors) and also wiring in
same_addr_size_stores_p which I want to discuss incrementally.

But at least there are not that many queries to slow down compile times
noticeably :)

Honza

gcc/ChangeLog:

* ipa-modref-tree.h (struct modref_access_node): New member function
* ipa-modref.c (modref_summary::useful_p): Kills are not useful when
we can not analyze loads.
(struct modref_summary_lto): Add kills.
(modref_summary::dump): Dump kills.
(record_access): Take access node as parameter.
(record_access_lto): Likewise.
(add_kill): New function.
(merge_call_side_effects): Merge kills.
(analyze_call): Pass around always_executed.
(struct summary_ptrs): Add always_executed flag.
(analyze_load): Update.
(analyze_store): Handle kills.
(analyze_stmt): Pass around always_executed flag; handle kills from
clobbers.
(analyze_function): Compute always_executed.
(modref_summaries::duplicate): Copy kills.
(update_signature): Release kills.
* ipa-modref.h (struct modref_summary): Add kills.
* tree-ssa-alias.c (dump_alias_stats): Dump kills.
(stmt_kills_ref_p): Handle modref kills.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/modref-dse-2.c: New test.

diff --git a/gcc/tree-ssa-alias.c b/gcc/tree-ssa-alias.c
index 17ff6bb582c..6f8caa331a6 100644
--- a/gcc/tree-ssa-alias.c
+++ b/gcc/tree-ssa-alias.c
@@ -120,6 +120,8 @@ static struct {
   unsigned HOST_WIDE_INT modref_use_no_alias;
   unsigned HOST_WIDE_INT modref_clobber_may_alias;
   unsigned HOST_WIDE_INT modref_clobber_no_alias;
+  unsigned HOST_WIDE_INT modref_kill_no;
+  unsigned HOST_WIDE_INT modref_kill_yes;
   unsigned HOST_WIDE_INT modref_tests;
   unsigned HOST_WIDE_INT modref_baseptr_tests;
 } alias_stats;
@@ -169,6 +171,12 @@ dump_alias_stats (FILE *s)
   + alias_stats.aliasing_component_refs_p_may_alias);
   dump_alias_stats_in_alias_c (s);
   fprintf (s, "\nModref stats:\n");
+  fprintf (s, "  modref kill: "
+  HOST_WIDE_INT_PRINT_DEC" kills, "
+  HOST_WIDE_INT_PRINT_DEC" queries\n",
+  alias_stats.modref_kill_yes,
+  alias_stats.modref_kill_yes
+  + alias_stats.modref_kill_no);
   fprintf (s, "  modref use: "
   HOST_WIDE_INT_PRINT_DEC" disambiguations, "
   HOST_WIDE_INT_PRINT_DEC" queries\n",
@@ -3373,6 +3381,107 @@ stmt_kills_ref_p (gimple *stmt, ao_ref *ref)
   if (is_gimple_call (stmt))
 {
   tree callee = gimple_call_fndecl (stmt);
+  struct cgraph_node *node;
+  modref_summary *summary;
+
+  /* Try to disambiguate using modref summary.  Modref records a vector
+of stores with known offsets relative to function parameters that must
+happen every execution of function.  Find if we have a matching
+store and verify that function can not use the value.  */
+  if (callee != NULL_TREE
+ && (node = cgraph_node::get (callee)) != NULL
+ && node->binds_to_current_def_p ()
+ && (summary = get_modref_function_summary (node)) != NULL
+ && summary->kills.length ())
+   {
+ tree base = ao_ref_base (ref);
+ for (unsigned int i = 0; i < summary->kills.length (); i++)
+   {
+ modref_access_node  = summary->kills[i];
+

Re: Use modref summary to DSE calls to non-pure functions

2021-11-11 Thread Jan Hubicka via Gcc-patches
Hi,
> 
> No, I think if it turns out useful then we want a way to have such ref
> represented by an ao_ref.  Note that when we come from a
> ref tree we know handled-components only will increase offset,
> only the base MEM_REF can contain a pointer subtraction (but
> the result of that is the base then).

Yep, that is why I introduced the parm_offset at first place - it can be
negative or unknown...
> 
> In what cases does parm_offset_known end up false?  Is that
> when seeing a POINTER_PLUS_EXPR with unknown offset?

Yep, a typical example is a loop with pointer walking an array .

> So yes, that's a case we cannot capture right now - the only
> thing that remains is a pointer with a known points-to-set - a
> similar problem as with the pure call PRE.  You could in theory
> allocate a scratch SSA name and attach points-to-info
> to it.  And when the call argument is  based then you could set
> offset to zero.

Hmm, I could try to do this, but possibly incrementally?

Basically I want to have

foo ()
decl = {}

To be matched since even if I do not know the offset I know it is dead
after end of lifetime of the decl.  I am not quite sure PTA will give me
that?
> > It was my initial plan. However I was not sure how much I would get from
> > that.
> >
> > The function starts with:
> >
> >   /* Don't return early on *this_2(D) ={v} {CLOBBER}.  */
> >   if (gimple_has_volatile_ops (stmt)
> >   && (!gimple_clobber_p (stmt)
> >   || TREE_CODE (gimple_assign_lhs (stmt)) != MEM_REF))
> > return;
> >
> >   ao_ref ref;
> >   if (!initialize_ao_ref_for_dse (stmt, ))
> > return;
> >
> > The check about clobber does not apply to calls and then it gives up on
> > functions not returning aggregates (that is a common case).
> >
> > For functions returing aggregates it tries to prove that retval is dead
> > and replace it.
> >
> > I guess I can simply call my analysis from the second return above and
> > from the code removing dead LHS call instead of doing it from the main
> > walker and drop the LHS handling?
> 
> Yeah, something like that.
OK, I will prepare updated patch, thanks!

Honza
> 
> Richard.
> 
> > Thank you,
> > Honza
> > >
> > > Thanks,
> > > Richard.
> > >
> > > > +   }
> > > >   else if (def_operand_p
> > > >  def_p = single_ssa_def_operand (stmt, SSA_OP_DEF))
> > > > {


Re: Use modref summary to DSE calls to non-pure functions

2021-11-11 Thread Richard Biener via Gcc-patches
On Thu, Nov 11, 2021 at 1:07 PM Jan Hubicka  wrote:
>
> > > +  /* Unlike alias oracle we can not skip subtrees based on TBAA check.
> > > + Count the size of the whole tree to verify that we will not need 
> > > too many
> > > + tests.  */
> > > +  FOR_EACH_VEC_SAFE_ELT (summary->stores->bases, i, base_node)
> > > +FOR_EACH_VEC_SAFE_ELT (base_node->refs, j, ref_node)
> > > +  FOR_EACH_VEC_SAFE_ELT (ref_node->accesses, k, access_node)
> > > +   if (num_tests++ > max_tests)
> > > + return false;
> >
> > at least the innermost loop can be done as
> >
> >   if (num_tests += ref_node->accesses.length () > max_tests)
> >
> > no?
>
> Yep that was stupid, sorry for that ;))
> >
> > > +
> > > +  /* Walk all memory writes and verify that they are dead.  */
> > > +  FOR_EACH_VEC_SAFE_ELT (summary->stores->bases, i, base_node)
> > > +FOR_EACH_VEC_SAFE_ELT (base_node->refs, j, ref_node)
> > > +  FOR_EACH_VEC_SAFE_ELT (ref_node->accesses, k, access_node)
> > > +   {
> > > + /* ??? if offset is unkonwn it may be negative.  Not sure
> > > +how to construct ref here.  */
> >
> > I think you can't, you could use -poly_int64_max or so.
>
> I need a ref to give to dse_classify_store. It needs base to track live
> bytes etc which is not very useful if I do not know the range.  However
> DSE is still useful since I can hit free or end of lifetime of the decl.
> I was wondering if I should simply implement a lightweight version of
> dse_clasify_store that handles this case?

No, I think if it turns out useful then we want a way to have such ref
represented by an ao_ref.  Note that when we come from a
ref tree we know handled-components only will increase offset,
only the base MEM_REF can contain a pointer subtraction (but
the result of that is the base then).

In what cases does parm_offset_known end up false?  Is that
when seeing a POINTER_PLUS_EXPR with unknown offset?
So yes, that's a case we cannot capture right now - the only
thing that remains is a pointer with a known points-to-set - a
similar problem as with the pure call PRE.  You could in theory
allocate a scratch SSA name and attach points-to-info
to it.  And when the call argument is  based then you could set
offset to zero.

> >
> > > + if (!access_node->parm_offset_known)
> > > +   return false;
> >
> > But you could do this check in the loop computing num_tests ...
> > (we could also cache the count and whether any of the refs have unknown 
> > offset
> > in the summary?)
>
> Yep, I plan to add cache for bits like this (and the check for accessing
> global memory).  Just want to push bit more of the cleanups I have in my
> local tree.
> >
> > > + tree arg;
> > > + if (access_node->parm_index == MODREF_STATIC_CHAIN_PARM)
> > > +   arg = gimple_call_chain (stmt);
> > > + else
> > > +   arg = gimple_call_arg (stmt, access_node->parm_index);
> > > +
> > > + ao_ref ref;
> > > + poly_offset_int off = (poly_offset_int)access_node->offset
> > > +   + ((poly_offset_int)access_node->parm_offset
> > > +  << LOG2_BITS_PER_UNIT);
> > > + poly_int64 off2;
> > > + if (!off.to_shwi ())
> > > +   return false;
> > > + ao_ref_init_from_ptr_and_range
> > > +(, arg, true, off2, access_node->size,
> > > + access_node->max_size);
> > > + ref.ref_alias_set = ref_node->ref;
> > > + ref.base_alias_set = base_node->base;
> > > +
> > > + bool byte_tracking_enabled
> > > + = setup_live_bytes_from_ref (, live_bytes);
> > > + enum dse_store_status store_status;
> > > +
> > > + store_status = dse_classify_store (, stmt,
> > > +byte_tracking_enabled,
> > > +live_bytes, _clobber_p);
> > > + if (store_status != DSE_STORE_DEAD)
> > > +   return false;
> > > +   }
> > > +  /* Check also value stored by the call.  */
> > > +  if (gimple_store_p (stmt))
> > > +{
> > > +  ao_ref ref;
> > > +
> > > +  if (!initialize_ao_ref_for_dse (stmt, ))
> > > +   gcc_unreachable ();
> > > +  bool byte_tracking_enabled
> > > + = setup_live_bytes_from_ref (, live_bytes);
> > > +  enum dse_store_status store_status;
> > > +
> > > +  store_status = dse_classify_store (, stmt,
> > > +byte_tracking_enabled,
> > > +live_bytes, _clobber_p);
> > > +  if (store_status != DSE_STORE_DEAD)
> > > +   return false;
> > > +}
> > > +  delete_dead_or_redundant_assignment (gsi, "dead", need_eh_cleanup);
> > > +  return true;
> > > +}
> > > +
> > >  namespace {
> > >
> > >  const pass_data pass_data_dse =
> > > @@ -1235,7 +1363,14 @@ pass_dse::execute (function *fun)
> > >   gimple *stmt = gsi_stmt (gsi);
> > >
> > 

Re: Use modref summary to DSE calls to non-pure functions

2021-11-11 Thread Jan Hubicka via Gcc-patches
> > +  /* Unlike alias oracle we can not skip subtrees based on TBAA check.
> > + Count the size of the whole tree to verify that we will not need too 
> > many
> > + tests.  */
> > +  FOR_EACH_VEC_SAFE_ELT (summary->stores->bases, i, base_node)
> > +FOR_EACH_VEC_SAFE_ELT (base_node->refs, j, ref_node)
> > +  FOR_EACH_VEC_SAFE_ELT (ref_node->accesses, k, access_node)
> > +   if (num_tests++ > max_tests)
> > + return false;
> 
> at least the innermost loop can be done as
> 
>   if (num_tests += ref_node->accesses.length () > max_tests)
> 
> no?

Yep that was stupid, sorry for that ;))
> 
> > +
> > +  /* Walk all memory writes and verify that they are dead.  */
> > +  FOR_EACH_VEC_SAFE_ELT (summary->stores->bases, i, base_node)
> > +FOR_EACH_VEC_SAFE_ELT (base_node->refs, j, ref_node)
> > +  FOR_EACH_VEC_SAFE_ELT (ref_node->accesses, k, access_node)
> > +   {
> > + /* ??? if offset is unkonwn it may be negative.  Not sure
> > +how to construct ref here.  */
> 
> I think you can't, you could use -poly_int64_max or so.

I need a ref to give to dse_classify_store. It needs base to track live
bytes etc which is not very useful if I do not know the range.  However
DSE is still useful since I can hit free or end of lifetime of the decl.
I was wondering if I should simply implement a lightweight version of
dse_clasify_store that handles this case?
> 
> > + if (!access_node->parm_offset_known)
> > +   return false;
> 
> But you could do this check in the loop computing num_tests ...
> (we could also cache the count and whether any of the refs have unknown offset
> in the summary?)

Yep, I plan to add cache for bits like this (and the check for accessing
global memory).  Just want to push bit more of the cleanups I have in my
local tree.
> 
> > + tree arg;
> > + if (access_node->parm_index == MODREF_STATIC_CHAIN_PARM)
> > +   arg = gimple_call_chain (stmt);
> > + else
> > +   arg = gimple_call_arg (stmt, access_node->parm_index);
> > +
> > + ao_ref ref;
> > + poly_offset_int off = (poly_offset_int)access_node->offset
> > +   + ((poly_offset_int)access_node->parm_offset
> > +  << LOG2_BITS_PER_UNIT);
> > + poly_int64 off2;
> > + if (!off.to_shwi ())
> > +   return false;
> > + ao_ref_init_from_ptr_and_range
> > +(, arg, true, off2, access_node->size,
> > + access_node->max_size);
> > + ref.ref_alias_set = ref_node->ref;
> > + ref.base_alias_set = base_node->base;
> > +
> > + bool byte_tracking_enabled
> > + = setup_live_bytes_from_ref (, live_bytes);
> > + enum dse_store_status store_status;
> > +
> > + store_status = dse_classify_store (, stmt,
> > +byte_tracking_enabled,
> > +live_bytes, _clobber_p);
> > + if (store_status != DSE_STORE_DEAD)
> > +   return false;
> > +   }
> > +  /* Check also value stored by the call.  */
> > +  if (gimple_store_p (stmt))
> > +{
> > +  ao_ref ref;
> > +
> > +  if (!initialize_ao_ref_for_dse (stmt, ))
> > +   gcc_unreachable ();
> > +  bool byte_tracking_enabled
> > + = setup_live_bytes_from_ref (, live_bytes);
> > +  enum dse_store_status store_status;
> > +
> > +  store_status = dse_classify_store (, stmt,
> > +byte_tracking_enabled,
> > +live_bytes, _clobber_p);
> > +  if (store_status != DSE_STORE_DEAD)
> > +   return false;
> > +}
> > +  delete_dead_or_redundant_assignment (gsi, "dead", need_eh_cleanup);
> > +  return true;
> > +}
> > +
> >  namespace {
> >
> >  const pass_data pass_data_dse =
> > @@ -1235,7 +1363,14 @@ pass_dse::execute (function *fun)
> >   gimple *stmt = gsi_stmt (gsi);
> >
> >   if (gimple_vdef (stmt))
> > -   dse_optimize_stmt (fun, , live_bytes);
> > +   {
> > + gcall *call = dyn_cast  (stmt);
> > +
> > + if (call && dse_optimize_call (, live_bytes))
> > +   /* We removed a dead call.  */;
> > + else
> > +   dse_optimize_store (fun, , live_bytes);
> 
> I think we want to refactor both functions, dse_optimize_stmt has some
> early outs that apply generally, and it handles some builtin calls
> that we don't want to re-handle with dse_optimize_call.
> 
> So I wonder if it is either possible to call the new function from
> inside dse_optimize_stmt instead, after we handled the return
> value of call for example or different refactoring can make the flow
> more obvious.

It was my initial plan. However I was not sure how much I would get from
that.

The function starts with:

  /* Don't return early on *this_2(D) ={v} {CLOBBER}.  */
  if 

[PATCH 11/15] csky: Fix non-robust split condition in define_insn_and_split

2021-11-11 Thread Kewen Lin via Gcc-patches
This patch is to fix some non-robust split conditions in some
define_insn_and_splits, to make each of them applied on top of
the corresponding condition for define_insn part, otherwise the
splitting could perform unexpectedly.

gcc/ChangeLog:

* config/csky/csky.md (*cskyv2_adddi3, *ck801_adddi3, *cskyv2_adddi1_1,
*cskyv2_subdi3, *ck801_subdi3, *cskyv2_subdi1_1, cskyv2_addcc,
cskyv2_addcc_invert, *cskyv2_anddi3, *ck801_anddi3, *cskyv2_iordi3,
*ck801_iordi3, *cskyv2_xordi3, *ck801_xordi3,): Fix split condition.
---
 gcc/config/csky/csky.md | 28 ++--
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/gcc/config/csky/csky.md b/gcc/config/csky/csky.md
index f91d851cb2c..54143a0efea 100644
--- a/gcc/config/csky/csky.md
+++ b/gcc/config/csky/csky.md
@@ -850,7 +850,7 @@ (define_insn_and_split "*cskyv2_adddi3"
(clobber (reg:CC CSKY_CC_REGNUM))]
   "CSKY_ISA_FEATURE (E2)"
   "#"
-  "reload_completed"
+  "&& reload_completed"
   [(const_int 0)]
   {
 int hi = TARGET_BIG_ENDIAN ? 0 : UNITS_PER_WORD;
@@ -877,7 +877,7 @@ (define_insn_and_split "*ck801_adddi3"
(clobber (reg:CC CSKY_CC_REGNUM))]
   "CSKY_ISA_FEATURE (E1)"
   "#"
-  "reload_completed"
+  "&& reload_completed"
   [(const_int 0)]
   {
 int hi = TARGET_BIG_ENDIAN ? 0 : UNITS_PER_WORD;
@@ -906,7 +906,7 @@ (define_insn_and_split "*cskyv2_adddi1_1"
(clobber (reg:CC CSKY_CC_REGNUM))]
   "CSKY_ISA_FEATURE (E2)"
   "#"
-  "reload_completed"
+  "&& reload_completed"
   [(const_int 0)]
   {
 int hi = TARGET_BIG_ENDIAN ? 0 : UNITS_PER_WORD;
@@ -1048,7 +1048,7 @@ (define_insn_and_split "*cskyv2_subdi3"
(clobber (reg:CC CSKY_CC_REGNUM))]
   "CSKY_ISA_FEATURE (E2)"
   "#"
-  "reload_completed"
+  "&& reload_completed"
   [(const_int 0)]
   {
 int hi = TARGET_BIG_ENDIAN ? 0 : UNITS_PER_WORD;
@@ -1075,7 +1075,7 @@ (define_insn_and_split "*ck801_subdi3"
(clobber (reg:CC CSKY_CC_REGNUM))]
   "CSKY_ISA_FEATURE (E1)"
   "#"
-  "reload_completed"
+  "&& reload_completed"
   [(const_int 0)]
   {
 int hi = TARGET_BIG_ENDIAN ? 0 : UNITS_PER_WORD;
@@ -1104,7 +1104,7 @@ (define_insn_and_split "*cskyv2_subdi1_1"
(clobber (reg:CC CSKY_CC_REGNUM))]
   "CSKY_ISA_FEATURE (E2)"
   "#"
-  "reload_completed"
+  "&& reload_completed"
   [(const_int 0)]
   {
 int hi = TARGET_BIG_ENDIAN ? 0 : UNITS_PER_WORD;
@@ -1276,7 +1276,7 @@ (define_insn_and_split "cskyv2_addcc"
dect\t%0, %1, %M2
#
#"
-  "reload_completed && !rtx_equal_p (operands[0], operands[1])"
+  "&& reload_completed && !rtx_equal_p (operands[0], operands[1])"
   [(set (match_dup 0)
(if_then_else:SI (ne (reg:CC CSKY_CC_REGNUM) (const_int 0))
 (plus:SI (match_dup 0) (match_dup 2]
@@ -1302,7 +1302,7 @@ (define_insn_and_split "cskyv2_addcc_invert"
decf\t%0, %1, %M2
#
#"
-  "reload_completed && !rtx_equal_p (operands[0], operands[1])"
+  "&& reload_completed && !rtx_equal_p (operands[0], operands[1])"
   [(set (match_dup 0)
(if_then_else:SI (eq (reg:CC CSKY_CC_REGNUM) (const_int 0))
 (plus:SI (match_dup 0) (match_dup 2]
@@ -1691,7 +1691,7 @@ (define_insn_and_split "*cskyv2_anddi3"
(match_operand:DI 2 "register_operand" "b,r")))]
   "CSKY_ISA_FEATURE (E2)"
   "#"
-  "reload_completed"
+  "&& reload_completed"
   [(const_int 0)]
   {
 int hi = TARGET_BIG_ENDIAN ? 0 : UNITS_PER_WORD;
@@ -1716,7 +1716,7 @@ (define_insn_and_split "*ck801_anddi3"
   (match_operand:DI 2 "register_operand" "r")))]
   "CSKY_ISA_FEATURE (E1)"
   "#"
-  "reload_completed"
+  "&& reload_completed"
   [(const_int 0)]
   {
 int hi = TARGET_BIG_ENDIAN ? 0 : UNITS_PER_WORD;
@@ -1801,7 +1801,7 @@ (define_insn_and_split "*cskyv2_iordi3"
(match_operand:DI 2 "register_operand" "b,  r")))]
   "CSKY_ISA_FEATURE (E2)"
   "#"
-  "reload_completed"
+  "&& reload_completed"
   [(const_int 0)]
   {
 int hi = TARGET_BIG_ENDIAN ? 0 : UNITS_PER_WORD;
@@ -1826,7 +1826,7 @@ (define_insn_and_split "*ck801_iordi3"
(match_operand:DI 2 "register_operand" "r")))]
   "CSKY_ISA_FEATURE (E1)"
   "#"
-  "reload_completed"
+  "&& reload_completed"
   [(const_int 0)]
   {
 int hi = TARGET_BIG_ENDIAN ? 0 : UNITS_PER_WORD;
@@ -1911,7 +1911,7 @@ (define_insn_and_split "*cskyv2_xordi3"
(match_operand:DI 2 "register_operand" "b,  r")))]
   "CSKY_ISA_FEATURE (E2)"
   "#"
-  "reload_completed"
+  "&& reload_completed"
   [(const_int 0)]
   {
 int hi = TARGET_BIG_ENDIAN ? 0 : UNITS_PER_WORD;
@@ -1936,7 +1936,7 @@ (define_insn_and_split "*ck801_xordi3"
(match_operand:DI 2 "register_operand" "r")))]
   "CSKY_ISA_FEATURE (E1)"
   "#"
-  "reload_completed"
+  "&& reload_completed"
   [(const_int 0)]
   {
 int hi = TARGET_BIG_ENDIAN ? 0 : UNITS_PER_WORD;
-- 
2.27.0



[PATCH 15/15] sh: Fix non-robust split condition in define_insn_and_split

2021-11-11 Thread Kewen Lin via Gcc-patches
This patch is to fix some non-robust split conditions in some
define_insn_and_splits, to make each of them applied on top of
the corresponding condition for define_insn part, otherwise the
splitting could perform unexpectedly.

gcc/ChangeLog:

* config/sh/sh.md (call_pcrel, call_value_pcrel, sibcall_pcrel,
sibcall_value_pcrel): Fix split condition.
---
 gcc/config/sh/sh.md | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md
index 93ee7c9a7de..1bb325c7044 100644
--- a/gcc/config/sh/sh.md
+++ b/gcc/config/sh/sh.md
@@ -6566,7 +6566,7 @@ (define_insn_and_split "call_pcrel"
(clobber (match_scratch:SI 2 "="))]
   "TARGET_SH2"
   "#"
-  "reload_completed"
+  "&& reload_completed"
   [(const_int 0)]
 {
   rtx lab = PATTERN (gen_call_site ());
@@ -6678,7 +6678,7 @@ (define_insn_and_split "call_value_pcrel"
(clobber (match_scratch:SI 3 "="))]
   "TARGET_SH2"
   "#"
-  "reload_completed"
+  "&& reload_completed"
   [(const_int 0)]
 {
   rtx lab = PATTERN (gen_call_site ());
@@ -6877,7 +6877,7 @@ (define_insn_and_split "sibcall_pcrel"
(return)]
   "TARGET_SH2 && !TARGET_FDPIC"
   "#"
-  "reload_completed"
+  "&& reload_completed"
   [(const_int 0)]
 {
   rtx lab = PATTERN (gen_call_site ());
@@ -7043,7 +7043,7 @@ (define_insn_and_split "sibcall_value_pcrel"
(return)]
   "TARGET_SH2 && !TARGET_FDPIC"
   "#"
-  "reload_completed"
+  "&& reload_completed"
   [(const_int 0)]
 {
   rtx lab = PATTERN (gen_call_site ());
-- 
2.27.0



[PATCH 14/15] mips: Fix non-robust split condition in define_insn_and_split

2021-11-11 Thread Kewen Lin via Gcc-patches
This patch is to fix some non-robust split conditions in some
define_insn_and_splits, to make each of them applied on top of
the corresponding condition for define_insn part, otherwise the
splitting could perform unexpectedly.

gcc/ChangeLog:

* config/mips/mips.md (*udivmod4, udivmod4_mips16): Fix
split condition.
---
 gcc/config/mips/mips.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md
index 455b9b802f6..4efb7503df3 100644
--- a/gcc/config/mips/mips.md
+++ b/gcc/config/mips/mips.md
@@ -2961,7 +2961,7 @@ (define_insn_and_split "*udivmod4"
  (match_dup 2)))]
   "ISA_HAS_DIV && !TARGET_MIPS16"
   "#"
-  "reload_completed"
+  "&& reload_completed"
   [(const_int 0)]
 {
   emit_insn (gen_udivmod4_split (operands[3], operands[1], operands[2]));
@@ -2982,7 +2982,7 @@ (define_insn_and_split "udivmod4_mips16"
(clobber (match_operand:GPR 4 "lo_operand" "=l"))]
   "ISA_HAS_DIV && TARGET_MIPS16"
   "#"
-  "cse_not_expected"
+  "&& cse_not_expected"
   [(const_int 0)]
 {
   emit_insn (gen_udivmod4_split (operands[3], operands[1], operands[2]));
-- 
2.27.0



[PATCH 12/15] i386: Fix non-robust split condition in define_insn_and_split

2021-11-11 Thread Kewen Lin via Gcc-patches
This patch is to fix some non-robust split conditions in some
define_insn_and_splits, to make each of them applied on top of
the corresponding condition for define_insn part, otherwise the
splitting could perform unexpectedly.

gcc/ChangeLog:

* config/i386/i386.md (*add3_doubleword, *addv4_doubleword,
*addv4_doubleword_1, *sub3_doubleword,
*subv4_doubleword, *subv4_doubleword_1,
*add3_doubleword_cc_overflow_1, *divmodsi4_const,
*neg2_doubleword, *tls_dynamic_gnu2_combine_64_): Fix split
condition.
---
 gcc/config/i386/i386.md | 20 ++--
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 6eb9de81921..2bd09e502ae 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -5491,7 +5491,7 @@ (define_insn_and_split "*add3_doubleword"
(clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (PLUS, mode, operands)"
   "#"
-  "reload_completed"
+  "&& reload_completed"
   [(parallel [(set (reg:CCC FLAGS_REG)
   (compare:CCC
 (plus:DWIH (match_dup 1) (match_dup 2))
@@ -6300,7 +6300,7 @@ (define_insn_and_split "*addv4_doubleword"
(plus: (match_dup 1) (match_dup 2)))]
   "ix86_binary_operator_ok (PLUS, mode, operands)"
   "#"
-  "reload_completed"
+  "&& reload_completed"
   [(parallel [(set (reg:CCC FLAGS_REG)
   (compare:CCC
 (plus:DWIH (match_dup 1) (match_dup 2))
@@ -6347,7 +6347,7 @@ (define_insn_and_split "*addv4_doubleword_1"
&& CONST_SCALAR_INT_P (operands[2])
&& rtx_equal_p (operands[2], operands[3])"
   "#"
-  "reload_completed"
+  "&& reload_completed"
   [(parallel [(set (reg:CCC FLAGS_REG)
   (compare:CCC
 (plus:DWIH (match_dup 1) (match_dup 2))
@@ -6641,7 +6641,7 @@ (define_insn_and_split "*sub3_doubleword"
(clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (MINUS, mode, operands)"
   "#"
-  "reload_completed"
+  "&& reload_completed"
   [(parallel [(set (reg:CC FLAGS_REG)
   (compare:CC (match_dup 1) (match_dup 2)))
  (set (match_dup 0)
@@ -6817,7 +6817,7 @@ (define_insn_and_split "*subv4_doubleword"
(minus: (match_dup 1) (match_dup 2)))]
   "ix86_binary_operator_ok (MINUS, mode, operands)"
   "#"
-  "reload_completed"
+  "&& reload_completed"
   [(parallel [(set (reg:CC FLAGS_REG)
   (compare:CC (match_dup 1) (match_dup 2)))
  (set (match_dup 0)
@@ -6862,7 +6862,7 @@ (define_insn_and_split "*subv4_doubleword_1"
&& CONST_SCALAR_INT_P (operands[2])
&& rtx_equal_p (operands[2], operands[3])"
   "#"
-  "reload_completed"
+  "&& reload_completed"
   [(parallel [(set (reg:CC FLAGS_REG)
   (compare:CC (match_dup 1) (match_dup 2)))
  (set (match_dup 0)
@@ -7542,7 +7542,7 @@ (define_insn_and_split 
"*add3_doubleword_cc_overflow_1"
(plus: (match_dup 1) (match_dup 2)))]
   "ix86_binary_operator_ok (PLUS, mode, operands)"
   "#"
-  "reload_completed"
+  "&& reload_completed"
   [(parallel [(set (reg:CCC FLAGS_REG)
   (compare:CCC
 (plus:DWIH (match_dup 1) (match_dup 2))
@@ -9000,7 +9000,7 @@ (define_insn_and_split "*divmodsi4_const"
(clobber (reg:CC FLAGS_REG))]
   "!optimize_function_for_size_p (cfun)"
   "#"
-  "reload_completed"
+  "&& reload_completed"
   [(set (match_dup 0) (match_dup 2))
(set (match_dup 1) (match_dup 4))
(parallel [(set (match_dup 0)
@@ -10515,7 +10515,7 @@ (define_insn_and_split "*neg2_doubleword"
(clobber (reg:CC FLAGS_REG))]
   "ix86_unary_operator_ok (NEG, mode, operands)"
   "#"
-  "reload_completed"
+  "&& reload_completed"
   [(parallel
 [(set (reg:CCC FLAGS_REG)
  (ne:CCC (match_dup 1) (const_int 0)))
@@ -16898,7 +16898,7 @@ (define_insn_and_split 
"*tls_dynamic_gnu2_combine_64_"
(clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT && TARGET_GNU2_TLS"
   "#"
-  ""
+  "&& 1"
   [(set (match_dup 0) (match_dup 4))]
 {
   operands[4] = can_create_pseudo_p () ? gen_reg_rtx (ptr_mode) : operands[0];
-- 
2.27.0



[PATCH 13/15] ia64: Fix non-robust split condition in define_insn_and_split

2021-11-11 Thread Kewen Lin via Gcc-patches
This patch is to fix some non-robust split conditions in some
define_insn_and_splits, to make each of them applied on top of
the corresponding condition for define_insn part, otherwise the
splitting could perform unexpectedly.

gcc/ChangeLog:

* config/ia64/vect.md (*vec_extractv2sf_0_le, *vec_extractv2sf_0_be):
Fix split condition.
---
 gcc/config/ia64/vect.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/config/ia64/vect.md b/gcc/config/ia64/vect.md
index 1a2452289b7..0f3a406d620 100644
--- a/gcc/config/ia64/vect.md
+++ b/gcc/config/ia64/vect.md
@@ -1422,7 +1422,7 @@ (define_insn_and_split "*vec_extractv2sf_0_le"
   UNSPEC_VECT_EXTR))]
   "!TARGET_BIG_ENDIAN"
   "#"
-  "reload_completed"
+  "&& reload_completed"
   [(set (match_dup 0) (match_dup 1))]
 {
   if (REG_P (operands[1]) && FR_REGNO_P (REGNO (operands[1])))
@@ -1440,7 +1440,7 @@ (define_insn_and_split "*vec_extractv2sf_0_be"
   UNSPEC_VECT_EXTR))]
   "TARGET_BIG_ENDIAN"
   "#"
-  "reload_completed"
+  "&& reload_completed"
   [(set (match_dup 0) (match_dup 1))]
 {
   if (MEM_P (operands[1]))
-- 
2.27.0



[PATCH 10/15] bfin: Fix non-robust split condition in define_insn_and_split

2021-11-11 Thread Kewen Lin via Gcc-patches
This patch is to fix some non-robust split conditions in some
define_insn_and_splits, to make each of them applied on top of
the corresponding condition for define_insn part, otherwise the
splitting could perform unexpectedly.

gcc/ChangeLog:

* config/bfin/bfin.md (movdi_insn, movdf_insn): Fix split condition.
---
 gcc/config/bfin/bfin.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/config/bfin/bfin.md b/gcc/config/bfin/bfin.md
index fd65f4d9e63..41a50974136 100644
--- a/gcc/config/bfin/bfin.md
+++ b/gcc/config/bfin/bfin.md
@@ -506,7 +506,7 @@ (define_insn_and_split "movdi_insn"
(match_operand:DI 1 "general_operand" "iFx,r,mx"))]
   "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) == REG"
   "#"
-  "reload_completed"
+  "&& reload_completed"
   [(set (match_dup 2) (match_dup 3))
(set (match_dup 4) (match_dup 5))]
 {
@@ -718,7 +718,7 @@ (define_insn_and_split "movdf_insn"
(match_operand:DF 1 "general_operand" "iFx,r,mx"))]
   "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) == REG"
   "#"
-  "reload_completed"
+  "&& reload_completed"
   [(set (match_dup 2) (match_dup 3))
(set (match_dup 4) (match_dup 5))]
 {
-- 
2.27.0



[PATCH 09/15] arm: Fix non-robust split condition in define_insn_and_split

2021-11-11 Thread Kewen Lin via Gcc-patches
This patch is to fix one non-robust split condition, to make
it applied on top of the corresponding condition for define_insn
part, otherwise the splitting could perform unexpectedly.

gcc/ChangeLog:

* config/arm/arm.md (*minmax_arithsi_non_canon): Fix split condition.
---
 gcc/config/arm/arm.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 4adc976b8b6..9a27d421484 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -4198,7 +4198,7 @@ (define_insn_and_split "*minmax_arithsi_non_canon"
   "TARGET_32BIT && !arm_eliminable_register (operands[1])
&& !(arm_restrict_it && CONST_INT_P (operands[3]))"
   "#"
-  "TARGET_32BIT && !arm_eliminable_register (operands[1]) && reload_completed"
+  "&& reload_completed"
   [(set (reg:CC CC_REGNUM)
 (compare:CC (match_dup 2) (match_dup 3)))
 
-- 
2.27.0



[PATCH 08/15] alpha: Fix non-robust split condition in define_insn_and_split

2021-11-11 Thread Kewen Lin via Gcc-patches
This patch is to fix some non-robust split conditions in some
define_insn_and_splits, to make each of them applied on top of
the corresponding condition for define_insn part, otherwise the
splitting could perform unexpectedly.

gcc/ChangeLog:

* config/alpha/alpha.md (*movtf_internal, *movti_internal): Fix split
condition.
---
 gcc/config/alpha/alpha.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/config/alpha/alpha.md b/gcc/config/alpha/alpha.md
index 98d09d43721..87617afd0c6 100644
--- a/gcc/config/alpha/alpha.md
+++ b/gcc/config/alpha/alpha.md
@@ -3830,7 +3830,7 @@ (define_insn_and_split "*movtf_internal"
   "register_operand (operands[0], TFmode)
|| reg_or_0_operand (operands[1], TFmode)"
   "#"
-  "reload_completed"
+  "&& reload_completed"
   [(set (match_dup 0) (match_dup 2))
(set (match_dup 1) (match_dup 3))]
   "alpha_split_tmode_pair (operands, TFmode, true);")
@@ -4091,7 +4091,7 @@ (define_insn_and_split "*movti_internal"
 && ! CONSTANT_P (operands[1]))
|| reg_or_0_operand (operands[1], TImode)"
   "#"
-  "reload_completed"
+  "&& reload_completed"
   [(set (match_dup 0) (match_dup 2))
(set (match_dup 1) (match_dup 3))]
   "alpha_split_tmode_pair (operands, TImode, true);")
-- 
2.27.0



  1   2   >