[Committed] S/390 zTPF: Handle skip trace addresses when unwinding

2020-04-02 Thread Andreas Krebbel via Gcc-patches
From: Jim Johnston 

Check for and handle new skip trace addresses when unwinding on zTPF.

libgcc/ChangeLog:

2020-04-03  Jim Johnston  

* config/s390/tpf-unwind.h (MIN_PATRANGE, MAX_PATRANGE)
(TPFRA_OFFSET): Macros removed.
(CP_CNF, cinfc_fast, CINFC_CMRESET, CINTFC_CMCENBKST)
(CINTFC_CMCENBKED, ICST_CRET, ICST_SRET, LOWCORE_PAGE3_ADDR)
(PG3_SKIPPING_OFFSET): New macros.
(__isPATrange): Use cinfc_fast for the check.
(__isSkipResetAddr): New function.
(s390_fallback_frame_state): Check for skip trace addresses. Use
either ICST_CRET or ICST_SRET to calculate return address
location.
(__tpf_eh_return): Handle skip trace addresses.
---
 libgcc/ChangeLog|  14 
 libgcc/config/s390/tpf-unwind.h | 132 ++--
 2 files changed, 89 insertions(+), 57 deletions(-)

diff --git a/libgcc/ChangeLog b/libgcc/ChangeLog
index c5ff4f63c7d..7548e347640 100644
--- a/libgcc/ChangeLog
+++ b/libgcc/ChangeLog
@@ -1,3 +1,17 @@
+2020-04-03  Jim Johnston  
+
+   * config/s390/tpf-unwind.h (MIN_PATRANGE, MAX_PATRANGE)
+   (TPFRA_OFFSET): Macros removed.
+   (CP_CNF, cinfc_fast, CINFC_CMRESET, CINTFC_CMCENBKST)
+   (CINTFC_CMCENBKED, ICST_CRET, ICST_SRET, LOWCORE_PAGE3_ADDR)
+   (PG3_SKIPPING_OFFSET): New macros.
+   (__isPATrange): Use cinfc_fast for the check.
+   (__isSkipResetAddr): New function.
+   (s390_fallback_frame_state): Check for skip trace addresses. Use
+   either ICST_CRET or ICST_SRET to calculate return address
+   location.
+   (__tpf_eh_return): Handle skip trace addresses.
+
 2020-03-26  Richard Earnshaw  
 
PR target/94220
diff --git a/libgcc/config/s390/tpf-unwind.h b/libgcc/config/s390/tpf-unwind.h
index 2bd5493bb71..fadc06b5e59 100644
--- a/libgcc/config/s390/tpf-unwind.h
+++ b/libgcc/config/s390/tpf-unwind.h
@@ -32,20 +32,29 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  
If not, see
Description: This function simply checks to see if the address
passed to it is in the CP pat code range.  */
 
-#define MIN_PATRANGE 0x1
-#define MAX_PATRANGE 0x80
+#define CP_CNF  0xc18u /* location of BSS CINFC pointer */
+#define cinfc_fast(TAG) (void *) \
+  *((unsigned long *) *(unsigned long *) (CP_CNF) + (TAG))
+#define CINFC_CMRESET 187
+#define CINTFC_CMCENBKST 431
+#define CINTFC_CMCENBKED 432
 
 static inline unsigned int
 __isPATrange (void *addr)
 {
-  if (addr > (void *)MIN_PATRANGE && addr < (void *)MAX_PATRANGE)
-return 1;
-  else
-return 0;
+  return !!(addr > cinfc_fast (CINTFC_CMCENBKST)
+   && addr < cinfc_fast (CINTFC_CMCENBKED));
+}
+
+static inline unsigned int
+__isSkipResetAddr (void *addr)
+{
+  return !!(addr == cinfc_fast (CINFC_CMRESET));
 }
 
 /* TPF return address offset from start of stack frame.  */
-#define TPFRA_OFFSET 168
+#define ICST_CRET 168
+#define ICST_SRET 320
 
 /* Exceptions macro defined for TPF so that functions without
dwarf frame information can be used with exceptions.  */
@@ -63,12 +72,12 @@ s390_fallback_frame_state (struct _Unwind_Context *context,
 (((unsigned long int) context->cfa) - STACK_POINTER_OFFSET));
 
   /* Are we going through special linkage code?  */
-  if (__isPATrange (context->ra))
+  if (__isPATrange (context->ra) || __isSkipResetAddr (context->ra))
 {
 
   /* Our return register isn't zero for end of stack, so
  check backward stackpointer to see if it is zero.  */
-  if (regs == NULL)
+  if (regs == 0)
  return _URC_END_OF_STACK;
 
   /* No stack frame.  */
@@ -83,11 +92,18 @@ s390_fallback_frame_state (struct _Unwind_Context *context,
  fs->regs.reg[i].loc.reg = i;
}
 
-  /* ... except for %r14, which is stored at CFA-112
-and used as return address.  */
-  fs->regs.reg[14].how = REG_SAVED_OFFSET;
-  fs->regs.reg[14].loc.offset = TPFRA_OFFSET - STACK_POINTER_OFFSET;
-  fs->retaddr_column = 14;
+  /* ... except for %r14, which is stored at CFA+offset where offset
+is displacment of ICST_CRET or ICST_SRET from CFA */
+  if ( __isPATrange(context->ra) )  {
+  fs->regs.reg[14].how = REG_SAVED_OFFSET;
+  fs->regs.reg[14].loc.offset = ICST_CRET - STACK_POINTER_OFFSET;
+  fs->retaddr_column = 14;
+  }  else  {
+  fs->regs.reg[14].how = REG_SAVED_OFFSET;
+  fs->regs.reg[14].loc.offset = ICST_SRET - STACK_POINTER_OFFSET;
+  fs->retaddr_column = 14;
+
+  }
 
   return _URC_NO_REASON;
 }
@@ -140,6 +156,9 @@ s390_fallback_frame_state (struct _Unwind_Context *context,
 #define TPFAREA_SIZE STACK_POINTER_OFFSET-TPFAREA_OFFSET
 #define INVALID_RETURN 0
 
+#define LOWCORE_PAGE3_ADDR 4032
+#define PG3_SKIPPING_OFFSET 18
+
 void * __tpf_eh_return (void *target, void *origRA);
 
 void *
@@ -148,30 +167,29 @@ __tpf_eh_return (void *target, void *origRA)
   Dl_info targetco

[PATCH v2] gcc/config/rs6000: Add link with libc128 with -mlong-double-128 for AIX

2020-04-02 Thread CHIGOT, CLEMENT via Gcc-patches
Description:
 * AIX applications using 128-bit long double must be linked with
   libc128.a, in order to have 128-bit compatible routines.

Tests:
 * AIX 7.2, 7.1, 6.1: Build/Tests: OK

Changelog:
2020-04-03 Clément Chigot 
 * config/rs6000/aix61.h (LIB_SPEC): Add -lc128 with -mlong-double-128.
 * config/rs6000/aix71.h (LIB_SPEC): Likewise.
 * config/rs6000/aix72.h (LIB_SPEC): Likewise.





gcc-8.4.0-gcc-config-rs6000-add-link-with-libc128-with-mlong-d.patch
Description: gcc-8.4.0-gcc-config-rs6000-add-link-with-libc128-with-mlong-d.patch


Re: [PATCH] Enable -mpcrel on PowerPC -mcpu=future ELF v2 systems, V3

2020-04-02 Thread will schmidt via Gcc-patches
On Thu, 2020-04-02 at 20:36 -0400, Michael Meissner via Gcc-patches
wrote:
> Enable -mpcrel on PowerPC -mcpu=future ELF v2 systems, V3
> 

Hi,

> This patch changes the default for -mcpu=future to be -mpcrel (i.e.
> use
> PC-relative addressing) if the ABI allows PC-relative relocations and
> the user
> did not use either -mno-pcrel or -mno-prefixed.
> 
> I have changed the spelling of the macro to PCREL_SUPPORTED_BY_ABI
> (from
> PCREL_SUPPORTED_BY_OS) since you pointed out it is more properly a
> function of
> the particular ABI, rather than just an OS choice.  I have changed
> the various
> comments to make it clearer.
> 
> I have done a bootstrap and a make check with and without the patch
> and there
> were no regressions by adding the patch on a little endian PowerPC
> Linux
> system.
> 
> I also tested by hand that if I use:
> 
>   -mcpu=power9
>   -mcpu=future -mno-prefixed
>   -mcpu=future -mno-pcrel
>   -mcpu=future -mabi=elfv1(or)
>   -mcpu=future -mcmodel=large
> 
> that PC-relative addressing is not enabled by default.  Variants of
> this patch
> have been used since December, building power8/power9 code on big
> endian
> systems, and power8/power9/future on little endian systems.  Can I
> check this
> into the master branch?
> 
> 2020-04-02  Michael Meissner  
> 
>   * config/rs6000/linux64.h (PCREL_SUPPORTED_BY_ABI): Enable
>   prefixed PC-relative addressing if the ABI supports it.

>   * config/rs6000/rs6000-cpus.def (ISA_FUTURE_MASKS_SERVER): Do
> not
>   set OPTION_MASK_FUTURE here.

Patch contents suggest this should be "Do not set OPTION_MASK_PREFIXED
here."

>   * config/rs6000/rs6000.c (rs6000_option_override_internal):
> Enable
>   OPTION_MASK_PREFIXED and OPTION_MASK_PREFIXED on -mcpu=future
> by
>   default if the current ABI allows the options.

It's so good we enable it twice.  :-)
One of those should be s/OPTION_MASK_PREFIXED/OPTION_MASK_PCREL/


> 
> --- /tmp/IPB30g_linux64.h 2020-04-02 15:23:19.977060411 -0500
> +++ gcc/config/rs6000/linux64.h   2020-04-02 15:23:01.016474023
> -0500
> @@ -640,3 +640,11 @@ extern int dot_symbols;
> enabling the __float128 keyword.  */
>  #undef   TARGET_FLOAT128_ENABLE_TYPE
>  #define TARGET_FLOAT128_ENABLE_TYPE 1
> +
> +/* Enable using prefixed PC-relative addressing on the 'future'
> machine if the
> +   ABI supports it.  The ELF v2 ABI only supports PC-relative
> relocations for
> +   the medium code model.  */
> +#undef  PCREL_SUPPORTED_BY_ABI

A previous comment suggested the #undef there should not be there. 
(potential for hiding or introducing a bug).


Thanks
-Will





Re: [PATCH] rs6000: Don't split constant oprator when add, move to temp register for future optimization

2020-04-02 Thread luoxhu via Gcc-patches



On 2020/4/3 06:16, Segher Boessenkool wrote:
> Hi!
> 
> On Mon, Mar 30, 2020 at 11:59:57AM +0800, luoxhu wrote:
>>> Do we want something later in the RTL pipeline to make "addi"s etc. again?
> 
> (This would be a good thing to consider -- maybe a define_insn_and_split
> will work.  But see below).
> 
>> [PATCH] rs6000: Don't split constant operator add before reload, move to 
>> temp register for future optimization
>>
>> Don't split code from add3 for SDI to allow a later pass to split.
>> This allows later logic to hoist out constant load in add instructions.
>> In loop, lis+ori could be hoisted out to improve performance compared with
>> previous addis+addi (About 15% on typical case), weak point is
>> one more register is used and one more instruction is generated.  i.e.:
>>
>> addis 3,3,0x8765
>> addi 3,3,0x4321
>>
>> =>
>>
>> lis 9,0x8765
>> ori 9,9,0x4321
>> add 3,3,9
> 
> (This patch will of course have to wait for stage 1).
> 
> Such a define_insn_and_split could be for an add of a (signed) 32-bit
> immediate.  combine will try to combine the three insns (lis;ori;add),
> and match the new pattern.

Currently 286r.split2 will split "12:%9:DI=0x87654321" to lis+ori by
rs6000_emit_set_const of define_split, do you mean add new define_insn_and_split
to do the split?  Another patch to do this after this one goes upstream in 
stage 1?

> 
> This also links in with Alan's work on big immediates, and with paddi
> insns, etc.

Seems PR94393?  Yes, rs6000_emit_set_const calls rs6000_emit_set_long_const for 
DImode.
I tried unsigned long like 0xabcd87654321, 0xabcd87654321 and 
0xc000ULL, 
All of them are outside of loop even without my patch.  No difference with or 
without
Alan's patch.

0xabcd87654321: li 9,0  ori 9,9,0xabcd  sldi 9,9,32   oris 9,9,0x8765ori 
9,9,0x4321
0xabcd87654321: lis 9,0xabcd   ori 9,9,0x8765 sldi 9,9,16 ori 
9,9,0x4321
0xc000ULL:   li 9,-1   rldicr 9,9,0,1


Thanks,
Xionghu

> 
> 
> Segher
> 



[PATCH] Enable -mpcrel on PowerPC -mcpu=future ELF v2 systems, V3

2020-04-02 Thread Michael Meissner via Gcc-patches
Enable -mpcrel on PowerPC -mcpu=future ELF v2 systems, V3

This patch changes the default for -mcpu=future to be -mpcrel (i.e. use
PC-relative addressing) if the ABI allows PC-relative relocations and the user
did not use either -mno-pcrel or -mno-prefixed.

I have changed the spelling of the macro to PCREL_SUPPORTED_BY_ABI (from
PCREL_SUPPORTED_BY_OS) since you pointed out it is more properly a function of
the particular ABI, rather than just an OS choice.  I have changed the various
comments to make it clearer.

I have done a bootstrap and a make check with and without the patch and there
were no regressions by adding the patch on a little endian PowerPC Linux
system.

I also tested by hand that if I use:

-mcpu=power9
-mcpu=future -mno-prefixed
-mcpu=future -mno-pcrel
-mcpu=future -mabi=elfv1(or)
-mcpu=future -mcmodel=large

that PC-relative addressing is not enabled by default.  Variants of this patch
have been used since December, building power8/power9 code on big endian
systems, and power8/power9/future on little endian systems.  Can I check this
into the master branch?

2020-04-02  Michael Meissner  

* config/rs6000/linux64.h (PCREL_SUPPORTED_BY_ABI): Enable
prefixed PC-relative addressing if the ABI supports it.
* config/rs6000/rs6000-cpus.def (ISA_FUTURE_MASKS_SERVER): Do not
set OPTION_MASK_FUTURE here.
* config/rs6000/rs6000.c (rs6000_option_override_internal): Enable
OPTION_MASK_PREFIXED and OPTION_MASK_PREFIXED on -mcpu=future by
default if the current ABI allows the options.

--- /tmp/IPB30g_linux64.h   2020-04-02 15:23:19.977060411 -0500
+++ gcc/config/rs6000/linux64.h 2020-04-02 15:23:01.016474023 -0500
@@ -640,3 +640,11 @@ extern int dot_symbols;
enabling the __float128 keyword.  */
 #undef TARGET_FLOAT128_ENABLE_TYPE
 #define TARGET_FLOAT128_ENABLE_TYPE 1
+
+/* Enable using prefixed PC-relative addressing on the 'future' machine if the
+   ABI supports it.  The ELF v2 ABI only supports PC-relative relocations for
+   the medium code model.  */
+#undef  PCREL_SUPPORTED_BY_ABI
+#define PCREL_SUPPORTED_BY_ABI (TARGET_FUTURE && TARGET_PREFIXED   \
+&& ELFv2_ABI_CHECK \
+&& (TARGET_CMODEL == CMODEL_MEDIUM))
--- /tmp/EHqpAk_rs6000-cpus.def 2020-04-02 15:23:19.993064282 -0500
+++ gcc/config/rs6000/rs6000-cpus.def   2020-04-02 15:23:01.016474023 -0500
@@ -75,11 +75,11 @@
 | OPTION_MASK_P8_VECTOR\
 | OPTION_MASK_P9_VECTOR)
 
-/* Support for a future processor's features.  Do not enable -mpcrel until it
-   is fully functional.  */
+/* Support for a future processor's features.  We do not set the addressing
+   options OPTION_MASK_PREFIXED or OPTION_MASK_PCREL here.  Those options are
+   enabled in the function rs6000_option_override if the ABI supports them.  */
 #define ISA_FUTURE_MASKS_SERVER(ISA_3_0_MASKS_SERVER   
\
-| OPTION_MASK_FUTURE   \
-| OPTION_MASK_PREFIXED)
+| OPTION_MASK_FUTURE)
 
 /* Flags that need to be turned off if -mno-future.  */
 #define OTHER_FUTURE_MASKS (OPTION_MASK_PCREL  \
--- /tmp/Zayhhc_rs6000.c2020-04-02 15:23:20.009068153 -0500
+++ gcc/config/rs6000/rs6000.c  2020-04-02 15:23:01.020474991 -0500
@@ -4020,6 +4020,11 @@ rs6000_option_override_internal (bool gl
   rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
 }
 
+  /* Enable -mprefixed by default on 64-bit 'future' systems.  */
+  if (TARGET_FUTURE && TARGET_POWERPC64
+  && (rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED) == 0)
+rs6000_isa_flags |= OPTION_MASK_PREFIXED;
+
   /* -mprefixed (and hence -mpcrel) requires -mcpu=future.  */
   if (TARGET_PREFIXED && !TARGET_FUTURE)
 {
@@ -4181,6 +4186,14 @@ rs6000_option_override_internal (bool gl
   rs6000_isa_flags &= ~OPTION_MASK_PCREL;
 }
 
+#ifdef PCREL_SUPPORTED_BY_ABI
+  /* If the ABI has support for PC-relative relocations, enable it by
+ default.  */
+  if (PCREL_SUPPORTED_BY_ABI
+  && (rs6000_isa_flags_explicit & OPTION_MASK_PCREL) == 0)
+rs6000_isa_flags |= OPTION_MASK_PCREL;
+#endif
+
   if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
 

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.ibm.com, phone: +1 (978) 899-4797


[PATCH] c++: Refrain from using replace_placeholders in constexpr evaluation [PR94205]

2020-04-02 Thread Patrick Palka via Gcc-patches
This removes the use of replace_placeholders in cxx_eval_constant_expression
(which causes the new test lambda-this6.C to ICE due to replace_placeholders
mutating the shared TARGET_EXPR_INITIAL tree which then trips up the
gimplifier).

In its place, this patch adds a 'parent' field to constexpr_ctx which is used to
store a pointer to an outer constexpr_ctx that points to another object under
construction.  With this new field, we can beef up lookup_placeholder to resolve
PLACEHOLDER_EXPRs that refer to former objects under construction, which fixes
PR94205 without needing to do replace_placeholders.  Also we can now respect the
CONSTRUCTOR_PLACEHOLDER_BOUNDARY flag which fixes the constexpr analogue of
PR79937.

Does this look OK to commit after testing?

gcc/cp/ChangeLog:

PR c++/94205
PR c++/79937
* constexpr.c (struct constexpr_ctx): New field 'parent'.
(cxx_eval_bare_aggregate): Propagate CONSTRUCTOR_PLACEHOLDER_BOUNDARY
flag from the original constructor to the reduced constructor.
(lookup_placeholder): Prefer to return the outermost matching object
by recursively calling lookup_placeholder on the 'parent' context,
but don't cross CONSTRUCTOR_PLACEHOLDER_BOUNDARY constructors.
(cxx_eval_constant_expression): Link the 'ctx' context to the 'new_ctx'
context via 'new_ctx.parent' when being expanded without an explicit
target.  Don't call replace_placeholders.
(cxx_eval_outermost_constant_expr): Initialize 'ctx.parent' to NULL.

gcc/testsuite/ChangeLog:

PR c++/94205
PR c++/79937
* g++.dg/cpp1y/pr79937-5.C: New test.
* g++.dg/cpp1z/lambda-this6.C: New test.
---
 gcc/cp/constexpr.c| 25 +-
 gcc/testsuite/g++.dg/cpp1y/pr79937-5.C| 42 +++
 gcc/testsuite/g++.dg/cpp1z/lambda-this6.C | 12 +++
 3 files changed, 71 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp1y/pr79937-5.C
 create mode 100644 gcc/testsuite/g++.dg/cpp1z/lambda-this6.C

diff --git a/gcc/cp/constexpr.c b/gcc/cp/constexpr.c
index 613a769e642..c72305715e4 100644
--- a/gcc/cp/constexpr.c
+++ b/gcc/cp/constexpr.c
@@ -1076,6 +1076,9 @@ struct constexpr_ctx {
   tree object;
   /* If inside SWITCH_EXPR.  */
   constexpr_switch_state *css_state;
+  /* The constexpr expansion context inside which this one is nested.  This is
+ used by lookup_placeholder to resolve PLACEHOLDER_EXPRs.  */
+  const constexpr_ctx *parent;
 
   /* Whether we should error on a non-constant expression or fail quietly.
  This flag needs to be here, but some of the others could move to global
@@ -3843,6 +3846,9 @@ cxx_eval_bare_aggregate (const constexpr_ctx *ctx, tree t,
   vec **p = &CONSTRUCTOR_ELTS (ctx->ctor);
   vec_alloc (*p, vec_safe_length (v));
 
+  if (CONSTRUCTOR_PLACEHOLDER_BOUNDARY (t))
+CONSTRUCTOR_PLACEHOLDER_BOUNDARY (ctx->ctor) = 1;
+
   unsigned i;
   tree index, value;
   bool constant_p = true;
@@ -5305,6 +5311,12 @@ lookup_placeholder (const constexpr_ctx *ctx, bool lval, 
tree type)
   if (!ctx)
 return NULL_TREE;
 
+  /* Prefer the outermost matching object, but don't cross
+ CONSTRUCTOR_PLACEHOLDER_BOUNDARY constructors.  */
+  if (ctx->ctor && !CONSTRUCTOR_PLACEHOLDER_BOUNDARY (ctx->ctor))
+if (tree parent_ob = lookup_placeholder (ctx->parent, lval, type))
+  return parent_ob;
+
   /* We could use ctx->object unconditionally, but using ctx->ctor when we
  can is a minor optimization.  */
   if (!lval && ctx->ctor && same_type_p (TREE_TYPE (ctx->ctor), type))
@@ -5608,19 +5620,16 @@ cxx_eval_constant_expression (const constexpr_ctx *ctx, 
tree t,
r = *p;
break;
  }
-   tree init = TARGET_EXPR_INITIAL (t);
if ((AGGREGATE_TYPE_P (type) || VECTOR_TYPE_P (type)))
  {
-   if (ctx->object)
- /* If the initializer contains any PLACEHOLDER_EXPR, we need to
-resolve them before we create a new CONSTRUCTOR for the
-temporary.  */
- init = replace_placeholders (init, ctx->object);
-
/* We're being expanded without an explicit target, so start
   initializing a new object; expansion with an explicit target
   strips the TARGET_EXPR before we get here.  */
new_ctx = *ctx;
+   /* Link CTX to NEW_CTX so that lookup_placeholder can resolve
+  any PLACEHOLDER_EXPR within the initializer that refers to the
+  former object under construction.  */
+   new_ctx.parent = ctx;
new_ctx.ctor = build_constructor (type, NULL);
CONSTRUCTOR_NO_CLEARING (new_ctx.ctor) = true;
new_ctx.object = slot;
@@ -6474,7 +6483,7 @@ cxx_eval_outermost_constant_expr (tree t, bool 
allow_non_constant,
   bool overflow_p = false;
 
   constexpr_global_ctx global_ctx;
-  constexpr_ctx ctx = { &global_ctx, NULL, NULL, NU

[committed, obvious] Fix some comment typos in alias.c

2020-04-02 Thread Sandra Loosemore
I noticed a bunch of comment typos in get_alias_set when I was reading 
code to try to track down a bug affecting nios2 targets (see pr93946). 
I've checked in this patch to clean them up.


-Sandra
commit 63f56527335a7667769b5dea15569e23d0ebc749
Author: Sandra Loosemore 
Date:   Thu Apr 2 15:29:10 2020 -0700

Fix some comment typos in alias.c.

2020-04-02  Sandra Loosemore  

	* alias.c (get_alias_set): Fix comment typos.

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 27fac2c..8c57cd5 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,7 @@
+2020-04-02  Sandra Loosemore  
+
+	* alias.c (get_alias_set): Fix comment typos.
+
 2020-04-02  Fritz Reese  
 
 	PR fortran/85982
diff --git a/gcc/alias.c b/gcc/alias.c
index 82a27f6..49bd7b3 100644
--- a/gcc/alias.c
+++ b/gcc/alias.c
@@ -861,7 +861,7 @@ get_alias_set (tree t)
   alias_set_type set;
 
   /* We cannot give up with -fno-strict-aliasing because we need to build
- proper type representation for possible functions which are build with
+ proper type representations for possible functions which are built with
  -fstrict-aliasing.  */
 
   /* return 0 if this or its type is an error.  */
@@ -920,9 +920,9 @@ get_alias_set (tree t)
   if (set != -1)
 	return set;
   /* Handle structure type equality for pointer types, arrays and vectors.
-	 This is easy to do, because the code bellow ignore canonical types on
+	 This is easy to do, because the code below ignores canonical types on
 	 these anyway.  This is important for LTO, where TYPE_CANONICAL for
-	 pointers cannot be meaningfuly computed by the frotnend.  */
+	 pointers cannot be meaningfully computed by the frontend.  */
   if (canonical_type_used_p (t))
 	{
 	  /* In LTO we set canonical types for all types where it makes
@@ -1031,9 +1031,9 @@ get_alias_set (tree t)
 	   || TREE_CODE (p) == VECTOR_TYPE;
 	   p = TREE_TYPE (p))
 	{
-	  /* Ada supports recusive pointers.  Instead of doing recrusion check
-	 just give up once the preallocated space of 8 elements is up.
-	 In this case just punt to void * alias set.  */
+	  /* Ada supports recursive pointers.  Instead of doing recursion
+	 check, just give up once the preallocated space of 8 elements
+	 is up.  In this case just punt to void * alias set.  */
 	  if (reference.length () == 8)
 	{
 	  p = ptr_type_node;
@@ -1048,7 +1048,7 @@ get_alias_set (tree t)
 	}
   p = TYPE_MAIN_VARIANT (p);
 
-  /* In LTO for C++ programs we can turn in complete types to complete
+  /* In LTO for C++ programs we can turn incomplete types to complete
 	 using ODR name lookup.  */
   if (in_lto_p && TYPE_STRUCTURAL_EQUALITY_P (p) && odr_type_p (p))
 	{


[PATCH] i386: Don't use AVX512F integral masks for V*TImode [PR94438]

2020-04-02 Thread Jakub Jelinek via Gcc-patches
Hi!

The ix86_get_mask_mode hook uses int mask for 512-bit vectors or 128/256-bit
vectors with AVX512VL (that is correct), and only for V*[SD][IF]mode if not
AVX512BW (also correct), but with AVX512BW it would stop checking the
elem_size altogether and pretend the hw has masking support for V*TImode
etc., which it doesn't.  That can lead to various ICEs later on.

Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux, ok for
trunk?

2020-04-02  Jakub Jelinek  

PR target/94438
* config/i386/i386.c (ix86_get_mask_mode): Only use int mask for 
elem_size
1, 2, 4 and 8.

* gcc.target/i386/avx512bw-pr94438.c: New test.
* gcc.target/i386/avx512vlbw-pr94438.c: New test.

--- gcc/config/i386/i386.c.jj   2020-03-17 13:50:52.916933781 +0100
+++ gcc/config/i386/i386.c  2020-04-02 17:14:00.202672882 +0200
@@ -21771,7 +21771,9 @@ ix86_get_mask_mode (machine_mode data_mo
   if ((TARGET_AVX512F && vector_size == 64)
   || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16)))
 {
-  if (elem_size == 4 || elem_size == 8 || TARGET_AVX512BW)
+  if (elem_size == 4
+ || elem_size == 8
+ || (TARGET_AVX512BW && (elem_size == 1 || elem_size == 2)))
return smallest_int_mode_for_size (nunits);
 }
 
--- gcc/testsuite/gcc.target/i386/avx512bw-pr94438.c.jj 2020-04-02 
17:18:37.374587069 +0200
+++ gcc/testsuite/gcc.target/i386/avx512bw-pr94438.c2020-04-02 
17:17:15.928787665 +0200
@@ -0,0 +1,13 @@
+/* PR target/94438 */
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-mavx512bw" } */
+
+typedef __attribute__ ((__vector_size__ (4 * sizeof (__int128 __int128 V;
+void bar (V);
+
+void
+foo (V w)
+{
+  V v = 0 <= (0 >= w);
+  bar (v);
+}
--- gcc/testsuite/gcc.target/i386/avx512vlbw-pr94438.c.jj   2020-04-02 
17:18:08.272016069 +0200
+++ gcc/testsuite/gcc.target/i386/avx512vlbw-pr94438.c  2020-04-02 
17:16:25.302533951 +0200
@@ -0,0 +1,13 @@
+/* PR target/94438 */
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-mavx512bw -mavx512vl" } */
+
+typedef __attribute__ ((__vector_size__ (sizeof (__int128 __int128 V;
+void bar (V);
+
+void
+foo (V w)
+{
+  V v = 0 <= (0 >= w);
+  bar (v);
+}

Jakub



Re: [PATCH] rs6000: Don't split constant oprator when add, move to temp register for future optimization

2020-04-02 Thread Segher Boessenkool
Hi!

On Mon, Mar 30, 2020 at 11:59:57AM +0800, luoxhu wrote:
> > Do we want something later in the RTL pipeline to make "addi"s etc. again?

(This would be a good thing to consider -- maybe a define_insn_and_split
will work.  But see below).

> [PATCH] rs6000: Don't split constant operator add before reload, move to temp 
> register for future optimization
> 
> Don't split code from add3 for SDI to allow a later pass to split.
> This allows later logic to hoist out constant load in add instructions.
> In loop, lis+ori could be hoisted out to improve performance compared with
> previous addis+addi (About 15% on typical case), weak point is
> one more register is used and one more instruction is generated.  i.e.:
> 
> addis 3,3,0x8765
> addi 3,3,0x4321
> 
> =>
> 
> lis 9,0x8765
> ori 9,9,0x4321
> add 3,3,9

(This patch will of course have to wait for stage 1).

Such a define_insn_and_split could be for an add of a (signed) 32-bit
immediate.  combine will try to combine the three insns (lis;ori;add),
and match the new pattern.

This also links in with Alan's work on big immediates, and with paddi
insns, etc.


Segher


Re: [PATCH] lower-subreg: PR94123, SVN r273240, causes gcc.target/powerpc/pr87507.c to fail

2020-04-02 Thread Segher Boessenkool
On Sat, Mar 28, 2020 at 06:39:56PM -0500, Peter Bergner wrote:
> On 3/28/20 2:22 PM, Segher Boessenkool wrote:
> > On Fri, Mar 27, 2020 at 05:41:36PM -0500, Peter Bergner wrote:
> >> A different (ie, safer) approach would be to just rerun lower-subreg at
> >> its old location, regardless of whether we used -fsplit-wide-types-early.
> > 
> > That is my preference, for a simpler reason even: when I added the new
> > pass I disabled the old one, thinking it wouldn't do anything useful
> > anymore.  Here you show that isn't true.
> >
> >> This way, we are not changing lower-subreg's behaviour, just running it an
> >> extra time (3 times instead of twice when using -fsplit-wide-types-early).
> >> I don't think lower-subreg is too expensive to run an extra time
> > 
> > Yes, I think so too.
> 
> Right.  However, like I said though, the downside is that we don't expose
> the decomposed uses to passes in between subreg2 and subreg3, like combine,
> etc.  Isn't that why you moved it early in the first place?  Then again,
> maybe you're getting the important cases now and subreg3 is just cleanup?

Yeah.  subreg1 is the limited one; subreg2 and subreg3 are the "full"
one.  subreg2 is the "early" one that only some archs have by default.
subreg3 will not do much (if subreg2 is enabled), except all the usual
RTL passes (CSE, *prop, combine, etc.) can expose more possibilities
for lower-subreg in some cases.


Segher


Re: [PATCH] Test for sigsetjmp support in analyzer tests requiring that feature.

2020-04-02 Thread Sandra Loosemore

On 4/2/20 4:09 AM, Tobias Burnus wrote:

Interestingly, only those two testcases use the effective-target check:
gcc.dg/analyzer/sigsetjmp-5.c, gcc.dg/analyzer/sigsetjmp-6.c

Regarding the link test: No header file is included – it just
defines the function ("extern char $funcname();") and calls it.


This is the same way configure scripts test for the existence of a 
library function; the test case is just an arbitrary reference to the 
named function that will generate an undefined symbol error from the 
linker if the standard libraries don't provide a definition.



The check fails as "sigsetjmp" does not exist in glibc –
the symbol is "__sigsetjmp" in glibc.


I've checked in this followup patch to make it also test for the 
presence of __sigsetjmp, and verified that the test case now runs and 
passes on x86_64_linux-gnu.


-Sandra
commit a950bb6e95236bb60ec17cda36763945d3d0a714
Author: Sandra Loosemore 
Date:   Thu Apr 2 13:37:58 2020 -0700

Fix check_effective_target_sigsetjmp for glibc targets.

2020-04-02  Sandra Loosemore  

	gcc/testsuite/
	* lib/target-supports.exp (check_effective_target_sigsetjmp): Test
	for __sigsetjmp as well as sigsetjmp.

diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 050cecd..cd8d7e1 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2020-04-02  Sandra Loosemore  
+
+	* lib/target-supports.exp (check_effective_target_sigsetjmp): Test
+	for __sigsetjmp as well as sigsetjmp.
+
 2020-04-02  Fritz Reese  
 
 	PR fortran/85982
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 3654e7b..0dfe3ae 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -9036,9 +9036,15 @@ proc check_effective_target_stpcpy {} {
 }
 
 # Returns 1 if "sigsetjmp" is available on the target system.
+# Also check if "__sigsetjmp" is defined since that's what glibc
+# uses.
 
 proc check_effective_target_sigsetjmp {} {
-return [check_function_available "sigsetjmp"]
+if { [check_function_available "sigsetjmp"]
+ || [check_function_available "__sigsetjmp"] } {
+	return 1
+}
+return 0
 }
 
 # Check whether the vectorizer tests are supported by the target and


Re: [PATCH], Make PowerPC -mcpu=future enable -mpcrel on linux ELFv2

2020-04-02 Thread Segher Boessenkool
Hi!

Some more comments:

On Fri, Mar 27, 2020 at 09:31:46PM -0400, Michael Meissner wrote:
> There were no regressions when I did the bootstrap and make check steps.  I
> verified that -mcpu=future does turn on -mprecl if you are targeting a Linux
> ELF v2 system and use the medium code model.  Can I check this into the master
> branch?

Please post the commit message you would use as well.  It often can
*replace* what you would type in the mail otherwise (just add some
things like how it was tested, etc).

> 2020-03-27  Michael Meissner  
> 
>   * config/rs6000/linux64.h (PCREL_SUPPORTED_BY_OS): New macro.
>   * config/rs6000/rs6000-cpus.def (ISA_FUTURE_MASKS_SERVER): Do not
>   set -mprefixed here.

OPTION_MASK_PREFIXED

>   * config/rs6000/rs6000.c (PCREL_SUPPORTED_BY_OS): New macro.

It cannot be "new macro" both here and in linux64.h .  This latter one
is the default implementation.

>   (rs6000_option_override_internal): Set the -mprefixed and -mpcrel
>   options for -mcpu=future if these options can be used.

Similar, OPTION_*.  Please talk about PCREL_SUPPORTED_BY_OS.

A changelog very boringly says *what* and *how*, never "why".


> --- /tmp/JVBhAf_linux64.h 2020-03-27 16:27:05.478619500 -0400
> +++ gcc/config/rs6000/linux64.h   2020-03-27 16:21:56.268876616 -0400
> @@ -640,3 +640,11 @@ extern int dot_symbols;
> enabling the __float128 keyword.  */
>  #undef   TARGET_FLOAT128_ENABLE_TYPE
>  #define TARGET_FLOAT128_ENABLE_TYPE 1
> +
> +/* Enable default support for PC-relative addressing on the 'future' system 
> if
> +   we can use the PC-relative instructions.  Currently this support only 
> exits
> +   for the ELF v2 object file format using the medium code model.  */

(exists, typo)

> +#undef  PCREL_SUPPORTED_BY_OS
> +#define PCREL_SUPPORTED_BY_OS(TARGET_FUTURE && TARGET_PREFIXED   
> \
> +  && ELFv2_ABI_CHECK \
> +  && (TARGET_CMODEL == CMODEL_MEDIUM))

There should not be an #undef here, it just hides bugs (or causes them).

> --- /tmp/KyQOUN_rs6000-cpus.def   2020-03-27 16:27:05.488619427 -0400
> +++ gcc/config/rs6000/rs6000-cpus.def 2020-03-27 16:23:51.780030238 -0400
> @@ -75,11 +75,11 @@
>| OPTION_MASK_P8_VECTOR\
>| OPTION_MASK_P9_VECTOR)
>  
> -/* Support for a future processor's features.  Do not enable -mpcrel until it
> -   is fully functional.  */
> +/* Support for a future processor's features.  We do not set -mpcrel or
> +   -mprefixed here.  These bits are set in rs6000_option_override if the 
> system
> +   supports those options. */

We talked about this before.

Things might be easier if you had different OPTIONs for "can the CPU do
this" and "can the OS do it".

> +/* Set up the defaults for whether PC-relative addressing is supported by the
> +   target system.  */
> +#ifndef PCREL_SUPPORTED_BY_OS
> +#define PCREL_SUPPORTED_BY_OS0
> +#endif
> +
>  /* Support targetm.vectorize.builtin_mask_for_load.  */
>  tree altivec_builtin_mask_for_load;
>  
> @@ -4014,6 +4020,11 @@ rs6000_option_override_internal (bool gl
>rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
>  }
>  
> +  /* Enable -mprefixed by default on 64-bit 'future' systems.  */
> +  if (TARGET_FUTURE && TARGET_POWERPC64
> +  && (rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED) == 0)
> +rs6000_isa_flags |= OPTION_MASK_PREFIXED;

This does not need OS support?

> +  /* If the OS has support for PC-relative relocations, enable it now.  */
> +  if (PCREL_SUPPORTED_BY_OS
> +  && (rs6000_isa_flags_explicit & OPTION_MASK_PCREL) == 0)
> +rs6000_isa_flags |= OPTION_MASK_PCREL;

So the user can enable pcrel even if PCREL_SUPPORTED_BY_OS is false.
Okay, that is good.  Maybe a better name could be found, but I can't
think of one either.


Segher


Re: [PATCH] doc: RISC-V: Update binutils requirement to 2.30

2020-04-02 Thread Maciej W. Rozycki via Gcc-patches
On Thu, 2 Apr 2020, Maciej W. Rozycki wrote:

> > OK.  Can you also update gcc-10/changes.html?
> 
>  Change now applied, thank you for your review, and patch posted for 
> wwwdocs [I meant to give a link to the message in the archive here, but it 
> seems behind by ~2.5 hours; something to look into and fix too, perhaps?  
> The old archive was live as messages went through.].

 I confused the archives, not being used to the new layout, so the message 
may have actually been recorded in a timely manner.  Sorry about that.  
Patch is here: 
.

  Maciej


Re: [PATCH] Fix PR94443 with gsi_insert_seq_before

2020-04-02 Thread H.J. Lu via Gcc-patches
On Thu, Apr 2, 2020 at 3:43 AM Kewen.Lin  wrote:
>
> on 2020/4/2 上午6:51, H.J. Lu wrote:
> >
> > This caused:
> >
> > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94449
> >
>
> Thanks for reporting this.  The attached patch is to fix the stupid
> mistake by using gsi_insert_seq_before instead of gsi_insert_before.
>
> BTW, the regression testing on one x86_64 machine from CFarm is
> unable to reveal it (I guess due to native arch sandybridge?), so I
> specified additional option -march=znver2 and verified the coverage.
>
> Bootstrapped/regtested on powerpc64le-linux-gnu (P9) and
> x86_64-pc-linux-gnu, also verified the fail cases in related PRs.
>
>
> BR,
> Kewen
> ---
> gcc/ChangeLog
>
> 2020-04-02  Kewen Lin  
>
> PR tree-optimization/94443
> * tree-vect-loop.c (vectorizable_live_operation): Use
> gsi_insert_seq_before to replace gsi_insert_before.
>
> gcc/testsuite/ChangeLog
>
> 2020-04-02  Kewen Lin  
>
> PR tree-optimization/94443
> * gcc.dg/vect/pr94443.c: New test.
>

I verified that this fixed my bug:

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94449

Thanks.

-- 
H.J.


Re: [PATCH v2 00/11] aarch64: Implement TImode comparisons

2020-04-02 Thread Richard Henderson
On 4/2/20 11:53 AM, Richard Henderson via Gcc-patches wrote:
> This is attacking case 3 of PR 94174.
> 
> In v2, I unify the various subtract-with-borrow and add-with-carry
> patterns that also output flags with unspecs.  As suggested by
> Richard Sandiford during review of v1.  It does seem cleaner.

Hmph.  I miscounted -- this is actually v3.  :-P


r~


[PATCH v2 09/11] aarch64: Adjust result of aarch64_gen_compare_reg

2020-04-02 Thread Richard Henderson via Gcc-patches
Return the entire comparison expression, not just the cc_reg.
This will allow the routine to adjust the comparison code as
needed for TImode comparisons.

Note that some users were passing e.g. EQ to aarch64_gen_compare_reg
and then using gen_rtx_NE.  Pass the proper code in the first place.

* config/aarch64/aarch64.c (aarch64_gen_compare_reg): Return
the final comparison for code & cc_reg.
(aarch64_gen_compare_reg_maybe_ze): Likewise.
(aarch64_expand_compare_and_swap): Update to match -- do not
build the final comparison here, but PUT_MODE as necessary.
(aarch64_split_compare_and_swap): Use prebuilt comparison.
* config/aarch64/aarch64-simd.md (aarch64_cmdi): Likewise.
(aarch64_cmdi): Likewise.
(aarch64_cmtstdi): Likewise.
* config/aarch64/aarch64-speculation.cc
(aarch64_speculation_establish_tracker): Likewise.
* config/aarch64/aarch64.md (cbranch4, cbranch4): Likewise.
(mod3, abs2): Likewise.
(cstore4, cstore4): Likewise.
(cmov6, cmov6): Likewise.
(movcc, movcc, movcc): Likewise.
(cc): Likewise.
(ffs2): Likewise.
(cstorecc4): Remove redundant "".
---
 gcc/config/aarch64/aarch64.c  | 26 +++---
 gcc/config/aarch64/aarch64-simd.md| 18 ++---
 gcc/config/aarch64/aarch64-speculation.cc |  5 +-
 gcc/config/aarch64/aarch64.md | 96 ++-
 4 files changed, 63 insertions(+), 82 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 8e54506bc3e..93658338041 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -2328,7 +2328,7 @@ emit_set_insn (rtx x, rtx y)
 }
 
 /* X and Y are two things to compare using CODE.  Emit the compare insn and
-   return the rtx for register 0 in the proper mode.  */
+   return the rtx for the CCmode comparison.  */
 rtx
 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
 {
@@ -2359,7 +2359,7 @@ aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
   cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM);
   emit_set_insn (cc_reg, gen_rtx_COMPARE (cc_mode, x, y));
 }
-  return cc_reg;
+  return gen_rtx_fmt_ee (code, VOIDmode, cc_reg, const0_rtx);
 }
 
 /* Similarly, but maybe zero-extend Y if Y_MODE < SImode.  */
@@ -2382,7 +2382,7 @@ aarch64_gen_compare_reg_maybe_ze (RTX_CODE code, rtx x, 
rtx y,
  cc_mode = CC_SWPmode;
  cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM);
  emit_set_insn (cc_reg, t);
- return cc_reg;
+ return gen_rtx_fmt_ee (code, VOIDmode, cc_reg, const0_rtx);
}
 }
 
@@ -18487,7 +18487,8 @@ aarch64_expand_compare_and_swap (rtx operands[])
 
   emit_insn (gen_aarch64_compare_and_swap_lse (mode, rval, mem,
   newval, mod_s));
-  cc_reg = aarch64_gen_compare_reg_maybe_ze (NE, rval, oldval, mode);
+  x = aarch64_gen_compare_reg_maybe_ze (EQ, rval, oldval, mode);
+  PUT_MODE (x, SImode);
 }
   else if (TARGET_OUTLINE_ATOMICS)
 {
@@ -18498,7 +18499,8 @@ aarch64_expand_compare_and_swap (rtx operands[])
   rval = emit_library_call_value (func, NULL_RTX, LCT_NORMAL, r_mode,
  oldval, mode, newval, mode,
  XEXP (mem, 0), Pmode);
-  cc_reg = aarch64_gen_compare_reg_maybe_ze (NE, rval, oldval, mode);
+  x = aarch64_gen_compare_reg_maybe_ze (EQ, rval, oldval, mode);
+  PUT_MODE (x, SImode);
 }
   else
 {
@@ -18510,13 +18512,13 @@ aarch64_expand_compare_and_swap (rtx operands[])
   emit_insn (GEN_FCN (code) (rval, mem, oldval, newval,
 is_weak, mod_s, mod_f));
   cc_reg = gen_rtx_REG (CCmode, CC_REGNUM);
+  x = gen_rtx_EQ (SImode, cc_reg, const0_rtx);
 }
 
   if (r_mode != mode)
 rval = gen_lowpart (mode, rval);
   emit_move_insn (operands[1], rval);
 
-  x = gen_rtx_EQ (SImode, cc_reg, const0_rtx);
   emit_insn (gen_rtx_SET (bval, x));
 }
 
@@ -18591,10 +18593,8 @@ aarch64_split_compare_and_swap (rtx operands[])
   if (strong_zero_p)
 x = gen_rtx_NE (VOIDmode, rval, const0_rtx);
   else
-{
-  rtx cc_reg = aarch64_gen_compare_reg_maybe_ze (NE, rval, oldval, mode);
-  x = gen_rtx_NE (VOIDmode, cc_reg, const0_rtx);
-}
+x = aarch64_gen_compare_reg_maybe_ze (NE, rval, oldval, mode);
+
   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
   aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
@@ -18607,8 +18607,7 @@ aarch64_split_compare_and_swap (rtx operands[])
{
  /* Emit an explicit compare instruction, so that we can correctly
 track the condition codes.  */
- rtx cc_reg = aarch64_gen_compare_reg (NE, scratch, const0_rtx);
- x = gen_rtx_NE (GET_MODE (cc_reg), cc_reg, const0_rtx);
+ x = aarch64_gen_compare_reg (NE, scratch,

[PATCH v2 04/11] aarch64: Introduce aarch64_expand_addsubti

2020-04-02 Thread Richard Henderson via Gcc-patches
Modify aarch64_expand_subvti into a form that handles all
addition and subtraction, modulo, signed or unsigned overflow.

Use expand_insn to put the operands into the proper form,
and do not force values into register if not required.

* config/aarch64/aarch64.c (aarch64_ti_split) New.
(aarch64_addti_scratch_regs): Remove.
(aarch64_subvti_scratch_regs): Remove.
(aarch64_expand_subvti): Remove.
(aarch64_expand_addsubti): New.
* config/aarch64/aarch64-protos.h: Update to match.
* config/aarch64/aarch64.md (addti3): Use aarch64_expand_addsubti.
(addvti4, uaddvti4): Likewise.
(subvti4, usubvti4): Likewise.
(subti3): Likewise; accept immediates for operand 2.
---
 gcc/config/aarch64/aarch64-protos.h |  10 +--
 gcc/config/aarch64/aarch64.c| 129 +---
 gcc/config/aarch64/aarch64.md   | 125 ++-
 3 files changed, 67 insertions(+), 197 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-protos.h 
b/gcc/config/aarch64/aarch64-protos.h
index d6d668ea920..787085b24d2 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -630,16 +630,8 @@ void aarch64_reset_previous_fndecl (void);
 bool aarch64_return_address_signing_enabled (void);
 bool aarch64_bti_enabled (void);
 void aarch64_save_restore_target_globals (tree);
-void aarch64_addti_scratch_regs (rtx, rtx, rtx *,
-rtx *, rtx *,
-rtx *, rtx *,
-rtx *);
-void aarch64_subvti_scratch_regs (rtx, rtx, rtx *,
- rtx *, rtx *,
- rtx *, rtx *, rtx *);
-void aarch64_expand_subvti (rtx, rtx, rtx,
-   rtx, rtx, rtx, rtx, bool);
 
+void aarch64_expand_addsubti (rtx, rtx, rtx, int, int, int);
 
 /* Initialize builtins for SIMD intrinsics.  */
 void init_aarch64_simd_builtins (void);
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 7a13a8e8ec4..6263897c9a0 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -20241,110 +20241,61 @@ aarch64_gen_unlikely_cbranch (enum rtx_code code, 
machine_mode cc_mode,
   aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
 }
 
-/* Generate DImode scratch registers for 128-bit (TImode) addition.
+/* Generate DImode scratch registers for 128-bit (TImode) add/sub.
+   INPUT represents the TImode input operand
+   LO represents the low half (DImode) of the TImode operand
+   HI represents the high half (DImode) of the TImode operand.  */
 
-   OP1 represents the TImode destination operand 1
-   OP2 represents the TImode destination operand 2
-   LOW_DEST represents the low half (DImode) of TImode operand 0
-   LOW_IN1 represents the low half (DImode) of TImode operand 1
-   LOW_IN2 represents the low half (DImode) of TImode operand 2
-   HIGH_DEST represents the high half (DImode) of TImode operand 0
-   HIGH_IN1 represents the high half (DImode) of TImode operand 1
-   HIGH_IN2 represents the high half (DImode) of TImode operand 2.  */
-
-void
-aarch64_addti_scratch_regs (rtx op1, rtx op2, rtx *low_dest,
-   rtx *low_in1, rtx *low_in2,
-   rtx *high_dest, rtx *high_in1,
-   rtx *high_in2)
+static void
+aarch64_ti_split (rtx input, rtx *lo, rtx *hi)
 {
-  *low_dest = gen_reg_rtx (DImode);
-  *low_in1 = gen_lowpart (DImode, op1);
-  *low_in2 = simplify_gen_subreg (DImode, op2, TImode,
- subreg_lowpart_offset (DImode, TImode));
-  *high_dest = gen_reg_rtx (DImode);
-  *high_in1 = gen_highpart (DImode, op1);
-  *high_in2 = simplify_gen_subreg (DImode, op2, TImode,
-  subreg_highpart_offset (DImode, TImode));
+  *lo = simplify_gen_subreg (DImode, input, TImode,
+subreg_lowpart_offset (DImode, TImode));
+  *hi = simplify_gen_subreg (DImode, input, TImode,
+subreg_highpart_offset (DImode, TImode));
 }
 
-/* Generate DImode scratch registers for 128-bit (TImode) subtraction.
-
-   This function differs from 'arch64_addti_scratch_regs' in that
-   OP1 can be an immediate constant (zero). We must call
-   subreg_highpart_offset with DImode and TImode arguments, otherwise
-   VOIDmode will be used for the const_int which generates an internal
-   error from subreg_size_highpart_offset which does not expect a size of zero.
-
-   OP1 represents the TImode destination operand 1
-   OP2 represents the TImode destination operand 2
-   LOW_DEST represents the low half (DImode) of TImode operand 0
-   LOW_IN1 represents the low half (DImode) of TImode operand 1
-   LOW_IN2 represents the low half (DImode) of TImode operand 2
-   HIGH_DEST represents the high half (DImode) of TImode operand 0
-   HIGH_IN1 represents the high half (DImode) of TImode operand 1
-   HIG

[PATCH v2 05/11] aarch64: Use UNSPEC_SBCS for subtract-with-borrow + output flags

2020-04-02 Thread Richard Henderson via Gcc-patches
The rtl description of signed/unsigned overflow from subtract
was fine, as far as it goes -- we have CC_Cmode and CC_Vmode
that indicate that only those particular bits are valid.

However, it's not clear how to extend that description to
handle signed comparison, where N == V (GE) N != V (LT) are
the only valid bits.

Using an UNSPEC means that we can unify all 3 usages without
fear that combine will try to infer anything from the rtl.
It also means we need far fewer variants when various inputs
have constants propagated in, and the rtl folds.

Accept -1 for the second input by using ADCS.

* config/aarch64/aarch64.md (UNSPEC_SBCS): New.
(cmp3_carryin): New expander.
(sub3_carryin_cmp): New expander.
(*cmp3_carryin): New pattern.
(*cmp3_carryin_0): New pattern.
(*sub3_carryin_cmp): New pattern.
(*sub3_carryin_cmp_0): New pattern.
(subvti4, usubvti4, negvti3): Use subdi3_carryin_cmp.
(negvdi_carryinV): Remove.
(usub3_carryinC): Remove.
(*usub3_carryinC): Remove.
(*usub3_carryinC_z1): Remove.
(*usub3_carryinC_z2): Remove.
(sub3_carryinV): Remove.
(*sub3_carryinV): Remove.
(*sub3_carryinV_z2): Remove.
* config/aarch64/predicates.md (aarch64_reg_zero_minus1): New.
---
 gcc/config/aarch64/aarch64.md| 217 +--
 gcc/config/aarch64/predicates.md |   7 +
 2 files changed, 94 insertions(+), 130 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 532c114a42e..564dea390be 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -281,6 +281,7 @@
 UNSPEC_GEN_TAG_RND ; Generate a random 4-bit MTE tag.
 UNSPEC_TAG_SPACE   ; Translate address to MTE tag address space.
 UNSPEC_LD1RO
+UNSPEC_SBCS
 ])
 
 (define_c_enum "unspecv" [
@@ -2942,7 +2943,7 @@
   aarch64_expand_addsubti (operands[0], operands[1], operands[2],
   CODE_FOR_subvdi_insn,
   CODE_FOR_subdi3_compare1,
-  CODE_FOR_subdi3_carryinV);
+  CODE_FOR_subdi3_carryin_cmp);
   aarch64_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]);
   DONE;
 })
@@ -2957,7 +2958,7 @@
   aarch64_expand_addsubti (operands[0], operands[1], operands[2],
   CODE_FOR_subdi3_compare1,
   CODE_FOR_subdi3_compare1,
-  CODE_FOR_usubdi3_carryinC);
+  CODE_FOR_subdi3_carryin_cmp);
   aarch64_gen_unlikely_cbranch (LTU, CCmode, operands[3]);
   DONE;
 })
@@ -2968,12 +2969,14 @@
(label_ref (match_operand 2 "" ""))]
   ""
   {
-emit_insn (gen_negdi_carryout (gen_lowpart (DImode, operands[0]),
-  gen_lowpart (DImode, operands[1])));
-emit_insn (gen_negvdi_carryinV (gen_highpart (DImode, operands[0]),
-   gen_highpart (DImode, operands[1])));
-aarch64_gen_unlikely_cbranch (NE, CC_Vmode, operands[2]);
+rtx op0l = gen_lowpart (DImode, operands[0]);
+rtx op1l = gen_lowpart (DImode, operands[1]);
+rtx op0h = gen_highpart (DImode, operands[0]);
+rtx op1h = gen_highpart (DImode, operands[1]);
 
+emit_insn (gen_negdi_carryout (op0l, op1l));
+emit_insn (gen_subdi3_carryin_cmp (op0h, const0_rtx, op1h));
+aarch64_gen_unlikely_cbranch (NE, CC_Vmode, operands[2]);
 DONE;
   }
 )
@@ -2989,23 +2992,6 @@
   [(set_attr "type" "alus_sreg")]
 )
 
-(define_insn "negvdi_carryinV"
-  [(set (reg:CC_V CC_REGNUM)
-   (compare:CC_V
-(neg:TI (plus:TI
- (ltu:TI (reg:CC CC_REGNUM) (const_int 0))
- (sign_extend:TI (match_operand:DI 1 "register_operand" "r"
-(sign_extend:TI
- (neg:DI (plus:DI (ltu:DI (reg:CC CC_REGNUM) (const_int 0))
-  (match_dup 1))
-   (set (match_operand:DI 0 "register_operand" "=r")
-   (neg:DI (plus:DI (ltu:DI (reg:CC CC_REGNUM) (const_int 0))
-(match_dup 1]
-  ""
-  "ngcs\\t%0, %1"
-  [(set_attr "type" "alus_sreg")]
-)
-
 (define_insn "*sub3_compare0"
   [(set (reg:CC_NZ CC_REGNUM)
(compare:CC_NZ (minus:GPI (match_operand:GPI 1 "register_operand" "rk")
@@ -3370,134 +3356,105 @@
   [(set_attr "type" "adc_reg")]
 )
 
-(define_expand "usub3_carryinC"
+(define_expand "sub3_carryin_cmp"
   [(parallel
- [(set (reg:CC CC_REGNUM)
-  (compare:CC
-(zero_extend:
-  (match_operand:GPI 1 "aarch64_reg_or_zero"))
-(plus:
-  (zero_extend:
-(match_operand:GPI 2 "register_operand"))
-  (ltu: (reg:CC CC_REGNUM) (const_int 0)
-  (set (match_operand:GPI 0 "register_operand")
-  (minus:GPI
-(minus:GPI (match_dup 1) (match_dup 2))
-(ltu:GPI (reg:CC CC_REGNUM) (const_int 0])]
+[(set (match_dup 3)
+

[PATCH v2 07/11] aarch64: Remove CC_ADCmode

2020-04-02 Thread Richard Henderson via Gcc-patches
Now that we're using UNSPEC_ADCS instead of rtl, there's
no reason to distinguish CC_ADCmode from CC_Cmode.  Both
examine only the C bit.  Within uaddvti4, using CC_Cmode
is clearer, since it's the carry-outthat's relevant.

* config/aarch64/aarch64-modes.def (CC_ADC): Remove.
* config/aarch64/aarch64.c (aarch64_select_cc_mode):
Do not look for unsigned overflow from add with carry.
* config/aarch64/aarch64.md (uaddvti4): Use CC_Cmode.
* config/aarch64/predicates.md (aarch64_carry_operation)
Remove check for CC_ADCmode.
(aarch64_borrow_operation): Likewise.
---
 gcc/config/aarch64/aarch64.c | 19 ---
 gcc/config/aarch64/aarch64-modes.def |  1 -
 gcc/config/aarch64/aarch64.md|  2 +-
 gcc/config/aarch64/predicates.md |  4 ++--
 4 files changed, 3 insertions(+), 23 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 6263897c9a0..8e54506bc3e 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -9094,16 +9094,6 @@ aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
   && rtx_equal_p (XEXP (x, 0), y))
 return CC_Cmode;
 
-  /* A test for unsigned overflow from an add with carry.  */
-  if ((mode_x == DImode || mode_x == TImode)
-  && (code == LTU || code == GEU)
-  && code_x == PLUS
-  && CONST_SCALAR_INT_P (y)
-  && (rtx_mode_t (y, mode_x)
- == (wi::shwi (1, mode_x)
- << (GET_MODE_BITSIZE (mode_x).to_constant () / 2
-return CC_ADCmode;
-
   /* A test for signed overflow.  */
   if ((mode_x == DImode || mode_x == TImode)
   && code == NE
@@ -9232,15 +9222,6 @@ aarch64_get_condition_code_1 (machine_mode mode, enum 
rtx_code comp_code)
}
   break;
 
-case E_CC_ADCmode:
-  switch (comp_code)
-   {
-   case GEU: return AARCH64_CS;
-   case LTU: return AARCH64_CC;
-   default: return -1;
-   }
-  break;
-
 case E_CC_Vmode:
   switch (comp_code)
{
diff --git a/gcc/config/aarch64/aarch64-modes.def 
b/gcc/config/aarch64/aarch64-modes.def
index af972e8f72b..32e4b6a35a9 100644
--- a/gcc/config/aarch64/aarch64-modes.def
+++ b/gcc/config/aarch64/aarch64-modes.def
@@ -38,7 +38,6 @@ CC_MODE (CC_NZC);   /* Only N, Z and C bits of condition 
flags are valid.
 CC_MODE (CC_NZ);/* Only N and Z bits of condition flags are valid.  */
 CC_MODE (CC_Z); /* Only Z bit of condition flags is valid.  */
 CC_MODE (CC_C); /* C represents unsigned overflow of a simple addition.  */
-CC_MODE (CC_ADC);   /* Unsigned overflow from an ADC (add with carry).  */
 CC_MODE (CC_V); /* Only V bit of condition flags is valid.  */
 
 /* Half-precision floating point for __fp16.  */
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 99023494fa1..8d405b40173 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -2079,7 +2079,7 @@
   CODE_FOR_adddi3_compareC,
   CODE_FOR_adddi3_compareC,
   CODE_FOR_adddi3_carryin_cmp);
-  aarch64_gen_unlikely_cbranch (GEU, CC_ADCmode, operands[3]);
+  aarch64_gen_unlikely_cbranch (LTU, CC_Cmode, operands[3]);
   DONE;
 })
 
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index 5f44ef7d672..42864cbf4dd 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -388,7 +388,7 @@
   machine_mode ccmode = GET_MODE (op0);
   if (ccmode == CC_Cmode)
 return GET_CODE (op) == LTU;
-  if (ccmode == CC_ADCmode || ccmode == CCmode)
+  if (ccmode == CCmode)
 return GET_CODE (op) == GEU;
   return false;
 })
@@ -406,7 +406,7 @@
   machine_mode ccmode = GET_MODE (op0);
   if (ccmode == CC_Cmode)
 return GET_CODE (op) == GEU;
-  if (ccmode == CC_ADCmode || ccmode == CCmode)
+  if (ccmode == CCmode)
 return GET_CODE (op) == LTU;
   return false;
 })
-- 
2.20.1



[PATCH v2 11/11] aarch64: Implement absti2

2020-04-02 Thread Richard Henderson via Gcc-patches
* config/aarch64/aarch64.md (absti2): New.
---
 gcc/config/aarch64/aarch64.md | 29 +
 1 file changed, 29 insertions(+)

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index cf716f815a1..4a30d4cca93 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -3521,6 +3521,35 @@
   }
 )
 
+(define_expand "absti2"
+  [(match_operand:TI 0 "register_operand")
+   (match_operand:TI 1 "register_operand")]
+  ""
+  {
+rtx lo_op1 = gen_lowpart (DImode, operands[1]);
+rtx hi_op1 = gen_highpart (DImode, operands[1]);
+rtx lo_tmp = gen_reg_rtx (DImode);
+rtx hi_tmp = gen_reg_rtx (DImode);
+rtx x, cc;
+
+emit_insn (gen_negdi_carryout (lo_tmp, lo_op1));
+emit_insn (gen_subdi3_carryin_cmp (hi_tmp, const0_rtx, hi_op1));
+
+cc = gen_rtx_REG (CC_NZmode, CC_REGNUM);
+x = gen_rtx_GE (VOIDmode, cc, const0_rtx);
+x = gen_rtx_IF_THEN_ELSE (DImode, x, lo_tmp, lo_op1);
+emit_insn (gen_rtx_SET (lo_tmp, x));
+
+x = gen_rtx_GE (VOIDmode, cc, const0_rtx);
+x = gen_rtx_IF_THEN_ELSE (DImode, x, hi_tmp, hi_op1);
+emit_insn (gen_rtx_SET (hi_tmp, x));
+
+emit_move_insn (gen_lowpart (DImode, operands[0]), lo_tmp);
+emit_move_insn (gen_highpart (DImode, operands[0]), hi_tmp);
+DONE;
+  }
+)
+
 (define_insn "neg2"
   [(set (match_operand:GPI 0 "register_operand" "=r,w")
(neg:GPI (match_operand:GPI 1 "register_operand" "r,w")))]
-- 
2.20.1



[PATCH v2 08/11] aarch64: Accept -1 as second argument to add3_carryin

2020-04-02 Thread Richard Henderson via Gcc-patches
* config/aarch64/predicates.md (aarch64_reg_or_minus1): New.
* config/aarch64/aarch64.md (add3_carryin): Use it.
(*add3_carryin): Likewise.
(*addsi3_carryin_uxtw): Likewise.
---
 gcc/config/aarch64/aarch64.md| 26 +++---
 gcc/config/aarch64/predicates.md |  6 +-
 2 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 8d405b40173..c11c4366bf9 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -2545,7 +2545,7 @@
  (plus:GPI
(ltu:GPI (reg:CC_C CC_REGNUM) (const_int 0))
(match_operand:GPI 1 "aarch64_reg_or_zero"))
- (match_operand:GPI 2 "aarch64_reg_or_zero")))]
+ (match_operand:GPI 2 "aarch64_reg_zero_minus1")))]
""
""
 )
@@ -2555,28 +2555,32 @@
 ;; accept the zeros during initial expansion.
 
 (define_insn "*add3_carryin"
-  [(set (match_operand:GPI 0 "register_operand" "=r")
+  [(set (match_operand:GPI 0 "register_operand" "=r,r")
(plus:GPI
  (plus:GPI
(match_operand:GPI 3 "aarch64_carry_operation" "")
-   (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ"))
- (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")))]
-   ""
-   "adc\\t%0, %1, %2"
+   (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ,rZ"))
+ (match_operand:GPI 2 "aarch64_reg_zero_minus1" "rZ,UsM")))]
+  ""
+  "@
+   adc\\t%0, %1, %2
+   sbc\\t%0, %1, zr"
   [(set_attr "type" "adc_reg")]
 )
 
 ;; zero_extend version of above
 (define_insn "*addsi3_carryin_uxtw"
-  [(set (match_operand:DI 0 "register_operand" "=r")
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
(zero_extend:DI
  (plus:SI
(plus:SI
  (match_operand:SI 3 "aarch64_carry_operation" "")
- (match_operand:SI 1 "register_operand" "r"))
-   (match_operand:SI 2 "register_operand" "r"]
-   ""
-   "adc\\t%w0, %w1, %w2"
+ (match_operand:SI 1 "register_operand" "r,r"))
+   (match_operand:SI 2 "aarch64_reg_or_minus1" "r,UsM"]
+  ""
+  "@
+   adc\\t%w0, %w1, %w2
+   sbc\\t%w0, %w1, wzr"
   [(set_attr "type" "adc_reg")]
 )
 
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index 42864cbf4dd..2e7aa6389eb 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -68,13 +68,17 @@
(ior (match_operand 0 "register_operand")
(match_test "op == CONST0_RTX (GET_MODE (op))"
 
+(define_predicate "aarch64_reg_or_minus1"
+  (and (match_code "reg,subreg,const_int")
+   (ior (match_operand 0 "register_operand")
+   (match_test "op == CONSTM1_RTX (GET_MODE (op))"
+
 (define_predicate "aarch64_reg_zero_minus1"
   (and (match_code "reg,subreg,const_int")
(ior (match_operand 0 "register_operand")
(ior (match_test "op == CONST0_RTX (GET_MODE (op))")
 (match_test "op == CONSTM1_RTX (GET_MODE (op))")
 
-
 (define_predicate "aarch64_reg_or_fp_zero"
   (ior (match_operand 0 "register_operand")
(and (match_code "const_double")
-- 
2.20.1



[PATCH v2 06/11] aarch64: Use UNSPEC_ADCS for add-with-carry + output flags

2020-04-02 Thread Richard Henderson via Gcc-patches
Similar to UNSPEC_SBCS, we can unify the signed/unsigned overflow
paths by using an unspec.

Accept -1 for the second input by using SBCS.

* config/aarch64/aarch64.md (UNSPEC_ADCS): New.
(addvti4, uaddvti4): Use adddi_carryin_cmp.
(add3_carryinC): Remove.
(*add3_carryinC_zero): Remove.
(*add3_carryinC): Remove.
(add3_carryinV): Remove.
(*add3_carryinV_zero): Remove.
(*add3_carryinV): Remove.
(add3_carryin_cmp): New expander.
(*add3_carryin_cmp): New pattern.
(*add3_carryin_cmp_0): New pattern.
(*cmn3_carryin): New pattern.
(*cmn3_carryin_0): New pattern.
---
 gcc/config/aarch64/aarch64.md | 206 +++---
 1 file changed, 89 insertions(+), 117 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 564dea390be..99023494fa1 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -281,6 +281,7 @@
 UNSPEC_GEN_TAG_RND ; Generate a random 4-bit MTE tag.
 UNSPEC_TAG_SPACE   ; Translate address to MTE tag address space.
 UNSPEC_LD1RO
+UNSPEC_ADCS
 UNSPEC_SBCS
 ])
 
@@ -2062,7 +2063,7 @@
   aarch64_expand_addsubti (operands[0], operands[1], operands[2],
   CODE_FOR_adddi3_compareV,
   CODE_FOR_adddi3_compareC,
-  CODE_FOR_adddi3_carryinV);
+  CODE_FOR_adddi3_carryin_cmp);
   aarch64_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]);
   DONE;
 })
@@ -2077,7 +2078,7 @@
   aarch64_expand_addsubti (operands[0], operands[1], operands[2],
   CODE_FOR_adddi3_compareC,
   CODE_FOR_adddi3_compareC,
-  CODE_FOR_adddi3_carryinC);
+  CODE_FOR_adddi3_carryin_cmp);
   aarch64_gen_unlikely_cbranch (GEU, CC_ADCmode, operands[3]);
   DONE;
 })
@@ -2579,133 +2580,104 @@
   [(set_attr "type" "adc_reg")]
 )
 
-(define_expand "add3_carryinC"
+(define_expand "add3_carryin_cmp"
   [(parallel
- [(set (match_dup 3)
-  (compare:CC_ADC
-(plus:
-  (plus:
-(match_dup 4)
-(zero_extend:
-  (match_operand:GPI 1 "register_operand")))
-  (zero_extend:
-(match_operand:GPI 2 "register_operand")))
-(match_dup 6)))
-  (set (match_operand:GPI 0 "register_operand")
-  (plus:GPI
-(plus:GPI (match_dup 5) (match_dup 1))
-(match_dup 2)))])]
+[(set (match_dup 3)
+ (unspec:CC
+   [(match_operand:GPI 1 "aarch64_reg_or_zero")
+(match_operand:GPI 2 "aarch64_reg_zero_minus1")
+(match_dup 4)]
+   UNSPEC_ADCS))
+ (set (match_operand:GPI 0 "register_operand")
+ (unspec:GPI
+   [(match_dup 1) (match_dup 2) (match_dup 4)]
+   UNSPEC_ADCS))])]
""
-{
-  operands[3] = gen_rtx_REG (CC_ADCmode, CC_REGNUM);
-  rtx ccin = gen_rtx_REG (CC_Cmode, CC_REGNUM);
-  operands[4] = gen_rtx_LTU (mode, ccin, const0_rtx);
-  operands[5] = gen_rtx_LTU (mode, ccin, const0_rtx);
-  operands[6] = immed_wide_int_const (wi::shwi (1, mode)
- << GET_MODE_BITSIZE (mode),
- TImode);
-})
+  {
+operands[3] = gen_rtx_REG (CCmode, CC_REGNUM);
+operands[4] = gen_rtx_GEU (mode, operands[3], const0_rtx);
+  }
+)
 
-(define_insn "*add3_carryinC_zero"
-  [(set (reg:CC_ADC CC_REGNUM)
-   (compare:CC_ADC
- (plus:
-   (match_operand: 2 "aarch64_carry_operation" "")
-   (zero_extend: (match_operand:GPI 1 "register_operand" "r")))
- (match_operand 4 "const_scalar_int_operand" "")))
-   (set (match_operand:GPI 0 "register_operand" "=r")
-   (plus:GPI (match_operand:GPI 3 "aarch64_carry_operation" "")
- (match_dup 1)))]
-  "rtx_mode_t (operands[4], mode)
-   == (wi::shwi (1, mode) << (unsigned) GET_MODE_BITSIZE (mode))"
-   "adcs\\t%0, %1, zr"
+(define_insn "*add3_carryin_cmp"
+  [(set (reg:CC CC_REGNUM)
+   (unspec:CC
+ [(match_operand:GPI 1 "aarch64_reg_or_zero" "%rZ,rZ")
+  (match_operand:GPI 2 "aarch64_reg_zero_minus1" "rZ,UsM")
+  (match_operand:GPI 3 "aarch64_carry_operation" "")]
+ UNSPEC_ADCS))
+   (set (match_operand:GPI 0 "register_operand" "=r,r")
+   (unspec:GPI
+ [(match_dup 1) (match_dup 2) (match_dup 3)]
+ UNSPEC_ADCS))]
+   ""
+   "@
+adcs\\t%0, %1, %2
+sbcs\\t%0, %1, zr"
   [(set_attr "type" "adc_reg")]
 )
 
-(define_insn "*add3_carryinC"
-  [(set (reg:CC_ADC CC_REGNUM)
-   (compare:CC_ADC
- (plus:
-   (plus:
- (match_operand: 3 "aarch64_carry_operation" "")
- (zero_extend: (match_operand:GPI 1 "register_operand" "r")))
-   (zero_extend: (match_operand:GPI 2 "register_operand" "r")))
- (m

[PATCH v2 10/11] aarch64: Implement TImode comparisons

2020-04-02 Thread Richard Henderson via Gcc-patches
Use ccmp to perform all TImode comparisons branchless.

* config/aarch64/aarch64.c (aarch64_gen_compare_reg): Expand all of
the comparisons for TImode, not just NE.
* config/aarch64/aarch64.md (cbranchti4, cstoreti4): New.
---
 gcc/config/aarch64/aarch64.c  | 122 ++
 gcc/config/aarch64/aarch64.md |  28 
 2 files changed, 136 insertions(+), 14 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 93658338041..89c9192266c 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -2333,32 +2333,126 @@ rtx
 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
 {
   machine_mode cmp_mode = GET_MODE (x);
-  machine_mode cc_mode;
   rtx cc_reg;
 
   if (cmp_mode == TImode)
 {
-  gcc_assert (code == NE);
-
-  cc_mode = CCmode;
-  cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM);
-
   rtx x_lo = operand_subword (x, 0, 0, TImode);
-  rtx y_lo = operand_subword (y, 0, 0, TImode);
-  emit_set_insn (cc_reg, gen_rtx_COMPARE (cc_mode, x_lo, y_lo));
-
   rtx x_hi = operand_subword (x, 1, 0, TImode);
-  rtx y_hi = operand_subword (y, 1, 0, TImode);
-  emit_insn (gen_ccmpccdi (cc_reg, cc_reg, x_hi, y_hi,
-  gen_rtx_EQ (cc_mode, cc_reg, const0_rtx),
-  GEN_INT (AARCH64_EQ)));
+  struct expand_operand ops[2];
+  rtx y_lo, y_hi, tmp;
+
+  if (CONST_INT_P (y))
+   {
+ HOST_WIDE_INT y_int = INTVAL (y);
+
+ y_lo = y;
+ switch (code)
+   {
+   case EQ:
+   case NE:
+ /* For equality, IOR the two halves together.  If this gets
+used for a branch, we expect this to fold to cbz/cbnz;
+otherwise it's no larger than cmp+ccmp below.  Beware of
+the compare-and-swap post-reload split and use cmp+ccmp.  */
+ if (y_int == 0 && can_create_pseudo_p ())
+   {
+ tmp = gen_reg_rtx (DImode);
+ emit_insn (gen_iordi3 (tmp, x_hi, x_lo));
+ emit_insn (gen_cmpdi (tmp, const0_rtx));
+ cc_reg = gen_rtx_REG (CCmode, CC_REGNUM);
+ goto done;
+   }
+   break;
+
+   case LE:
+   case GT:
+ /* Add 1 to Y to convert to LT/GE, which avoids the swap and
+keeps the constant operand.  The cstoreti and cbranchti
+operand predicates require aarch64_plus_operand, which
+means this increment cannot overflow.  */
+ y_lo = gen_int_mode (++y_int, DImode);
+ code = (code == LE ? LT : GE);
+ /* fall through */
+
+   case LT:
+   case GE:
+ /* Check only the sign bit using tst, or fold to tbz/tbnz.  */
+ if (y_int == 0)
+   {
+ cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
+ tmp = gen_rtx_AND (DImode, x_hi, GEN_INT (INT64_MIN));
+ tmp = gen_rtx_COMPARE (CC_NZmode, tmp, const0_rtx);
+ emit_set_insn (cc_reg, tmp);
+ code = (code == LT ? NE : EQ);
+ goto done;
+   }
+ break;
+
+   default:
+ break;
+   }
+ y_hi = (y_int < 0 ? constm1_rtx : const0_rtx);
+   }
+  else
+   {
+ y_lo = operand_subword (y, 0, 0, TImode);
+ y_hi = operand_subword (y, 1, 0, TImode);
+   }
+
+  switch (code)
+   {
+   case LEU:
+   case GTU:
+   case LE:
+   case GT:
+ std::swap (x_lo, y_lo);
+ std::swap (x_hi, y_hi);
+ code = swap_condition (code);
+ break;
+
+   default:
+ break;
+   }
+
+  /* Emit cmpdi, forcing operands into registers as required. */
+  create_input_operand (&ops[0], x_lo, DImode);
+  create_input_operand (&ops[1], y_lo, DImode);
+  expand_insn (CODE_FOR_cmpdi, 2, ops);
+
+  cc_reg = gen_rtx_REG (CCmode, CC_REGNUM);
+  switch (code)
+   {
+   case EQ:
+   case NE:
+ /* For EQ, (x_lo == y_lo) && (x_hi == y_hi).  */
+ emit_insn (gen_ccmpccdi (cc_reg, cc_reg, x_hi, y_hi,
+  gen_rtx_EQ (VOIDmode, cc_reg, const0_rtx),
+  GEN_INT (AARCH64_EQ)));
+ break;
+
+   case LTU:
+   case GEU:
+   case LT:
+   case GE:
+ /* Compute (x - y), as double-word arithmetic.  */
+ create_input_operand (&ops[0], x_hi, DImode);
+ create_input_operand (&ops[1], y_hi, DImode);
+ expand_insn (CODE_FOR_cmpdi3_carryin, 2, ops);
+ break;
+
+   default:
+ gcc_unreachable ();
+   }
 }
   else
 {
-  cc_mode = SELECT_CC_MODE (code, x, y);
+  machine_mode cc_mode = SELECT_CC_MODE (code, x, y);
   cc_reg = gen_rtx_REG (cc_mode, CC_R

[PATCH v2 01/11] aarch64: Accept 0 as first argument to compares

2020-04-02 Thread Richard Henderson via Gcc-patches
While cmp (extended register) and cmp (immediate) uses ,
cmp (shifted register) uses .  So we can perform cmp xzr, x0.

For ccmp, we only have  as an input.

* config/aarch64/aarch64.md (cmp): For operand 0, use
aarch64_reg_or_zero.  Shuffle reg/reg to last alternative
and accept Z.
(@ccmpcc): For operand 0, use aarch64_reg_or_zero and Z.
(@ccmpcc_rev): Likewise.
---
 gcc/config/aarch64/aarch64.md | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index c7c4d1dd519..6fdab5f3402 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -502,7 +502,7 @@
   [(match_operand 0 "cc_register" "")
(const_int 0)])
  (compare:CC_ONLY
-   (match_operand:GPI 2 "register_operand" "r,r,r")
+   (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ,rZ,rZ")
(match_operand:GPI 3 "aarch64_ccmp_operand" "r,Uss,Usn"))
  (unspec:CC_ONLY
[(match_operand 5 "immediate_operand")]
@@ -542,7 +542,7 @@
[(match_operand 5 "immediate_operand")]
UNSPEC_NZCV)
  (compare:CC_ONLY
-   (match_operand:GPI 2 "register_operand" "r,r,r")
+   (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ,rZ,rZ")
(match_operand:GPI 3 "aarch64_ccmp_operand" "r,Uss,Usn"]
   ""
   "@
@@ -3961,14 +3961,14 @@
 
 (define_insn "cmp"
   [(set (reg:CC CC_REGNUM)
-   (compare:CC (match_operand:GPI 0 "register_operand" "rk,rk,rk")
-   (match_operand:GPI 1 "aarch64_plus_operand" "r,I,J")))]
+   (compare:CC (match_operand:GPI 0 "aarch64_reg_or_zero" "rk,rk,rkZ")
+   (match_operand:GPI 1 "aarch64_plus_operand" "I,J,r")))]
   ""
   "@
-   cmp\\t%0, %1
cmp\\t%0, %1
-   cmn\\t%0, #%n1"
-  [(set_attr "type" "alus_sreg,alus_imm,alus_imm")]
+   cmn\\t%0, #%n1
+   cmp\\t%0, %1"
+  [(set_attr "type" "alus_imm,alus_imm,alus_sreg")]
 )
 
 (define_insn "fcmp"
-- 
2.20.1



[PATCH v2 00/11] aarch64: Implement TImode comparisons

2020-04-02 Thread Richard Henderson via Gcc-patches
This is attacking case 3 of PR 94174.

In v2, I unify the various subtract-with-borrow and add-with-carry
patterns that also output flags with unspecs.  As suggested by
Richard Sandiford during review of v1.  It does seem cleaner.


r~


Richard Henderson (11):
  aarch64: Accept 0 as first argument to compares
  aarch64: Accept zeros in add3_carryin
  aarch64: Provide expander for sub3_compare1
  aarch64: Introduce aarch64_expand_addsubti
  aarch64: Use UNSPEC_SBCS for subtract-with-borrow + output flags
  aarch64: Use UNSPEC_ADCS for add-with-carry + output flags
  aarch64: Remove CC_ADCmode
  aarch64: Accept -1 as second argument to add3_carryin
  aarch64: Adjust result of aarch64_gen_compare_reg
  aarch64: Implement TImode comparisons
  aarch64: Implement absti2

 gcc/config/aarch64/aarch64-protos.h   |  10 +-
 gcc/config/aarch64/aarch64.c  | 303 +
 gcc/config/aarch64/aarch64-modes.def  |   1 -
 gcc/config/aarch64/aarch64-simd.md|  18 +-
 gcc/config/aarch64/aarch64-speculation.cc |   5 +-
 gcc/config/aarch64/aarch64.md | 762 ++
 gcc/config/aarch64/predicates.md  |  15 +-
 7 files changed, 527 insertions(+), 587 deletions(-)

-- 
2.20.1



[PATCH v2 02/11] aarch64: Accept zeros in add3_carryin

2020-04-02 Thread Richard Henderson via Gcc-patches
The expander and the insn pattern did not match, leading to
recognition failures in expand.

* config/aarch64/aarch64.md (*add3_carryin): Accept zeros.
---
 gcc/config/aarch64/aarch64.md | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 6fdab5f3402..b242f2b1c73 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -2606,16 +2606,17 @@
""
 )
 
-;; Note that add with carry with two zero inputs is matched by cset,
-;; and that add with carry with one zero input is matched by cinc.
+;; While add with carry with two zero inputs will be folded to cset,
+;; and add with carry with one zero input will be folded to cinc,
+;; accept the zeros during initial expansion.
 
 (define_insn "*add3_carryin"
   [(set (match_operand:GPI 0 "register_operand" "=r")
(plus:GPI
  (plus:GPI
(match_operand:GPI 3 "aarch64_carry_operation" "")
-   (match_operand:GPI 1 "register_operand" "r"))
- (match_operand:GPI 2 "register_operand" "r")))]
+   (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ"))
+ (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")))]
""
"adc\\t%0, %1, %2"
   [(set_attr "type" "adc_reg")]
-- 
2.20.1



[PATCH v2 03/11] aarch64: Provide expander for sub3_compare1

2020-04-02 Thread Richard Henderson via Gcc-patches
In one place we open-code a special case of this pattern into the
more specific sub3_compare1_imm, and miss this special case
in other places.  Centralize that special case into an expander.

* config/aarch64/aarch64.md (*sub3_compare1): Rename
from sub3_compare1.
(sub3_compare1): New expander.
* config/aarch64/aarch64.c (aarch64_expand_subvti): Remove
call to gen_subdi3_compare1_imm.
---
 gcc/config/aarch64/aarch64.c  | 11 ++-
 gcc/config/aarch64/aarch64.md | 22 +-
 2 files changed, 23 insertions(+), 10 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index c90de65de12..7a13a8e8ec4 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -20333,16 +20333,9 @@ aarch64_expand_subvti (rtx op0, rtx low_dest, rtx 
low_in1,
 }
   else
 {
-  if (aarch64_plus_immediate (low_in2, DImode))
-   emit_insn (gen_subdi3_compare1_imm (low_dest, low_in1, low_in2,
-   GEN_INT (-INTVAL (low_in2;
-  else
-   {
- low_in2 = force_reg (DImode, low_in2);
- emit_insn (gen_subdi3_compare1 (low_dest, low_in1, low_in2));
-   }
-  high_in2 = force_reg (DImode, high_in2);
+  emit_insn (gen_subdi3_compare1 (low_dest, low_in1, low_in2));
 
+  high_in2 = force_reg (DImode, high_in2);
   if (unsigned_p)
emit_insn (gen_usubdi3_carryinC (high_dest, high_in1, high_in2));
   else
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index b242f2b1c73..d6389cc8148 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -3120,7 +3120,7 @@
   [(set_attr "type" "alus_imm")]
 )
 
-(define_insn "sub3_compare1"
+(define_insn "*sub3_compare1"
   [(set (reg:CC CC_REGNUM)
(compare:CC
  (match_operand:GPI 1 "aarch64_reg_or_zero" "rkZ")
@@ -3132,6 +3132,26 @@
   [(set_attr "type" "alus_sreg")]
 )
 
+(define_expand "sub3_compare1"
+  [(parallel
+[(set (reg:CC CC_REGNUM)
+ (compare:CC
+   (match_operand:GPI 1 "aarch64_reg_or_zero")
+   (match_operand:GPI 2 "aarch64_reg_or_imm")))
+ (set (match_operand:GPI 0 "register_operand")
+ (minus:GPI (match_dup 1) (match_dup 2)))])]
+  ""
+{
+  if (aarch64_plus_immediate (operands[2], mode))
+{
+  emit_insn (gen_sub3_compare1_imm
+(operands[0], operands[1], operands[2],
+ GEN_INT (-INTVAL (operands[2];
+  DONE;
+}
+  operands[2] = force_reg (mode, operands[2]);
+})
+
 (define_peephole2
   [(set (match_operand:GPI 0 "aarch64_general_reg")
(minus:GPI (match_operand:GPI 1 "aarch64_reg_or_zero")
-- 
2.20.1



[PATCH] free() was missing from a part of the documentation

2020-04-02 Thread Zackery Spytz via Gcc-patches
Hello,

The free() function was missing from a part of the documentation!

diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index bde37482b6e..79e2c8cb87f 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -12670,7 +12670,7 @@ The ISO C90 functions
 @code{abort}, @code{abs}, @code{acos}, @code{asin}, @code{atan2},
 @code{atan}, @code{calloc}, @code{ceil}, @code{cosh}, @code{cos},
 @code{exit}, @code{exp}, @code{fabs}, @code{floor}, @code{fmod},
-@code{fprintf}, @code{fputs}, @code{frexp}, @code{fscanf},
+@code{fprintf}, @code{fputs}, @code{free}, @code{frexp}, @code{fscanf},
 @code{isalnum}, @code{isalpha}, @code{iscntrl}, @code{isdigit},
 @code{isgraph}, @code{islower}, @code{isprint}, @code{ispunct},
 @code{isspace}, @code{isupper}, @code{isxdigit}, @code{tolower},


Zackery


Re: [PATCH] c++: Fix constexpr evaluation of self-modifying CONSTRUCTORs [PR94219]

2020-04-02 Thread Patrick Palka via Gcc-patches
On Thu, 2 Apr 2020, Patrick Palka wrote:

> This PR reveals that cxx_eval_bare_aggregate and cxx_eval_store_expression do
> not anticipate that a constructor element's initializer could mutate the
> underlying CONSTRUCTOR.  Evaluation of the initializer could add new elements 
> to
> the underlying CONSTRUCTOR, thereby potentially invalidating any pointers to
> or assumptions about the CONSTRUCTOR's elements, and so these routines should 
> be
> prepared for that.
> 
> To fix this problem, this patch makes cxx_eval_bare_aggregate and
> cxx_eval_store_expression recompute the pointer to the constructor_elt's 
> through
> which we're assigning, after it evaluates the initializer.  Care is taken to
> to make the common case where the initializer does not modify the underlying
> CONSTRUCTOR as fast as before.

Also, with this patch, I'm not totally sure but I think we might not
need the special preeval handling in cxx_eval_store_expression anymore.
I could try to remove it in a subsequent patch.

> 
> Does this look OK to commit after testing?
> 
> gcc/cp/ChangeLog:
> 
>   PR c++/94205
>   PR c++/94219
>   * constexpr.c (get_or_insert_ctor_field): Split out (while adding
>   support for VECTOR_TYPEs, and optimizations for the common case)
>   from ...
>   (cxx_eval_store_expression): ... here.  Rename local variable
>   'changed_active_union_member_p' to 'activated_union_member_p'.  Record
>   the sequence of indexes into 'indexes' that yields the subobject we're
>   assigning to.  Record the integer offsets of the constructor indexes
>   we're assigning through into 'index_pos_hints'.  After evaluating the
>   initializer of the store expression, recompute 'valp' using 'indexes'
>   and 'index_pos_hints' as hints.
>   (cxx_eval_bare_aggregate): Tweak comments.  Use get_or_insert_ctor_field
>   to recompute the pointer to the constructor_elt we're assigning through
>   after evaluating each initializer.
> 
> gcc/testsuite/ChangeLog:
> 
>   PR c++/94205
>   PR c++/94219
>   * g++.dg/cpp1y/constexpr-nsdmi3.C: New test.
>   * g++.dg/cpp1y/constexpr-nsdmi4.C: New test.
>   * g++.dg/cpp1y/constexpr-nsdmi5.C: New test.
>   * g++.dg/cpp1z/lambda-this5.C: New test.
> ---
>  gcc/cp/constexpr.c| 252 +++---
>  gcc/testsuite/g++.dg/cpp1y/constexpr-nsdmi3.C |  19 ++
>  gcc/testsuite/g++.dg/cpp1y/constexpr-nsdmi4.C |  21 ++
>  gcc/testsuite/g++.dg/cpp1y/constexpr-nsdmi5.C |  22 ++
>  gcc/testsuite/g++.dg/cpp1z/lambda-this5.C |  11 +
>  5 files changed, 228 insertions(+), 97 deletions(-)
>  create mode 100644 gcc/testsuite/g++.dg/cpp1y/constexpr-nsdmi3.C
>  create mode 100644 gcc/testsuite/g++.dg/cpp1y/constexpr-nsdmi4.C
>  create mode 100644 gcc/testsuite/g++.dg/cpp1y/constexpr-nsdmi5.C
>  create mode 100644 gcc/testsuite/g++.dg/cpp1z/lambda-this5.C
> 
> diff --git a/gcc/cp/constexpr.c b/gcc/cp/constexpr.c
> index 91f0c3ba269..b4173c595f0 100644
> --- a/gcc/cp/constexpr.c
> +++ b/gcc/cp/constexpr.c
> @@ -3151,6 +3151,97 @@ find_array_ctor_elt (tree ary, tree dindex, bool 
> insert)
>return -1;
>  }
>  
> +/* Return a pointer to the constructor_elt of CTOR which matches INDEX.  If 
> no
> +   matching constructor_elt exists, then add one to CTOR.
> +
> +   As an optimization, if POS_HINT is non-negative then it is used as a guess
> +   for the (integer) index of the matching constructor_elt within CTOR.  */
> +
> +static constructor_elt *
> +get_or_insert_ctor_field (tree ctor, tree index, int pos_hint)
> +{
> +  tree type = TREE_TYPE (ctor);
> +  if (TREE_CODE (type) == VECTOR_TYPE && index == NULL_TREE)
> +{
> +  CONSTRUCTOR_APPEND_ELT (CONSTRUCTOR_ELTS (ctor), index, NULL_TREE);
> +  return &CONSTRUCTOR_ELTS (ctor)->last();
> +}
> +  else if (TREE_CODE (type) == ARRAY_TYPE || TREE_CODE (type) == VECTOR_TYPE)
> +{
> +  HOST_WIDE_INT i = find_array_ctor_elt (ctor, index, /*insert*/true);
> +  gcc_assert (i >= 0);
> +  constructor_elt *cep = CONSTRUCTOR_ELT (ctor, i);
> +  gcc_assert (cep->index == NULL_TREE
> +   || TREE_CODE (cep->index) != RANGE_EXPR);
> +  return cep;
> +}
> +  else
> +{
> +  gcc_assert (TREE_CODE (index) == FIELD_DECL);
> +
> +  /* We must keep the CONSTRUCTOR's ELTS in FIELD order.
> +  Usually we meet initializers in that order, but it is
> +  possible for base types to be placed not in program
> +  order.  */
> +  tree fields = TYPE_FIELDS (DECL_CONTEXT (index));
> +  unsigned HOST_WIDE_INT idx = 0;
> +  constructor_elt *cep = NULL;
> +
> +  /* First, check if we're changing the active member of a union.  */
> +  if (TREE_CODE (type) == UNION_TYPE && CONSTRUCTOR_NELTS (ctor)
> +   && CONSTRUCTOR_ELT (ctor, 0)->index != index)
> + vec_safe_truncate (CONSTRUCTOR_ELTS (ctor), 0);
> +  /* Next, check the hint.  */
> +  else if (pos_hint >= 0 && (unsigned)pos_hi

[PATCH] c++: Fix constexpr evaluation of self-modifying CONSTRUCTORs [PR94219]

2020-04-02 Thread Patrick Palka via Gcc-patches
This PR reveals that cxx_eval_bare_aggregate and cxx_eval_store_expression do
not anticipate that a constructor element's initializer could mutate the
underlying CONSTRUCTOR.  Evaluation of the initializer could add new elements to
the underlying CONSTRUCTOR, thereby potentially invalidating any pointers to
or assumptions about the CONSTRUCTOR's elements, and so these routines should be
prepared for that.

To fix this problem, this patch makes cxx_eval_bare_aggregate and
cxx_eval_store_expression recompute the pointer to the constructor_elt's through
which we're assigning, after it evaluates the initializer.  Care is taken to
to make the common case where the initializer does not modify the underlying
CONSTRUCTOR as fast as before.

Does this look OK to commit after testing?

gcc/cp/ChangeLog:

PR c++/94205
PR c++/94219
* constexpr.c (get_or_insert_ctor_field): Split out (while adding
support for VECTOR_TYPEs, and optimizations for the common case)
from ...
(cxx_eval_store_expression): ... here.  Rename local variable
'changed_active_union_member_p' to 'activated_union_member_p'.  Record
the sequence of indexes into 'indexes' that yields the subobject we're
assigning to.  Record the integer offsets of the constructor indexes
we're assigning through into 'index_pos_hints'.  After evaluating the
initializer of the store expression, recompute 'valp' using 'indexes'
and 'index_pos_hints' as hints.
(cxx_eval_bare_aggregate): Tweak comments.  Use get_or_insert_ctor_field
to recompute the pointer to the constructor_elt we're assigning through
after evaluating each initializer.

gcc/testsuite/ChangeLog:

PR c++/94205
PR c++/94219
* g++.dg/cpp1y/constexpr-nsdmi3.C: New test.
* g++.dg/cpp1y/constexpr-nsdmi4.C: New test.
* g++.dg/cpp1y/constexpr-nsdmi5.C: New test.
* g++.dg/cpp1z/lambda-this5.C: New test.
---
 gcc/cp/constexpr.c| 252 +++---
 gcc/testsuite/g++.dg/cpp1y/constexpr-nsdmi3.C |  19 ++
 gcc/testsuite/g++.dg/cpp1y/constexpr-nsdmi4.C |  21 ++
 gcc/testsuite/g++.dg/cpp1y/constexpr-nsdmi5.C |  22 ++
 gcc/testsuite/g++.dg/cpp1z/lambda-this5.C |  11 +
 5 files changed, 228 insertions(+), 97 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp1y/constexpr-nsdmi3.C
 create mode 100644 gcc/testsuite/g++.dg/cpp1y/constexpr-nsdmi4.C
 create mode 100644 gcc/testsuite/g++.dg/cpp1y/constexpr-nsdmi5.C
 create mode 100644 gcc/testsuite/g++.dg/cpp1z/lambda-this5.C

diff --git a/gcc/cp/constexpr.c b/gcc/cp/constexpr.c
index 91f0c3ba269..b4173c595f0 100644
--- a/gcc/cp/constexpr.c
+++ b/gcc/cp/constexpr.c
@@ -3151,6 +3151,97 @@ find_array_ctor_elt (tree ary, tree dindex, bool insert)
   return -1;
 }
 
+/* Return a pointer to the constructor_elt of CTOR which matches INDEX.  If no
+   matching constructor_elt exists, then add one to CTOR.
+
+   As an optimization, if POS_HINT is non-negative then it is used as a guess
+   for the (integer) index of the matching constructor_elt within CTOR.  */
+
+static constructor_elt *
+get_or_insert_ctor_field (tree ctor, tree index, int pos_hint)
+{
+  tree type = TREE_TYPE (ctor);
+  if (TREE_CODE (type) == VECTOR_TYPE && index == NULL_TREE)
+{
+  CONSTRUCTOR_APPEND_ELT (CONSTRUCTOR_ELTS (ctor), index, NULL_TREE);
+  return &CONSTRUCTOR_ELTS (ctor)->last();
+}
+  else if (TREE_CODE (type) == ARRAY_TYPE || TREE_CODE (type) == VECTOR_TYPE)
+{
+  HOST_WIDE_INT i = find_array_ctor_elt (ctor, index, /*insert*/true);
+  gcc_assert (i >= 0);
+  constructor_elt *cep = CONSTRUCTOR_ELT (ctor, i);
+  gcc_assert (cep->index == NULL_TREE
+ || TREE_CODE (cep->index) != RANGE_EXPR);
+  return cep;
+}
+  else
+{
+  gcc_assert (TREE_CODE (index) == FIELD_DECL);
+
+  /* We must keep the CONSTRUCTOR's ELTS in FIELD order.
+Usually we meet initializers in that order, but it is
+possible for base types to be placed not in program
+order.  */
+  tree fields = TYPE_FIELDS (DECL_CONTEXT (index));
+  unsigned HOST_WIDE_INT idx = 0;
+  constructor_elt *cep = NULL;
+
+  /* First, check if we're changing the active member of a union.  */
+  if (TREE_CODE (type) == UNION_TYPE && CONSTRUCTOR_NELTS (ctor)
+ && CONSTRUCTOR_ELT (ctor, 0)->index != index)
+   vec_safe_truncate (CONSTRUCTOR_ELTS (ctor), 0);
+  /* Next, check the hint.  */
+  else if (pos_hint >= 0 && (unsigned)pos_hint < CONSTRUCTOR_NELTS (ctor)
+  && CONSTRUCTOR_ELT (ctor, pos_hint)->index == index)
+   {
+ cep = CONSTRUCTOR_ELT (ctor, pos_hint);
+ goto found;
+   }
+  /* If the hint was wrong, and if the bit offset of INDEX is larger than
+that of the last constructor_elt, then we can just immediately append a
+new constructor_elt to the end of CTOR.  

Re: [PATCH], Set -mpcrel by default on Linux 64-bit systems for -mcpu=future

2020-04-02 Thread Segher Boessenkool
On Fri, Mar 27, 2020 at 02:30:36PM -0400, Michael Meissner wrote:
> > > -/* Support for a future processor's features.  Do not enable -mpcrel 
> > > until it
> > > -   is fully functional.  */
> > > +/* Support for a future processor's features.  The addressing related 
> > > options
> > > +   (like -mprefixed, -mpcrel) are not set here.  */
> > 
> > So, where are they set?  why is it important to say they are not set
> > here?
> 
> They are set in rs6000_option_override in rs6000.c, like all of the other
> defaults.
> 
> The issue is that not all 'future' targets will enable these bits.  In 
> general,
> it is simpler to set the bits to ON in the cases where they should be, rather
> than settings the bits here and then resetting them.
> 
> In particular, other operating systems (like AIX, Linux using ELF v1, or
> 32-bit) might not have the necessary support for the PC-relative relocations.
> In addition, if the user did -mcmodel=large or -mcmodel=small, we cannot turn
> on the PC-relative addressing, because the instructions only have a 34-bit
> offset, and the other code models have different assumptions.
> 
> Perhaps one day, we might think about adding the support for -mcmodel=large,
> but for now, you need to use the normal TOC addressing for that.

Please just don't mention it here at all then?  It only confuses
matters, it does not help the reader at all.


Segher


Re: linkage of lambda types

2020-04-02 Thread Jason Merrill via Gcc-patches
On Wed, Apr 1, 2020 at 2:55 PM Nathan Sidwell  wrote:

> Jason,
>
> This is from pr94426, which is fallout from my pr94147 fix.
>
> You added the following to no_linkage_check as part of
>
>   2018-11-12  Jason Merrill  
> Implement P0315R4, Lambdas in unevaluated contexts.
>
>/* Lambda types that don't have mangling scope have no linkage.  We
>   check CLASSTYPE_LAMBDA_EXPR for error_mark_node because
>   when we get here from pushtag none of the lambda information is
>   set up yet, so we want to assume that the lambda has linkage and
>   fix it up later if not.  We need to check this even in templates so
>   that we properly handle a lambda-expression in the signature.  */
>if (LAMBDA_TYPE_P (t)
>&& CLASSTYPE_LAMBDA_EXPR (t) != error_mark_node
>&& LAMBDA_TYPE_EXTRA_SCOPE (t) == NULL_TREE)
>  return t;
>
> The comment suggests that those with a mangling scope do (sometimes?)
> have linkage.  Under what circumstances does the std give lambdas

linkage?  They are 'unique, unnamed non-union class type[s]' 7.5.5.1/1


> The wording in 6.3/14 suggests that even in:
> inline auto var = []{};
> the multiple definitions of 'var' in different TUs could have different
> types.
>
> 'In particular, lambda-expressions (7.5.5) appearing in the type of D
> may result in the different declarations having distinct types,'
>
> so they can be ODR-same, but not TYPE-same.  Comparing 'typeid (var)'
> acrosss TU boundaries gives an unspecified result.
>
> I can see why implementationwise we might want the above to have a
> pseudo-external linkage -- IIRC we don't correctly give templates
> instantiated from non-external types internal linkage, so we have to
> either guarantee unique mangling or guarantee same typeness.
>

We certainly work hard to do that; see constrain_visibility_for_template.


> What am I missing?
>

A little before that we have

"In each definition of D, except within the default arguments and default
template arguments of D,
corresponding lambda-expressions shall have the same closure type (see
below)."

So that's where the closure has "linkage".

and then later we have

"[Example:
inline void f(bool cond, void (*p)()) {
  if (cond) f(false, []{});
}
inline void g(bool cond, void (*p)() = []{}) {
  if (cond) g(false);
}
struct X {
  void h(bool cond, void (*p)() = []{}) {
if (cond) h(false);
  }
};
If the definition of f appears in multiple translation units, the behavior
of the program is as if there is only
one definition of f. If the definition of g appears in multiple translation
units, the program is ill-formed
(no diagnostic required) because each such definition uses a default
argument that refers to a distinct
lambda-expression closure type. The definition of X can appear in multiple
translation units of a valid program;
the lambda-expressions defined within the default argument of X::h within
the definition of X denote the
same closure type in each translation unit. — end example]"

The reference to "type" in your quotation seems just wrong to me; if it's
part of the type, it isn't from a default argument, so the closures have to
be the same type.  We should check that Davis' overhaul has a good answer
for this.

Jason


Re: [PATCH] ICF: compare type attributes for gimple_call_fntypes.

2020-04-02 Thread Christophe Lyon via Gcc-patches
On Thu, 2 Apr 2020 at 17:16, Martin Liška  wrote:
>
> Hi.
>
> The patch compares type attributes for gimple_call_fntypes in IPA ICF.
> Note that we were unable to find a generic function attribute that
> can be used on a function type definition.
>
> For a one which is allowed assume_aligned(16) I get affects_type_identity == 
> false
> which seems suspicious to me.
>
> Note that we currently use comp_type_attributes in ICF for both variable and
> function declarations.
>
> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
>
> Ready to be installed?
> Thanks,
> Martin
>

Hi,

Thanks for the quick patch!

I confirm it fixes the problem I noticed on arm with the cmse-15.c for
-O2 and -O3.

However, the testcase still fails with -Os. I haven't looked at the
details, so it may be a different cause.

Thanks,

Christophe

> gcc/ChangeLog:
>
> 2020-04-02  Martin Liska  
>
> PR ipa/94445
> * ipa-icf-gimple.c (func_checker::compare_gimple_call):
>   Compare type attributes for gimple_call_fntypes.
> ---
>   gcc/ipa-icf-gimple.c | 4 
>   1 file changed, 4 insertions(+)
>
>


[PATCH][GCC][AArch64] opt: Fix options canonization for assembler

2020-04-02 Thread Tamar Christina
Hi All,

It is currently impossible to use fp16 on any architecture higher than Armv8.3-a
due to a bug in options canonization.  This bug results in the fp16 flag not
being emitted in the assembly when it should have been.

This is caused by a complicated architectural requirement at Armv8.4-a.  On
Armv8.2-a and Armv8.3-a fp16fml is an optional extension and turning it on turns
on both fp and fp16.  However starting with Armv8.4-a fp16fml is mandatory if
fp16 is available, otherwise it's optional.

In short this means that to enable fp16fml the smallest option that needs to
passed to the assembler is Armv8.4-a+fp16.

The fix in this patch takes into account that an option may be on by default in
an architecture, but that not all the bits required to use it are on by default
in an architecture.  In such cases the difference between the two are still
emitted to the assembler.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for trunk? and backport to GCC 8 and 9 after some stew.

Thanks,
Tamar

gcc/ChangeLog:

2020-04-02  Tamar Christina  

PR target/94396
* common/config/aarch64/aarch64-common.c
(aarch64_get_extension_string_for_isa_flags): Handle default flags.

gcc/testsuite/ChangeLog:

2020-04-02  Tamar Christina  

PR target/94396
* gcc.target/aarch64/options_set_11.c: New test.
* gcc.target/aarch64/options_set_12.c: New test.
* gcc.target/aarch64/options_set_13.c: New test.
* gcc.target/aarch64/options_set_14.c: New test.
* gcc.target/aarch64/options_set_15.c: New test.
* gcc.target/aarch64/options_set_16.c: New test.
* gcc.target/aarch64/options_set_17.c: New test.
* gcc.target/aarch64/options_set_18.c: New test.
* gcc.target/aarch64/options_set_19.c: New test.
* gcc.target/aarch64/options_set_20.c: New test.
* gcc.target/aarch64/options_set_21.c: New test.
* gcc.target/aarch64/options_set_22.c: New test.
* gcc.target/aarch64/options_set_23.c: New test.
* gcc.target/aarch64/options_set_24.c: New test.
* gcc.target/aarch64/options_set_25.c: New test.
* gcc.target/aarch64/options_set_26.c: New test.

-- 
diff --git a/gcc/common/config/aarch64/aarch64-common.c b/gcc/common/config/aarch64/aarch64-common.c
index 8d24c140ee45c5e1b1313e6a85a8b8e44bb05405..0bddcc8c3e9282a957c5479b4df7f68058093bab 100644
--- a/gcc/common/config/aarch64/aarch64-common.c
+++ b/gcc/common/config/aarch64/aarch64-common.c
@@ -391,7 +391,22 @@ aarch64_get_extension_string_for_isa_flags (uint64_t isa_flags,
 	/* We remove all the dependent bits, to prevent them from being turned
 	   on twice.  This only works because we assume that all there are
 	   individual options to set all bits standalone.  */
-	isa_flag_bits &= ~opt->flags_on;
+
+	/* PR target/94396.
+
+	   For flags which would already imply a bit that's on by default (e.g
+	   fp16fml which implies +fp,+fp16) we must emit the flags that are not
+	   on by default.  i.e. in Armv8.4-a +fp16fml is default if +fp16.  So
+	   if a user passes armv8.4-a+fp16 (or +fp16fml) then we need to emit
+	   +fp16.  But if +fp16fml is used in an architecture where it is
+	   completely optional we only have to emit the canonical flag.  */
+	uint64_t toggle_bits = opt->flags_on & default_arch_flags;
+	/* Now check to see if the canonical flag is on by default.  If it
+	   is not then enabling it will enable all bits in flags_on.  */
+	if ((opt->flag_canonical & default_arch_flags) == 0)
+	  toggle_bits = opt->flags_on;
+
+	isa_flag_bits &= ~toggle_bits;
 	isa_flag_bits |= opt->flag_canonical;
   }
 }
diff --git a/gcc/testsuite/gcc.target/aarch64/options_set_11.c b/gcc/testsuite/gcc.target/aarch64/options_set_11.c
new file mode 100644
index ..d083bfdbd5c4ee0067607d506306a4271542c4d5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/options_set_11.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=armv8.2-a+fp" } */
+
+int main ()
+{
+  return 0;
+}
+
+/* { dg-final { scan-assembler {\.arch armv8\.2-a\+crc} } } */
+
+ /* FP is default on, no need to pass on to assembler.  */
diff --git a/gcc/testsuite/gcc.target/aarch64/options_set_12.c b/gcc/testsuite/gcc.target/aarch64/options_set_12.c
new file mode 100644
index ..58a09fda2c1140bd63559f81280f41be5e1a2b17
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/options_set_12.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=armv8.2-a+fp16" } */
+
+int main ()
+{
+  return 0;
+}
+
+/* { dg-final { scan-assembler {\.arch armv8\.2-a\+crc\+fp16} } } */
+
+ /* fp16 not default, should be emitted.  */
diff --git a/gcc/testsuite/gcc.target/aarch64/options_set_13.c b/gcc/testsuite/gcc.target/aarch64/options_set_13.c
new file mode 100644
index ..2a517ecb58f87ca5653bb6aac7e2db12a1de0926
--- 

[PATCH] gcc-9 sra: Cap number of sub-access propagations with a param (PR 93435)

2020-04-02 Thread Martin Jambor
Hi,

This is non-trivial but rather straightforward backport of
29f23ed79b60949fc60f6fdbbd931bd58090b241 from master.  See
https://gcc.gnu.org/pipermail/gcc-patches/2020-March/542390.html for
more information.

Bootstrapped and tested on gcc-9 branch, can I commit it there?  It also
applies as-is to gcc-8 as well and I will backport it there as the next
step after testing (without seeking another approval).

Thanks,

Martin


2020-04-01  Martin Jambor  

PR tree-optimization/93435
* params.def (PARAM_SRA_MAX_PROPAGATIONS): New parameter.
* tree-sra.c (propagation_budget): New variable.
(budget_for_propagation_access): New function.
(propagate_subaccesses_across_link): Use it.
(propagate_all_subaccesses): Set up and destroy propagation_budget.
* doc/invoke.texi (sra-max-propagations): New.

gcc/testsuite/
* gcc.dg/tree-ssa/pr93435.c: New test.
---
 gcc/ChangeLog   |  10 ++
 gcc/doc/invoke.texi |   5 +
 gcc/params.def  |   7 ++
 gcc/testsuite/ChangeLog |   5 +
 gcc/testsuite/gcc.dg/tree-ssa/pr93435.c | 159 
 gcc/tree-sra.c  |  34 -
 6 files changed, 219 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr93435.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 2b1ce7df14a..815fa8eec2d 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,13 @@
+2020-04-01  Martin Jambor  
+
+   PR tree-optimization/93435
+   * params.def (PARAM_SRA_MAX_PROPAGATIONS): New parameter.
+   * tree-sra.c (propagation_budget): New variable.
+   (budget_for_propagation_access): New function.
+   (propagate_subaccesses_across_link): Use it.
+   (propagate_all_subaccesses): Set up and destroy propagation_budget.
+   * doc/invoke.texi (sra-max-propagations): New.
+
 2020-03-31  Carl Love  
 
Backport of:
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 0f6247caf51..1782a648d02 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -11797,6 +11797,11 @@ speed
 (@option{sra-max-scalarization-size-Ospeed}) or size
 (@option{sra-max-scalarization-size-Osize}) respectively.
 
+@item sra-max-propagations
+The maximum number of artificial accesses that Scalar Replacement of
+Aggregates (SRA) will track, per one local variable, in order to
+facilitate copy propagation.
+
 @item tm-max-aggregate-size
 When making copies of thread-local variables in a transaction, this
 parameter specifies the size in bytes after which variables are
diff --git a/gcc/params.def b/gcc/params.def
index 8e4887e50a2..e23a4530bfa 100644
--- a/gcc/params.def
+++ b/gcc/params.def
@@ -1081,6 +1081,13 @@ DEFPARAM (PARAM_SRA_MAX_SCALARIZATION_SIZE_SIZE,
  "considered for scalarization when compiling for size.",
  0, 0, 0)
 
+DEFPARAM (PARAM_SRA_MAX_PROPAGATIONS,
+ "sra-max-propagations",
+ "Maximum number of artificial accesses to enable forward propagation "
+ "that Scalar Replacement of Aggregates will keep for one local "
+ "variable.",
+ 32, 0, 0)
+
 DEFPARAM (PARAM_IPA_CP_VALUE_LIST_SIZE,
  "ipa-cp-value-list-size",
  "Maximum size of a list of values associated with each parameter for "
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 71686c72a33..6be82b5d5c2 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2020-04-01  Martin Jambor  
+
+   PR tree-optimization/93435
+   * gcc.dg/tree-ssa/pr93435.c: New test.
+
 2020-03-28  Tobias Burnus  
 
Backport from mainline
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr93435.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr93435.c
new file mode 100644
index 000..cb8e7495b15
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr93435.c
@@ -0,0 +1,159 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+typedef signed char int8_T;
+typedef int int32_T;
+
+typedef struct {
+  int8_T a;
+} struct0_T;
+
+typedef struct {
+  struct0_T f10[4];
+} struct_T;
+
+typedef struct {
+  struct_T f9[4];
+} b_struct_T;
+
+typedef struct {
+  b_struct_T f8[4];
+} c_struct_T;
+
+typedef struct {
+  c_struct_T f7[4];
+} d_struct_T;
+
+typedef struct {
+  d_struct_T f6[4];
+} e_struct_T;
+
+typedef struct {
+  e_struct_T f5[4];
+} f_struct_T;
+
+typedef struct {
+  f_struct_T f4[4];
+} g_struct_T;
+
+typedef struct {
+  g_struct_T f3[4];
+} h_struct_T;
+
+typedef struct {
+  h_struct_T f2[4];
+} i_struct_T;
+
+typedef struct {
+  i_struct_T f1[4];
+} j_struct_T;
+
+typedef struct {
+  struct {
+j_struct_T ds21[4];
+i_struct_T ds20[4];
+i_struct_T r9;
+  } f0;
+} deep_struct_arraysStackData;
+
+/* Function Definitions */
+void deep_struct_arrays(deep_struct_arraysStackData *SD,
+  int8_T in1, int8_T inCount, int8_T *out1, int8_T *out2, struct0_T out3[4])
+{
+  struct0_T r;
+  struct_T r1;
+  b_struct_T r2;

Re: PING -- [PATCH, fortran] PR 85982 -- ICE in resolve_component

2020-04-02 Thread Fritz Reese via Gcc-patches
Tobias,

Thank you for the information. I didn't think about translations. I'll
post a new version and commit shortly.

Cheers,

Fritz

On Thu, Apr 2, 2020 at 3:50 AM Tobias Burnus  wrote:
>
> In principle, I like the patch. However, I think one should
> replace
>
> gfc_error ("Attribute at %L is not allowed in a %s definition",
>…, state_name
>
> by something like:
>
> bool is_type = gfc_current_state () == COMP_DERIVED;
> gfc_error (is_type ? G_("Attribute at %L is not allowed in a TYPE definition")
>: G_("Attribute at %L is not allowed in a STRUCTURE 
> definition"),
>…
>
> Reason: (a) This makes translation simpler; e.g. 'structure' and 'type' have
> different gender in several European languages. (Albeit in this case the
> gender of 'definition' dominates in the cases I checked.)
> (b) For TYPE, the string won't change such that the existing translations
> still work – even if the update for STRUCTURE won't make it for the release.
>
> Otherwise it looks good to me, including the test case in your follow-up 
> email.
>
> Cheers,
>
> Tobias
>
> On 4/1/20 7:19 PM, Fritz Reese via Fortran wrote:
>
> > This simple patch was submitted some time ago (over 1 year), but got
> > lost without review. I have lately rebased and tested, and the patch
> > is still good. Is this OK to commit to trunk and for backport? I'd
> > like to port as far back as 7.
> >
> > ---
> > Fritz Reese
> >
> > gcc/ChangeLog:
> > 2020-04-01  Fritz Reese  
> >
> > PR fortran/85982
> > * fortran/decl.c (match_attr_spec): Lump COMP_STRUCTURE/COMP_MAP 
> > into
> > attribute checking used by TYPE.
> >
> > gcc/testsuite/ChangeLog:
> > 2020-04-01  Fritz Reese  
> >
> > PR fortran/85982
> > * gfortran.dg/dec_structure_28.f90: New test.
> -
> Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany
> Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, 
> Alexander Walter


Re: [PATCH] [amdgcn] Add support for unordered floating-point comparisons

2020-04-02 Thread Andrew Stubbs

On 02/04/2020 15:55, Kwok Cheung Yeung wrote:

Hello

This patch adds support for the unordered floating-point comparison 
operators (UNEQ, UNGE, UNGT, UNLE, UNLT), which return true if one of 
the operands is a NaN. These comparisons can be generated by builtins 
such as __builtin_isgreater.


GCC 10 appears to have a fall-back if the unordered comparisons are not 
available (by generating an unordered comparison first, followed by the 
main comparison), whereas previous versions would simply throw an ICE. 
Still, it should be a little more efficient to have direct support for 
these operators.


Tested on a GCN3 board with no regressions noted. Okay for trunk?


OK, thanks.

Andrew


Re: [PATCH] gcc/config/rs6000: Add link with libc128 with -mlong-double-128 for AIX

2020-04-02 Thread David Edelsohn via Gcc-patches
On Thu, Apr 2, 2020 at 5:30 AM CHIGOT, CLEMENT  wrote:
>
> Description:
>  * AIX applications using 128-bit long double must be linked with
>libc128.a, in order to have 128-bit compatible routines.
>
> Tests:
>  * AIX 7.2, 7.1: Build/Tests: OK
>
> Changelog:
>  * config/rs6000/aix61.h (LIB_SPEC): Add -lc128 with -mlong-double-128.
>  * config/rs6000/aix71.h (LIB_SPEC: Likewise.
>  * config/rs6000/aix72.h (LIB_SPEC: Likewise.

The ChangeLog entries should include the date, name and email.  And
two of the entries contain a typo with no closing parentheses.

Okay with those changes.

Thanks, David


[PATCH] ICF: compare type attributes for gimple_call_fntypes.

2020-04-02 Thread Martin Liška

Hi.

The patch compares type attributes for gimple_call_fntypes in IPA ICF.
Note that we were unable to find a generic function attribute that
can be used on a function type definition.

For a one which is allowed assume_aligned(16) I get affects_type_identity == 
false
which seems suspicious to me.

Note that we currently use comp_type_attributes in ICF for both variable and
function declarations.

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.

Ready to be installed?
Thanks,
Martin

gcc/ChangeLog:

2020-04-02  Martin Liska  

PR ipa/94445
* ipa-icf-gimple.c (func_checker::compare_gimple_call):
  Compare type attributes for gimple_call_fntypes.
---
 gcc/ipa-icf-gimple.c | 4 
 1 file changed, 4 insertions(+)


diff --git a/gcc/ipa-icf-gimple.c b/gcc/ipa-icf-gimple.c
index 3e5b2d4bd6d..fa02809defd 100644
--- a/gcc/ipa-icf-gimple.c
+++ b/gcc/ipa-icf-gimple.c
@@ -37,6 +37,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-eh.h"
 #include "builtins.h"
 #include "cfgloop.h"
+#include "attribs.h"
 
 #include "ipa-icf-gimple.h"
 
@@ -568,6 +569,9 @@ func_checker::compare_gimple_call (gcall *s1, gcall *s2)
   || (fntype1 && !types_compatible_p (fntype1, fntype2)))
 return return_false_with_msg ("call function types are not compatible");
 
+  if (fntype1 && fntype2 && comp_type_attributes (fntype1, fntype2) != 1)
+return return_false_with_msg ("different fntype attributes");
+
   tree chain1 = gimple_call_chain (s1);
   tree chain2 = gimple_call_chain (s2);
   if ((chain1 && !chain2)



Re: [Patch][Fortran] Resolve formal args before checking DTIO (was: Re: [PATCH] deferred-shape vs assumed-shape)

2020-04-02 Thread Steve Kargl via Gcc-patches
This one is a little bit wierd for me.  It seems AS_DEFERRED
is overloaded and things get fixed up later.  In array.c
one finds match_array_element_spec(), which set AS_DEFERRED
if the matchers sees (:) regardless of how the array is used.
Then in gfc_set_array_spec(), the array spec is attached to
the symbol in this block

  if (sym->as == NULL)
{
  sym->as = as;
  return true;
}

We can check sym->attr to determine if this is a nonallocatable
nonpointer dummy argument and reset the type to AS_ASSUMED_SHAPE;
or at least I though tI could.  This leads to a very, very, long
list of regressions.  So, I went with the direct hammer.

Your patch is fine with me as it likely resolves (fixes up?)
the symbol for future references.

-- 
steve

On Thu, Apr 02, 2020 at 11:34:16AM +0200, Tobias Burnus wrote:
> Hi Steve,
> 
> I think your patch is fine - however, I think calling the normal
> resolve_formal_arglist looks a bit cleaner to me (as done in the
> attached patch). — Additionally, I added the testcase.
> 
> Side effect of my variant is that gfc_check_dtio_interfaces will
> be called again a bit later again. — In this sense, Steve's patch,
> which replicates a chunk of resolve_formal_arglist, is better.
> 
> Thoughts by anyone?
> 
> OK?
> 
> Tobias
> 
> PS: I was thinking of calling resolve_symbol instead
> but this one does not resolve the formal arguments
> (via "gfc_resolve (sym->formal_ns)") as sym->attr.contained.
> 
> On 4/1/20 10:04 PM, Steve Kargl via Fortran wrote:
> 
> > See
> > https://stackoverflow.com/questions/60972134/whats-wrong-with-the-following-fortran-code-gfortran-dtio-dummy-argument-at
> > 
> > Is A(:) a deferred-shape array or an assumed-shape array?  The
> > answer of course depends on context.
> > 
> > This patch fixes the issue found at the above URL.
> > 
> > Index: gcc/fortran/interface.c
> > ===
> > --- gcc/fortran/interface.c   (revision 280157)
> > +++ gcc/fortran/interface.c   (working copy)
> > @@ -4916,10 +4916,15 @@ check_dtio_arg_TKR_intent (gfc_symbol *fsym, bool 
> > type
> > || ((type != BT_CLASS) && fsym->attr.dimension)))
> >   gfc_error ("DTIO dummy argument at %L must be a scalar",
> >  &fsym->declared_at);
> > -  else if (rank == 1
> > -&& (fsym->as == NULL || fsym->as->type != AS_ASSUMED_SHAPE))
> > -gfc_error ("DTIO dummy argument at %L must be an "
> > -"ASSUMED SHAPE ARRAY", &fsym->declared_at);
> > +  else if (rank == 1)
> > +{
> > +  if (fsym->as == NULL
> > +   || !(fsym->as->type == AS_ASSUMED_SHAPE
> > + || (fsym->as->type == AS_DEFERRED && fsym->attr.dummy
> > + && !fsym->attr.allocatable && !fsym->attr.pointer)))
> > + gfc_error ("DTIO dummy argument at %L must be an "
> > +"ASSUMED-SHAPE ARRAY", &fsym->declared_at);
> > +}
> > 
> > if (type == BT_CHARACTER && fsym->ts.u.cl->length != NULL)
> >   gfc_error ("DTIO character argument at %L must have assumed length",
> > 
> -
> Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany
> Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, 
> Alexander Walter

> [Fortran] Resolve formal args before checking DTIO
> 
>   * gfortran.h (gfc_resolve_formal_arglist): Add prototype.
>   * interface.c (check_dtio_interface1): Call it.
>   * resolve.c (gfc_resolve_formal_arglist): Renamed from
>   resolve_formal_arglist, removed static.
>   (find_arglists, resolve_types): Update calls.
> 
>   * gfortran.dg/dtio_35.f90: New.
> 
>  gcc/fortran/gfortran.h|  1 +
>  gcc/fortran/interface.c   |  4 ++-
>  gcc/fortran/resolve.c | 10 +++
>  gcc/testsuite/gfortran.dg/dtio_35.f90 | 50 
> +++
>  4 files changed, 59 insertions(+), 6 deletions(-)
> 
> diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h
> index 96037629f5f..88e4d9236f3 100644
> --- a/gcc/fortran/gfortran.h
> +++ b/gcc/fortran/gfortran.h
> @@ -3369,6 +3369,7 @@ bool gfc_resolve_expr (gfc_expr *);
>  void gfc_resolve (gfc_namespace *);
>  void gfc_resolve_code (gfc_code *, gfc_namespace *);
>  void gfc_resolve_blocks (gfc_code *, gfc_namespace *);
> +void gfc_resolve_formal_arglist (gfc_symbol *);
>  int gfc_impure_variable (gfc_symbol *);
>  int gfc_pure (gfc_symbol *);
>  int gfc_implicit_pure (gfc_symbol *);
> diff --git a/gcc/fortran/interface.c b/gcc/fortran/interface.c
> index 14d03c27759..75a50c999b7 100644
> --- a/gcc/fortran/interface.c
> +++ b/gcc/fortran/interface.c
> @@ -5007,6 +5007,9 @@ check_dtio_interface1 (gfc_symbol *derived, gfc_symtree 
> *tb_io_st,
>  gfc_error ("DTIO procedure %qs at %L must be a subroutine",
>  dtio_sub->name, &dtio_sub->declared_at);
>  
> +  if (!dtio_sub->resolved)
> +gfc_resolve_formal_arglist (dtio_sub);
> +
>arg_num = 0;
>for (formal = dti

Re: [PATCH] doc: RISC-V: Update binutils requirement to 2.30

2020-04-02 Thread Maciej W. Rozycki via Gcc-patches
On Thu, 2 Apr 2020, Richard Biener wrote:

> > >  Our installation instructions state binutils 2.28 as the requirement for
> > > all the RISC-V targets, however the change for fmv.x.w/fmv.w.x instruction
> > > support was only added in the binutils 2.30 development cycle.
> >
> >  Here's the resulting change.  Verified with `make info' and `make check'.
> > OK to apply?
> 
> OK.  Can you also update gcc-10/changes.html?

 Change now applied, thank you for your review, and patch posted for 
wwwdocs [I meant to give a link to the message in the archive here, but it 
seems behind by ~2.5 hours; something to look into and fix too, perhaps?  
The old archive was live as messages went through.].

  Maciej


[PATCH] [amdgcn] Add support for unordered floating-point comparisons

2020-04-02 Thread Kwok Cheung Yeung

Hello

This patch adds support for the unordered floating-point comparison operators 
(UNEQ, UNGE, UNGT, UNLE, UNLT), which return true if one of the operands is a 
NaN. These comparisons can be generated by builtins such as __builtin_isgreater.


GCC 10 appears to have a fall-back if the unordered comparisons are not 
available (by generating an unordered comparison first, followed by the main 
comparison), whereas previous versions would simply throw an ICE. Still, it 
should be a little more efficient to have direct support for these operators.


Tested on a GCN3 board with no regressions noted. Okay for trunk?

Kwok
commit ea811ce38ae2127554f0aca9cd34aca6e42f814d
Author: Kwok Cheung Yeung 
Date:   Thu Apr 2 07:47:28 2020 -0700

amdgcn: Support unordered floating-point comparison operators

2020-04-02  Kwok Cheung Yeung  

gcc/
* config/gcn/gcn.c (print_operand): Handle unordered comparison
operators.
* config/gcn/predicates.md (gcn_fp_compare_operator): Add unordered
comparison operators.

diff --git a/gcc/config/gcn/gcn.c b/gcc/config/gcn/gcn.c
index 12438cf..38b5b98 100644
--- a/gcc/config/gcn/gcn.c
+++ b/gcc/config/gcn/gcn.c
@@ -6007,6 +6007,21 @@ print_operand (FILE *file, rtx x, int code)
  case UNORDERED:
s = "_u_";
break;
+ case UNEQ:
+   s = "_nlg_";
+   break;
+ case UNGE:
+   s = "_nlt_";
+   break;
+ case UNGT:
+   s = "_nle_";
+   break;
+ case UNLE:
+   s = "_ngt_";
+   break;
+ case UNLT:
+   s = "_nge_";
+   break;
  case LTGT:
s = "_lg_";
break;
diff --git a/gcc/config/gcn/predicates.md b/gcc/config/gcn/predicates.md
index 7bf763a..91e5ca1 100644
--- a/gcc/config/gcn/predicates.md
+++ b/gcc/config/gcn/predicates.md
@@ -165,7 +165,7 @@
   (match_code "eq,ne,gt,ge,lt,le,gtu,geu,ltu,leu"))
 
 (define_predicate "gcn_fp_compare_operator"
-  (match_code "eq,ne,gt,ge,lt,le,gtu,geu,ltu,leu,ordered,unordered,ltgt"))
+  (match_code 
"eq,ne,gt,ge,lt,le,gtu,geu,ltu,leu,ordered,unordered,uneq,unge,ungt,unle,unlt,ltgt"))
 
 (define_predicate "unary_operator"
   (match_code "not,popcount"))


[PATCH] Inhibit hoists before graphite pass

2020-04-02 Thread Ananth Jasty via Gcc-patches
Loop-im and PRE can hoist loads out of loops, creating artificial dependencies 
that inhibit graphite's analysis.

do k = 1,4096
do j = 1,4096
do i = 1,4096
c(i,j)= c(i, j) + a(k,j) * b(i, k)
enddo
 enddo
enddo

In the preceding loop body, the a(k,j) load can be hoisted out of the 
inner-most loop, which is a valuable optimization, however also one that 
creates a cross-iteration dependency, inhibiting polynomial transformation of 
the nested-loop. An attempt to tile will fail as graphite will assume a 
dependency between loop iterations and fail.

By inhibiting hoists until after graphite has run, we preserve the loop 
structure, while allowing hoists to be performed by later passes of loop-im 
and/or PRE. If graphite is not enabled on the command line, hoists are 
performed early as normal.

This change gives ~8% improvement on spec2017/fotonik3d on my system. It should 
also help graphite become applicable to further applications that are blocked 
by data dependency.


[PATCH][wwwdocs] GCC 10: Document RISC-V target's requirement for binutils 2.30

2020-04-02 Thread Maciej W. Rozycki via Gcc-patches
Match GCC commit bfe78b08471f ("RISC-V: Using fmv.x.w/fmv.w.x rather 
than fmv.x.s/fmv.s.x") and commit 879bc686a0aa ("doc: RISC-V: Update 
binutils requirement to 2.30").
---
Hi,

 OK to apply?

  Maciej
---
 htdocs/gcc-10/changes.html | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/htdocs/gcc-10/changes.html b/htdocs/gcc-10/changes.html
index 3d8e0ba9..809bbb4d 100644
--- a/htdocs/gcc-10/changes.html
+++ b/htdocs/gcc-10/changes.html
@@ -725,7 +725,13 @@ a work-in-progress.
   
 
 
-
+RISC-V
+
+  
+The riscv*-*-* targets now require GNU binutils version 2.30
+or later, to support new assembly instructions produced by GCC.
+  
+
 
 
 


[committed][wwwdocs] GCC 10: Reorder S/390 target alphabetically

2020-04-02 Thread Maciej W. Rozycki via Gcc-patches
---
Hi,

 Committed as obvious.

  Maciej
---
 htdocs/gcc-10/changes.html | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/htdocs/gcc-10/changes.html b/htdocs/gcc-10/changes.html
index 1e1eaf43..3d8e0ba9 100644
--- a/htdocs/gcc-10/changes.html
+++ b/htdocs/gcc-10/changes.html
@@ -725,12 +725,12 @@ a work-in-progress.
   
 
 
-
-
 
 
 
 
+
+
 
 
 


[PATCH] debug/94450 - remove DW_TAG_imported_unit generated in LTRANS units

2020-04-02 Thread Richard Biener
This removes the DW_TAG_imported_unit we generate for each referenced
early debug unit in LTRANS units.  They are more harmful than they
do good and the semantics can be read in a way making it even wrong.

LTO bootstrapped and tested on x86_64-unknown-linux-gnu, applied.

Richard.

2020-04-02  Richard Biener  

PR debug/94450
* dwarf2out.c (dwarf2out_early_finish): Remove code emitting
DW_TAG_imported_unit.
---
 gcc/dwarf2out.c | 18 --
 1 file changed, 18 deletions(-)

diff --git a/gcc/dwarf2out.c b/gcc/dwarf2out.c
index 378a27394e8..d68367eee3e 100644
--- a/gcc/dwarf2out.c
+++ b/gcc/dwarf2out.c
@@ -32040,24 +32040,6 @@ dwarf2out_early_finish (const char *filename)
  sure to adjust the phase after annotating the LTRANS CU DIE.  */
   if (in_lto_p)
 {
-  /* Force DW_TAG_imported_unit to be created now, otherwise
-we might end up without it or ordered after DW_TAG_inlined_subroutine
-referencing DIEs from it.  */
-  if (! flag_wpa && flag_incremental_link != INCREMENTAL_LINK_LTO)
-   {
- unsigned i;
- tree tu;
- if (external_die_map)
-   FOR_EACH_VEC_SAFE_ELT (all_translation_units, i, tu)
- if (sym_off_pair *desc = external_die_map->get (tu))
-   {
- dw_die_ref import = new_die (DW_TAG_imported_unit,
-  comp_unit_die (), NULL_TREE);
- add_AT_external_die_ref (import, DW_AT_import,
-  desc->sym, desc->off);
-   }
-   }
-
   early_dwarf_finished = true;
   if (dump_file)
{
-- 
2.12.3


[committed] mips: Fix up -Wliteral-suffix warning on mti-linux.h

2020-04-02 Thread Jakub Jelinek via Gcc-patches
Hi!

I've noticed while trying to reproduce PR92989 the following warning:
In file included from ./tm.h:42,
 from ../../gcc/backend.h:28,
 from ../../gcc/lra-assigns.c:80:
../../gcc/config/mips/mti-linux.h:31:5: warning: invalid suffix on literal; 
C++11 requires a space between literal and string macro [-Wliteral-suffix]
 "/%{mmicromips:micro}mips%{mel|EL:el}-"MIPS_SYSVERSION_SPEC  \
 ^
This fixes it, string concatenation works just fine even with whitespace
in between.

Committed to trunk as obvious.

2020-04-02  Jakub Jelinek  

* config/mips/mti-linux.h (SYSROOT_SUFFIX_SPEC): Add a space in
between a string literal and MIPS_SYSVERSION_SPEC macro.

--- gcc/config/mips/mti-linux.h.jj  2020-01-12 11:54:36.360414208 +0100
+++ gcc/config/mips/mti-linux.h 2020-04-02 15:22:32.478597778 +0200
@@ -28,7 +28,7 @@ along with GCC; see the file COPYING3.
 
 #undef SYSROOT_SUFFIX_SPEC
 #define SYSROOT_SUFFIX_SPEC\
-"/%{mmicromips:micro}mips%{mel|EL:el}-"MIPS_SYSVERSION_SPEC
\
+"/%{mmicromips:micro}mips%{mel|EL:el}-" MIPS_SYSVERSION_SPEC   \
 "%{msoft-float:-soft;:-hard}"  \
 "%{!mips32r6:%{!mips64r6:%{mnan=2008:-nan2008}}}%{muclibc:-uclibc}"
 


Jakub



[committed] params: Decrease -param=max-find-base-term-values= default [PR92264]

2020-04-02 Thread Jakub Jelinek via Gcc-patches
On Thu, Apr 02, 2020 at 12:44:32PM +0200, Richard Biener wrote:
> > For the PR in question, my proposal would be to also lower
> > -param=max-find-base-term-values=
> > default from 2000 to 200 after this, at least in the above 4
> > bootstraps/regtests there is nothing that would ever result in
> > find_base_term returning non-NULL with more than 200 VALUEs being processed.
> 
> Sounds good to me.

Here is what I've committed after another bootstrap/regtest on x86_64-linux
and i686-linux.

2020-04-02  Jakub Jelinek  

PR rtl-optimization/92264
* params.opt (-param=max-find-base-term-values=): Decrease default
from 2000 to 200.

--- gcc/params.opt.jj   2020-03-21 18:29:59.102158469 +0100
+++ gcc/params.opt  2020-04-02 13:05:14.433729117 +0200
@@ -663,7 +663,7 @@ Common Joined UInteger Var(param_max_var
 Max. size of var tracking hash tables.
 
 -param=max-find-base-term-values=
-Common Joined UInteger Var(param_max_find_base_term_values) Init(2000) Param 
Optimization
+Common Joined UInteger Var(param_max_find_base_term_values) Init(200) Param 
Optimization
 Maximum number of VALUEs handled during a single find_base_term call.
 
 -param=max-vrp-switch-assertions=


Jakub



Re: [PATCH] Fix PR94401 by considering reverse overrun

2020-04-02 Thread Segher Boessenkool
Hi!

On Thu, Apr 02, 2020 at 10:28:34AM +0200, Jakub Jelinek wrote:
> > +   tree offset = dataref_offset
> > +   ? dataref_offset
> > +   : build_int_cst (ref_type, 0);
> 
> The above is misformatted.  The ? and : shouldn't be indented further than
> the dataref_offset, but usually e.g. for the sake of emacs we add ()s around
> the expression in this case.  So:
>   tree offset = (dataref_offset
>  ? dataref_offset
>  : build_int_cst (ref_type, 0));
> or
>   tree offset
> = (dataref_offset
>? dataref_offset : build_int_cst (ref_type, 0));

Or even just the (less obfuscated imnsho)

tree offset = dataref_offset;
if (!offset)
  offset = build_int_cst (ref_type, 0);

which even is shorter!


Segher


Re: [PATCH] cselib: Reuse VALUEs on sp adjustments [PR92264]

2020-04-02 Thread Richard Biener
On Thu, 2 Apr 2020, Jakub Jelinek wrote:

> Hi!
> 
> As discussed in the PR, if !ACCUMULATE_OUTGOING_ARGS on large functions we
> can have hundreds of thousands of stack pointer adjustments and cselib
> creates a new VALUE after each sp adjustment, which form extremely deep
> VALUE chains, which is very harmful e.g. for find_base_term.
> E.g. if we have
> sp -= 4
> sp -= 4
> sp += 4
> sp += 4
> sp -= 4
> sp += 4
> that means 7 VALUEs, one for the sp at beginning (val1), than val2 = val1 -
> 4, then val3 = val2 - 4, then val4 = val3 + 4, then val5 = val4 + 4, then
> val6 = val5 - 4, then val7 = val6 + 4.
> This patch tweaks cselib, so that it is smarter about sp adjustments.
> When cselib_lookup (stack_pointer_rtx, Pmode, 1, VOIDmode) and we know
> nothing about sp yet (this happens at the start of the function, for
> non-var-tracking also after cselib_reset_table and for var-tracking after
> processing fp_setter insn where we forget about former sp values because
> that is now hfp related while everything after it is sp related), we
> look it up normally, but in addition to what we have been doing before
> we mark the VALUE as SP_DERIVED_VALUE_P.  Further lookups of sp + offset
> are then special cased, so that it is canonicalized to that
> SP_DERIVED_VALUE_P VALUE + CONST_INT (if possible).  So, for the above,
> we get val1 with SP_DERIVED_VALUE_P set, then val2 = val1 - 4, val3 = val1 -
> 8 (note, no longer val2 - 4!), then we get val2 again, val1 again, val2
> again, val1 again.
> In the find_base_term visited_vals.length () > 100 find_base_term
> statistics during combined x86_64-linux and i686-linux bootstrap+regtest
> cycle, without the patch I see:
>   find_base_term > 100
>   returning NULL  returning non-NULL
> 32-bit compilations   4229178 407
> 64-bit compilations   217523  0
> with largest visited_vals.length () when returning non-NULL being 206.
> With the patch the same numbers are:
> 32-bit compilations   1249588 135
> 64-bit compilations   35100
> with largest visited_vals.length () when returning non-NULL being 173.
> This shows significant reduction of the deep VALUE chains.
> On powerpc64{,le}-linux, these stats didn't change at all, we have
>   10080
> for all of -m32, -m64 and little-endian -m64, just the
> gcc.dg/pr85180.c and gcc.dg/pr87985.c testcases which are unrelated to sp.
> 
> My earlier version of the patch, which contained just the rtl.h and cselib.c
> changes, regressed some tests:
> gcc.dg/guality/{pr36728-{1,3},pr68860-{1,2}}.c
> gcc.target/i386/{pr88416,sse-{13,23,24,25,26}}.c
> The problem with the former tests was worse debug info, where with -m32
> where arg7 was passed in a stack slot we though a push later on might have
> invalidated it, when it couldn't.  This is something I've solved with the
> var-tracking.c (vt_initialize) changes.  In those problematic functions, we
> create a cfa_base VALUE (argp) and want to record that at the start of
> the function the argp VALUE is sp + off and also record that current sp
> VALUE is argp's VALUE - off.  The second permanent equivalence didn't make
> it after the patch though, because cselib_add_permanent_equiv will
> cselib_lookup the value of the expression it wants to add as the equivalence
> and if it is the same VALUE as we are calling it on, it doesn't do anything;
> and due to the cselib changes for sp based accesses that is exactly what
> happened.  By reversing the order of the cselib_add_permanent_equiv calls we
> get both equivalences though and thus are able to canonicalize the sp based
> accesses in var-tracking to the cfa_base value + offset.

I think this warrants a comment in the code.

> The i386 FAILs were all ICEs, where we had pushf instruction pushing flags
> and then pop pseudo reading that value again.  With the cselib changes,
> cselib during RTL DSE is able to see through the sp adjustment and wanted
> to replace_read what was done pushf, by moving the flags register into a
> pseudo and replace the memory read in the pop with that pseudo.  That is
> wrong for two reasons: one is that the backend doesn't have an instruction
> to move the flags hard register into some other register, but replace_read
> has been validating just the mem -> pseudo replacement and not the insns
> emitted by copy_to_mode_reg.  And the second issue is that it is obviously
> wrong to replace a stack pop which contains stack post-increment by a copy
> of pseudo into destination.  dse.c has some code to handle RTX_AUTOINC, but
> only uses it when actually removing stores and only when there is REG_INC
> note (stack RTX_AUTOINC does not have those), in check_for_inc_dec* where
> it emits the reg adjustment(s) before the insn that is going to be deleted.
> replace_read doesn't remove the insn, so if it e.g. contained REG_INC note,
> it would be kept there and we might have the RTX_AUTOINC not just in *loc,
> but other spot

[PATCH] Fix PR94443 with gsi_insert_seq_before

2020-04-02 Thread Kewen.Lin via Gcc-patches
on 2020/4/2 上午6:51, H.J. Lu wrote:
> 
> This caused:
> 
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94449
> 

Thanks for reporting this.  The attached patch is to fix the stupid
mistake by using gsi_insert_seq_before instead of gsi_insert_before.

BTW, the regression testing on one x86_64 machine from CFarm is 
unable to reveal it (I guess due to native arch sandybridge?), so I
specified additional option -march=znver2 and verified the coverage.

Bootstrapped/regtested on powerpc64le-linux-gnu (P9) and 
x86_64-pc-linux-gnu, also verified the fail cases in related PRs.


BR,
Kewen
---
gcc/ChangeLog

2020-04-02  Kewen Lin  

PR tree-optimization/94443
* tree-vect-loop.c (vectorizable_live_operation): Use
gsi_insert_seq_before to replace gsi_insert_before.

gcc/testsuite/ChangeLog

2020-04-02  Kewen Lin  

PR tree-optimization/94443
* gcc.dg/vect/pr94443.c: New test.

diff --git a/gcc/testsuite/gcc.dg/vect/pr94443.c 
b/gcc/testsuite/gcc.dg/vect/pr94443.c
new file mode 100644
index 000..f8cbaf1
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr94443.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=znver2" { target { x86_64-*-* i?86-*-* } } 
} */
+
+/* Check it to be compiled successfully without any ICE.  */
+
+int a;
+unsigned *b;
+
+void foo()
+{
+  for (unsigned i; i <= a; ++i, ++b)
+;
+}
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index c9b6534..34adf79 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -8050,7 +8050,7 @@ vectorizable_live_operation (stmt_vec_info stmt_info,
   if (stmts)
 {
   gimple_stmt_iterator exit_gsi = gsi_after_labels (exit_bb);
-  gsi_insert_before (&exit_gsi, stmts, GSI_CONTINUE_LINKING);
+  gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
 
   /* Remove existing phi from lhs and create one copy from new_tree.  */
   tree lhs_phi = NULL_TREE;
@@ -8063,7 +8063,7 @@ vectorizable_live_operation (stmt_vec_info stmt_info,
  remove_phi_node (&gsi, false);
  lhs_phi = gimple_phi_result (phi);
  gimple *copy = gimple_build_assign (lhs_phi, new_tree);
- gsi_insert_after (&exit_gsi, copy, GSI_CONTINUE_LINKING);
+ gsi_insert_before (&exit_gsi, copy, GSI_SAME_STMT); 
  break;
}
}


Re: [PATCH] Fix PR94401 by considering reverse overrun

2020-04-02 Thread Jakub Jelinek via Gcc-patches
On Thu, Apr 02, 2020 at 06:07:55PM +0800, Kewen.Lin wrote:
> > The above is misformatted.  The ? and : shouldn't be indented further than
> > the dataref_offset, but usually e.g. for the sake of emacs we add ()s around
> > the expression in this case.  So:
> > tree offset = (dataref_offset
> >? dataref_offset
> >: build_int_cst (ref_type, 0));
> > or
> > tree offset
> >   = (dataref_offset
> >  ? dataref_offset : build_int_cst (ref_type, 0));
> > 
> 
> Thanks Jakub!  I'll follow this by add () for ternary expression.
> With manual added "()", clang-format can get below:

Note, the () isn't about ternary expressions, if everything fits on one
line, there is no reason to add ()s, so
  tree offset = dataref_offset ? dataref_offset : build_int_cst (ref_type, 0);
is just fine that way, on the other side
int whatever = HOST_WIDE_INT_1U
   + foobarbaz (qux);
should have them too, like:
int whatever = (HOST_WIDE_INT_1U
+ foobarbaz (qux));
or
int whatever
  = HOST_WIDE_INT_1U + foobarbaz (qux);
I don't use emacs, but was told that emacs without the ()s would misindent
it like (I think):
int whatever = HOST_WIDE_INT_1U
  + foobarbaz (qux);
which is what we do not want.

> 
>   tree offset
> = (dataref_offset ? dataref_offset
>   : build_int_cst (ref_type, 0));
> 
> contrib/check_GNU_style.sh didn't complain this, I'm not sure whether
> it's possible to add this kind of convention into contrib/clang-format.

clang-format is not our official indentation style; I have no problem with
the above formatting from readability POV, though unsure what emacs will do
with that (but if it moves that : right below the first dataref_offset,
no big deal, that is also fine and probably more appropriate if the
build_int_cst... is long and would need more wrapping).

Jakub



Re: [PATCH] sra/doc: Document param sra-max-propagations

2020-04-02 Thread Richard Biener via Gcc-patches
On Thu, Apr 2, 2020 at 12:21 PM Martin Jambor  wrote:
>
> Hi,
>
> I forgot to document the new param in invoke.texi, does the text below
> look OK?

OK.

> Tested with make info and make pdf.
>
> Thanks,
>
> Martin
>
>
> 2020-04-02  Martin Jambor  
>
> * doc/invoke.texi (Optimize Options): Document sra-max-propagations.
> ---
>  gcc/ChangeLog   | 4 
>  gcc/doc/invoke.texi | 5 +
>  2 files changed, 9 insertions(+)
>
> diff --git a/gcc/ChangeLog b/gcc/ChangeLog
> index 654356c8dc8..a93db199cf9 100644
> --- a/gcc/ChangeLog
> +++ b/gcc/ChangeLog
> @@ -1,3 +1,7 @@
> +2020-04-02  Martin Jambor  
> +
> +   * doc/invoke.texi (Optimize Options): Document sra-max-propagations.
> +
>  2020-04-01  Jakub Jelinek  
>
> PR middle-end/94423
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index 412750c1fc9..e9e1683e9a8 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -12785,6 +12785,11 @@ speed
>  (@option{sra-max-scalarization-size-Ospeed}) or size
>  (@option{sra-max-scalarization-size-Osize}) respectively.
>
> +@item sra-max-propagations
> +The maximum number of artificial accesses that Scalar Replacement of
> +Aggregates (SRA) will track, per one local variable, in order to
> +facilitate copy propagation.
> +
>  @item tm-max-aggregate-size
>  When making copies of thread-local variables in a transaction, this
>  parameter specifies the size in bytes after which variables are
> --
> 2.25.1
>


Re: [PATCH] Fix PR94401 by considering reverse overrun

2020-04-02 Thread Kewen.Lin via Gcc-patches
on 2020/4/2 下午5:21, Richard Biener wrote:
> On Thu, Apr 2, 2020 at 9:15 AM Kewen.Lin  wrote:
>>
>> Hi,
>>
>> The commit r10-7415 brings scalar type consideration
>> to eliminate epilogue peeling for gaps, but it exposed
>> one problem that the current handling doesn't consider
>> the memory access type VMAT_CONTIGUOUS_REVERSE, for
>> which the overrun happens on low address side.  This
>> patch is to make the code take care of it by updating
>> the offset and construction element order accordingly.
>>
>> Bootstrapped/regtested on powerpc64le-linux-gnu P8
>> and aarch64-linux-gnu.
> 
> OK with the formatting changes suggested by Jakub.
> 

Thanks Richi, I'll push the formatted one as attached.

BR,
Kewen
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 12beef6978c..7730e71b94d 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -9590,11 +9590,20 @@ vectorizable_load (stmt_vec_info stmt_info, 
gimple_stmt_iterator *gsi,
if (new_vtype != NULL_TREE)
  ltype = half_vtype;
  }
+   tree offset
+ = (dataref_offset ? dataref_offset
+   : build_int_cst (ref_type, 0));
+   if (ltype != vectype
+   && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
+ {
+   unsigned HOST_WIDE_INT gap
+ = DR_GROUP_GAP (first_stmt_info);
+   gap *= tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
+   tree gapcst = build_int_cst (ref_type, gap);
+   offset = size_binop (PLUS_EXPR, offset, gapcst);
+ }
data_ref
- = fold_build2 (MEM_REF, ltype, dataref_ptr,
-dataref_offset
-? dataref_offset
-: build_int_cst (ref_type, 0));
+ = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
if (alignment_support_scheme == dr_aligned)
  ;
else if (DR_MISALIGNMENT (first_dr_info) == -1)
@@ -9607,16 +9616,27 @@ vectorizable_load (stmt_vec_info stmt_info, 
gimple_stmt_iterator *gsi,
  TYPE_ALIGN (elem_type));
if (ltype != vectype)
  {
-   vect_copy_ref_info (data_ref, DR_REF 
(first_dr_info->dr));
+   vect_copy_ref_info (data_ref,
+   DR_REF (first_dr_info->dr));
tree tem = make_ssa_name (ltype);
new_stmt = gimple_build_assign (tem, data_ref);
-   vect_finish_stmt_generation (stmt_info, new_stmt, 
gsi);
+   vect_finish_stmt_generation (stmt_info, new_stmt,
+gsi);
data_ref = NULL;
vec *v;
vec_alloc (v, 2);
-   CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
-   CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
-   build_zero_cst (ltype));
+   if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
+ {
+   CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
+   build_zero_cst (ltype));
+   CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
+ }
+   else
+ {
+   CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
+   CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
+   build_zero_cst (ltype));
+ }
gcc_assert (new_vtype != NULL_TREE);
if (new_vtype == vectype)
  new_stmt = gimple_build_assign (


[PATCH] cselib: Reuse VALUEs on sp adjustments [PR92264]

2020-04-02 Thread Jakub Jelinek via Gcc-patches
Hi!

As discussed in the PR, if !ACCUMULATE_OUTGOING_ARGS on large functions we
can have hundreds of thousands of stack pointer adjustments and cselib
creates a new VALUE after each sp adjustment, which form extremely deep
VALUE chains, which is very harmful e.g. for find_base_term.
E.g. if we have
sp -= 4
sp -= 4
sp += 4
sp += 4
sp -= 4
sp += 4
that means 7 VALUEs, one for the sp at beginning (val1), than val2 = val1 -
4, then val3 = val2 - 4, then val4 = val3 + 4, then val5 = val4 + 4, then
val6 = val5 - 4, then val7 = val6 + 4.
This patch tweaks cselib, so that it is smarter about sp adjustments.
When cselib_lookup (stack_pointer_rtx, Pmode, 1, VOIDmode) and we know
nothing about sp yet (this happens at the start of the function, for
non-var-tracking also after cselib_reset_table and for var-tracking after
processing fp_setter insn where we forget about former sp values because
that is now hfp related while everything after it is sp related), we
look it up normally, but in addition to what we have been doing before
we mark the VALUE as SP_DERIVED_VALUE_P.  Further lookups of sp + offset
are then special cased, so that it is canonicalized to that
SP_DERIVED_VALUE_P VALUE + CONST_INT (if possible).  So, for the above,
we get val1 with SP_DERIVED_VALUE_P set, then val2 = val1 - 4, val3 = val1 -
8 (note, no longer val2 - 4!), then we get val2 again, val1 again, val2
again, val1 again.
In the find_base_term visited_vals.length () > 100 find_base_term
statistics during combined x86_64-linux and i686-linux bootstrap+regtest
cycle, without the patch I see:
find_base_term > 100
returning NULL  returning non-NULL
32-bit compilations 4229178 407
64-bit compilations 217523  0
with largest visited_vals.length () when returning non-NULL being 206.
With the patch the same numbers are:
32-bit compilations 1249588 135
64-bit compilations 35100
with largest visited_vals.length () when returning non-NULL being 173.
This shows significant reduction of the deep VALUE chains.
On powerpc64{,le}-linux, these stats didn't change at all, we have
10080
for all of -m32, -m64 and little-endian -m64, just the
gcc.dg/pr85180.c and gcc.dg/pr87985.c testcases which are unrelated to sp.

My earlier version of the patch, which contained just the rtl.h and cselib.c
changes, regressed some tests:
gcc.dg/guality/{pr36728-{1,3},pr68860-{1,2}}.c
gcc.target/i386/{pr88416,sse-{13,23,24,25,26}}.c
The problem with the former tests was worse debug info, where with -m32
where arg7 was passed in a stack slot we though a push later on might have
invalidated it, when it couldn't.  This is something I've solved with the
var-tracking.c (vt_initialize) changes.  In those problematic functions, we
create a cfa_base VALUE (argp) and want to record that at the start of
the function the argp VALUE is sp + off and also record that current sp
VALUE is argp's VALUE - off.  The second permanent equivalence didn't make
it after the patch though, because cselib_add_permanent_equiv will
cselib_lookup the value of the expression it wants to add as the equivalence
and if it is the same VALUE as we are calling it on, it doesn't do anything;
and due to the cselib changes for sp based accesses that is exactly what
happened.  By reversing the order of the cselib_add_permanent_equiv calls we
get both equivalences though and thus are able to canonicalize the sp based
accesses in var-tracking to the cfa_base value + offset.
The i386 FAILs were all ICEs, where we had pushf instruction pushing flags
and then pop pseudo reading that value again.  With the cselib changes,
cselib during RTL DSE is able to see through the sp adjustment and wanted
to replace_read what was done pushf, by moving the flags register into a
pseudo and replace the memory read in the pop with that pseudo.  That is
wrong for two reasons: one is that the backend doesn't have an instruction
to move the flags hard register into some other register, but replace_read
has been validating just the mem -> pseudo replacement and not the insns
emitted by copy_to_mode_reg.  And the second issue is that it is obviously
wrong to replace a stack pop which contains stack post-increment by a copy
of pseudo into destination.  dse.c has some code to handle RTX_AUTOINC, but
only uses it when actually removing stores and only when there is REG_INC
note (stack RTX_AUTOINC does not have those), in check_for_inc_dec* where
it emits the reg adjustment(s) before the insn that is going to be deleted.
replace_read doesn't remove the insn, so if it e.g. contained REG_INC note,
it would be kept there and we might have the RTX_AUTOINC not just in *loc,
but other spots.
So, the dse.c changes try to validate the added insns and punt on all
RTX_AUTOINC in *loc.  Furthermore, it seems that with the cselib.c changes
on the gfortran.dg/pr87360.f90 and gcc.target/i386/pr88416.c testcases
check_for_inc_dec{,_

[PATCH] sra/doc: Document param sra-max-propagations

2020-04-02 Thread Martin Jambor
Hi,

I forgot to document the new param in invoke.texi, does the text below
look OK?

Tested with make info and make pdf.

Thanks,

Martin


2020-04-02  Martin Jambor  

* doc/invoke.texi (Optimize Options): Document sra-max-propagations.
---
 gcc/ChangeLog   | 4 
 gcc/doc/invoke.texi | 5 +
 2 files changed, 9 insertions(+)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 654356c8dc8..a93db199cf9 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,7 @@
+2020-04-02  Martin Jambor  
+
+   * doc/invoke.texi (Optimize Options): Document sra-max-propagations.
+
 2020-04-01  Jakub Jelinek  
 
PR middle-end/94423
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 412750c1fc9..e9e1683e9a8 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -12785,6 +12785,11 @@ speed
 (@option{sra-max-scalarization-size-Ospeed}) or size
 (@option{sra-max-scalarization-size-Osize}) respectively.
 
+@item sra-max-propagations
+The maximum number of artificial accesses that Scalar Replacement of
+Aggregates (SRA) will track, per one local variable, in order to
+facilitate copy propagation.
+
 @item tm-max-aggregate-size
 When making copies of thread-local variables in a transaction, this
 parameter specifies the size in bytes after which variables are
-- 
2.25.1



Re: [PATCH] Test for sigsetjmp support in analyzer tests requiring that feature.

2020-04-02 Thread Tobias Burnus

Interestingly, only those two testcases use the effective-target check:
gcc.dg/analyzer/sigsetjmp-5.c, gcc.dg/analyzer/sigsetjmp-6.c

Regarding the link test: No header file is included – it just
defines the function ("extern char $funcname();") and calls it.
The check fails as "sigsetjmp" does not exist in glibc –
the symbol is "__sigsetjmp" in glibc.

If one includes '#include ', it works – even without
explicitly setting _POSIX_C_SOURCE.

BTW: sigsetjmp is also used by the following testcase, but without the
effective-target check:
g++.dg/asan/asan_test.cc, g++.dg/torture/pr57190.C, gcc.dg/pr69167.c,
gcc.dg/torture/float128-exact-underflow.c,
gcc.dg/torture/float128-extendxf-underflow.c,
gcc.dg/torture/pr57147-3.c, gcc.dg/torture/pr81900.c,
gcc.dg/ubsan/pr94423.c, gcc.target/sh/torture/pr30807.c

Cheers,

Tobias

On 4/2/20 11:36 AM, Thomas Schwinge wrote:


Hi!

On 2020-03-22T11:31:31-0600, Sandra Loosemore  wrote:

The new-ish analyzer test cases sigsetjmp-5.c and sigsetjmp-6.c were
failing on nios2-elf and probably other newlib targets due to lack of
support for sigsetjmp.  I didn't see a suitable existing
effective-target test for this, so I added one.
--- a/gcc/testsuite/gcc.dg/analyzer/sigsetjmp-5.c
+++ b/gcc/testsuite/gcc.dg/analyzer/sigsetjmp-5.c
@@ -1,3 +1,5 @@
+/* { dg-require-effective-target sigsetjmp } */
--- a/gcc/testsuite/gcc.dg/analyzer/sigsetjmp-6.c
+++ b/gcc/testsuite/gcc.dg/analyzer/sigsetjmp-6.c
@@ -1,3 +1,5 @@
+/* { dg-require-effective-target sigsetjmp } */
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
+# Returns 1 if "sigsetjmp" is available on the target system.
+
+proc check_effective_target_sigsetjmp {} {
+return [check_function_available "sigsetjmp"]
+}

That got pushed to master branch as commit
adaf4b6c66e789d927684003b9ee05ed04c105ea "Test for sigsetjmp support in
analyzer tests requiring that feature".

On x86_64-pc-linux-gnu I now see these tests regress to:

 UNSUPPORTED: gcc.dg/analyzer/sigsetjmp-5.c
 UNSUPPORTED: gcc.dg/analyzer/sigsetjmp-6.c

..., because of:

 Executing on host: [xgcc] sigsetjmp_available6728.c [...] -fno-builtin  
-lm-o sigsetjmp_available6728.exe(timeout = 300)
 spawn [xgcc] sigsetjmp_available6728.c [...] -fno-builtin -lm -o 
sigsetjmp_available6728.exe
 /tmp/ccKsf87z.o: In function `main':
 sigsetjmp_available6728.c:(.text+0xa): undefined reference to `sigsetjmp'
 collect2: error: ld returned 1 exit status
 [...]

Does that maybe have something to do with feature test macros required
for 'sigsetjmp'?


Grüße
  Thomas
-
Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Alexander 
Walter

-
Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Alexander 
Walter


Re: [PATCH] Fix PR94401 by considering reverse overrun

2020-04-02 Thread Kewen.Lin via Gcc-patches
Hi,

on 2020/4/2 下午4:28, Jakub Jelinek wrote:
> Hi!
> 
> On Thu, Apr 02, 2020 at 03:15:42PM +0800, Kewen.Lin via Gcc-patches wrote:
> 
> Just formatting nits, not commenting on what the actual patch does.
> 
>> --- a/gcc/tree-vect-stmts.c
>> +++ b/gcc/tree-vect-stmts.c
>> @@ -9590,11 +9590,20 @@ vectorizable_load (stmt_vec_info stmt_info, 
>> gimple_stmt_iterator *gsi,
>>  if (new_vtype != NULL_TREE)
>>ltype = half_vtype;
>>}
>> +tree offset = dataref_offset
>> +? dataref_offset
>> +: build_int_cst (ref_type, 0);
> 
> The above is misformatted.  The ? and : shouldn't be indented further than
> the dataref_offset, but usually e.g. for the sake of emacs we add ()s around
> the expression in this case.  So:
>   tree offset = (dataref_offset
>  ? dataref_offset
>  : build_int_cst (ref_type, 0));
> or
>   tree offset
> = (dataref_offset
>? dataref_offset : build_int_cst (ref_type, 0));
> 

Thanks Jakub!  I'll follow this by add () for ternary expression.
With manual added "()", clang-format can get below:

tree offset
  = (dataref_offset ? dataref_offset
: build_int_cst (ref_type, 0));

contrib/check_GNU_style.sh didn't complain this, I'm not sure whether
it's possible to add this kind of convention into contrib/clang-format.

>> +if (ltype != vectype
>> +&& memory_access_type == VMAT_CONTIGUOUS_REVERSE)
>> +  offset = size_binop (
>> +PLUS_EXPR,
>> +build_int_cst (ref_type,
>> +   DR_GROUP_GAP (first_stmt_info)
>> + * tree_to_uhwi (
>> +   TYPE_SIZE_UNIT (elem_type))),
>> +offset);
> 
> Again, no reason to indent * by 2 columns from DR_GROUP_GAP.  But also all
> the (s at the end of line and randomly indented arguments look ugly.
> I'd recommend temporaries, e.g. like (perhaps with different names of
> temporaries, so that they don't shadow anything):
> 
> {
>   unsigned HOST_WIDE_INT gap
> = DR_GROUP_GAP (first_stmt_info);
>   gap *= tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
>   tree gapcst = build_int_cst (ref_type, gap);
>   offset = size_binop (PLUS_EXPR, offset, gapcst);
> }
> 

Good suggestion, will update it.

BR,
Kewen



RE: [PATCH][GCC][Arm]: MVE: Fix polymorphism for scalars and constants

2020-04-02 Thread Kyrylo Tkachov


> -Original Message-
> From: Andre Vieira (lists) 
> Sent: 02 April 2020 09:22
> To: gcc-patches@gcc.gnu.org
> Cc: Kyrylo Tkachov 
> Subject: [PATCH][GCC][Arm]: MVE: Fix polymorphism for scalars and
> constants
> 
> Hi,
> 
> This patch merges some polymorphic functions that were incorrectly
> separating scalar variants. It also simplifies the way we detect scalars and
> constants in mve_typeid.
> 
> Regression tested for arm-none-eabi.
> 
> Is this OK for trunk?

Ok.
Thanks,
Kyrill

> 
> 2020-04-02  Andre Vieira  
> 
>      * config/arm/arm_mve.h (vsubq_n): Merge with...
>      (vsubq): ... this.
>      (vmulq_n): Merge with...
>      (vmulq): ... this.
>      (__ARM_mve_typeid): Simplify scalar and constant detection.



RE: [GCC][PATCH][ARM]: Fix for MVE ACLE intrinsics with writeback (PR94317).

2020-04-02 Thread Kyrylo Tkachov
Hi Srinath,

> -Original Message-
> From: Srinath Parvathaneni 
> Sent: 31 March 2020 17:13
> To: gcc-patches@gcc.gnu.org
> Cc: Kyrylo Tkachov ; Richard Earnshaw
> 
> Subject: [GCC][PATCH][ARM]: Fix for MVE ACLE intrinsics with writeback
> (PR94317).
> 
> Hello,
> 
> Following MVE ACLE intrinsics have an issue with writeback to the base
> address.
> 
> vldrdq_gather_base_wb_s64, vldrdq_gather_base_wb_u64,
> vldrdq_gather_base_wb_z_s64, vldrdq_gather_base_wb_z_u64,
> vldrwq_gather_base_wb_s32, vldrwq_gather_base_wb_u32,
> vldrwq_gather_base_wb_z_s32, vldrwq_gather_base_wb_z_u32,
> vldrwq_gather_base_wb_f32, vldrwq_gather_base_wb_z_f32.
> 
> This patch fixes the bug reported in PR94317 by adding separate builtin calls
> to update the result and writeback to base address for the above intrinsics.
> 
> Please refer to M-profile Vector Extension (MVE) intrinsics [1]  for more
> details.
> [1] https://developer.arm.com/architectures/instruction-sets/simd-
> isas/helium/mve-intrinsics
> 
> Regression tested on arm-none-eabi and found no regressions.
> 
> Ok for trunk?

Thanks, I've pushed this patch to master.
Kyrill

> 
> Thanks,
> Srinath.
> 
> gcc/ChangeLog:
> 
> 2020-03-31  Srinath Parvathaneni  
> 
>   PR target/94317
>   * config/arm/arm-builtins.c (LDRGBWBXU_QUALIFIERS): Define.
>   (LDRGBWBXU_Z_QUALIFIERS): Likewise.
>   * config/arm/arm_mve.h (__arm_vldrdq_gather_base_wb_s64):
> Modify
>   intrinsic defintion by adding a new builtin call to writeback into base
>   address.
>   (__arm_vldrdq_gather_base_wb_u64): Likewise.
>   (__arm_vldrdq_gather_base_wb_z_s64): Likewise.
>   (__arm_vldrdq_gather_base_wb_z_u64): Likewise.
>   (__arm_vldrwq_gather_base_wb_s32): Likewise.
>   (__arm_vldrwq_gather_base_wb_u32): Likewise.
>   (__arm_vldrwq_gather_base_wb_z_s32): Likewise.
>   (__arm_vldrwq_gather_base_wb_z_u32): Likewise.
>   (__arm_vldrwq_gather_base_wb_f32): Likewise.
>   (__arm_vldrwq_gather_base_wb_z_f32): Likewise.
>   * config/arm/arm_mve_builtins.def (vldrwq_gather_base_wb_z_u):
> Modify
>   builtin's qualifier.
>   (vldrdq_gather_base_wb_z_u): Likewise.
>   (vldrwq_gather_base_wb_u): Likewise.
>   (vldrdq_gather_base_wb_u): Likewise.
>   (vldrwq_gather_base_wb_z_s): Likewise.
>   (vldrwq_gather_base_wb_z_f): Likewise.
>   (vldrdq_gather_base_wb_z_s): Likewise.
>   (vldrwq_gather_base_wb_s): Likewise.
>   (vldrwq_gather_base_wb_f): Likewise.
>   (vldrdq_gather_base_wb_s): Likewise.
>   (vldrwq_gather_base_nowb_z_u): Define builtin.
>   (vldrdq_gather_base_nowb_z_u): Likewise.
>   (vldrwq_gather_base_nowb_u): Likewise.
>   (vldrdq_gather_base_nowb_u): Likewise.
>   (vldrwq_gather_base_nowb_z_s): Likewise.
>   (vldrwq_gather_base_nowb_z_f): Likewise.
>   (vldrdq_gather_base_nowb_z_s): Likewise.
>   (vldrwq_gather_base_nowb_s): Likewise.
>   (vldrwq_gather_base_nowb_f): Likewise.
>   (vldrdq_gather_base_nowb_s): Likewise.
>   * config/arm/mve.md (mve_vldrwq_gather_base_nowb_v4si):
> Define RTL
>   pattern.
>   (mve_vldrwq_gather_base_wb_v4si): Modify RTL pattern.
>   (mve_vldrwq_gather_base_nowb_z_v4si): Define RTL pattern.
>   (mve_vldrwq_gather_base_wb_z_v4si): Modify RTL pattern.
>   (mve_vldrwq_gather_base_wb_fv4sf): Modify RTL pattern.
>   (mve_vldrwq_gather_base_nowb_fv4sf): Define RTL pattern.
>   (mve_vldrwq_gather_base_wb_z_fv4sf): Modify RTL pattern.
>   (mve_vldrwq_gather_base_nowb_z_fv4sf): Define RTL pattern.
>   (mve_vldrdq_gather_base_nowb_v4di): Define RTL pattern.
>   (mve_vldrdq_gather_base_wb_v4di):  Modify RTL pattern.
>   (mve_vldrdq_gather_base_nowb_z_v4di): Define RTL pattern.
>   (mve_vldrdq_gather_base_wb_z_v4di):  Modify RTL pattern.
> 
> gcc/testsuite/ChangeLog:
> 
> 2020-03-31  Srinath Parvathaneni  
> 
>   PR target/94317
>   * gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_s64.c:
> Modify
>   * gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_u64.c:
> Likewise.
>   * gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_z_s64.c:
> Likewise.
>   * gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_z_u64.c:
> Likewise.
>   * gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_f32.c:
> Likewise.
>   * gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_s32.c:
> Likewise.
>   * gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_u32.c:
> Likewise.
>   * gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_f32.c:
> Likewise.
>   * gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_s32.c:
> Likewise.
>   * gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_u32.c:
> Likewise.
> 
> 
> 
> ### Attachment also inlined for ease of reply
> ###
> 
> 
> diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c
> index
> 56f0db21ea95dcd738877daba27f1cb60f0d5a32..832b9107424fd9a4a0ee272
> b7

Re: [PATCH]libstdc++-v3/test: Skip "use_service.cc" for aarch64 baremetal

2020-04-02 Thread Andrea Corallo
Jonathan Wakely  writes:

> On 02/04/20 10:44 +0200, Andrea Corallo wrote:
>>Jonathan Wakely  writes:
>>
>>> On 01/04/20 16:56 +0100, Jonathan Wakely wrote:
>>
>>> Does that help, or does it still fail for other reasons?
>>
>>Yes it does thanks!  Updated patch follows.
>>
>>Okay for trunk?
>
> OK, thanks.

Committed as c1effaa209f9.

Thanks

  Andrea


Re: [PATCH] Prevent IPA-SRA from creating calls to local comdats (PR 92676)

2020-04-02 Thread Martin Jambor
Hi,

On Fri, Mar 20 2020, Jan Hubicka wrote:
>

[...]

>
> OK except for ...
>> @@ -3759,8 +3778,12 @@ process_isra_node_results (cgraph_node *node,
>>  = node->create_virtual_clone (callers, NULL, new_adjustments, "isra",
>>suffix_counter);
>>suffix_counter++;
>> -  if (node->same_comdat_group)
>> -new_node->add_to_same_comdat_group (node);
>> +  if (node->calls_comdat_local && node->same_comdat_group)
>> +{
>> +  new_node->add_to_same_comdat_group (node);
>> +  for (cgraph_edge *cs = new_node->callers; cs; cs = cs->next_caller)
>> +cs->caller->calls_comdat_local = true;
>
> You need to walk all aliases here.

Oh, right.  I assume thunks too?  (I acknowledge I swapped most of this
out since I fixed this bug and now cannot really say if we can have a
thunk there in the call chain.)  So like this?

Passed bootstrap and testing on x86_64-linux, running LTO bootstrap now.

Thanks,

Martin


2020-04-01  Martin Jambor  

PR ipa/92676
* ipa-sra.c (struct caller_issues): New fields candidate and
call_from_outside_comdat.
(check_for_caller_issues): Check for calls from outsied of
candidate's same_comdat_group.
(check_all_callers_for_issues): Set up issues.candidate, check result
of the new check.
(mark_callers_calls_cdlcl): New function.
(process_isra_node_results): Set calls_comdat_local of callers if
appropriate.
---
 gcc/ChangeLog | 13 +
 gcc/ipa-sra.c | 38 --
 2 files changed, 49 insertions(+), 2 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 654356c8dc8..e9183f4921a 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,16 @@
+2020-04-01  Martin Jambor  
+
+   PR ipa/92676
+   * ipa-sra.c (struct caller_issues): New fields candidate and
+   call_from_outside_comdat.
+   (check_for_caller_issues): Check for calls from outsied of
+   candidate's same_comdat_group.
+   (check_all_callers_for_issues): Set up issues.candidate, check result
+   of the new check.
+   (mark_callers_calls_cdlcl): New function.
+   (process_isra_node_results): Set calls_comdat_local of callers if
+   appropriate.
+
 2020-04-01  Jakub Jelinek  
 
PR middle-end/94423
diff --git a/gcc/ipa-sra.c b/gcc/ipa-sra.c
index 31de527d111..45ea62522ab 100644
--- a/gcc/ipa-sra.c
+++ b/gcc/ipa-sra.c
@@ -2897,10 +2897,14 @@ ipa_sra_ipa_function_checks (cgraph_node *node)
 
 struct caller_issues
 {
+  /* The candidate being considered.  */
+  cgraph_node *candidate;
   /* There is a thunk among callers.  */
   bool thunk;
   /* Call site with no available information.  */
   bool unknown_callsite;
+  /* Call from outside the the candidate's comdat group.  */
+  bool call_from_outside_comdat;
   /* There is a bit-aligned load into one of non-gimple-typed arguments. */
   bool bit_aligned_aggregate_argument;
 };
@@ -2922,6 +2926,13 @@ check_for_caller_issues (struct cgraph_node *node, void 
*data)
 thunks.  */
  return true;
}
+  if (issues->candidate->calls_comdat_local
+ && issues->candidate->same_comdat_group
+ && !issues->candidate->in_same_comdat_group_p (cs->caller))
+   {
+ issues->call_from_outside_comdat = true;
+ return true;
+   }
 
   isra_call_summary *csum = call_sums->get (cs);
   if (!csum)
@@ -2944,6 +2955,7 @@ check_all_callers_for_issues (cgraph_node *node)
 {
   struct caller_issues issues;
   memset (&issues, 0, sizeof (issues));
+  issues.candidate = node;
 
   node->call_for_symbol_and_aliases (check_for_caller_issues, &issues, true);
   if (issues.unknown_callsite)
@@ -2962,6 +2974,13 @@ check_all_callers_for_issues (cgraph_node *node)
 node->dump_name ());
   return true;
 }
+  if (issues.call_from_outside_comdat)
+{
+  if (dump_file)
+   fprintf (dump_file, "Function would become private comdat called "
+"outside of its comdat group.\n");
+  return true;
+}
 
   if (issues.bit_aligned_aggregate_argument)
 {
@@ -3679,6 +3698,17 @@ push_param_adjustments_for_index (isra_func_summary 
*ifs, unsigned base_index,
 }
 }
 
+/* Worker for all call_for_symbol_thunks_and_aliases.  Set calls_comdat_local
+   flag of all callers of NODE.  */
+
+static bool
+mark_callers_calls_cdlcl (struct cgraph_node *node, void *)
+{
+  for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
+cs->caller->calls_comdat_local = true;
+  return false;
+}
+
 
 /* Do final processing of results of IPA propagation regarding NODE, clone it
if appropriate.  */
@@ -3763,8 +3793,12 @@ process_isra_node_results (cgraph_node *node,
 = node->create_virtual_clone (callers, NULL, new_adjustments, "isra",
  suffix_counter);
   suffix_counter++;
-  if (node->same_comdat_group)
-new_node->add_to_same_comdat_group (node);
+  

Re: [PATCH][RFC] c/94392 - only enable -ffinite-loops for C++

2020-04-02 Thread Richard Biener
On Thu, 2 Apr 2020, Jakub Jelinek wrote:

> On Thu, Apr 02, 2020 at 11:12:48AM +0200, Richard Biener wrote:
> > > "The implementation may assume that any thread will eventually do one of 
> > > the
> > > following:
> > >  — terminate,
> > >  — make a call to a library I/O function,
> > >  — perform an access through a volatile glvalue, or
> > >  — perform a synchronization operation or an atomic operation.
> > > [Note: This is intended to allow compiler transformations such as removal 
> > > of
> > > empty loops, even when termination cannot be proven. — end note]"
> 
> With -ffinite-loops, do we actually not optimize if the loop has volatile 
> accesses
> or atomics or library I/O calls?

We don't remove the loop then, yes.  All of those are considered 
side-effects by GCC and thus they are considered needed and keep
the loop live.  From a technical point finite_loop_p will still
return true for those which is not correct but harmless at the
moment (I hope).  I read the above as

volatile int i;

int __attribute__((const,noinline)) baz(int i) { return i; }
int foo(int a)
{
  do
{
  i;
  if (a != baz(a)) return 1;  
}
  while (1);
}

being a valid endles loop which we indeed preserve.  But we notice
the opportunity to unswitch on the condition and elide _that_ loop:

   [local count: 118111600]:
  _1 = baz (a_4(D));
  if (_1 != a_4(D))
goto ; [11.00%]
  else
goto ; [89.00%]

   [local count: 955630224]:
  vol.0_5 ={v} i;
  goto ; [100.00%]

   [local count: 118111600]:
  vol.0_3 ={v} i;
  return 1;

thus turn it into

  if (a != baz(a)) { i; return 1; }
  do { i; } while (1);

Richard.


Re: [PATCH] aarch64: Fix ICE due to aarch64_gen_compare_reg_maybe_ze [PR94435]

2020-04-02 Thread Richard Sandiford
Jakub Jelinek  writes:
> Hi!
>
> The following testcase ICEs, because aarch64_gen_compare_reg_maybe_ze emits
> invalid RTL.
> For y_mode [QH]Imode it expects y to be of that mode (or CONST_INT that fits
> into that mode) and x being SImode; for non-CONST_INT y it zero extends y
> into SImode and compares that against x, for CONST_INT y it zero extends y
> into SImode.  The problem is that when the zero extended constant isn't
> usable directly, it forces it into a REG, but with y_mode mode, and then
> compares against y.  That is wrong, because it should force it into a SImode
> REG and compare that way.
>
> The following patch fixes that, bootstrapped/regtested on aarch64-linux, ok
> for trunk?
>
> 2020-04-01  Jakub Jelinek  
>
>   PR target/94435
>   * config/aarch64/aarch64.c (aarch64_gen_compare_reg_maybe_ze): For
>   y_mode E_[QH]Imode and y being a CONST_INT, change y_mode to SImode.
>
>   * gcc.target/aarch64/pr94435.c: New test.

OK, thanks.

Richard

>
> --- gcc/config/aarch64/aarch64.c.jj   2020-03-30 17:02:28.0 +0200
> +++ gcc/config/aarch64/aarch64.c  2020-04-01 11:32:48.877900235 +0200
> @@ -2371,7 +2371,10 @@ aarch64_gen_compare_reg_maybe_ze (RTX_CO
>if (y_mode == E_QImode || y_mode == E_HImode)
>  {
>if (CONST_INT_P (y))
> - y = GEN_INT (INTVAL (y) & GET_MODE_MASK (y_mode));
> + {
> +   y = GEN_INT (INTVAL (y) & GET_MODE_MASK (y_mode));
> +   y_mode = SImode;
> + }
>else
>   {
> rtx t, cc_reg;
> --- gcc/testsuite/gcc.target/aarch64/pr94435.c.jj 2020-04-01 
> 11:36:21.172797217 +0200
> +++ gcc/testsuite/gcc.target/aarch64/pr94435.c2020-04-01 
> 11:36:36.493573280 +0200
> @@ -0,0 +1,25 @@
> +/* PR target/94435 */
> +/* { dg-do compile } */
> +/* { dg-options "-march=armv8-a+nolse -moutline-atomics" } */
> +
> +int b, c, d, e, f, h;
> +short g;
> +int foo (int) __attribute__ ((__const__));
> +
> +void
> +bar (void)
> +{
> +  while (1)
> +{
> +  while (1)
> + {
> +   __atomic_load_n (&e, 0);
> +   if (foo (2))
> + __sync_val_compare_and_swap (&c, 0, f);
> +   b = 1;
> +   if (h == e)
> + break;
> + }
> +  __sync_val_compare_and_swap (&g, -1, f);
> +}
> +}
>
>   Jakub


Re: [PATCH] Test for sigsetjmp support in analyzer tests requiring that feature.

2020-04-02 Thread Thomas Schwinge
Hi!

On 2020-03-22T11:31:31-0600, Sandra Loosemore  wrote:
> The new-ish analyzer test cases sigsetjmp-5.c and sigsetjmp-6.c were
> failing on nios2-elf and probably other newlib targets due to lack of
> support for sigsetjmp.  I didn't see a suitable existing
> effective-target test for this, so I added one.

> --- a/gcc/testsuite/gcc.dg/analyzer/sigsetjmp-5.c
> +++ b/gcc/testsuite/gcc.dg/analyzer/sigsetjmp-5.c
> @@ -1,3 +1,5 @@
> +/* { dg-require-effective-target sigsetjmp } */

> --- a/gcc/testsuite/gcc.dg/analyzer/sigsetjmp-6.c
> +++ b/gcc/testsuite/gcc.dg/analyzer/sigsetjmp-6.c
> @@ -1,3 +1,5 @@
> +/* { dg-require-effective-target sigsetjmp } */

> --- a/gcc/testsuite/lib/target-supports.exp
> +++ b/gcc/testsuite/lib/target-supports.exp

> +# Returns 1 if "sigsetjmp" is available on the target system.
> +
> +proc check_effective_target_sigsetjmp {} {
> +return [check_function_available "sigsetjmp"]
> +}

That got pushed to master branch as commit
adaf4b6c66e789d927684003b9ee05ed04c105ea "Test for sigsetjmp support in
analyzer tests requiring that feature".

On x86_64-pc-linux-gnu I now see these tests regress to:

UNSUPPORTED: gcc.dg/analyzer/sigsetjmp-5.c
UNSUPPORTED: gcc.dg/analyzer/sigsetjmp-6.c

..., because of:

Executing on host: [xgcc] sigsetjmp_available6728.c [...] -fno-builtin  -lm 
   -o sigsetjmp_available6728.exe(timeout = 300)
spawn [xgcc] sigsetjmp_available6728.c [...] -fno-builtin -lm -o 
sigsetjmp_available6728.exe
/tmp/ccKsf87z.o: In function `main':
sigsetjmp_available6728.c:(.text+0xa): undefined reference to `sigsetjmp'
collect2: error: ld returned 1 exit status
[...]

Does that maybe have something to do with feature test macros required
for 'sigsetjmp'?


Grüße
 Thomas
-
Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Alexander 
Walter


[Patch][Fortran] Resolve formal args before checking DTIO (was: Re: [PATCH] deferred-shape vs assumed-shape)

2020-04-02 Thread Tobias Burnus

Hi Steve,

I think your patch is fine - however, I think calling the normal
resolve_formal_arglist looks a bit cleaner to me (as done in the
attached patch). — Additionally, I added the testcase.

Side effect of my variant is that gfc_check_dtio_interfaces will
be called again a bit later again. — In this sense, Steve's patch,
which replicates a chunk of resolve_formal_arglist, is better.

Thoughts by anyone?

OK?

Tobias

PS: I was thinking of calling resolve_symbol instead
but this one does not resolve the formal arguments
(via "gfc_resolve (sym->formal_ns)") as sym->attr.contained.

On 4/1/20 10:04 PM, Steve Kargl via Fortran wrote:


See
https://stackoverflow.com/questions/60972134/whats-wrong-with-the-following-fortran-code-gfortran-dtio-dummy-argument-at

Is A(:) a deferred-shape array or an assumed-shape array?  The
answer of course depends on context.

This patch fixes the issue found at the above URL.

Index: gcc/fortran/interface.c
===
--- gcc/fortran/interface.c   (revision 280157)
+++ gcc/fortran/interface.c   (working copy)
@@ -4916,10 +4916,15 @@ check_dtio_arg_TKR_intent (gfc_symbol *fsym, bool type
|| ((type != BT_CLASS) && fsym->attr.dimension)))
  gfc_error ("DTIO dummy argument at %L must be a scalar",
 &fsym->declared_at);
-  else if (rank == 1
-&& (fsym->as == NULL || fsym->as->type != AS_ASSUMED_SHAPE))
-gfc_error ("DTIO dummy argument at %L must be an "
-"ASSUMED SHAPE ARRAY", &fsym->declared_at);
+  else if (rank == 1)
+{
+  if (fsym->as == NULL
+   || !(fsym->as->type == AS_ASSUMED_SHAPE
+ || (fsym->as->type == AS_DEFERRED && fsym->attr.dummy
+ && !fsym->attr.allocatable && !fsym->attr.pointer)))
+ gfc_error ("DTIO dummy argument at %L must be an "
+"ASSUMED-SHAPE ARRAY", &fsym->declared_at);
+}

if (type == BT_CHARACTER && fsym->ts.u.cl->length != NULL)
  gfc_error ("DTIO character argument at %L must have assumed length",


-
Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Alexander 
Walter
[Fortran] Resolve formal args before checking DTIO

	* gfortran.h (gfc_resolve_formal_arglist): Add prototype.
	* interface.c (check_dtio_interface1): Call it.
	* resolve.c (gfc_resolve_formal_arglist): Renamed from
	resolve_formal_arglist, removed static.
	(find_arglists, resolve_types): Update calls.

	* gfortran.dg/dtio_35.f90: New.

 gcc/fortran/gfortran.h|  1 +
 gcc/fortran/interface.c   |  4 ++-
 gcc/fortran/resolve.c | 10 +++
 gcc/testsuite/gfortran.dg/dtio_35.f90 | 50 +++
 4 files changed, 59 insertions(+), 6 deletions(-)

diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h
index 96037629f5f..88e4d9236f3 100644
--- a/gcc/fortran/gfortran.h
+++ b/gcc/fortran/gfortran.h
@@ -3369,6 +3369,7 @@ bool gfc_resolve_expr (gfc_expr *);
 void gfc_resolve (gfc_namespace *);
 void gfc_resolve_code (gfc_code *, gfc_namespace *);
 void gfc_resolve_blocks (gfc_code *, gfc_namespace *);
+void gfc_resolve_formal_arglist (gfc_symbol *);
 int gfc_impure_variable (gfc_symbol *);
 int gfc_pure (gfc_symbol *);
 int gfc_implicit_pure (gfc_symbol *);
diff --git a/gcc/fortran/interface.c b/gcc/fortran/interface.c
index 14d03c27759..75a50c999b7 100644
--- a/gcc/fortran/interface.c
+++ b/gcc/fortran/interface.c
@@ -5007,6 +5007,9 @@ check_dtio_interface1 (gfc_symbol *derived, gfc_symtree *tb_io_st,
 gfc_error ("DTIO procedure %qs at %L must be a subroutine",
 	   dtio_sub->name, &dtio_sub->declared_at);
 
+  if (!dtio_sub->resolved)
+gfc_resolve_formal_arglist (dtio_sub);
+
   arg_num = 0;
   for (formal = dtio_sub->formal; formal; formal = formal->next)
 arg_num++;
@@ -5025,7 +5028,6 @@ check_dtio_interface1 (gfc_symbol *derived, gfc_symtree *tb_io_st,
   return;
 }
 
-
   /* Now go through the formal arglist.  */
   arg_num = 1;
   for (formal = dtio_sub->formal; formal; formal = formal->next, arg_num++)
diff --git a/gcc/fortran/resolve.c b/gcc/fortran/resolve.c
index 79b0d724565..97de6ddce84 100644
--- a/gcc/fortran/resolve.c
+++ b/gcc/fortran/resolve.c
@@ -264,8 +264,8 @@ resolve_procedure_interface (gfc_symbol *sym)
Since a dummy argument cannot be a non-dummy procedure, the only
resort left for untyped names are the IMPLICIT types.  */
 
-static void
-resolve_formal_arglist (gfc_symbol *proc)
+void
+gfc_resolve_formal_arglist (gfc_symbol *proc)
 {
   gfc_formal_arglist *f;
   gfc_symbol *sym;
@@ -319,7 +319,7 @@ resolve_formal_arglist (gfc_symbol *proc)
 }
 
   if (sym->attr.if_source != IFSRC_UNKNOWN)
-	resolve_formal_arglist (sym);
+	gfc_resolve_formal_arglist (sym);
 
   if (sym->attr.subroutine || sym->attr.external)
 	{
@@ -547,7 +547,7 @@ find_arglists (gfc_symbol *sym)
   

[PATCH] gcc/config/rs6000: Add link with libc128 with -mlong-double-128 for AIX

2020-04-02 Thread CHIGOT, CLEMENT via Gcc-patches
Description:
 * AIX applications using 128-bit long double must be linked with
   libc128.a, in order to have 128-bit compatible routines.

Tests:
 * AIX 7.2, 7.1: Build/Tests: OK

Changelog:
 * config/rs6000/aix61.h (LIB_SPEC): Add -lc128 with -mlong-double-128.
 * config/rs6000/aix71.h (LIB_SPEC: Likewise.
 * config/rs6000/aix72.h (LIB_SPEC: Likewise.

gcc-8.4.0-gcc-config-rs6000-add-link-with-libc128-with-mlong-d.patch
Description: gcc-8.4.0-gcc-config-rs6000-add-link-with-libc128-with-mlong-d.patch


Re: [PATCH] Fix PR94401 by considering reverse overrun

2020-04-02 Thread Richard Biener via Gcc-patches
On Thu, Apr 2, 2020 at 9:15 AM Kewen.Lin  wrote:
>
> Hi,
>
> The commit r10-7415 brings scalar type consideration
> to eliminate epilogue peeling for gaps, but it exposed
> one problem that the current handling doesn't consider
> the memory access type VMAT_CONTIGUOUS_REVERSE, for
> which the overrun happens on low address side.  This
> patch is to make the code take care of it by updating
> the offset and construction element order accordingly.
>
> Bootstrapped/regtested on powerpc64le-linux-gnu P8
> and aarch64-linux-gnu.

OK with the formatting changes suggested by Jakub.

Richard.

> BR,
> Kewen
> ---
> gcc/ChangeLog
>
> 2020-04-02  Kewen Lin  
>
> PR tree-optimization/94401
> * tree-vect-loop.c (vectorizable_load): Handle VMAT_CONTIGUOUS_REVERSE
> access type when loading halves of vector to avoid peeling for gaps.


Re: [PATCH][RFC] c/94392 - only enable -ffinite-loops for C++

2020-04-02 Thread Jakub Jelinek via Gcc-patches
On Thu, Apr 02, 2020 at 11:12:48AM +0200, Richard Biener wrote:
> > "The implementation may assume that any thread will eventually do one of the
> > following:
> >  — terminate,
> >  — make a call to a library I/O function,
> >  — perform an access through a volatile glvalue, or
> >  — perform a synchronization operation or an atomic operation.
> > [Note: This is intended to allow compiler transformations such as removal of
> > empty loops, even when termination cannot be proven. — end note]"

With -ffinite-loops, do we actually not optimize if the loop has volatile 
accesses
or atomics or library I/O calls?

Jakub



Re: [PATCH]libstdc++-v3/test: Skip "use_service.cc" for aarch64 baremetal

2020-04-02 Thread Jonathan Wakely via Gcc-patches

On 02/04/20 10:44 +0200, Andrea Corallo wrote:

Jonathan Wakely  writes:


On 01/04/20 16:56 +0100, Jonathan Wakely wrote:



Does that help, or does it still fail for other reasons?


Yes it does thanks!  Updated patch follows.

Okay for trunk?


OK, thanks.



Re: [PATCH][RFC] c/94392 - only enable -ffinite-loops for C++

2020-04-02 Thread Richard Biener
On Wed, 1 Apr 2020, Jason Merrill wrote:

> On 4/1/20 9:36 AM, Richard Biener wrote:
> > This does away with enabling -ffinite-loops at -O2+ for all languages
> > and instead enables it selectively for C++ only.
> > 
> > Jason, I didn't find a reference as to when the forward progress
> > guarantee was introduced to C++ so I randomly chose C++11, is that
> > correct?
> 
> C++11 says "Implementations should ensure that all unblocked threads
> eventually make progress."
> 
> C++17 adds the "Forward progress" section that says
> 
> "The implementation may assume that any thread will eventually do one of the
> following:
>  — terminate,
>  — make a call to a library I/O function,
>  — perform an access through a volatile glvalue, or
>  — perform a synchronization operation or an atomic operation.
> [Note: This is intended to allow compiler transformations such as removal of
> empty loops, even when termination cannot be proven. — end note]"

OK, so I assume using C++11 as in the patch is fine.

I'm retesting the following currently which factors in Honzas comments
turning -ffinite-loops into a per-loop setting at CFG construction time
(the same place where we'd handle #pragma induced ANNOTATE_EXPRs for
such feature - for GCC11 we might implement the C side this way which has
more restrictive wording where such forward progress can be assumed)

Thanks,
Richard.

[PATCH] c/94392 - only enable -ffinite-loops for C++

This does away with enabling -ffinite-loops at -O2+ for all languages
and instead enables it selectively for C++ only.

It also makes -ffinite-loops loop-private at CFG construction time
fixing correctness issues with inlining.

2020-04-02  Richard Biener  

PR c/94392
* c-opts.c (c_common_post_options): Enable -ffinite-loops
for -O2 and C++11 or newer.

* common.opt (ffinite-loops): Initialize to zero.
* opts.c (default_options_table): Remove OPT_ffinite_loops
entry.
* cfgloop.h (loop::finite_p): New member.
* cfgloopmanip.c (copy_loop_info): Copy finite_p.
* ipa-icf-gimple.c (func_checker::compare_loops): Compare
finite_p.
* lto-streamer-in.c (input_cfg): Stream finite_p.
* lto-streamer-out.c (output_cfg): Likewise.
* tree-cfg.c (replace_loop_annotate): Initialize finite_p
from flag_finite_loops at CFG build time.
* tree-ssa-loop-niter.c (finite_loop_p): Check the loops
finite_p flag instead of flag_finite_loops.
* doc/invoke.texi (ffinite-loops): Adjust documentation of
default setting.

* gcc.dg/torture/pr94392.c: New testcase.
---
 gcc/c-family/c-opts.c  |  4 
 gcc/cfgloop.h  |  4 
 gcc/cfgloopmanip.c |  1 +
 gcc/common.opt |  2 +-
 gcc/doc/invoke.texi|  3 ++-
 gcc/ipa-icf-gimple.c   |  2 ++
 gcc/lto-streamer-in.c  |  1 +
 gcc/lto-streamer-out.c |  1 +
 gcc/opts.c |  1 -
 gcc/testsuite/gcc.dg/torture/pr94392.c | 22 ++
 gcc/tree-cfg.c |  3 +++
 gcc/tree-ssa-loop-niter.c  |  2 +-
 12 files changed, 42 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/torture/pr94392.c

diff --git a/gcc/c-family/c-opts.c b/gcc/c-family/c-opts.c
index 6b6c754ad86..58ba0948e79 100644
--- a/gcc/c-family/c-opts.c
+++ b/gcc/c-family/c-opts.c
@@ -989,6 +989,10 @@ c_common_post_options (const char **pfilename)
   SET_OPTION_IF_UNSET (&global_options, &global_options_set, flag_new_ttp,
   cxx_dialect >= cxx17);
 
+  /* C++11 guarantees forward progress.  */
+  SET_OPTION_IF_UNSET (&global_options, &global_options_set, flag_finite_loops,
+  optimize >= 2 && cxx_dialect >= cxx11);
+
   if (cxx_dialect >= cxx11)
 {
   /* If we're allowing C++0x constructs, don't warn about C++98
diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h
index 1c49a8b8c2d..18b404e292f 100644
--- a/gcc/cfgloop.h
+++ b/gcc/cfgloop.h
@@ -226,6 +226,10 @@ public:
   /* True if the loop is part of an oacc kernels region.  */
   unsigned in_oacc_kernels_region : 1;
 
+  /* True if the loop is known to be finite.  This is a localized
+ flag_finite_loops or similar pragmas state.  */
+  unsigned finite_p : 1;
+
   /* The number of times to unroll the loop.  0 means no information given,
  just do what we always do.  A value of 1 means do not unroll the loop.
  A value of USHRT_MAX means unroll with no specific unrolling factor.
diff --git a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c
index c9375565f62..50c7267ec49 100644
--- a/gcc/cfgloopmanip.c
+++ b/gcc/cfgloopmanip.c
@@ -1023,6 +1023,7 @@ copy_loop_info (class loop *loop, class loop *target)
   target->dont_vectorize = loop->dont_vectorize;
   target->force_vectorize = loop->force_vectorize;
   target->in_oacc_kernels_region = loop->in_oacc_kernels_r

Re: [Patch][Fortran] Fix error cleanup of select rank (PR93522)

2020-04-02 Thread Paul Richard Thomas via Gcc-patches
Hi Tobias,

I would say that if any patch were obvious, that one is :-) OK.

Thanks

Paul


On Mon, 30 Mar 2020 at 09:16, Tobias Burnus  wrote:

> Early *ping*.
>
> Tobias
>
> On 3/27/20 11:06 AM, Tobias Burnus wrote:
>
> > Hi all,
> >
> > here, the reject_statement cleanup and the freeing of the
> > namespace both remove the symbol. Solution: Remove it first,
> > then clean the namespace – then the reject_statement has no
> > (deleted) statement to cleanup.
> >
> > As select rank is new, that's again a GCC-10 only
> > regression (of invalid code).
> >
> > OK?
> >
> > Tobias
> >
> > PS: valgrind shows
> > ==71237==definitely lost: 0 bytes in 0 blocks
> > ==71237==indirectly lost: 0 bytes in 0 blocks
> > ==71237==  possibly lost: 0 bytes in 0 blocks
> > I did ignore:
> > ==52255==still reachable: 500,682 bytes in 2,181 blocks
> > which is the same also with 'select... end select' commented.
> >
> -
> Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München /
> Germany
> Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung,
> Alexander Walter
>


-- 
"If you can't explain it simply, you don't understand it well enough" -
Albert Einstein


Re: [PATCH]libstdc++-v3/test: Skip "use_service.cc" for aarch64 baremetal

2020-04-02 Thread Andrea Corallo
Jonathan Wakely  writes:

> On 01/04/20 16:56 +0100, Jonathan Wakely wrote:

> Does that help, or does it still fail for other reasons?

Yes it does thanks!  Updated patch follows.

Okay for trunk?

Thanks

  Andrea

libstdc++-v3/ChangeLog
2020-??-??  Andrea Corallo  

* testsuite/experimental/net/execution_context/use_service.cc:
Require pthread and gthreads.
>From 5aa45ce35b947b88d18ccb9026d10e2c0e2fc22c Mon Sep 17 00:00:00 2001
From: Andrea Corallo 
Date: Wed, 1 Apr 2020 10:19:04 +0100
Subject: [PATCH] 89760

---
 .../experimental/net/execution_context/use_service.cc   | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/libstdc++-v3/testsuite/experimental/net/execution_context/use_service.cc b/libstdc++-v3/testsuite/experimental/net/execution_context/use_service.cc
index 8a2d03fb1bb8..d242d63cb94e 100644
--- a/libstdc++-v3/testsuite/experimental/net/execution_context/use_service.cc
+++ b/libstdc++-v3/testsuite/experimental/net/execution_context/use_service.cc
@@ -15,7 +15,11 @@
 // with this library; see the file COPYING3.  If not see
 // .
 
-// { dg-do run { target c++14 } }
+// { dg-do run }
+// { dg-options "-pthread"  }
+// { dg-require-effective-target c++14 }
+// { dg-require-effective-target pthread }
+// { dg-require-gthreads "" }
 
 #include 
 #include 
-- 
2.17.1



Re: issue with behavior change of gcc -r between gcc-8 and gcc-9

2020-04-02 Thread Allan Sandfeld Jensen
On Wednesday, 1 April 2020 19:48:11 CEST Olivier Hainque wrote:
> 
> -r 's business was to arrange for the linker not to
> complain because the closure is incomplete, leaving us
> with complete control of the closure.
> 
> It doesn't seem to me there was a really strong motivation
> to suddenly have -r influence the closure the way it now does.
> 
> Would it be possible to revert to the previous behavior
> and document it ?
> 
> Or maybe allow it to be controllable by the target ports ?
> 
> Or provide something to bring back the flexibility we had
> if we really believe the default should change ? (I'm not
> convinced)

-r is used for relinking. The idea behind the change was to make it directly 
suitable for that. It takes object files and relinks them into a new object 
file. It gives the caller complete control.

It sounds like you are missing some way to add startfiles? A reverse of 
-nostartfiles?

But hopefully you can just use the linker directly? Unless you have LTO 
enabled object files you dont need the compiler to link.

`Allan




Re: [PATCH] Fix PR94401 by considering reverse overrun

2020-04-02 Thread Jakub Jelinek via Gcc-patches
Hi!

On Thu, Apr 02, 2020 at 03:15:42PM +0800, Kewen.Lin via Gcc-patches wrote:

Just formatting nits, not commenting on what the actual patch does.

> --- a/gcc/tree-vect-stmts.c
> +++ b/gcc/tree-vect-stmts.c
> @@ -9590,11 +9590,20 @@ vectorizable_load (stmt_vec_info stmt_info, 
> gimple_stmt_iterator *gsi,
>   if (new_vtype != NULL_TREE)
> ltype = half_vtype;
> }
> + tree offset = dataref_offset
> + ? dataref_offset
> + : build_int_cst (ref_type, 0);

The above is misformatted.  The ? and : shouldn't be indented further than
the dataref_offset, but usually e.g. for the sake of emacs we add ()s around
the expression in this case.  So:
tree offset = (dataref_offset
   ? dataref_offset
   : build_int_cst (ref_type, 0));
or
tree offset
  = (dataref_offset
 ? dataref_offset : build_int_cst (ref_type, 0));

> + if (ltype != vectype
> + && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
> +   offset = size_binop (
> + PLUS_EXPR,
> + build_int_cst (ref_type,
> +DR_GROUP_GAP (first_stmt_info)
> +  * tree_to_uhwi (
> +TYPE_SIZE_UNIT (elem_type))),
> + offset);

Again, no reason to indent * by 2 columns from DR_GROUP_GAP.  But also all
the (s at the end of line and randomly indented arguments look ugly.
I'd recommend temporaries, e.g. like (perhaps with different names of
temporaries, so that they don't shadow anything):

  {
unsigned HOST_WIDE_INT gap
  = DR_GROUP_GAP (first_stmt_info);
gap *= tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
tree gapcst = build_int_cst (ref_type, gap);
offset = size_binop (PLUS_EXPR, offset, gapcst);
  }

Jakub



RE: [PATCH][GCC][Arm]: Do not process rest of MVE header file after unsupported error

2020-04-02 Thread Kyrylo Tkachov


> -Original Message-
> From: Andre Vieira (lists) 
> Sent: 02 April 2020 09:20
> To: gcc-patches@gcc.gnu.org
> Cc: Kyrylo Tkachov 
> Subject: [PATCH][GCC][Arm]: Do not process rest of MVE header file after
> unsupported error
> 
> Hi,
> 
> This patch makes sure the rest of the header file is not parsed if MVE is not
> supported.  The user should not be including this file if MVE is not 
> supported,
> nevertheless making sure it doesn't parse the rest of the header file will 
> save
> the user from a huge error output that would be rather useless.
> 
> Is this OK for trunk?

Ok.
Thanks,
Kyrill

> 
> gcc/ChangeLog:
> 2020-04-02  Andre Vieira  
> 
>      * config/arm/arm_mve.h: Condition the header file on
> __ARM_FEATURE_MVE.



[PATCH][GCC][Arm]: Do not process rest of MVE header file after unsupported error

2020-04-02 Thread Andre Vieira (lists)

Hi,

This patch makes sure the rest of the header file is not parsed if MVE 
is not supported.  The user should not be including this file if MVE is 
not supported, nevertheless making sure it doesn't parse the rest of the 
header file will save the user from a huge error output that would be 
rather useless.


Is this OK for trunk?

gcc/ChangeLog:
2020-04-02  Andre Vieira  

    * config/arm/arm_mve.h: Condition the header file on 
__ARM_FEATURE_MVE.


diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index 
f1dcdc2153217e796c58526ba0e5be11be642234..1ce55bd2fc4f5c6a171ffe116d7fd9029e11a619
 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -24,11 +24,9 @@
 
 #if __ARM_BIG_ENDIAN
 #error "MVE intrinsics are not supported in Big-Endian mode."
-#endif
-
-#if !__ARM_FEATURE_MVE
+#elif !__ARM_FEATURE_MVE
 #error "MVE feature not supported"
-#endif
+#else
 
 #include 
 #ifndef  __cplusplus
@@ -27554,4 +27552,5 @@ extern void *__ARM_undef;
 }
 #endif
 
+#endif /* __ARM_FEATURE_MVE  */
 #endif /* _GCC_ARM_MVE_H.  */


Re: PING -- [PATCH, fortran] PR 85982 -- ICE in resolve_component

2020-04-02 Thread Tobias Burnus

In principle, I like the patch. However, I think one should
replace

gfc_error ("Attribute at %L is not allowed in a %s definition",
  …, state_name

by something like:

bool is_type = gfc_current_state () == COMP_DERIVED;
gfc_error (is_type ? G_("Attribute at %L is not allowed in a TYPE definition")
  : G_("Attribute at %L is not allowed in a STRUCTURE 
definition"),
  …

Reason: (a) This makes translation simpler; e.g. 'structure' and 'type' have
different gender in several European languages. (Albeit in this case the
gender of 'definition' dominates in the cases I checked.)
(b) For TYPE, the string won't change such that the existing translations
still work – even if the update for STRUCTURE won't make it for the release.

Otherwise it looks good to me, including the test case in your follow-up email.

Cheers,

Tobias

On 4/1/20 7:19 PM, Fritz Reese via Fortran wrote:


This simple patch was submitted some time ago (over 1 year), but got
lost without review. I have lately rebased and tested, and the patch
is still good. Is this OK to commit to trunk and for backport? I'd
like to port as far back as 7.

---
Fritz Reese

gcc/ChangeLog:
2020-04-01  Fritz Reese  

PR fortran/85982
* fortran/decl.c (match_attr_spec): Lump COMP_STRUCTURE/COMP_MAP into
attribute checking used by TYPE.

gcc/testsuite/ChangeLog:
2020-04-01  Fritz Reese  

PR fortran/85982
* gfortran.dg/dec_structure_28.f90: New test.

-
Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Alexander 
Walter


[PATCH] c++: Fix further protected_set_expr_location related -fcompare-debug issues [PR94441]

2020-04-02 Thread Jakub Jelinek via Gcc-patches
Hi!

My recent protected_set_expr_location changes work well when
that function is called unconditionally, but as the testcase shows, the C++
FE has a few spots that do:
  if (!EXPR_HAS_LOCATION (stmt))
protected_set_expr_location (stmt, locus);
or similar.  Now, if we have for -g0 stmt of some expression that can
have location and has != UNKNOWN_LOCATION, while -g instead has
a STATEMENT_LIST containing some DEBUG_BEGIN_STMTs + that expression with
that location, we don't call protected_set_expr_location in the -g0 case,
but do call it in the -g case, because on the STATEMENT_LIST
!EXPR_HAS_LOCATION.
The following patch introduces a helper function which digs up the single
expression of a STATEMENT_LIST and uses that expression in the
EXPR_HAS_LOCATION check (plus changes protected_set_expr_location to
also use that helper).

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Or do we want a further wrapper, perhaps C++ FE only, that would do this
protected_set_expr_location_if_unset (stmt, locus)?

2020-04-02  Jakub Jelinek  

PR debug/94441
* tree-iterator.h (expr_single): Declare.
* tree-iterator.c (expr_single): New function.
* tree.c (protected_set_expr_location): Use it.

* parser.c (cp_parser_omp_for_loop): Use expr_single.
* cp-gimplify.c (genericize_if_stmt, genericize_cp_loop): Likewise.

* g++.dg/opt/pr94441.C: New test.

--- gcc/tree-iterator.h.jj  2020-01-12 11:54:38.497381967 +0100
+++ gcc/tree-iterator.h 2020-04-01 16:58:00.034347283 +0200
@@ -119,5 +119,6 @@ extern void append_to_statement_list (tr
 extern void append_to_statement_list_force (tree, tree *);
 extern tree expr_first (tree);
 extern tree expr_last (tree);
+extern tree expr_single (tree);
 
 #endif /* GCC_TREE_ITERATOR_H  */
--- gcc/tree-iterator.c.jj  2020-01-12 11:54:38.497381967 +0100
+++ gcc/tree-iterator.c 2020-04-01 17:05:23.388920122 +0200
@@ -354,4 +354,45 @@ expr_last (tree expr)
   return expr;
 }
 
+/* If EXPR is a STATEMENT_LIST containing just DEBUG_BEGIN_STMTs and
+   a single other stmt, return that other stmt (recursively).
+   If it is a STATEMENT_LIST containing no non-DEBUG_BEGIN_STMTs or
+   multiple, return NULL_TREE.
+   Otherwise return EXPR.  */
+
+tree
+expr_single (tree expr)
+{
+  if (expr == NULL_TREE)
+return expr;
+
+  if (TREE_CODE (expr) == STATEMENT_LIST)
+{
+  /* With -gstatement-frontiers we could have a STATEMENT_LIST with
+DEBUG_BEGIN_STMT(s) and only a single other stmt, which with
+-g wouldn't be present and we'd have that single other stmt
+directly instead.  */
+  struct tree_statement_list_node *n = STATEMENT_LIST_HEAD (expr);
+  if (!n)
+   return NULL_TREE;
+  while (TREE_CODE (n->stmt) == DEBUG_BEGIN_STMT)
+   {
+ n = n->next;
+ if (!n)
+   return NULL_TREE;
+   }
+  expr = n->stmt;
+  do
+   {
+ n = n->next;
+ if (!n)
+   return expr_single (expr);
+   }
+  while (TREE_CODE (n->stmt) == DEBUG_BEGIN_STMT);
+  return NULL_TREE;
+}
+
+  return expr;
+}
+
 #include "gt-tree-iterator.h"
--- gcc/tree.c.jj   2020-03-26 10:35:29.984667559 +0100
+++ gcc/tree.c  2020-04-01 16:59:01.689453490 +0200
@@ -5148,30 +5148,9 @@ protected_set_expr_location (tree t, loc
 SET_EXPR_LOCATION (t, loc);
   else if (t && TREE_CODE (t) == STATEMENT_LIST)
 {
-  /* With -gstatement-frontiers we could have a STATEMENT_LIST with
-DEBUG_BEGIN_STMT(s) and only a single other stmt, which with
--g wouldn't be present and we'd have that single other stmt
-directly instead.  */
-  struct tree_statement_list_node *n = STATEMENT_LIST_HEAD (t);
-  if (!n)
-   return;
-  while (TREE_CODE (n->stmt) == DEBUG_BEGIN_STMT)
-   {
- n = n->next;
- if (!n)
-   return;
-   }
-  tree t2 = n->stmt;
-  do
-   {
- n = n->next;
- if (!n)
-   {
- protected_set_expr_location (t2, loc);
- return;
-   }
-   }
-  while (TREE_CODE (n->stmt) == DEBUG_BEGIN_STMT);
+  t = expr_single (t);
+  if (t && CAN_HAVE_LOCATION_P (t))
+   SET_EXPR_LOCATION (t, loc);
 }
 }
 
--- gcc/cp/parser.c.jj  2020-03-29 11:14:41.087556278 +0200
+++ gcc/cp/parser.c 2020-04-01 17:03:00.730988186 +0200
@@ -39149,8 +39149,9 @@ cp_parser_omp_for_loop (cp_parser *parse
incr = cp_parser_omp_for_incr (parser, real_decl);
  else
incr = cp_parser_expression (parser);
- if (!EXPR_HAS_LOCATION (incr))
-   protected_set_expr_location (incr, input_location);
+ if (tree incrl = expr_single (incr))
+   if (!EXPR_HAS_LOCATION (incrl))
+ protected_set_expr_location (incrl, input_location);
}
 
 parse_close_paren:
--- gcc/cp/cp-gimplify.c.jj 2020-03-29 11:14:41.057556738 +0200
+++ gcc/cp/cp-gimp

Re: [AArch64][SVE][IPA] ICE caused by incompatibility of SRA and svst builtin-function

2020-04-02 Thread Richard Biener via Gcc-patches
On Thu, Apr 2, 2020 at 5:36 AM bule  wrote:
>
> Hello,
>
> An Internal Compiler Error(ICE) is found in ipa-sra optimization pass when it 
> handle the argument of internal call svst3 for SVE.
>
> The problem comes from 
> gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_bf16.c in the test suit, 
> which can be reduced to flowing code:
>
> #include 
> #include
> void st2_bf16_base (svbfloat16x3_t z1, svbool_t p0, bfloat16_t *x0, intptr_t 
> x1) {
> svst3 (p0, x0, z1);
> }
>
> Compiled with -march=armv8.2-a+sve -msve-vector-bits=256 -O2, it will result 
> in a segment fault in IPA-SRA:
>
> > [bule@localhost gcc10_fail]$ gcc st2_bf16.i -o st2_bf16.s -S 
> > -march=armv8.2-a+sve -msve-vector-bits=256 -O2
> > during IPA pass: sra
> > st2_bf16.c: In function ‘st2_bf16_base’:
> > st2_bf16.c:10:1: internal compiler error: Segmentation fault
> >   .. /* omit some stack info here.  */ ..
> > 0xa34f68 call_summary::get_create(cgraph_edge*)
> > ../.././gcc/symbol-summary.h:642
> > 0xa34f68 record_nonregister_call_use
> > ../.././gcc/ipa-sra.c:1613
> > 0xa34f68 scan_expr_access
> > ../.././gcc/ipa-sra.c:1781
> >   .. /* omit some stack info here.  */ ..
> > Please submit a full bug report,
> > with preprocessed source if appropriate.
> > Please include the complete backtrace with any bug report.
>
> Details can be found in PR 94398.
> Similar problem can be found in svst2、svst4 and other functions of this kind.
>
> This problem is cause by "record_nonregister_call_use" function trying to 
> access the call graph edge of an internal call, .MASK_STORE_LANE, which is a 
> NULL pointer.
>
> The reason of stepping into "record_nonregister_call_use" function is that 
> the upper level function "scan_expr_access" considered the "svbfloat16x3_t z1"
> argument as a valid candidate for further optimization.
>
> A simple solution here is to disqualify the candidate at "scan_expr_access" 
> level when the call graph edge is null, which indicates the call is either an 
> internal call or a call with no references. For both case, the further 
> optimization process should stop before it reference a NULL pointer.
>
> A proposed patch is attached.
>
> Any suggestions?

I think internal calls should be handled like asms which means, lookig
at the source a bit,
instead of ISRA_CTX_ARG pass ISRA_CTX_LOAD to scan_expr_access.

Martin?

Richard.

>
> Thanks,
> Bu Le


Re: [PATCH] doc: RISC-V: Update binutils requirement to 2.30

2020-04-02 Thread Richard Biener via Gcc-patches
On Wed, Apr 1, 2020 at 10:34 PM Maciej W. Rozycki via Gcc-patches
 wrote:
>
> Complement commit bfe78b08471f ("RISC-V: Using fmv.x.w/fmv.w.x rather
> than fmv.x.s/fmv.s.x") and document a binutils 2.30 requirement in the
> installation manual, matching the addition of fmv.x.w/fmv.w.x mnemonics
> to GAS.
>
> gcc/
> * doc/install.texi (Specific) 
> : Update binutils requirement to
> 2.30.
> ---
> On Wed, 18 Mar 2020, Maciej W. Rozycki wrote:
>
> > > >  At the very least I think we ought to document the minimum version of
> > > > binutils now required by GCC for RISC-V support.
> > >
> > > The new opcodes were added to gas in 2017-09-27, and I can't recommend
> > > using any binutils or gcc release that predates 2018-01-01 because
> > > they are all known to be buggy, or incompatible with the current ISA
> > > definition.  So I don't see any need for a configure test for this
> > > change.  Anyone missing the new instructions in gas has bigger
> > > problems to worry about.
> [...]
> >  Our installation instructions state binutils 2.28 as the requirement for
> > all the RISC-V targets, however the change for fmv.x.w/fmv.w.x instruction
> > support was only added in the binutils 2.30 development cycle.
>
>  Here's the resulting change.  Verified with `make info' and `make check'.
> OK to apply?

OK.  Can you also update gcc-10/changes.html?

Thanks,
Richard.

>   Maciej
> ---
>  gcc/doc/install.texi |   12 
>  1 file changed, 4 insertions(+), 8 deletions(-)
>
> gcc-riscv-binutils-version.diff
> Index: gcc/gcc/doc/install.texi
> ===
> --- gcc.orig/gcc/doc/install.texi
> +++ gcc/gcc/doc/install.texi
> @@ -4545,8 +4545,7 @@ This configuration is intended for embed
>  @heading riscv32-*-elf
>  The RISC-V RV32 instruction set.
>  This configuration is intended for embedded systems.
> -This (and all other RISC-V) targets are supported upstream as of the
> -binutils 2.28 release.
> +This (and all other RISC-V) targets require the binutils 2.30 release.
>
>  @html
>  
> @@ -4554,8 +4553,7 @@ binutils 2.28 release.
>  @anchor{riscv32-x-linux}
>  @heading riscv32-*-linux
>  The RISC-V RV32 instruction set running GNU/Linux.
> -This (and all other RISC-V) targets are supported upstream as of the
> -binutils 2.28 release.
> +This (and all other RISC-V) targets require the binutils 2.30 release.
>
>  @html
>  
> @@ -4564,8 +4562,7 @@ binutils 2.28 release.
>  @heading riscv64-*-elf
>  The RISC-V RV64 instruction set.
>  This configuration is intended for embedded systems.
> -This (and all other RISC-V) targets are supported upstream as of the
> -binutils 2.28 release.
> +This (and all other RISC-V) targets require the binutils 2.30 release.
>
>  @html
>  
> @@ -4573,8 +4570,7 @@ binutils 2.28 release.
>  @anchor{riscv64-x-linux}
>  @heading riscv64-*-linux
>  The RISC-V RV64 instruction set running GNU/Linux.
> -This (and all other RISC-V) targets are supported upstream as of the
> -binutils 2.28 release.
> +This (and all other RISC-V) targets require the binutils 2.30 release.
>
>  @html
>  


Re: [AArch64] Backporting -moutline-atomics to gcc 9.x and 8.x

2020-04-02 Thread Christophe Lyon via Gcc-patches
On Thu, 2 Apr 2020 at 04:34, Pop, Sebastian  wrote:
>
> I have also seen this error in tsan.
> The fix is 
> https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=ea376dd471a3b006bc48945c1d9a29408ab17a04
> "Fix shrinkwrapping interactions with atomics (PR92692)".
> This fix got committed as the last patch in the series.
>

Indeed, it's now OK, thanks!

> Sebastian
>
> On 4/1/20, 5:13 PM, "Christophe Lyon"  wrote:
>
> CAUTION: This email originated from outside of the organization. Do not 
> click links or open attachments unless you can confirm the sender and know 
> the content is safe.
>
>
>
> On Wed, 25 Mar 2020 at 01:24, Pop, Sebastian via Gcc-patches
>  wrote:
> >
> > Hi Kyrill,
> >
> > Thanks for pointing out the two missing bug fixes.
> > Please see attached all the back-ported patches.
> > All the patches from trunk applied cleanly with no conflicts (except 
> for the ChangeLog files) to the gcc-9 branch.
> > An up to date gcc-9 branch on which I applied the attached patches has 
> passed bootstrap on aarch64-linux (Graviton2 with 64 N1 cores) and make check 
> with no extra fails.
> > Kyrill, could you please commit the attached patches to the gcc-9 
> branch?
> >
>
> Hi,
>
> I'm seeing a GCC build failure after "aarch64: Implement TImode
> compare-and-swap"
> was backported to gcc-9 (commit 53c1356515ac1357c341b594326967ac4677d891)
>
> The build log has:
> 0x14a1660 gen_split_100(rtx_insn*, rtx_def**)
> 
> /tmp/6477245_1.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/config/aarch64/atomics.md:110
> 0xa81076 try_split(rtx_def*, rtx_insn*, int)
> 
> /tmp/6477245_1.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/emit-rtl.c:3851
> 0xda2b0d split_insn
> 
> /tmp/6477245_1.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/recog.c:2901
> 0xda7057 split_all_insns()
> 
> /tmp/6477245_1.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/recog.c:3005
> 0xda7118 execute
> 
> /tmp/6477245_1.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/recog.c:3957
> Please submit a full bug report,
> with preprocessed source if appropriate.
> Please include the complete backtrace with any bug report.
> See  for instructions.
> make[4]: *** [Makefile:659: tsan_interface_atomic.lo] Error 1
>
> Maybe that problem is fixed by a patch later in the series? (I have
> validations running after every patch on the release branches, so it
> may take a while until I have the results for the end of the series)
>
> Thanks,
>
> Christophe
>
> > As we still don't have a copyright assignment on file, would it be 
> possible for ARM to finish the backport to the gcc-8 branch of these patches 
> and the atomics cleanup patches mentioned below?
> >
> > I did a `git log config/aarch64/atomics.md` and there is a follow-up 
> patch to the atomics cleanup patches:
> >
> > commit e21679a8bb17aac603b8704891e60ac502200629
> > Author: Jakub Jelinek 
> > Date:   Wed Nov 21 17:41:03 2018 +0100
> >
> > re PR target/87839 (ICE in final_scan_insn_1, at final.c:3070)
> >
> > PR target/87839
> > * config/aarch64/atomics.md 
> (@aarch64_compare_and_swap): Use
> > rIJ constraint for aarch64_plus_operand rather than rn.
> >
> > * gcc.target/aarch64/pr87839.c: New test.
> >
> > From-SVN: r266346
> >
> > That is fixing code modified in this cleanup patch:
> >
> > commit d400fda3a8c3330f77eb9d51874f5482d3819a9f
> > Author: Richard Henderson 
> > Date:   Wed Oct 31 09:42:39 2018 +
> >
> > aarch64: Improve cas generation
> >
> >
> > Thanks,
> > Sebastian
> >
> >
> > On 3/11/20, 5:11 AM, "Kyrill Tkachov"  
> wrote:
> >
> > CAUTION: This email originated from outside of the organization. Do 
> not click links or open attachments unless you can confirm the sender and 
> know the content is safe.
> >
> >
> >
> > Hi Sebastian,
> >
> > On 3/9/20 9:47 PM, Pop, Sebastian wrote:
> > > Hi,
> > >
> > > Please see attached the patches to add -moutline-atomics to the 
> gcc-9 branch.
> > > Tested on graviton2 aarch64-linux with bootstrap and
> > > `make check` passes with no new fails.
> > > Tested `make check` on glibc built with gcc-9 with and without 
> "-moutline-atomics"
> > > and CFLAGS=" -O2 -g -fno-stack-protector -U_FORTIFY_SOURCE".
> > >
> > > Ok to commit to gcc-9 branch?
> >
> > Since this feature enables backwards-compatible deployment of LSE
> > atomics, I'd support that.
> >
> > That is okay with me in principle after GCC 9.3 is released (the 
> branch
> > is currently frozen).
> >
> > However, there have been a few foll

[Committed] S/390: Remove superfluous commutative constraint modifiers

2020-04-02 Thread Andreas Krebbel via Gcc-patches
For operands with an identical set of alternatives there is no point
in marking them commutative.  This patch removes the superfluous
constraint modifiers in vector.md and vx-builtins.md since it might
slow down reload without buying us anything.

There were even two patterns where the constraint modifier was plain
wrong: "sub3" and "ior_not3". Fortunately it never had any effect.

Bootstrapped and regression tested on s390x.

SPEC binaries built with and without the patch are identical.

gcc/ChangeLog:

2020-04-02  Andreas Krebbel  

* config/s390/vector.md ("add3", "mul3")
("and3", "notand3", "ior3", "ior_not3")
("xor3", "notxor3", "smin3", "smax3")
("umin3", "umax3", "vec_widen_smult_even_")
("vec_widen_umult_even_", "vec_widen_smult_odd_")
("vec_widen_umult_odd_", "add3", "sub3")
("mul3", "fma4", "fms4", "neg_fma4")
("neg_fms4", "*smax3_vxe", "*smaxv2df3_vx")
("*smin3_vxe", "*sminv2df3_vx"): Remove % constraint
modifier.
("vec_widen_umult_lo_", "vec_widen_umult_hi_")
("vec_widen_smult_lo_", "vec_widen_smult_hi_"):
Remove constraints from expander.
* config/s390/vx-builtins.md ("vacc_", "vacq")
("vacccq", "vec_avg", "vec_avgu", "vec_vmal")
("vec_vmah", "vec_vmalh", "vec_vmae")
("vec_vmale", "vec_vmao", "vec_vmalo")
("vec_smulh", "vec_umulh", "vec_nor3")
("vfmin", "vfmax"): Remove % constraint modifier.
---
 gcc/ChangeLog  | 22 ++
 gcc/config/s390/vector.md  | 80 +-
 gcc/config/s390/vx-builtins.md | 78 -
 3 files changed, 101 insertions(+), 79 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 66a118b6f5f..5a8a2c525c9 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,25 @@
+2020-04-02  Andreas Krebbel  
+
+   * config/s390/vector.md ("add3", "mul3")
+   ("and3", "notand3", "ior3", "ior_not3")
+   ("xor3", "notxor3", "smin3", "smax3")
+   ("umin3", "umax3", "vec_widen_smult_even_")
+   ("vec_widen_umult_even_", "vec_widen_smult_odd_")
+   ("vec_widen_umult_odd_", "add3", "sub3")
+   ("mul3", "fma4", "fms4", "neg_fma4")
+   ("neg_fms4", "*smax3_vxe", "*smaxv2df3_vx")
+   ("*smin3_vxe", "*sminv2df3_vx"): Remove % constraint
+   modifier.
+   ("vec_widen_umult_lo_", "vec_widen_umult_hi_")
+   ("vec_widen_smult_lo_", "vec_widen_smult_hi_"):
+   Remove constraints from expander.
+   * config/s390/vx-builtins.md ("vacc_", "vacq")
+   ("vacccq", "vec_avg", "vec_avgu", "vec_vmal")
+   ("vec_vmah", "vec_vmalh", "vec_vmae")
+   ("vec_vmale", "vec_vmao", "vec_vmalo")
+   ("vec_smulh", "vec_umulh", "vec_nor3")
+   ("vfmin", "vfmax"): Remove % constraint modifier.
+
 2020-04-01  Peter Bergner  
 
PR rtl-optimization/94123
diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index 378c9e3c7fb..9d3df9813c9 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -694,7 +694,7 @@
 ; operation into two DImode ADDs.
 (define_insn "add3"
   [(set (match_operand:VIT   0 "nonimmediate_operand" "=v")
-   (plus:VIT (match_operand:VIT 1 "nonimmediate_operand" "%v")
+   (plus:VIT (match_operand:VIT 1 "nonimmediate_operand"  "v")
  (match_operand:VIT 2 "general_operand"   "v")))]
   "TARGET_VX"
   "va\t%v0,%v1,%v2"
@@ -704,7 +704,7 @@
 (define_insn "sub3"
   [(set (match_operand:VIT0 "nonimmediate_operand" "=v")
(minus:VIT (match_operand:VIT 1 "nonimmediate_operand"  "v")
-  (match_operand:VIT 2 "general_operand"  "v")))]
+  (match_operand:VIT 2 "general_operand"   "v")))]
   "TARGET_VX"
   "vs\t%v0,%v1,%v2"
   [(set_attr "op_type" "VRR")])
@@ -712,7 +712,7 @@
 ; vmlb, vmlhw, vmlf
 (define_insn "mul3"
   [(set (match_operand:VI_QHS  0 "register_operand" "=v")
-   (mult:VI_QHS (match_operand:VI_QHS 1 "register_operand" "%v")
+   (mult:VI_QHS (match_operand:VI_QHS 1 "register_operand"  "v")
 (match_operand:VI_QHS 2 "register_operand"  "v")))]
   "TARGET_VX"
   "vml\t%v0,%v1,%v2"
@@ -767,7 +767,7 @@
 
 (define_insn "and3"
   [(set (match_operand:VT 0 "register_operand" "=v")
-   (and:VT (match_operand:VT 1 "register_operand" "%v")
+   (and:VT (match_operand:VT 1 "register_operand"  "v")
(match_operand:VT 2 "register_operand"  "v")))]
   "TARGET_VX"
   "vn\t%v0,%v1,%v2"
@@ -777,7 +777,7 @@
 
 (define_insn "notand3"
   [(set (match_operand:VT 0 "register_operand" "=v")
-   (ior:VT (not:VT (match_operand:VT 1 "register_operand" "%v"))
+   (ior:VT (not:VT (match_operand:VT 1 "register_operand"  "v"))
(not:VT (match_operand:VT 2 "register_operand"  "v"]
   "TARGET_VXE"
   "vnn\t%v0,%v1,%v2"
@@ -787,7 +787,7 @@
 
 (define_insn "ior3"
   [(set (match_operand:VT

[PATCH] Fix PR94401 by considering reverse overrun

2020-04-02 Thread Kewen.Lin via Gcc-patches
Hi, 

The commit r10-7415 brings scalar type consideration
to eliminate epilogue peeling for gaps, but it exposed
one problem that the current handling doesn't consider
the memory access type VMAT_CONTIGUOUS_REVERSE, for
which the overrun happens on low address side.  This
patch is to make the code take care of it by updating
the offset and construction element order accordingly.

Bootstrapped/regtested on powerpc64le-linux-gnu P8
and aarch64-linux-gnu.

BR,
Kewen
---
gcc/ChangeLog

2020-04-02  Kewen Lin  

PR tree-optimization/94401
* tree-vect-loop.c (vectorizable_load): Handle VMAT_CONTIGUOUS_REVERSE
access type when loading halves of vector to avoid peeling for gaps.
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 12beef6978c..3d27f59ba22 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -9590,11 +9590,20 @@ vectorizable_load (stmt_vec_info stmt_info, 
gimple_stmt_iterator *gsi,
if (new_vtype != NULL_TREE)
  ltype = half_vtype;
  }
+   tree offset = dataref_offset
+   ? dataref_offset
+   : build_int_cst (ref_type, 0);
+   if (ltype != vectype
+   && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
+ offset = size_binop (
+   PLUS_EXPR,
+   build_int_cst (ref_type,
+  DR_GROUP_GAP (first_stmt_info)
+* tree_to_uhwi (
+  TYPE_SIZE_UNIT (elem_type))),
+   offset);
data_ref
- = fold_build2 (MEM_REF, ltype, dataref_ptr,
-dataref_offset
-? dataref_offset
-: build_int_cst (ref_type, 0));
+ = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
if (alignment_support_scheme == dr_aligned)
  ;
else if (DR_MISALIGNMENT (first_dr_info) == -1)
@@ -9607,16 +9616,27 @@ vectorizable_load (stmt_vec_info stmt_info, 
gimple_stmt_iterator *gsi,
  TYPE_ALIGN (elem_type));
if (ltype != vectype)
  {
-   vect_copy_ref_info (data_ref, DR_REF 
(first_dr_info->dr));
+   vect_copy_ref_info (data_ref,
+   DR_REF (first_dr_info->dr));
tree tem = make_ssa_name (ltype);
new_stmt = gimple_build_assign (tem, data_ref);
-   vect_finish_stmt_generation (stmt_info, new_stmt, 
gsi);
+   vect_finish_stmt_generation (stmt_info, new_stmt,
+gsi);
data_ref = NULL;
vec *v;
vec_alloc (v, 2);
-   CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
-   CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
-   build_zero_cst (ltype));
+   if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
+ {
+   CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
+   build_zero_cst (ltype));
+   CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
+ }
+   else
+ {
+   CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
+   CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
+   build_zero_cst (ltype));
+ }
gcc_assert (new_vtype != NULL_TREE);
if (new_vtype == vectype)
  new_stmt = gimple_build_assign (


**ping** Re: [Patch][Fortran] Fix error cleanup of select rank (PR93522)

2020-04-02 Thread Tobias Burnus

On 3/27/20 11:06 AM, Tobias Burnus wrote:

Hi all,

here, the reject_statement cleanup and the freeing of the
namespace both remove the symbol. Solution: Remove it first,
then clean the namespace – then the reject_statement has no
(deleted) statement to cleanup.

As select rank is new, that's again a GCC-10 only
regression (of invalid code).

OK?

Tobias

PS: valgrind shows
==71237==definitely lost: 0 bytes in 0 blocks
==71237==indirectly lost: 0 bytes in 0 blocks
==71237==  possibly lost: 0 bytes in 0 blocks
I did ignore:
==52255==still reachable: 500,682 bytes in 2,181 blocks
which is the same also with 'select... end select' commented.


-
Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Alexander 
Walter