[PATCH] testsuite/rs6000: Add option to ignore vect cost model

2020-07-15 Thread Kewen.Lin via Gcc-patches
Hi,

In my testing with cost tweaking for vector with length, I found
two cases below didn't get the expected output.  Since the expected
instructions reply on the vectorization occurrence, we don't expect
vectorization gets disabled by cost model.

To make it not fragile, the fix is to force it without vect cost model.

Is it ok for trunk?

BR,
Kewen
---
gcc/testsuite/ChangeLog:

* gcc.target/powerpc/conv-vectorize-1.c: Add option
-fno-vect-cost-model.
* gcc.target/powerpc/conv-vectorize-2.c: Likewise.

--
diff --git a/gcc/testsuite/gcc.target/powerpc/conv-vectorize-1.c 
b/gcc/testsuite/gcc.target/powerpc/conv-vectorize-1.c
index d96db146864..8b8c88befd5 100644
--- a/gcc/testsuite/gcc.target/powerpc/conv-vectorize-1.c
+++ b/gcc/testsuite/gcc.target/powerpc/conv-vectorize-1.c
@@ -1,5 +1,5 @@
 /* { dg-require-effective-target powerpc_vsx_ok } */
-/* { dg-options "-O2 -ftree-vectorize -mvsx" } */
+/* { dg-options "-O2 -ftree-vectorize -mvsx -fno-vect-cost-model" } */

 /* Test vectorizer can exploit vector conversion instructions to convert
unsigned/signed long long to float.  */
diff --git a/gcc/testsuite/gcc.target/powerpc/conv-vectorize-2.c 
b/gcc/testsuite/gcc.target/powerpc/conv-vectorize-2.c
index 5dd5deabdd3..06ae4e2d5ce 100644
--- a/gcc/testsuite/gcc.target/powerpc/conv-vectorize-2.c
+++ b/gcc/testsuite/gcc.target/powerpc/conv-vectorize-2.c
@@ -1,5 +1,5 @@
 /* { dg-require-effective-target powerpc_vsx_ok } */
-/* { dg-options "-O2 -ftree-vectorize -mvsx" } */
+/* { dg-options "-O2 -ftree-vectorize -mvsx -fno-vect-cost-model" } */

 /* Test vectorizer can exploit vector conversion instructions to convert
float to unsigned/signed long long.  */


[PATCH] c++: Get rid of convert_like* macros.

2020-07-15 Thread Marek Polacek via Gcc-patches
The convert_like* macros were introduced in

2000-03-05  Nathan Sidwell  

* call.c (convert_like): Macrofy.
(convert_like_with_context): New macro.

but now we can use overloading so we can do away with the macros.
I've also taken this chance to rename _real to _internal to make it
clear that it should not be called directly.

No functional change intended.

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

gcc/cp/ChangeLog:

* call.c (convert_like): Remove macro and introduce a new
wrapper instead.
(convert_like_with_context): Likewise.
(convert_like_real): Rename to convert_like.
(convert_like_real_1): Rename to convert_like_internal.  Call
convert_like instead of convert_like_real therein.
(perform_direct_initialization_if_possible): Call convert_like
instead of convert_like_real.
---
 gcc/cp/call.c | 159 +++---
 1 file changed, 86 insertions(+), 73 deletions(-)

diff --git a/gcc/cp/call.c b/gcc/cp/call.c
index 6d5d5e801a5..c136cf2882e 100644
--- a/gcc/cp/call.c
+++ b/gcc/cp/call.c
@@ -161,18 +161,9 @@ static int compare_ics (conversion *, conversion *);
 static void maybe_warn_class_memaccess (location_t, tree,
const vec *);
 static tree build_over_call (struct z_candidate *, int, tsubst_flags_t);
-#define convert_like(CONV, EXPR, COMPLAIN) \
-  convert_like_real ((CONV), (EXPR), NULL_TREE, 0, \
-/*issue_conversion_warnings=*/true,\
-/*c_cast_p=*/false, (COMPLAIN))
-#define convert_like_with_context(CONV, EXPR, FN, ARGNO, COMPLAIN )\
-  convert_like_real ((CONV), (EXPR), (FN), (ARGNO),\
-/*issue_conversion_warnings=*/true,\
-/*c_cast_p=*/false, (COMPLAIN))
-static tree convert_like_real (conversion *, tree, tree, int, bool,
-  bool, tsubst_flags_t);
-static tree convert_like_real_1 (conversion *, tree, tree, int, bool,
-bool, tsubst_flags_t);
+static tree convert_like (conversion *, tree, tsubst_flags_t);
+static tree convert_like_with_context (conversion *, tree, tree, int,
+  tsubst_flags_t);
 static void op_error (const op_location_t &, enum tree_code, enum tree_code,
  tree, tree, tree, bool);
 static struct z_candidate *build_user_type_conversion_1 (tree, tree, int,
@@ -1235,7 +1226,7 @@ standard_conversion (tree to, tree from, tree expr, bool 
c_cast_p,
 {
   from = type_decays_to (from);
   fcode = TREE_CODE (from);
-  /* Tell convert_like_real that we're using the address.  */
+  /* Tell convert_like that we're using the address.  */
   conv->rvaluedness_matches_p = true;
   conv = build_conv (ck_lvalue, from, conv);
 }
@@ -1256,7 +1247,7 @@ standard_conversion (tree to, tree from, tree expr, bool 
c_cast_p,
}
   conv = build_conv (ck_rvalue, from, conv);
   if (flags & LOOKUP_PREFER_RVALUE)
-   /* Tell convert_like_real to set LOOKUP_PREFER_RVALUE.  */
+   /* Tell convert_like to set LOOKUP_PREFER_RVALUE.  */
conv->rvaluedness_matches_p = true;
   /* If we're performing copy-initialization, remember to skip
 explicit constructors.  */
@@ -1536,7 +1527,7 @@ standard_conversion (tree to, tree from, tree expr, bool 
c_cast_p,
 the conversion unless we're binding directly to a reference.  */
   conv->need_temporary_p = !(flags & LOOKUP_NO_TEMP_BIND);
   if (flags & LOOKUP_PREFER_RVALUE)
-   /* Tell convert_like_real to set LOOKUP_PREFER_RVALUE.  */
+   /* Tell convert_like to set LOOKUP_PREFER_RVALUE.  */
conv->rvaluedness_matches_p = true;
   /* If we're performing copy-initialization, remember to skip
 explicit constructors.  */
@@ -2499,7 +2490,7 @@ add_conv_candidate (struct z_candidate **candidates, tree 
fn, tree obj,
  t = build_identity_conv (argtype, NULL_TREE);
  t = build_conv (ck_user, totype, t);
  /* Leave the 'cand' field null; we'll figure out the conversion in
-convert_like_real if this candidate is chosen.  */
+convert_like if this candidate is chosen.  */
  convert_type = totype;
}
   else if (parmnode == void_list_node)
@@ -7283,38 +7274,9 @@ maybe_warn_array_conv (location_t loc, conversion *c, 
tree expr)
 "are only available with %<-std=c++20%> or %<-std=gnu++20%>");
 }
 
-/* Wrapper for convert_like_real_1 that handles creating IMPLICIT_CONV_EXPR.  
*/
-
-static tree
-convert_like_real (conversion *convs, tree expr, tree fn, int argnum,
-  bool issue_conversion_warnings,
-  bool c_cast_p, tsubst_flags_t complain)
-{
-  /* Creating &TARGET_EXPR<> in a template breaks when substituting,
- and creatin

Re: [PATCH, rs6000, gcc-8 ] Improve handling of built-in initialization. [PR95952]

2020-07-15 Thread Segher Boessenkool
Hi!

On Tue, Jul 14, 2020 at 12:15:01PM -0500, will schmidt wrote:
>   We've got a scenario with a combination of old hardware, gcc-8 and
> binutils where gcc will ICE during it's selftest.  This ICE was exposed
> when the builtin processing for better #pragma support was added, where
> we no longer skip builtin initialization based on the current mask.

> OK for gcc-8 ?

Yes, but some formatting nits:

> +   /* PR95952:  Gracefully skip builtins that do not have the icode 
> properly
> +   set, but do have the builtin mask set.  This has occurred in older gcc
> +   builds with older binutils support when binutils refuses code 
> generation
> +   for instructions that it does not support.  This was exposed by 
> changes
> +   allowing all builtins being initialized for better #pragma support.  
> */

Nice useful comment :-)

> +   if (d->icode == CODE_FOR_nothing && d->mask) {
> +  HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;

The { goes on the next line:

  if (d->icode == CODE_FOR_nothing && d->mask)
{
  HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;

(two spaces indent, twice).

  if (TARGET_DEBUG_BUILTIN)
{
  fprintf (stderr, "altivec_init_builtins, altivec predicate 
builtin %s", d->name);
  fprintf (stderr, " was skipped.  icode:%d, mask: %lx, 
builtin_mask: 0x%lx",
   d->icode, d->mask, builtin_mask);

(those lines are much too long, but debug code, I can't say I care much).

}

  continue;
}

So: { always goes on a line of its own, two columns extra indent both
before and after it; } always aligns exactly with the {.

Okay for GCC 8 with that cleaned up.  Thank you!


Segher


[committed, OG10] amdgcn: Tune default OpenMP/OpenACC GPU utilization

2020-07-15 Thread Andrew Stubbs
This patch tunes the default GPU thread count for OpenMP and OpenACC on 
AMD GCN devices. It chooses a sensible default if no attributes are 
given at all, increases the number of OpenACC gangs if only one worker 
per gang is specified, and increases the number of workers otherwise. 
The tuning is still a work in progress as we fix issues that limit 
occupancy.


The patch is not suitable for mainline until the multiple-worker support 
is merged there, so for now this will exist on devel/omp/gcc-10 only.


Andrew
amdgcn: Tune default OpenMP/OpenACC GPU utilization

	libgomp/
	* plugin/plugin-gcn.c (parse_target_attributes): Automatically set
	the number of teams and threads if necessary.
	(gcn_exec): Automatically set the number of gangs and workers if
	necessary.

Co-Authored-By: Andrew Stubbs  

diff --git a/gcc/ChangeLog.omp b/gcc/ChangeLog.omp
index 9b9e1981f9a..e93424072e6 100644
--- a/gcc/ChangeLog.omp
+++ b/gcc/ChangeLog.omp
@@ -1,3 +1,11 @@
+2020-07-15  Kwok Cheung Yeung  
+	Andrew Stubbs   
+
+	* plugin/plugin-gcn.c (parse_target_attributes): Automatically set
+	the number of teams and threads if necessary.
+	(gcn_exec): Automatically set the number of gangs and workers if
+	necessary.
+
 2020-07-15  Julian Brown  
 
 	* config/gcn/gcn.c (gimple.h): Include.
diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c
index 7f2b27c7b31..24504a032c6 100644
--- a/libgomp/plugin/plugin-gcn.c
+++ b/libgomp/plugin/plugin-gcn.c
@@ -1222,24 +1222,55 @@ parse_target_attributes (void **input,
 
   if (gcn_dims_found)
 {
+  bool gfx900_workaround_p = false;
+
   if (agent->device_isa == EF_AMDGPU_MACH_AMDGCN_GFX900
 	  && gcn_threads == 0 && override_z_dim == 0)
 	{
-	  gcn_threads = 4;
+	  gfx900_workaround_p = true;
 	  GCN_WARNING ("VEGA BUG WORKAROUND: reducing default number of "
-		   "threads to 4 per team.\n");
+		   "threads to at most 4 per team.\n");
 	  GCN_WARNING (" - If this is not a Vega 10 device, please use "
 		   "GCN_NUM_THREADS=16\n");
 	}
 
+  /* Ideally, when a dimension isn't explicitly specified, we should
+	 tune it to run 40 (or 32?) threads per CU with no threads getting queued.
+	 In practice, we tune for peak performance on BabelStream, which
+	 for OpenACC is currently 32 threads per CU.  */
   def->ndim = 3;
-  /* Fiji has 64 CUs, but Vega20 has 60.  */
-  def->gdims[0] = (gcn_teams > 0) ? gcn_teams : get_cu_count (agent);
-  /* Each thread is 64 work items wide.  */
-  def->gdims[1] = 64;
-  /* A work group can have 16 wavefronts.  */
-  def->gdims[2] = (gcn_threads > 0) ? gcn_threads : 16;
-  def->wdims[0] = 1; /* Single team per work-group.  */
+  if (gcn_teams <= 0 && gcn_threads <= 0)
+	{
+	  /* Set up a reasonable number of teams and threads.  */
+	  gcn_threads = gfx900_workaround_p ? 4 : 16; // 8;
+	  def->gdims[0] = get_cu_count (agent); // * (40 / gcn_threads);
+	  def->gdims[2] = gcn_threads;
+	}
+  else if (gcn_teams <= 0 && gcn_threads > 0)
+	{
+	  /* Auto-scale the number of teams with the number of threads.  */
+	  def->gdims[0] = get_cu_count (agent); // * (40 / gcn_threads);
+	  def->gdims[2] = gcn_threads;
+	}
+  else if (gcn_teams > 0 && gcn_threads <= 0)
+	{
+	  int max_threads = gfx900_workaround_p ? 4 : 16;
+
+	  /* Auto-scale the number of threads with the number of teams.  */
+	  def->gdims[0] = gcn_teams;
+	  def->gdims[2] = 16; // get_cu_count (agent) * 40 / gcn_teams;
+	  if (def->gdims[2] == 0)
+	def->gdims[2] = 1;
+	  else if (def->gdims[2] > max_threads)
+	def->gdims[2] = max_threads;
+	}
+  else
+	{
+	  def->gdims[0] = gcn_teams;
+	  def->gdims[2] = gcn_threads;
+	}
+  def->gdims[1] = 64; /* Each thread is 64 work items wide.  */
+  def->wdims[0] = 1;  /* Single team per work-group.  */
   def->wdims[1] = 64;
   def->wdims[2] = 16;
   *result = def;
@@ -3032,13 +3063,34 @@ gcn_exec (struct kernel_info *kernel, size_t mapnum, void **hostaddrs,
   if (hsa_kernel_desc->oacc_dims[2] > 0)
 dims[2] = hsa_kernel_desc->oacc_dims[2];
 
-  /* If any of the OpenACC dimensions remain 0 then we get to pick a number.
- There isn't really a correct answer for this without a clue about the
- problem size, so let's do a reasonable number of single-worker gangs.
- 64 gangs matches a typical Fiji device.  */
+  /* Ideally, when a dimension isn't explicitly specified, we should
+ tune it to run 40 (or 32?) threads per CU with no threads getting queued.
+ In practice, we tune for peak performance on BabelStream, which
+ for OpenACC is currently 32 threads per CU.  */
+  if (dims[0] == 0 && dims[1] == 0)
+{
+  /* If any of the OpenACC dimensions remain 0 then we get to pick a
+	 number.  There isn't really a correct answer for this without a clue
+	 about the problem size, so let's do a reasonable number of workers
+	 and gangs.  */
 
-  if (dims[0] == 0) dims[0] = get_cu_count (kernel->agent); /* Gangs.  */
-  if 

Re: [PATCH 0/6 ver 4] ] Permute Class Operations

2020-07-15 Thread Segher Boessenkool
Hi!

On Wed, Jul 08, 2020 at 12:59:29PM -0700, Carl Love wrote:
> [PATCH 5/6] rs6000, Add vector splat builtin support

> +(define_insn "xxspltiw_v4si"
> +  [(set (match_operand:V4SI 0 "register_operand" "=wa")
> + (unspec:V4SI [(match_operand:SI 1 "s32bit_cint_operand" "n")]
> +  UNSPEC_XXSPLTIW))]
> + "TARGET_POWER10"
> + "xxspltiw %x0,%1"
> + [(set_attr "type" "vecsimple")])

Hrm, from the instruction description (in the ISA) this should be an
unsigned integer, instead?  (GNU as doesn't care, it takes the low 32
bits, of any integer, it doesn't have to be either a s32 or a u32
apparently).

> +(define_insn "xxspltiw_v4sf_inst"
> +  [(set (match_operand:V4SF 0 "register_operand" "=wa")
> + (unspec:V4SF [(match_operand:SI 1 "c32bit_cint_operand" "n")]
> +  UNSPEC_XXSPLTIW))]
> + "TARGET_POWER10"
> + "xxspltiw %x0,%c1"
> + [(set_attr "type" "vecsimple")])

This will do exactly the same as just "%1"?  Or not?  (I.e. call
output_addr_const for that arg).  (We don't use %c anywhere else in the
port AFAICS, so let's not start that if there is no reason to).

> +(define_expand "xxspltidp_v2df"
> +  [(set (match_operand:V2DF 0 "register_operand" )
> + (unspec:V2DF [(match_operand:SF 1 "const_double_operand")]
> +  UNSPEC_XXSPLTID))]
> + "TARGET_POWER10"
> +{
> +  long value = rs6000_const_f32_to_i32 (operands[1]);
> +  emit_insn (gen_xxspltidp_v2df_inst (operands[0], GEN_INT (value)));
> +  DONE;
> +})
> +
> +(define_insn "xxspltidp_v2df_inst"
> +  [(set (match_operand:V2DF 0 "register_operand" "=wa")
> + (unspec:V2DF [(match_operand:SI 1 "c32bit_cint_operand" "n")]
> +  UNSPEC_XXSPLTID))]
> +  "TARGET_POWER10"
> +{
> +  /* Note, the xxspltidp gives undefined results if the operand is a single
> + precision subnormal number. */
> +  int value = INTVAL (operands[1]);
> +
> +  if (((value & 0x7F80) == 0) && ((value & 0x7F) != 0))
> +/* value is subnormal */
> +fprintf (stderr, "WARNING: Result for the xxspltidp instruction is 
> undefined for subnormal input values.\n");
> +
> +  return "xxspltidp %x0,%c1";
> +}
> +  [(set_attr "type" "vecsimple")])

There are utility functions to print warnings.  But, we shouldn't at all
here.  Instead, the insn shouldn't match at all with bad inputs, or give
an actual error maybe (although it is nicer if the builtin handling code
does that).

> +(define_insn "xxsplti32dx_v4sf_inst"
> +  [(set (match_operand:V4SF 0 "register_operand" "=wa")
> + (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
> +   (match_operand:QI 2 "u1bit_cint_operand" "n")
> +   (match_operand:SI 3 "s32bit_cint_operand" "n")]
> +  UNSPEC_XXSPLTI32DX))]
> +  "TARGET_POWER10"
> +  "xxsplti32dx %x0,%2,%3"
> +   [(set_attr "type" "vecsimple")])

(a space too much indent here)

> +;; Return 1 if op is a unsigned 1-bit constant integer.
> +(define_predicate "u1bit_cint_operand"

"an unsigned"

> +long long
> +rs6000_const_f32_to_i32 (rtx operand)
> +{
> +  long long value;
> +  const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (operand);
> +
> +  gcc_assert (GET_MODE (operand) == SFmode);
> +  REAL_VALUE_TO_TARGET_SINGLE (*rv, value);
> +  return value;
> +}

Can this just return "int"?  (Or "unsigned int"?)


The rest of the patch looks good.


Segher


i386: Introduce peephole2 to use flags from CMPXCHG more [PR96189]

2020-07-15 Thread Uros Bizjak via Gcc-patches
CMPXCHG instruction sets ZF flag if the values in the destination operand
and EAX register are equal; otherwise the ZF flag is cleared and value
from destination operand is loaded to EAX. Following assembly:

movl%esi, %eax
lock cmpxchgl   %edx, (%rdi)
cmpl%esi, %eax
sete%al

can be optimized by removing the unneeded comparison, since set ZF flag
signals that no update to EAX happened.

2020-15-07  Uroš Bizjak  

gcc/ChangeLog:
PR target/96189
* config/i386/sync.md
(peephole2 to remove unneded compare after CMPXCHG): New pattern.

gcc/testsuite/ChangeLog:
PR target/96189
* gcc.target/i386/pr96189.c: New test.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Uros.
diff --git a/gcc/config/i386/sync.md b/gcc/config/i386/sync.md
index 9ab5456b227..d203e9d1ecb 100644
--- a/gcc/config/i386/sync.md
+++ b/gcc/config/i386/sync.md
@@ -594,6 +594,41 @@
   "TARGET_CMPXCHG"
   "lock{%;} %K4cmpxchg{}\t{%3, %1|%1, %3}")
 
+(define_peephole2
+  [(set (match_operand:SWI 0 "register_operand")
+   (match_operand:SWI 1 "general_operand"))
+   (parallel [(set (match_dup 0)
+  (unspec_volatile:SWI
+[(match_operand:SWI 2 "memory_operand")
+ (match_dup 0)
+ (match_operand:SWI 3 "register_operand")
+ (match_operand:SI 4 "const_int_operand")]
+UNSPECV_CMPXCHG))
+ (set (match_dup 2)
+  (unspec_volatile:SWI [(const_int 0)] UNSPECV_CMPXCHG))
+ (set (reg:CCZ FLAGS_REG)
+  (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG))])
+   (set (reg:CCZ FLAGS_REG)
+   (compare:CCZ (match_operand:SWI 5 "register_operand")
+(match_operand:SWI 6 "general_operand")))]
+  "(rtx_equal_p (operands[0], operands[5])
+&& rtx_equal_p (operands[1], operands[6]))
+   || (rtx_equal_p (operands[0], operands[6])
+   && rtx_equal_p (operands[1], operands[5]))"
+  [(set (match_dup 0)
+   (match_dup 1))
+   (parallel [(set (match_dup 0)
+  (unspec_volatile:SWI
+[(match_dup 2)
+ (match_dup 0)
+ (match_dup 3)
+ (match_dup 4)]
+UNSPECV_CMPXCHG))
+ (set (match_dup 2)
+  (unspec_volatile:SWI [(const_int 0)] UNSPECV_CMPXCHG))
+ (set (reg:CCZ FLAGS_REG)
+  (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG))])])
+
 ;; For operand 2 nonmemory_operand predicate is used instead of
 ;; register_operand to allow combiner to better optimize atomic
 ;; additions of constants.
diff --git a/gcc/testsuite/gcc.target/i386/pr96189.c 
b/gcc/testsuite/gcc.target/i386/pr96189.c
new file mode 100644
index 000..1505e483b94
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr96189.c
@@ -0,0 +1,12 @@
+/* PR target/96176 */
+/* { dg-do compile { target lp64 } } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-not "\tcmpb\t" } } */
+
+_Bool
+foo (unsigned char *x, unsigned char y, unsigned char z)
+{
+  unsigned char y_old = y;
+  __atomic_compare_exchange_n (x, &y, z, 0, __ATOMIC_RELAXED, 
__ATOMIC_RELAXED);
+  return y == y_old;
+}


Re: [PATCH] nvptx: Add support for subword compare-and-swap

2020-07-15 Thread Kwok Cheung Yeung

On 01/07/2020 3:28 pm, Tom de Vries wrote:

I looked at the implementation, and it looks ok to me, though I think we
need to make explicit in a comment what the assumptions are:
- that we have read and write access to the entire word, and


Is there a situation where an 8/16-bit portion of memory is R/W but the 32-bit 
word containing it is not on this architecture? Something like memory-mapped I/O 
perhaps?



- that the word is not volatile.


I don't think that non-volatility matters in this case - indeed, the whole point 
of using an atomic primitive is that the memory being accessed can change at any 
time :-). There is an initial read of the word before the loop, then one read 
per iteration of the loop using __sync_val_compare_and_swap_4 (which by 
definition will always access the memory). If any part of the initially read 
value changes by the time it gets to __sync_val_compare_and_swap_4, then the 
compare will fail and the loop will continue onto the next iteration.


Thanks

Kwok


Re: [PATCH 3/4] libstdc++: Add floating-point std::to_chars implementation

2020-07-15 Thread Patrick Palka via Gcc-patches
On Tue, 14 Jul 2020, Patrick Palka wrote:

> This implements the floating-point std::to_chars overloads for float,
> double and long double.  We use the Ryu library to compute the shortest
> round-trippable fixed and scientific forms of a number for float, double
> and long double.  We also use Ryu for performing fixed and scientific
> formatting of float and double. For formatting long double with an
> explicit precision argument we use a printf fallback.  Hexadecimal
> formatting for float, double and long double is implemented from
> scratch.
> 
> The supported long double binary formats are float64 (same as double),
> float80 (x86 extended precision), float128 and ibm128.
> 
> Much of the complexity of the implementation is in computing the exact
> output length before handing it off to Ryu (which doesn't do bounds
> checking).  In some cases it's hard to compute the output length before
> the fact, so in these cases we instead compute an upper bound on the
> output length and use a sufficiently-sized intermediate buffer (if the
> output range is smaller than the upper bound).
> 
> Another source of complexity is in the general-with-precision formatting
> mode, where we need to do zero-trimming of the string returned by Ryu, and
> where we also take care to avoid having to format the string a second
> time when the general formatting mode resolves to fixed.
> 
> Tested on x86_64-pc-linux-gnu, aarch64-unknown-linux-gnu,
> s390x-ibm-linux-gnu, and powerpc64-unknown-linux-gnu.
> 
> libstdc++-v3/ChangeLog:
> 
>   * acinclude.m4 (libtool_VERSION): Bump to 6:29:0.
>   * config/abi/pre/gnu.ver: Add new exports.
>   * configure: Regenerate.
>   * include/std/charconv (to_chars): Declare the floating-point
>   overloads for float, double and long double.
>   * src/c++17/Makefile.am (sources): Add floating_to_chars.cc.
>   * src/c++17/Makefile.in: Regenerate.
>   * src/c++17/floating_to_chars.cc: New file.
>   * testsuite/20_util/to_chars/long_double.cc: New test.
>   * testsuite/util/testsuite_abi.cc: Add new symbol version.

Here is v2 of this patch, which fixes a build failure on i386 due to
__int128 being unavailable, by refactoring the long double binary format
selection to avoid referring to __int128 when it doesn't exist.  The
patch also makes the hex formatting for 80-bit long double use uint64_t
instead of __int128 since the mantissa has exactly 64 bits in this case.

-- >8 --

Subject: [PATCH 3/4] libstdc++: Add floating-point std::to_chars
 implementation

This implements the floating-point std::to_chars overloads for float,
double and long double.  We use the Ryu library to compute the shortest
round-trippable fixed and scientific forms of a number for float, double
and long double.  We also use Ryu for performing fixed and scientific
formatting of float and double. For formatting long double with an
explicit precision argument we use a printf fallback.  Hexadecimal
formatting for float, double and long double is implemented from
scratch.

The supported long double binary formats are binary64, binary80 (x86
80-bit extended precision), binary128 and ibm128.

Much of the complexity of the implementation is in computing the exact
output length before handing it off to Ryu (which doesn't do bounds
checking).  In some cases it's hard to compute the output length before
the fact, so in these cases we instead compute an upper bound on the
output length and use a sufficiently-sized intermediate buffer (if the
output range is smaller than the upper bound).

Another source of complexity is in the general-with-precision formatting
mode, where we need to do zero-trimming of the string returned by Ryu, and
where we also take care to avoid having to format the string a second
time when the general formatting mode resolves to fixed.

This implementation is non-conforming in a couple of ways:

1. For the shortest hexadecimal formatting, we currently follow the
   Microsoft implementation's approach of being consistent with the
   output of printf's '%a' specifier at the expense of sometimes not
   printing the shortest representation.  For example, the shortest hex
   form of 1.08p+0 is 2.1p-1, but we output the former instead of the
   latter, as does printf.

2. The Ryu routines for doing shortest formatting on types larger than
   binary64 use the __int128 type, and some targets (e.g.  i386) have a
   large long double type but lack __int128.  For such targets we make
   the long double to_chars overloads go through the double overloads,
   which means we lose precision in the output.  (The mantissa of long
   double is 64 bits on i386, so I think we could potentially fix this
   writing a specialized version of the generic Ryu formatting
   routine which works with uint64_t instead of __int128.)

3. The __ibm128 shortest formatting routines don't guarantee
   round-trippability if the exponent difference between the low-order
   and high-order parts is too large.  This is 

Re: [PATCH 2/4] libstdc++: Apply modifications to our local copy of Ryu

2020-07-15 Thread Patrick Palka via Gcc-patches
On Tue, 14 Jul 2020, Patrick Palka wrote:

> This performs the following modifications to our local copy of Ryu in
> order to make it more easily usable for our std::to_chars implementation:
> 
>   * Remove all #includes
>   * Remove copy_special_str routines
>   * Adjust the exponent formatting to match printf
>   * Remove some functions we're not going to use
>   * Add an out-parameter to d2exp_buffered_n for the scientific exponent
>   * Store the sign bit inside struct floating_decimal_[32|64]
>   * Rename [df]2s_buffered_n and change their return type

Here is v2 of this patch, which applies another modification to Ryu that 
v2 of the third patch in the series depends on:

-- >8 --

Subject: [PATCH 2/4] libstdc++: Apply modifications to our local copy of Ryu

This performs the following modifications to our local copy of Ryu in
order to make it more easily usable in our std::to_chars implementation:

  * Remove all #includes
  * Remove copy_special_str routines
  * Adjust the exponent formatting to match printf
  * Remove some functions we're not going to use
  * Add an out-parameter to d2exp_buffered_n for the scientific exponent
  * Store the sign bit inside struct floating_decimal_[32|64]
  * Rename [df]2s_buffered_n and change their return type
  * Make generic_binary_to_decimal take the bit representation in parts

libstdc++-v3/ChangeLog:

* src/c++17/ryu/common.h, src/c++17/ryu/d2fixed.c,
src/c++17/ryu/d2fixed_full_table.h, src/c++17/ryu/d2s.c,
src/c++17/ryu/d2s_intrinsics.h, src/c++17/ryu/f2s.c,
src/c++17/ryu/f2s_intrinsics.h: Apply local modifications.
---
 libstdc++-v3/src/c++17/ryu/common.h   | 19 
 libstdc++-v3/src/c++17/ryu/d2fixed.c  | 98 ++-
 .../src/c++17/ryu/d2fixed_full_table.h|  1 -
 libstdc++-v3/src/c++17/ryu/d2s.c  | 56 +++
 libstdc++-v3/src/c++17/ryu/d2s_intrinsics.h   |  4 -
 libstdc++-v3/src/c++17/ryu/f2s.c  | 52 +++---
 libstdc++-v3/src/c++17/ryu/f2s_intrinsics.h   |  4 -
 libstdc++-v3/src/c++17/ryu/generic_128.c  | 47 ++---
 8 files changed, 45 insertions(+), 236 deletions(-)

diff --git a/libstdc++-v3/src/c++17/ryu/common.h 
b/libstdc++-v3/src/c++17/ryu/common.h
index 7dc130947ac..f8ee147db04 100644
--- a/libstdc++-v3/src/c++17/ryu/common.h
+++ b/libstdc++-v3/src/c++17/ryu/common.h
@@ -17,9 +17,6 @@
 #ifndef RYU_COMMON_H
 #define RYU_COMMON_H
 
-#include 
-#include 
-#include 
 
 #if defined(_M_IX86) || defined(_M_ARM)
 #define RYU_32_BIT_PLATFORM
@@ -83,22 +80,6 @@ static inline uint32_t log10Pow5(const int32_t e) {
   return (((uint32_t) e) * 732923) >> 20;
 }
 
-static inline int copy_special_str(char * const result, const bool sign, const 
bool exponent, const bool mantissa) {
-  if (mantissa) {
-memcpy(result, "NaN", 3);
-return 3;
-  }
-  if (sign) {
-result[0] = '-';
-  }
-  if (exponent) {
-memcpy(result + sign, "Infinity", 8);
-return sign + 8;
-  }
-  memcpy(result + sign, "0E0", 3);
-  return sign + 3;
-}
-
 static inline uint32_t float_to_bits(const float f) {
   uint32_t bits = 0;
   memcpy(&bits, &f, sizeof(float));
diff --git a/libstdc++-v3/src/c++17/ryu/d2fixed.c 
b/libstdc++-v3/src/c++17/ryu/d2fixed.c
index 5f479abb91b..642a29d3010 100644
--- a/libstdc++-v3/src/c++17/ryu/d2fixed.c
+++ b/libstdc++-v3/src/c++17/ryu/d2fixed.c
@@ -23,23 +23,11 @@
 //
 // -DRYU_AVOID_UINT128 Avoid using uint128_t. Slower, depending on your 
compiler.
 
-#include "ryu/ryu.h"
 
-#include 
-#include 
-#include 
-#include 
-#include 
 
 #ifdef RYU_DEBUG
-#include 
-#include 
 #endif
 
-#include "ryu/common.h"
-#include "ryu/digit_table.h"
-#include "ryu/d2fixed_full_table.h"
-#include "ryu/d2s_intrinsics.h"
 
 #define DOUBLE_MANTISSA_BITS 52
 #define DOUBLE_EXPONENT_BITS 11
@@ -328,33 +316,6 @@ static inline uint32_t lengthForIndex(const uint32_t idx) {
   return (log10Pow2(16 * (int32_t) idx) + 1 + 16 + 8) / 9;
 }
 
-static inline int copy_special_str_printf(char* const result, const bool sign, 
const uint64_t mantissa) {
-#if defined(_MSC_VER)
-  // TODO: Check that -nan is expected output on Windows.
-  if (sign) {
-result[0] = '-';
-  }
-  if (mantissa) {
-if (mantissa < (1ull << (DOUBLE_MANTISSA_BITS - 1))) {
-  memcpy(result + sign, "nan(snan)", 9);
-  return sign + 9;
-}
-memcpy(result + sign, "nan", 3);
-return sign + 3;
-  }
-#else
-  if (mantissa) {
-memcpy(result, "nan", 3);
-return 3;
-  }
-  if (sign) {
-result[0] = '-';
-  }
-#endif
-  memcpy(result + sign, "Infinity", 8);
-  return sign + 8;
-}
-
 int d2fixed_buffered_n(double d, uint32_t precision, char* result) {
   const uint64_t bits = double_to_bits(d);
 #ifdef RYU_DEBUG
@@ -372,20 +333,10 @@ int d2fixed_buffered_n(double d, uint32_t precision, 
char* result) {
 
   // Case distinction; exit early for the easy cases.
   if (ieeeExponent == ((1u << DOUBLE_EXPONENT_BITS) - 1u)) {
-return copy_special_str_printf(result, ieeeSign, ieeeMan

[PATCH] x86: Inline strncmp only with -minline-all-stringops

2020-07-15 Thread H.J. Lu via Gcc-patches
Expand strncmp to "repz cmpsb" only with -minline-all-stringops since
"repz cmpsb" can be much slower than strncmp function implemented with
vector instructions, see

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=43052

gcc/

PR target/95458
* config/i386/i386-expand.c (ix86_expand_cmpstrn_or_cmpmem):
Return false for -mno-inline-all-stringops.

gcc/testsuite/

PR target/95458
* gcc.target/i386/pr95458-1.c: New test.
* gcc.target/i386/pr95458-2.c: Likewise.
---
 gcc/config/i386/i386-expand.c | 19 +++
 gcc/testsuite/gcc.target/i386/pr95458-1.c | 11 +++
 gcc/testsuite/gcc.target/i386/pr95458-2.c |  7 +++
 3 files changed, 25 insertions(+), 12 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr95458-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr95458-2.c

diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index a3a62e341b4..04f8c6ef394 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -7650,7 +7650,13 @@ bool
 ix86_expand_cmpstrn_or_cmpmem (rtx result, rtx src1, rtx src2,
   rtx length, rtx align, bool is_cmpstrn)
 {
-  if (optimize_insn_for_size_p () && !TARGET_INLINE_ALL_STRINGOPS)
+  /* Expand strncmp and memcmp only with -minline-all-stringops since
+ "repz cmpsb" can be much slower than strncmp and memcmp functions
+ implemented with vector instructions, see
+
+ https://gcc.gnu.org/bugzilla/show_bug.cgi?id=43052
+   */
+  if (!TARGET_INLINE_ALL_STRINGOPS)
 return false;
 
   /* Can't use this if the user has appropriated ecx, esi or edi.  */
@@ -7677,17 +7683,6 @@ ix86_expand_cmpstrn_or_cmpmem (rtx result, rtx src1, rtx 
src2,
== STRING_CST
return false;
 }
-  else
-{
-  /* Expand memcmp to "repz cmpsb" only for -minline-all-stringops
-since "repz cmpsb" can be much slower than memcmp function
-implemented with vector instructions, see
-
-https://gcc.gnu.org/bugzilla/show_bug.cgi?id=43052
-   */
-  if (!TARGET_INLINE_ALL_STRINGOPS)
-   return false;
-}
 
   rtx addr1 = copy_addr_to_reg (XEXP (src1, 0));
   rtx addr2 = copy_addr_to_reg (XEXP (src2, 0));
diff --git a/gcc/testsuite/gcc.target/i386/pr95458-1.c 
b/gcc/testsuite/gcc.target/i386/pr95458-1.c
new file mode 100644
index 000..231a4787dce
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr95458-1.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -minline-all-stringops" } */
+
+int
+func (char *d, unsigned int l)
+{
+  return __builtin_strncmp (d, "foo", l) ? 1 : 2;
+}
+
+/* { dg-final { scan-assembler-not "call\[\\t \]*_?strncmp" } } */
+/* { dg-final { scan-assembler "cmpsb" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr95458-2.c 
b/gcc/testsuite/gcc.target/i386/pr95458-2.c
new file mode 100644
index 000..1a620444770
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr95458-2.c
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-inline-all-stringops" } */
+
+#include "pr95458-1.c"
+
+/* { dg-final { scan-assembler "call\[\\t \]*_?strncmp" } } */
+/* { dg-final { scan-assembler-not "cmpsb" } } */
-- 
2.26.2



Re: [PATCH] Require CET support only for the final GCC build

2020-07-15 Thread Joseph Myers
On Wed, 15 Jul 2020, Richard Biener wrote:

> But note one of the issues is that when not cross-compiling we're
> using a single libiberty for target and host objects (likewise

There shouldn't be a target libiberty, since commit 
8499116aa30a46993deff5acf73985df6b16fb8b (re PR regression/47836 (Some 
Cross Compiler can't build target-libiberty or target-zlib), Wed Jun 22 
19:40:45 2011 +).  If something is causing target libiberty to be 
built, that's a bug that should be fixed.

> That said, giving configury an idea whether it configures for
> the host, the target or the build would be required here - Joseph,
> is there an existing mechanism for example libiberty can use
> here?

Makefile.def has some settings specific to host or build, e.g.

build_modules= { module= libcpp;
 extra_configure_flags='--disable-nls am_cv_func_iconv=no';};

or

host_modules= { module= libiberty; bootstrap=true;

extra_configure_flags='@extra_host_libiberty_configure_flags@';};

-- 
Joseph S. Myers
jos...@codesourcery.com


[PATCH 2/2] S/390: Emit vector alignment hints for z13 if AS accepts them

2020-07-15 Thread Stefan Schulze Frielinghaus via Gcc-patches
gcc/ChangeLog:

* config.in: Regenerate.
* config/s390/s390.c (print_operand): Emit vector alignment hints
for target z13, if AS accepts them.  For other targets the logic
stays the same.
* config/s390/s390.h (TARGET_VECTOR_LOADSTORE_ALIGNMENT_HINTS): Define
macro.
* configure: Regenerate.
* configure.ac: Check HAVE_AS_VECTOR_LOADSTORE_ALIGNMENT_HINTS_ON_Z13.

gcc/testsuite/ChangeLog:

* gcc.target/s390/vector/align-1.c: Change target architecture
to z13.
* gcc.target/s390/vector/align-2.c: Change target architecture
to z13.

(cherry picked from commit 929fd91ba975eebf9e57f7f092041271dcaf0c34)
(squashed with commit 87cb9423add08743d8bb3368f0af61ddc9572837)
---
 gcc/config.in |  7 +
 gcc/config/s390/s390.c|  4 +--
 gcc/config/s390/s390.h|  7 +
 gcc/configure | 31 +++
 gcc/configure.ac  |  5 +++
 .../gcc.target/s390/vector/align-1.c  |  2 +-
 .../gcc.target/s390/vector/align-2.c  |  2 +-
 7 files changed, 53 insertions(+), 5 deletions(-)

diff --git a/gcc/config.in b/gcc/config.in
index 4924b8a0c32..051e6afb097 100644
--- a/gcc/config.in
+++ b/gcc/config.in
@@ -724,6 +724,13 @@
 #endif
 
 
+/* Define if your assembler supports vl/vst/vlm/vstm with an optional
+   alignment hint argument on z13. */
+#ifndef USED_FOR_TARGET
+#undef HAVE_AS_VECTOR_LOADSTORE_ALIGNMENT_HINTS_ON_Z13
+#endif
+
+
 /* Define if your assembler supports VSX instructions. */
 #ifndef USED_FOR_TARGET
 #undef HAVE_AS_VSX
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 5aff2084e1b..9057154be07 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -7737,15 +7737,13 @@ print_operand (FILE *file, rtx x, int code)
   switch (code)
 {
 case 'A':
-#ifdef HAVE_AS_VECTOR_LOADSTORE_ALIGNMENT_HINTS
-  if (TARGET_ARCH12 && MEM_P (x))
+  if (TARGET_VECTOR_LOADSTORE_ALIGNMENT_HINTS && MEM_P (x))
{
  if (MEM_ALIGN (x) >= 128)
fprintf (file, ",4");
  else if (MEM_ALIGN (x) == 64)
fprintf (file, ",3");
}
-#endif
   return;
 case 'C':
   fprintf (file, s390_branch_condition_mnemonic (x, FALSE));
diff --git a/gcc/config/s390/s390.h b/gcc/config/s390/s390.h
index 71a12b8c92e..c5307755aa1 100644
--- a/gcc/config/s390/s390.h
+++ b/gcc/config/s390/s390.h
@@ -154,6 +154,13 @@ enum processor_flags
(TARGET_VX && TARGET_CPU_VXE)
 #define TARGET_VXE_P(opts) \
(TARGET_VX_P (opts) && TARGET_CPU_VXE_P (opts))
+#if defined(HAVE_AS_VECTOR_LOADSTORE_ALIGNMENT_HINTS_ON_Z13)
+#define TARGET_VECTOR_LOADSTORE_ALIGNMENT_HINTS TARGET_Z13
+#elif defined(HAVE_AS_VECTOR_LOADSTORE_ALIGNMENT_HINTS)
+#define TARGET_VECTOR_LOADSTORE_ALIGNMENT_HINTS TARGET_ARCH12
+#else
+#define TARGET_VECTOR_LOADSTORE_ALIGNMENT_HINTS 0
+#endif
 
 #ifdef HAVE_AS_MACHINE_MACHINEMODE
 #define S390_USE_TARGET_ATTRIBUTE 1
diff --git a/gcc/configure b/gcc/configure
index 4dd81d24241..aa37763d6d4 100755
--- a/gcc/configure
+++ b/gcc/configure
@@ -27786,6 +27786,37 @@ if test 
$gcc_cv_as_s390_vector_loadstore_alignment_hints = yes; then
 
 $as_echo "#define HAVE_AS_VECTOR_LOADSTORE_ALIGNMENT_HINTS 1" >>confdefs.h
 
+fi
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for vector 
load/store alignment hints on z13" >&5
+$as_echo_n "checking assembler for vector load/store alignment hints on z13... 
" >&6; }
+if ${gcc_cv_as_s390_vector_loadstore_alignment_hints_on_z13+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  gcc_cv_as_s390_vector_loadstore_alignment_hints_on_z13=no
+  if test x$gcc_cv_as != x; then
+$as_echo ' vl %v24,0(%r15),3 ' > conftest.s
+if { ac_try='$gcc_cv_as $gcc_cv_as_flags -mzarch -march=z13 -o conftest.o 
conftest.s >&5'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }
+then
+   gcc_cv_as_s390_vector_loadstore_alignment_hints_on_z13=yes
+else
+  echo "configure: failed program was" >&5
+  cat conftest.s >&5
+fi
+rm -f conftest.o conftest.s
+  fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: 
$gcc_cv_as_s390_vector_loadstore_alignment_hints_on_z13" >&5
+$as_echo "$gcc_cv_as_s390_vector_loadstore_alignment_hints_on_z13" >&6; }
+if test $gcc_cv_as_s390_vector_loadstore_alignment_hints_on_z13 = yes; then
+
+$as_echo "#define HAVE_AS_VECTOR_LOADSTORE_ALIGNMENT_HINTS_ON_Z13 1" 
>>confdefs.h
+
 fi
 
 
diff --git a/gcc/configure.ac b/gcc/configure.ac
index 6173a1c4f23..a3211db36c0 100644
--- a/gcc/configure.ac
+++ b/gcc/configure.ac
@@ -4883,6 +4883,11 @@ pointers into PC-relative form.])
   [vl %v24,0(%r15),3 ],,
   [AC

[PATCH 1/2] S/390: Support vector load/store alignment hints

2020-07-15 Thread Stefan Schulze Frielinghaus via Gcc-patches
From: Andreas Krebbel 

The IBM z14 POP adds an optional alignment operand to the vl, vst,
vlm, and vstm instruction (vector loads and stores). Vectors residing
on 8 or 16 byte boundaries might get loaded or stored faster on some
models given the instruction uses the proper hint operand.  A wrong
hint will hurt performance though.

The attached testcase align-1 currently fails due to:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88085

gcc/ChangeLog:

2018-11-21  Andreas Krebbel  

* configure.ac: Add check for Binutils to determine whether vector
load/store alignments hints are being supported.
* config.in: Regenerate.
* configure: Regenerate.
* config/s390/s390.c (print_operand): Support new output
modifier A.
* config/s390/s390.md ("movti"): Append alignment hint output
using the new output modifier 'A'.
* config/s390/vector.md ("mov", "*vec_tf_to_v1tf")
("*vec_ti_to_v1ti"): Likewise.

gcc/testsuite/ChangeLog:

2018-11-21  Andreas Krebbel  

* gcc.target/s390/vector/align-1.c: New test.
* gcc.target/s390/vector/align-2.c: New test.

From-SVN: r266336

(cherry picked from commit b8923037ef1b229326b7f238580d9176b8ff)
---
 gcc/config.in |  7 
 gcc/config/s390/s390.c| 13 +++
 gcc/config/s390/s390.md   |  4 +--
 gcc/config/s390/vector.md | 12 +++
 gcc/configure | 36 +++
 gcc/configure.ac  |  6 
 .../gcc.target/s390/vector/align-1.c  | 30 
 .../gcc.target/s390/vector/align-2.c  | 29 +++
 8 files changed, 129 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/align-1.c
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/align-2.c

diff --git a/gcc/config.in b/gcc/config.in
index 5bccb408016..4924b8a0c32 100644
--- a/gcc/config.in
+++ b/gcc/config.in
@@ -717,6 +717,13 @@
 #endif
 
 
+/* Define if your assembler supports vl/vst/vlm/vstm with an optional
+   alignment hint argument. */
+#ifndef USED_FOR_TARGET
+#undef HAVE_AS_VECTOR_LOADSTORE_ALIGNMENT_HINTS
+#endif
+
+
 /* Define if your assembler supports VSX instructions. */
 #ifndef USED_FOR_TARGET
 #undef HAVE_AS_VSX
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 9e7fd2b04dd..5aff2084e1b 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -7697,6 +7697,8 @@ print_operand_address (FILE *file, rtx addr)
CODE specified the format flag.  The following format flags
are recognized:
 
+'A': On z14 or higher: If operand is a mem print the alignment
+hint usable with vl/vst prefixed by a comma.
 'C': print opcode suffix for branch condition.
 'D': print opcode suffix for inverse branch condition.
 'E': print opcode suffix for branch on index instruction.
@@ -7734,6 +7736,17 @@ print_operand (FILE *file, rtx x, int code)
 
   switch (code)
 {
+case 'A':
+#ifdef HAVE_AS_VECTOR_LOADSTORE_ALIGNMENT_HINTS
+  if (TARGET_ARCH12 && MEM_P (x))
+   {
+ if (MEM_ALIGN (x) >= 128)
+   fprintf (file, ",4");
+ else if (MEM_ALIGN (x) == 64)
+   fprintf (file, ",3");
+   }
+#endif
+  return;
 case 'C':
   fprintf (file, s390_branch_condition_mnemonic (x, FALSE));
   return;
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index 93b1ff0b8db..7114609b676 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -1516,8 +1516,8 @@
vone\t%v0
vlvgp\t%v0,%1,%N1
#
-   vl\t%v0,%1
-   vst\t%v1,%0
+   vl\t%v0,%1%A1
+   vst\t%v1,%0%A0
#
#"
   [(set_attr "op_type" "RSY,RSY,VRR,VRI,VRI,VRR,*,VRX,VRX,*,*")
diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index 24b200e0c57..0fcb8d2e158 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -198,8 +198,8 @@
   ""
   "@
vlr\t%v0,%v1
-   vl\t%v0,%1
-   vst\t%v1,%0
+   vl\t%v0,%1%A1
+   vst\t%v1,%0%A0
vzero\t%v0
vone\t%v0
vgbm\t%v0,%t1
@@ -549,8 +549,8 @@
   "TARGET_VX"
   "@
vmrhg\t%v0,%1,%N1
-   vl\t%v0,%1
-   vst\t%v1,%0
+   vl\t%v0,%1%A1
+   vst\t%v1,%0%A0
vzero\t%v0
vlvgp\t%v0,%1,%N1"
   [(set_attr "op_type" "VRR,VRX,VRX,VRI,VRR")])
@@ -561,8 +561,8 @@
   "TARGET_VX"
   "@
vlr\t%v0,%v1
-   vl\t%v0,%1
-   vst\t%v1,%0
+   vl\t%v0,%1%A1
+   vst\t%v1,%0%A0
vzero\t%v0
vone\t%v0
vlvgp\t%v0,%1,%N1"
diff --git a/gcc/configure b/gcc/configure
index 97ba7d7d69c..4dd81d24241 100755
--- a/gcc/configure
+++ b/gcc/configure
@@ -27753,6 +27753,42 @@ $as_echo "#define HAVE_AS_ARCHITECTURE_MODIFIERS 1" 
>>confdefs.h
 
 fi
 
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for vector 
load/store alignment hints" >&5
+$as_echo_n "checking assembler for vector load/store alignment hints... " >&6; 
}
+if ${gcc_cv_as_s390_vector_

[PATCH 0/2][BACKPORT GCC8] S/390: Support vector load/store alignment hints

2020-07-15 Thread Stefan Schulze Frielinghaus via Gcc-patches
Backport for supporting vector load/store alignment hints for GCC8.

Bootstrapped and regtested on s390x with and without patched gas. Ok for
releases/gcc-8 branch?

Andreas Krebbel (1):
  S/390: Support vector load/store alignment hints

Stefan Schulze Frielinghaus (1):
  S/390: Emit vector alignment hints for z13 if AS accepts them

 gcc/config.in | 14 
 gcc/config/s390/s390.c| 11 +++
 gcc/config/s390/s390.h|  7 ++
 gcc/config/s390/s390.md   |  4 +-
 gcc/config/s390/vector.md | 12 ++--
 gcc/configure | 67 +++
 gcc/configure.ac  | 11 +++
 .../gcc.target/s390/vector/align-1.c  | 30 +
 .../gcc.target/s390/vector/align-2.c  | 29 
 9 files changed, 177 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/align-1.c
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/align-2.c

-- 
2.26.2



[PATCH] non-power-of-2 group size can be vectorized for 2-element vectors case (PR96208)

2020-07-15 Thread Dmitrij Pochepko
Hi,

here is an enhancement to gcc, which allows load/store groups with size being 
non-power-of-2 to be vectorized.
Current implementation is using interleaving permutations to transform 
load/store groups. That is where power-of-2 requirements comes from.
For N-element vectors simplest approch would be to use N single element 
insertions for any required vector permutation.
And for 2-element vectors it is a reasonable amount of insertions.
Using this approach allows vectorization for cases, which were not supported 
before.

bootstrapped and tested on x86_64-pc-linux-gnu and aarch64-linux-gnu.

Thanks,
Dmitrij
>From acf12c34f4bebbb5c6000a87bf9aaa58e48418bb Mon Sep 17 00:00:00 2001
From: Dmitrij Pochepko 
Date: Wed, 15 Jul 2020 18:07:26 +0300
Subject: [PATCH] non-power-of-2 group size can be vectorized for 2-element
 vectors case (PR96208)

Support for non-power-of-2 group size in vectorizer for 2-element vectors.

gcc/ChangeLog:

2020-07-15  Dmitrij Pochepko 

PR gcc/96208

* gcc/tree-vect-data-refs.c:
	(vect_all_2element_permutations_supported): New function
	(vect_permute_load_chain): added new branch with new algo
	(vect_permute_store_chain): Likewise
	(vect_grouped_load_supported): modified logic for new algo
	(vect_grouped_store_supported): Likewise
	(vect_transform_grouped_load): Likewise

gcc/testsuite/ChangeLog:

2020-07-15  Dmitrij Pochepko 

	PR gcc/96208

	* gcc.dg/vect/vect-non-pow2-group.c: New test
---
 gcc/testsuite/gcc.dg/vect/vect-non-pow2-group.c |  25 +++
 gcc/tree-vect-data-refs.c   | 212 +---
 2 files changed, 218 insertions(+), 19 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/vect-non-pow2-group.c

diff --git a/gcc/testsuite/gcc.dg/vect/vect-non-pow2-group.c b/gcc/testsuite/gcc.dg/vect/vect-non-pow2-group.c
new file mode 100644
index 000..7a22739
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-non-pow2-group.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_double } */
+/* { dg-require-effective-target vect_perm } */
+/* { dg-additional-options "-fdump-tree-vect-details -fno-vect-cost-model -Ofast" } */
+
+typedef struct {
+double m1, m2, m3, m4, m5;
+} the_struct_t;
+
+double bar1 (the_struct_t*);
+
+double foo (double* k, unsigned int n, the_struct_t* the_struct)
+{
+unsigned int u;
+the_struct_t result;
+for (u=0; u < n; u++, k--) {
+	result.m1 += (*k)*the_struct[u].m1;
+	result.m2 += (*k)*the_struct[u].m2;
+	result.m3 += (*k)*the_struct[u].m3;
+	result.m4 += (*k)*the_struct[u].m4;
+}
+return bar1 (&result);
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  } } */
diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
index e35a215..caf4555 100644
--- a/gcc/tree-vect-data-refs.c
+++ b/gcc/tree-vect-data-refs.c
@@ -5027,6 +5027,37 @@ vect_create_destination_var (tree scalar_dest, tree vectype)
   return vec_dest;
 }
 
+/* Function vect_all_2element_permutations_supported
+
+   Returns TRUE if all possible permutations for 2-element
+   vectors are supported for requested mode.  */
+
+bool
+vect_all_2element_permutations_supported (machine_mode mode)
+{
+  // check all possible permutations for 2-element vectors
+  // for 2 vectors it'll be all low and high combinations:
+  // ll={0, 2}, lh={0, 3}, hl={1,2}, hh={1,3}
+  poly_uint64 nelt = GET_MODE_NUNITS (mode);
+  if (!known_eq (nelt, 2ULL))
+return false;
+  vec_perm_builder sel (nelt, 2, 2);
+  sel.quick_grow (2);
+  sel[0] = 0;
+  sel[1] = 2;
+  vec_perm_indices ll (sel, 2, 2);
+  sel[1] = 3;
+  vec_perm_indices lh (sel, 2, 2);
+  sel[0] = 1;
+  vec_perm_indices hh (sel, 2, 2);
+  sel[1] = 2;
+  vec_perm_indices hl (sel, 2, 2);
+  return can_vec_perm_const_p (mode, ll)
+  && can_vec_perm_const_p (mode, lh)
+  && can_vec_perm_const_p (mode, hl)
+  && can_vec_perm_const_p (mode, hh);
+}
+
 /* Function vect_grouped_store_supported.
 
Returns TRUE if interleave high and interleave low permutations
@@ -5038,13 +5069,15 @@ vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
   machine_mode mode = TYPE_MODE (vectype);
 
   /* vect_permute_store_chain requires the group size to be equal to 3 or
- be a power of two.  */
-  if (count != 3 && exact_log2 (count) == -1)
+ be a power of two or 2-element vectors to be used.  */
+  if (count != 3 && exact_log2 (count) == -1
+   && !known_eq (GET_MODE_NUNITS (mode), 2ULL))
 {
   if (dump_enabled_p ())
 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 			 "the size of the group of accesses"
-			 " is not a power of 2 or not eqaul to 3\n");
+			 " is not a power of 2 or not eqaul to 3"
+			 " and vector element number is not 2\n");
   return false;
 }
 
@@ -5113,9 +5146,14 @@ vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
 	}
 	  return true;
 	}
+  else if (known_eq (GET_MODE_NUNITS (mode), 2ULL))
+	{
+	  ret

Re: [Patch] libgomp: Add Fortran routine support for allocators

2020-07-15 Thread Tobias Burnus

It turned out that using -fdefault-integer-8 on 32bit systems didn't
work with omp_alloc – as that gave a 64bit / kind=8 integer for the
(c_)size_t argument. [As reported by Jakub on #gcc.]

Fixed (and committed to mainline + OG10) as attached; the most trivial
version of the patch didn't not work as for fixed-form input one then
exceeds the 72-characters limit; hence, the additional continuation lines.

Tobias

-
Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Alexander 
Walter
commit 51542d9254426c54363a42451885a77b44ebbeaf
Author: Tobias Burnus 
Date:   Wed Jul 15 17:23:04 2020 +0200

libgomp.fortran/alloc-1.F90: Fix testcase for 32bit size_t

libgomp/ChangeLog:

* testsuite/libgomp.fortran/alloc-1.F90: Use c_size_t to
avoid conversion on 32bit systems from 32bit to 64bit due
to -fdefault-integer-8.

diff --git a/libgomp/testsuite/libgomp.fortran/alloc-1.F90 b/libgomp/testsuite/libgomp.fortran/alloc-1.F90
index e19077a78d0..8ecb4c41246 100644
--- a/libgomp/testsuite/libgomp.fortran/alloc-1.F90
+++ b/libgomp/testsuite/libgomp.fortran/alloc-1.F90
@@ -1,8 +1,8 @@
 ! { dg-additional-options "-Wall -Wextra -Wno-maybe-uninitialized" }
 #ifdef DEFAULT_INTEGER_8
-#define ONEoFIVE 105_c_size_t*8
+#define ONEoFIVE 105_c_size_t*8_c_size_t
 #else
-#define ONEoFIVE 105_c_size_t*4
+#define ONEoFIVE 105_c_size_t*4_c_size_t
 #endif
   program main
 use iso_c_binding
@@ -58,7 +58,8 @@
 integer, pointer, volatile :: p(:), p0, q(:), r(:)
 integer (omp_allocator_handle_kind) :: a, a2
 
-cp = omp_alloc (3 * c_sizeof (i), omp_default_mem_alloc)
+cp = omp_alloc (3_c_size_t * c_sizeof (i),  &
+ &  omp_default_mem_alloc)
 if (mod (transfer (cp, intptr), 4_c_intptr_t) /= 0) stop 1
 call c_f_pointer (cp, p, [3])
 p(1) = 1
@@ -66,7 +67,8 @@
 p(3) = 3
 call omp_free (cp, omp_default_mem_alloc)
 
-cp = omp_alloc (2 * c_sizeof (i), omp_default_mem_alloc)
+cp = omp_alloc (2_c_size_t * c_sizeof (i),  &
+ &  omp_default_mem_alloc)
 if (mod (transfer (cp, intptr), 4_c_intptr_t) /= 0) stop 2
 call c_f_pointer (cp, p, [2])
 p(1) = 1
@@ -121,7 +123,8 @@
 if (a2 == omp_null_allocator) stop 10
 cp = omp_alloc (ONEoFIVE, a2)
 if (mod (transfer (cp, intptr), 32_c_intptr_t) /= 0) stop 11
-call c_f_pointer (cp, p, [ONEoFIVE / c_sizeof (i)])
+call c_f_pointer (cp, p, [ONEoFIVE  &
+ &/ c_sizeof (i)])
 p(1) = 5
 p(ONEoFIVE / c_sizeof (i)) = 6
 cq = omp_alloc (768_c_size_t, a2)
@@ -149,9 +152,11 @@
  &   size (traits5), traits5)
 if (a2 == omp_null_allocator) stop 16
 call omp_set_default_allocator (a2)
-cp = omp_alloc (ONEoFIVE, omp_null_allocator)
+cp = omp_alloc (ONEoFIVE,   &
+ &  omp_null_allocator)
 if (mod (transfer (cp, intptr), 32_c_intptr_t) /= 0) stop 17
-call c_f_pointer (cq, q, [ONEoFIVE / c_sizeof (i)])
+call c_f_pointer (cq, q, [ONEoFIVE  &
+ &/ c_sizeof (i)])
 p(1) = 5
 p(ONEoFIVE / c_sizeof (i)) = 6
 cq = omp_alloc (768_c_size_t, omp_null_allocator)


Re: [PATCH] Require CET support only for the final GCC build

2020-07-15 Thread H.J. Lu via Gcc-patches
On Wed, Jul 15, 2020 at 8:20 AM Richard Biener  wrote:
>
> On Wed, 15 Jul 2020, H.J. Lu wrote:
>
> > With --enable-cet, require CET support only for the final GCC build.
> > Don't enable CET without CET support in stage1 nor for build support.
>
> I wonder if we want to do sth less fragile than testing
> for ../stage_current - for example the toplevel make could
> export sth special through POSTSTAGE1_FLAGS_TO_PASS?

That would be nice.

> But note one of the issues is that when not cross-compiling we're
> using a single libiberty for target and host objects (likewise

It shouldn't be a problem.

> for libstdc++ I guess).  When cross-compiling say powerpc -> x86_64
> we should already build stage1 libiberty used for target objects
> by the stage1 compiler and thus CET enabled.
>
> Note that for PR96202 it's host objects (gen*) that we get complaints
> for - the host objects should never get CET enabled.

We must enable CET in lto-plugin on CET enabled hosts even if
CET isn't enabled in GCC run-time.  Otherwise, ld can't dlopen
 lto-plugin.

> That said, giving configury an idea whether it configures for
> the host, the target or the build would be required here - Joseph,
> is there an existing mechanism for example libiberty can use
> here?
>
> Thanks,
> Richard.
>
> > config/
> >
> >   PR bootstrap/96202
> >   * cet.m4 (GCC_CET_HOST_FLAGS): Don't enable CET without CET
> >   support in stage1 nor for build support.
> >
> > gcc/
> >
> >   PR bootstrap/96202
> >   * configure: Regenerated.
> >
> > libbacktrace/
> >
> >   PR bootstrap/96202
> >   * configure: Regenerated.
> >
> > libcc1/
> >
> >   PR bootstrap/96202
> >   * configure: Regenerated.
> >
> > libcpp/
> >
> >   PR bootstrap/96202
> >   * configure: Regenerated.
> >
> > libdecnumber/
> >
> >   PR bootstrap/96202
> >   * configure: Regenerated.
> >
> > libiberty/
> >
> >   PR bootstrap/96202
> >   * configure: Regenerated.
> >
> > lto-plugin/
> >
> >   PR bootstrap/96202
> >   * configure: Regenerated.
> > ---
> >  config/cet.m4  | 22 +-
> >  gcc/configure  | 30 ++
> >  libbacktrace/configure | 22 +-
> >  libcc1/configure   | 20 
> >  libcpp/configure   | 20 
> >  libdecnumber/configure | 20 
> >  libiberty/configure| 20 
> >  lto-plugin/configure   | 24 ++--
> >  8 files changed, 138 insertions(+), 40 deletions(-)
> >
> > diff --git a/config/cet.m4 b/config/cet.m4
> > index 911fbd46475..265756e4c81 100644
> > --- a/config/cet.m4
> > +++ b/config/cet.m4
> > @@ -13,7 +13,7 @@ case "$host" in
> >  case "$enable_cet" in
> >auto)
> >   # Check if target supports multi-byte NOPs
> > - # and if assembler supports CET insn.
> > + # and if compiler and assembler support CET insn.
> >   cet_save_CFLAGS="$CFLAGS"
> >   CFLAGS="$CFLAGS -fcf-protection"
> >   AC_COMPILE_IFELSE(
> > @@ -70,7 +70,7 @@ case "$host" in
> >  case "$enable_cet" in
> >auto)
> >   # Check if target supports multi-byte NOPs
> > - # and if assembler supports CET insn.
> > + # and if compiler and assembler support CET.
> >   AC_COMPILE_IFELSE(
> >[AC_LANG_PROGRAM(
> > [],
> > @@ -85,13 +85,25 @@ asm ("setssbsy");
> >[enable_cet=no])
> >   ;;
> >yes)
> > - # Check if assembler supports CET.
> > + # Check if compiler and assembler support CET.
> >   AC_COMPILE_IFELSE(
> >[AC_LANG_PROGRAM(
> > [],
> > [asm ("setssbsy");])],
> > -  [],
> > -  [AC_MSG_ERROR([assembler with CET support is required for 
> > --enable-cet])])
> > +  [support_cet=yes],
> > +  [support_cet=no])
> > + if test $support_cet = "no"; then
> > +   if test -z "${with_build_subdir}" \
> > +  && (test ! -f ../stage_current \
> > +  || test `cat ../stage_current` != "stage1"); then
> > + # Require CET support only for the final GCC build.
> > + AC_MSG_ERROR([compiler and assembler with CET support are 
> > required for --enable-cet])
> > +   else
> > + # Don't enable CET without CET support in stage1 nor for
> > + # build support.
> > + enable_cet=no
> > +   fi
> > + fi
> >   ;;
> >  esac
> >  CFLAGS="$cet_save_CFLAGS"
> > diff --git a/gcc/configure b/gcc/configure
> > index f224679ed3e..6d06220eb2c 100755
> > --- a/gcc/configure
> > +++ b/gcc/configure
> > @@ -785,10 +785,10 @@ manext
> >  LIBICONV_DEP
> >  LTLIBICONV
> >  LIBICONV
> > -ZSTD_LIB
> > -ZSTD_INCLUDE
> >  ZSTD_LDFLAGS
> >  ZSTD_CPPFLAGS
> > +ZSTD_LIB
> > +ZSTD_INCLUDE
> >  DL_LIB
> >  LDEXP_LIB
> >  EXTRA_GCC_LIBS
> > @@ -9978,6 +9978,8 @@ ZSTD_LIB=
> >  ZSTD_CPPFLAGS=
> >  ZSTD_LDFLAGS=
> >
> > +
> > +
> >  # Check whether --with-zstd was given.
> >  if test "${

Re: [PATCH] Require CET support only for the final GCC build

2020-07-15 Thread Richard Biener
On Wed, 15 Jul 2020, H.J. Lu wrote:

> With --enable-cet, require CET support only for the final GCC build.
> Don't enable CET without CET support in stage1 nor for build support.

I wonder if we want to do sth less fragile than testing
for ../stage_current - for example the toplevel make could
export sth special through POSTSTAGE1_FLAGS_TO_PASS?

But note one of the issues is that when not cross-compiling we're
using a single libiberty for target and host objects (likewise
for libstdc++ I guess).  When cross-compiling say powerpc -> x86_64
we should already build stage1 libiberty used for target objects
by the stage1 compiler and thus CET enabled.

Note that for PR96202 it's host objects (gen*) that we get complaints
for - the host objects should never get CET enabled.

That said, giving configury an idea whether it configures for
the host, the target or the build would be required here - Joseph,
is there an existing mechanism for example libiberty can use
here?

Thanks,
Richard.

> config/
> 
>   PR bootstrap/96202
>   * cet.m4 (GCC_CET_HOST_FLAGS): Don't enable CET without CET
>   support in stage1 nor for build support.
> 
> gcc/
> 
>   PR bootstrap/96202
>   * configure: Regenerated.
> 
> libbacktrace/
> 
>   PR bootstrap/96202
>   * configure: Regenerated.
> 
> libcc1/
> 
>   PR bootstrap/96202
>   * configure: Regenerated.
> 
> libcpp/
> 
>   PR bootstrap/96202
>   * configure: Regenerated.
> 
> libdecnumber/
> 
>   PR bootstrap/96202
>   * configure: Regenerated.
> 
> libiberty/
> 
>   PR bootstrap/96202
>   * configure: Regenerated.
> 
> lto-plugin/
> 
>   PR bootstrap/96202
>   * configure: Regenerated.
> ---
>  config/cet.m4  | 22 +-
>  gcc/configure  | 30 ++
>  libbacktrace/configure | 22 +-
>  libcc1/configure   | 20 
>  libcpp/configure   | 20 
>  libdecnumber/configure | 20 
>  libiberty/configure| 20 
>  lto-plugin/configure   | 24 ++--
>  8 files changed, 138 insertions(+), 40 deletions(-)
> 
> diff --git a/config/cet.m4 b/config/cet.m4
> index 911fbd46475..265756e4c81 100644
> --- a/config/cet.m4
> +++ b/config/cet.m4
> @@ -13,7 +13,7 @@ case "$host" in
>  case "$enable_cet" in
>auto)
>   # Check if target supports multi-byte NOPs
> - # and if assembler supports CET insn.
> + # and if compiler and assembler support CET insn.
>   cet_save_CFLAGS="$CFLAGS"
>   CFLAGS="$CFLAGS -fcf-protection"
>   AC_COMPILE_IFELSE(
> @@ -70,7 +70,7 @@ case "$host" in
>  case "$enable_cet" in
>auto)
>   # Check if target supports multi-byte NOPs
> - # and if assembler supports CET insn.
> + # and if compiler and assembler support CET.
>   AC_COMPILE_IFELSE(
>[AC_LANG_PROGRAM(
> [],
> @@ -85,13 +85,25 @@ asm ("setssbsy");
>[enable_cet=no])
>   ;;
>yes)
> - # Check if assembler supports CET.
> + # Check if compiler and assembler support CET.
>   AC_COMPILE_IFELSE(
>[AC_LANG_PROGRAM(
> [],
> [asm ("setssbsy");])],
> -  [],
> -  [AC_MSG_ERROR([assembler with CET support is required for 
> --enable-cet])])
> +  [support_cet=yes],
> +  [support_cet=no])
> + if test $support_cet = "no"; then
> +   if test -z "${with_build_subdir}" \
> +  && (test ! -f ../stage_current \
> +  || test `cat ../stage_current` != "stage1"); then
> + # Require CET support only for the final GCC build.
> + AC_MSG_ERROR([compiler and assembler with CET support are required 
> for --enable-cet])
> +   else
> + # Don't enable CET without CET support in stage1 nor for
> + # build support.
> + enable_cet=no
> +   fi
> + fi
>   ;;
>  esac
>  CFLAGS="$cet_save_CFLAGS"
> diff --git a/gcc/configure b/gcc/configure
> index f224679ed3e..6d06220eb2c 100755
> --- a/gcc/configure
> +++ b/gcc/configure
> @@ -785,10 +785,10 @@ manext
>  LIBICONV_DEP
>  LTLIBICONV
>  LIBICONV
> -ZSTD_LIB
> -ZSTD_INCLUDE
>  ZSTD_LDFLAGS
>  ZSTD_CPPFLAGS
> +ZSTD_LIB
> +ZSTD_INCLUDE
>  DL_LIB
>  LDEXP_LIB
>  EXTRA_GCC_LIBS
> @@ -9978,6 +9978,8 @@ ZSTD_LIB=
>  ZSTD_CPPFLAGS=
>  ZSTD_LDFLAGS=
>  
> +
> +
>  # Check whether --with-zstd was given.
>  if test "${with_zstd+set}" = set; then :
>withval=$with_zstd;
> @@ -19021,7 +19023,7 @@ else
>lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
>lt_status=$lt_dlunknown
>cat > conftest.$ac_ext <<_LT_EOF
> -#line 19022 "configure"
> +#line 19026 "configure"
>  #include "confdefs.h"
>  
>  #if HAVE_DLFCN_H
> @@ -19127,7 +19129,7 @@ else
>lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
>lt_status=$lt_dlunknown
>cat > conftest.$ac_ext <<_LT_EOF
> -#line 19128 "configure"
> +#line 19132 "configure"
>  #i

[committed] openmp: Fix up loop-21.c [PR96198]

2020-07-15 Thread Jakub Jelinek via Gcc-patches
Hi!

I've missed
+FAIL: libgomp.c/loop-21.c execution test
during testing of the recent patch.  The problem is that while
for the number of iterations computation it doesn't matter if we compute
min_inner_iterations as (m2 * first + n2 + (adjusted step) + m1 * first + n1) / 
step
or (m2 * last + n2 + (adjusted step) + m1 * last + n1) / step provided that
in the second case we use as factor (m1 - m2) * ostep / step rather than
(m2 - m1) * ostep / step, for the logical to actual iterator values computation
it does matter and in my hand written C implementations of all the cases (outer
vs. inner loop with increasing vs. decreasing iterator) I'm using the same 
computation
and it worked well for all the pseudo-random iterators testing it was doing.

It also means min_inner_iterations is misnamed, because it is not really
minimum number of inner iterations, whether the first or last outer iteration
results in the smaller or larger value of this can be (sometimes) only
determined at runtime.
So this patch also renames it to first_inner_iterations.

Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk.

2020-07-15  Jakub Jelinek  

PR libgomp/96198
* omp-general.h (struct omp_for_data): Rename min_inner_iterations
member to first_inner_iterations, adjust comment.
* omp-general.c (omp_extract_for_data): Adjust for the above change.
Always use n1first and n2first to compute it, rather than depending
on single_nonrect_cond_code.  Similarly, always compute factor
as (m2 - m1) * outer_step / inner_step rather than sometimes m1 - m2
depending on single_nonrect_cond_code.
* omp-expand.c (expand_omp_for_init_vars): Rename min_inner_iterations
to first_inner_iterations and min_inner_iterationsd to
first_inner_iterationsd.

--- gcc/omp-general.h.jj2020-07-14 12:20:01.520110629 +0200
+++ gcc/omp-general.h   2020-07-15 12:54:02.660265973 +0200
@@ -80,10 +80,9 @@ struct omp_for_data
   struct omp_for_data_loop *loops;
   /* The following are relevant only for non-rectangular loops
  where only a single loop depends on an outer loop iterator.  */
-  tree min_inner_iterations; /* Number of iterations of the inner
-   loop with either the first or last
-   outer iterator, depending on which
-   results in fewer iterations.  */
+  tree first_inner_iterations; /* Number of iterations of the inner
+ loop with the first outer iterator
+ (or adjn1, if that is non-NULL).  */
   tree factor; /* (m2 - m1) * outer_step / inner_step.  */
   /* Adjusted n1 of the outer loop in such loop nests (if needed).  */
   tree adjn1;
--- gcc/omp-general.c.jj2020-07-14 12:20:01.520110629 +0200
+++ gcc/omp-general.c   2020-07-15 12:54:38.793740074 +0200
@@ -212,7 +212,7 @@ omp_extract_for_data (gomp_for *for_stmt
   fd->sched_modifiers = 0;
   fd->chunk_size = NULL_TREE;
   fd->simd_schedule = false;
-  fd->min_inner_iterations = NULL_TREE;
+  fd->first_inner_iterations = NULL_TREE;
   fd->factor = NULL_TREE;
   fd->adjn1 = NULL_TREE;
   collapse_iter = NULL;
@@ -726,16 +726,8 @@ omp_extract_for_data (gomp_for *for_stmt
  if (loop->m1 || loop->m2)
{
  gcc_assert (single_nonrect != -1);
- if (single_nonrect_cond_code == LT_EXPR)
-   {
- n1 = n1first;
- n2 = n2first;
-   }
- else
-   {
- n1 = n1last;
- n2 = n2last;
-   }
+ n1 = n1first;
+ n2 = n2first;
}
  t = fold_build2 (PLUS_EXPR, itype, t, fold_convert (itype, n2));
  t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
@@ -754,8 +746,6 @@ omp_extract_for_data (gomp_for *for_stmt
 or last value of the outer iterator (the one with fewer
 iterations).
 Compute t2 = ((m2 - m1) * ostep) / step
-(for single_nonrect_cond_code GT_EXPR
- t2 = ((m1 - m2) * ostep) / step instead)
 and niters = outer_count * t
  + t2 * ((outer_count - 1) * outer_count / 2)
   */
@@ -763,11 +753,7 @@ omp_extract_for_data (gomp_for *for_stmt
  tree m2 = loop->m2 ? loop->m2 : integer_zero_node;
  m1 = fold_convert (itype, m1);
  m2 = fold_convert (itype, m2);
- tree t2;
- if (single_nonrect_cond_code == LT_EXPR)
-   t2 = fold_build2 (MINUS_EXPR, itype, m2, m1);
- else
-   t2 = fold_build2 (MINUS_EXPR, itype, m1, m2);
+ tree t2 = fold_build2 (MINUS_EXPR,

[PATCH] Require CET support only for the final GCC build

2020-07-15 Thread H.J. Lu via Gcc-patches
With --enable-cet, require CET support only for the final GCC build.
Don't enable CET without CET support in stage1 nor for build support.

config/

PR bootstrap/96202
* cet.m4 (GCC_CET_HOST_FLAGS): Don't enable CET without CET
support in stage1 nor for build support.

gcc/

PR bootstrap/96202
* configure: Regenerated.

libbacktrace/

PR bootstrap/96202
* configure: Regenerated.

libcc1/

PR bootstrap/96202
* configure: Regenerated.

libcpp/

PR bootstrap/96202
* configure: Regenerated.

libdecnumber/

PR bootstrap/96202
* configure: Regenerated.

libiberty/

PR bootstrap/96202
* configure: Regenerated.

lto-plugin/

PR bootstrap/96202
* configure: Regenerated.
---
 config/cet.m4  | 22 +-
 gcc/configure  | 30 ++
 libbacktrace/configure | 22 +-
 libcc1/configure   | 20 
 libcpp/configure   | 20 
 libdecnumber/configure | 20 
 libiberty/configure| 20 
 lto-plugin/configure   | 24 ++--
 8 files changed, 138 insertions(+), 40 deletions(-)

diff --git a/config/cet.m4 b/config/cet.m4
index 911fbd46475..265756e4c81 100644
--- a/config/cet.m4
+++ b/config/cet.m4
@@ -13,7 +13,7 @@ case "$host" in
 case "$enable_cet" in
   auto)
# Check if target supports multi-byte NOPs
-   # and if assembler supports CET insn.
+   # and if compiler and assembler support CET insn.
cet_save_CFLAGS="$CFLAGS"
CFLAGS="$CFLAGS -fcf-protection"
AC_COMPILE_IFELSE(
@@ -70,7 +70,7 @@ case "$host" in
 case "$enable_cet" in
   auto)
# Check if target supports multi-byte NOPs
-   # and if assembler supports CET insn.
+   # and if compiler and assembler support CET.
AC_COMPILE_IFELSE(
 [AC_LANG_PROGRAM(
  [],
@@ -85,13 +85,25 @@ asm ("setssbsy");
 [enable_cet=no])
;;
   yes)
-   # Check if assembler supports CET.
+   # Check if compiler and assembler support CET.
AC_COMPILE_IFELSE(
 [AC_LANG_PROGRAM(
  [],
  [asm ("setssbsy");])],
-[],
-[AC_MSG_ERROR([assembler with CET support is required for 
--enable-cet])])
+[support_cet=yes],
+[support_cet=no])
+   if test $support_cet = "no"; then
+ if test -z "${with_build_subdir}" \
+&& (test ! -f ../stage_current \
+|| test `cat ../stage_current` != "stage1"); then
+   # Require CET support only for the final GCC build.
+   AC_MSG_ERROR([compiler and assembler with CET support are required 
for --enable-cet])
+ else
+   # Don't enable CET without CET support in stage1 nor for
+   # build support.
+   enable_cet=no
+ fi
+   fi
;;
 esac
 CFLAGS="$cet_save_CFLAGS"
diff --git a/gcc/configure b/gcc/configure
index f224679ed3e..6d06220eb2c 100755
--- a/gcc/configure
+++ b/gcc/configure
@@ -785,10 +785,10 @@ manext
 LIBICONV_DEP
 LTLIBICONV
 LIBICONV
-ZSTD_LIB
-ZSTD_INCLUDE
 ZSTD_LDFLAGS
 ZSTD_CPPFLAGS
+ZSTD_LIB
+ZSTD_INCLUDE
 DL_LIB
 LDEXP_LIB
 EXTRA_GCC_LIBS
@@ -9978,6 +9978,8 @@ ZSTD_LIB=
 ZSTD_CPPFLAGS=
 ZSTD_LDFLAGS=
 
+
+
 # Check whether --with-zstd was given.
 if test "${with_zstd+set}" = set; then :
   withval=$with_zstd;
@@ -19021,7 +19023,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 19022 "configure"
+#line 19026 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -19127,7 +19129,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 19128 "configure"
+#line 19132 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -30792,7 +30794,7 @@ case "$host" in
 case "$enable_cet" in
   auto)
# Check if target supports multi-byte NOPs
-   # and if assembler supports CET insn.
+   # and if compiler and assembler support CET.
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
 
@@ -30818,7 +30820,7 @@ fi
 rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
;;
   yes)
-   # Check if assembler supports CET.
+   # Check if compiler and assembler support CET.
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
 
@@ -30831,11 +30833,23 @@ asm ("setssbsy");
 }
 _ACEOF
 if ac_fn_cxx_try_compile "$LINENO"; then :
-
+  support_cet=yes
 else
-  as_fn_error $? "assembler with CET support is required for --enable-cet" 
"$LINENO" 5
+  support_cet=no
 fi
 rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+   if test $support_cet = "no"; then
+ if test -z "${with_build_subdir}" \
+&& (t

c++: refactor some parser code

2020-07-15 Thread Nathan Sidwell
cp_parser_declaration copies tokens to local variables, before 
inspecting (some of) their fields.  There's no need.  Just point at them 
in the token buffer -- they don't move.  Also, we never look at the 
second token if the first is EOF, so no need for some kind of dummy 
value in that case.


gcc/cp/
* parser.c (cp_parser_declaration): Avoid copying tokens.
(cp_parser_block_declaration): RAII token pointer.

pushed

--
Nathan Sidwell
diff --git i/gcc/cp/parser.c w/gcc/cp/parser.c
index 1532431378e..11db02418bc 100644
--- i/gcc/cp/parser.c
+++ w/gcc/cp/parser.c
@@ -13402,11 +13402,7 @@ cp_parser_declaration_seq_opt (cp_parser* parser)
 static void
 cp_parser_declaration (cp_parser* parser)
 {
-  cp_token token1;
-  cp_token token2;
   int saved_pedantic;
-  void *p;
-  tree attributes = NULL_TREE;
 
   /* Check for the `__extension__' keyword.  */
   if (cp_parser_extension_opt (parser, &saved_pedantic))
@@ -13420,35 +13416,33 @@ cp_parser_declaration (cp_parser* parser)
 }
 
   /* Try to figure out what kind of declaration is present.  */
-  token1 = *cp_lexer_peek_token (parser->lexer);
+  cp_token *token1 = cp_lexer_peek_token (parser->lexer);
+  cp_token *token2 = NULL;
 
-  if (token1.type != CPP_EOF)
-token2 = *cp_lexer_peek_nth_token (parser->lexer, 2);
-  else
-{
-  token2.type = CPP_EOF;
-  token2.keyword = RID_MAX;
-}
+  if (token1->type != CPP_EOF)
+token2 = cp_lexer_peek_nth_token (parser->lexer, 2);
 
   /* Get the high-water mark for the DECLARATOR_OBSTACK.  */
-  p = obstack_alloc (&declarator_obstack, 0);
+  void *p = obstack_alloc (&declarator_obstack, 0);
+
+  tree attributes = NULL_TREE;
 
   /* If the next token is `extern' and the following token is a string
  literal, then we have a linkage specification.  */
-  if (token1.keyword == RID_EXTERN
-  && cp_parser_is_pure_string_literal (&token2))
+  if (token1->keyword == RID_EXTERN
+  && cp_parser_is_pure_string_literal (token2))
 cp_parser_linkage_specification (parser);
   /* If the next token is `template', then we have either a template
  declaration, an explicit instantiation, or an explicit
  specialization.  */
-  else if (token1.keyword == RID_TEMPLATE)
+  else if (token1->keyword == RID_TEMPLATE)
 {
   /* `template <>' indicates a template specialization.  */
-  if (token2.type == CPP_LESS
+  if (token2->type == CPP_LESS
 	  && cp_lexer_peek_nth_token (parser->lexer, 3)->type == CPP_GREATER)
 	cp_parser_explicit_specialization (parser);
   /* `template <' indicates a template declaration.  */
-  else if (token2.type == CPP_LESS)
+  else if (token2->type == CPP_LESS)
 	cp_parser_template_declaration (parser, /*member_p=*/false);
   /* Anything else must be an explicit instantiation.  */
   else
@@ -13456,40 +13450,40 @@ cp_parser_declaration (cp_parser* parser)
 }
   /* If the next token is `export', then we have a template
  declaration.  */
-  else if (token1.keyword == RID_EXPORT)
+  else if (token1->keyword == RID_EXPORT)
 cp_parser_template_declaration (parser, /*member_p=*/false);
   /* If the next token is `extern', 'static' or 'inline' and the one
  after that is `template', we have a GNU extended explicit
  instantiation directive.  */
   else if (cp_parser_allow_gnu_extensions_p (parser)
-	   && (token1.keyword == RID_EXTERN
-	   || token1.keyword == RID_STATIC
-	   || token1.keyword == RID_INLINE)
-	   && token2.keyword == RID_TEMPLATE)
+	   && token2->keyword == RID_TEMPLATE
+	   && (token1->keyword == RID_EXTERN
+	   || token1->keyword == RID_STATIC
+	   || token1->keyword == RID_INLINE))
 cp_parser_explicit_instantiation (parser);
   /* If the next token is `namespace', check for a named or unnamed
  namespace definition.  */
-  else if (token1.keyword == RID_NAMESPACE
+  else if (token1->keyword == RID_NAMESPACE
 	   && (/* A named namespace definition.  */
-	   (token2.type == CPP_NAME
+	   (token2->type == CPP_NAME
 		&& (cp_lexer_peek_nth_token (parser->lexer, 3)->type
 		!= CPP_EQ))
-   || (token2.type == CPP_OPEN_SQUARE
+   || (token2->type == CPP_OPEN_SQUARE
&& cp_lexer_peek_nth_token (parser->lexer, 3)->type
== CPP_OPEN_SQUARE)
 	   /* An unnamed namespace definition.  */
-	   || token2.type == CPP_OPEN_BRACE
-	   || token2.keyword == RID_ATTRIBUTE))
+	   || token2->type == CPP_OPEN_BRACE
+	   || token2->keyword == RID_ATTRIBUTE))
 cp_parser_namespace_definition (parser);
   /* An inline (associated) namespace definition.  */
-  else if (token1.keyword == RID_INLINE
-	   && token2.keyword == RID_NAMESPACE)
+  else if (token2->keyword == RID_NAMESPACE
+	   && token1->keyword == RID_INLINE)
 cp_parser_namespace_definition (parser);
   /* Objective-C++ declaration/definition.  */
-  else if (c_dialect_objc () && OBJC_IS_AT_KEYWORD (token1.k

[Ada] Cleanup in Convert_To_Positional after previous work

2020-07-15 Thread Pierre-Marie de Rodat
This adjusts the description of Flatten, removes an obsolete comment
and uses Compile_Time_Known_Value as now done in Is_Static_Element.

No functional changes.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* exp_aggr.adb (Flatten): Adjust description.
(Convert_To_Positional): Remove obsolete ??? comment and use
Compile_Time_Known_Value in the final test.diff --git a/gcc/ada/exp_aggr.adb b/gcc/ada/exp_aggr.adb
--- a/gcc/ada/exp_aggr.adb
+++ b/gcc/ada/exp_aggr.adb
@@ -4969,9 +4969,8 @@ package body Exp_Aggr is
  Dims : Nat;
  Ix   : Node_Id;
  Ixb  : Node_Id) return Boolean;
-  --  Convert the aggregate into a purely positional form if possible. On
-  --  entry the bounds of all dimensions are known to be static, and the
-  --  total number of components is safe enough to expand.
+  --  Convert the aggregate into a purely positional form if possible after
+  --  checking that the bounds of all dimensions are known to be static.
 
   function Is_Flat (N : Node_Id; Dims : Nat) return Boolean;
   --  Return True if the aggregate N is flat (which is not trivial in the
@@ -5476,10 +5475,6 @@ package body Exp_Aggr is
   --  compatible with the upper bound of the type, and therefore it is
   --  worth flattening such aggregates as well.
 
-  --  For now the back-end expands these aggregates into individual
-  --  assignments to the target anyway, but it is conceivable that
-  --  it will eventually be able to treat such aggregates statically???
-
   if Aggr_Size_OK (N, Typ)
 and then
   Flatten (N, Dims, First_Index (Typ), First_Index (Base_Type (Typ)))
@@ -5506,14 +5501,7 @@ package body Exp_Aggr is
 if Nkind (N) = N_Aggregate and then Present (Expressions (N)) then
Expr := First (Expressions (N));
while Present (Expr) loop
-  if Nkind_In (Expr, N_Integer_Literal, N_Real_Literal)
-or else
-  (Is_Entity_Name (Expr)
-and then Ekind (Entity (Expr)) = E_Enumeration_Literal)
-  then
- null;
-
-  else
+  if not Compile_Time_Known_Value (Expr) then
  Error_Msg_N
("non-static object requires elaboration code??", N);
  exit;




[Ada] Spurious error on Predicate_Failure aspect

2020-07-15 Thread Pierre-Marie de Rodat
GNAT would in some cases not resolve a Predicate_Failure aspect properly
and generate spurious errors of the form:

  cannot find unique type for raise expression

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* sem_ch13.adb (Check_Aspect_At_End_Of_Declarations): Add proper
handling of Aspect_Predicate_Failure, consistent with
Check_Aspect_At_Freeze_Point.diff --git a/gcc/ada/sem_ch13.adb b/gcc/ada/sem_ch13.adb
--- a/gcc/ada/sem_ch13.adb
+++ b/gcc/ada/sem_ch13.adb
@@ -10519,6 +10519,8 @@ package body Sem_Ch13 is
 Preanalyze_Spec_Expression (End_Decl_Expr, T);
 Pop_Type (Ent);
 
+ elsif A_Id = Aspect_Predicate_Failure then
+Preanalyze_Spec_Expression (End_Decl_Expr, Standard_String);
  else
 Preanalyze_Spec_Expression (End_Decl_Expr, T);
  end if;




[Ada] Ongoing work for AI12-0212: container aggregates

2020-07-15 Thread Pierre-Marie de Rodat
This patch modifies the parser to recognize
iterated_element_associations, which may include a key_exprewsion to be
used in a named aggregate such as a map. The new syntactic node
N_Iterated_Element_Association is recognized throughout the compiler.
The patch also extends the analysis and expansion of positional and
named aggregates that include iterated_element_associations, (for now
without key_expressions).

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* par-ch4.adb (P_Iterated_Component_Association): Extended to
recognzize the similar Iterated_Element_Association. This node
is only generated when an explicit Key_Expression is given.
Otherwise the distinction between the two iterated forms is done
during semantic analysis.
* sinfo.ads: New node N_Iterated_Element_Association, for
Ada202x container aggregates.  New field Key_Expression.
* sinfo.adb: Subprograms for new node and newn field.
* sem_aggr.adb (Resolve_Iterated_Component_Association): Handle
the case where the Iteration_Scheme is an
Iterator_Specification.
* exp_aggr.adb (Wxpand_Iterated_Component): Handle a component
with an Iterated_Component_Association, generate proper loop
using given Iterator_Specification.
* exp_util.adb (Insert_Axtions): Handle new node as other
aggregate components.
* sem.adb, sprint.adb: Handle new node.
* tbuild.adb (Make_Implicit_Loop_Statement): Handle properly a
loop with an Iterator_ specification.diff --git a/gcc/ada/exp_aggr.adb b/gcc/ada/exp_aggr.adb
--- a/gcc/ada/exp_aggr.adb
+++ b/gcc/ada/exp_aggr.adb
@@ -6914,13 +6914,20 @@ package body Exp_Aggr is
  Stats  : List_Id;
 
   begin
- L_Range := Relocate_Node (First (Discrete_Choices (Comp)));
- L_Iteration_Scheme :=
-   Make_Iteration_Scheme (Loc,
- Loop_Parameter_Specification =>
-   Make_Loop_Parameter_Specification (Loc,
- Defining_Identifier => Loop_Id,
- Discrete_Subtype_Definition => L_Range));
+ if Present (Iterator_Specification (Comp)) then
+L_Iteration_Scheme :=
+  Make_Iteration_Scheme (Loc,
+Iterator_Specification => Iterator_Specification (Comp));
+
+ else
+L_Range := Relocate_Node (First (Discrete_Choices (Comp)));
+L_Iteration_Scheme :=
+  Make_Iteration_Scheme (Loc,
+Loop_Parameter_Specification =>
+  Make_Loop_Parameter_Specification (Loc,
+Defining_Identifier => Loop_Id,
+Discrete_Subtype_Definition => L_Range));
+ end if;
 
  --  Build insertion statement. For a positional aggregate, only the
  --  expression is needed. For a named aggregate, the loop variable,


diff --git a/gcc/ada/exp_util.adb b/gcc/ada/exp_util.adb
--- a/gcc/ada/exp_util.adb
+++ b/gcc/ada/exp_util.adb
@@ -7346,6 +7346,7 @@ package body Exp_Util is
 
 when N_Component_Association
| N_Iterated_Component_Association
+   | N_Iterated_Element_Association
 =>
if Nkind (Parent (P)) = N_Aggregate
  and then Present (Loop_Actions (P))


diff --git a/gcc/ada/par-ch4.adb b/gcc/ada/par-ch4.adb
--- a/gcc/ada/par-ch4.adb
+++ b/gcc/ada/par-ch4.adb
@@ -3407,6 +3407,8 @@ package body Ch4 is
function P_Iterated_Component_Association return Node_Id is
   Assoc_Node : Node_Id;
   Id : Node_Id;
+  Iter_Spec  : Node_Id;
+  Loop_Spec  : Node_Id;
   State  : Saved_Scan_State;
 
--  Start of processing for P_Iterated_Component_Association
@@ -3423,6 +3425,9 @@ package body Ch4 is
   --  if E is a subtype indication this is a loop parameter spec,
   --  while if E a name it is an iterator_specification, and the
   --  disambiguation takes place during semantic analysis.
+  --  In addition, if "use" is present after the specification,
+  --  this is an Iterated_Element_Association that carries a
+  --  key_expression, and we generate the appropriate node.
 
   Id := P_Defining_Identifier;
   Assoc_Node :=
@@ -3432,6 +3437,22 @@ package body Ch4 is
  Set_Defining_Identifier (Assoc_Node, Id);
  T_In;
  Set_Discrete_Choices (Assoc_Node, P_Discrete_Choice_List);
+
+ if Token = Tok_Use then
+
+--  Key-expression is present, rewrite node as an
+--  iterated_Element_Awwoiation.
+
+Scan;  --  past USE
+Loop_Spec :=
+  New_Node (N_Loop_Parameter_Specification, Prev_Token_Ptr);
+Set_Defining_Identifier (Loop_Spec, Id);
+Set_Discrete_Subtype_Definition (Loop_Spec,
+   First (Discrete_Choices (Assoc_Node)));
+Set_Loop_Parameter_Specification (Assoc_Node, Loop_Spec);

[Ada] Guard against access to wrong fields in Is_Renaming

2020-07-15 Thread Pierre-Marie de Rodat
Renamed_Entity is only valid for a few entities, using it on any entity
passed to Is_Renaming can result in crashes.  Fixing this requires
making sure that Is_Renaming only uses Renamed_Entity on entities where
this is allowed and uses Is_Renaming_Of_Object everywhere else.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* sem_util.adb (Is_Renaming): Add ekind checks.diff --git a/gcc/ada/sem_util.adb b/gcc/ada/sem_util.adb
--- a/gcc/ada/sem_util.adb
+++ b/gcc/ada/sem_util.adb
@@ -6422,8 +6422,28 @@ package body Sem_Util is
 
   function Is_Renaming (N : Node_Id) return Boolean is
   begin
- return
-   Is_Entity_Name (N) and then Present (Renamed_Entity (Entity (N)));
+ if not Is_Entity_Name (N) then
+return False;
+ end if;
+
+ case Ekind (Entity (N)) is
+when E_Variable | E_Constant =>
+   return Present (Renamed_Object (Entity (N)));
+
+when E_Exception
+   | E_Function
+   | E_Generic_Function
+   | E_Generic_Package
+   | E_Generic_Procedure
+   | E_Operator
+   | E_Package
+   | E_Procedure
+=>
+   return Present (Renamed_Entity (Entity (N)));
+
+when others =>
+   return False;
+ end case;
   end Is_Renaming;
 
   ---




[Ada] Fix logic in Allocate_Any_Controlled

2020-07-15 Thread Pierre-Marie de Rodat
If an exception is raised early in Allocate_Any_Controlled, no lock is
taken yet and Unlock is called on a lock which isn't taken.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* libgnat/s-stposu.adb (Allocate_Any_Controlled): Fix logic in
lock/unlock.diff --git a/gcc/ada/libgnat/s-stposu.adb b/gcc/ada/libgnat/s-stposu.adb
--- a/gcc/ada/libgnat/s-stposu.adb
+++ b/gcc/ada/libgnat/s-stposu.adb
@@ -117,11 +117,12 @@ package body System.Storage_Pools.Subpools is
   Is_Subpool_Allocation : constant Boolean :=
 Pool in Root_Storage_Pool_With_Subpools'Class;
 
-  Master  : Finalization_Master_Ptr := null;
-  N_Addr  : Address;
-  N_Ptr   : FM_Node_Ptr;
-  N_Size  : Storage_Count;
-  Subpool : Subpool_Handle := null;
+  Master : Finalization_Master_Ptr := null;
+  N_Addr : Address;
+  N_Ptr  : FM_Node_Ptr;
+  N_Size : Storage_Count;
+  Subpool: Subpool_Handle := null;
+  Lock_Taken : Boolean := False;
 
   Header_And_Padding : Storage_Offset;
   --  This offset includes the size of a FM_Node plus any additional
@@ -205,6 +206,7 @@ package body System.Storage_Pools.Subpools is
  --Read  - allocation, finalization
  --Write - finalization
 
+ Lock_Taken := True;
  Lock_Task.all;
 
  --  Do not allow the allocation of controlled objects while the
@@ -322,6 +324,7 @@ package body System.Storage_Pools.Subpools is
  end if;
 
  Unlock_Task.all;
+ Lock_Taken := False;
 
   --  Non-controlled allocation
 
@@ -335,7 +338,7 @@ package body System.Storage_Pools.Subpools is
  --  Unlock the task in case the allocation step failed and reraise the
  --  exception.
 
- if Is_Controlled then
+ if Lock_Taken then
 Unlock_Task.all;
  end if;
 




[Ada] Do not generate elaboration code for alignment aspect

2020-07-15 Thread Pierre-Marie de Rodat
This prevents the compiler from generating elaboration code for a record
declared with an initial value and an alignment aspect.  The expression
of an alignment aspect must be static so, in practice, there is no need
to defer the elaboration of the object just because of it.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* einfo.ads (Delayed Freezing and Elaboration): Adjust description.
* freeze.adb (Freeze_Object_Declaration): Likewise.
* sem_ch3.adb (Delayed_Aspect_Present): Likewise.  Do not return
true for Alignment.
* sem_ch13.adb (Analyze_Aspect_Specifications): Do not always delay
for Alignment.  Moreover, for Alignment and various Size aspects,
do not delay if the expression is an attribute whose prefix is the
Standard package.diff --git a/gcc/ada/einfo.ads b/gcc/ada/einfo.ads
--- a/gcc/ada/einfo.ads
+++ b/gcc/ada/einfo.ads
@@ -281,8 +281,7 @@ package Einfo is
 --  For object declarations, the flag is set when an address clause for the
 --  object is encountered. Legality checks on the address expression only take
 --  place at the freeze point of the object. In Ada 2012, the flag is also set
---  when an address or an alignment aspect for the object is encountered (note
---  the discrepancy with the non-aspect case).
+--  when an address aspect for the object is encountered.
 
 --  Most types have an explicit freeze node, because they cannot be elaborated
 --  until all representation and operational items that apply to them have been


diff --git a/gcc/ada/freeze.adb b/gcc/ada/freeze.adb
--- a/gcc/ada/freeze.adb
+++ b/gcc/ada/freeze.adb
@@ -3431,7 +3431,7 @@ package body Freeze is
  Check_Address_Clause (E);
 
  --  Similar processing is needed for aspects that may affect object
- --  layout, like Alignment, if there is an initialization expression.
+ --  layout, like Address, if there is an initialization expression.
  --  We don't do this if there is a pragma Linker_Section, because it
  --  would prevent the back end from statically initializing the
  --  object; we don't want elaboration code in that case.


diff --git a/gcc/ada/sem_ch13.adb b/gcc/ada/sem_ch13.adb
--- a/gcc/ada/sem_ch13.adb
+++ b/gcc/ada/sem_ch13.adb
@@ -2861,17 +2861,30 @@ package body Sem_Ch13 is
   if A_Id in Boolean_Aspects and then No (Expr) then
  Delay_Required := False;
 
-  --  For non-Boolean aspects, don't delay if integer literal,
-  --  unless the aspect is Alignment, which affects the
-  --  freezing of an initialized object.
+  --  For non-Boolean aspects, don't delay if integer literal
 
   elsif A_Id not in Boolean_Aspects
-and then A_Id /= Aspect_Alignment
 and then Present (Expr)
 and then Nkind (Expr) = N_Integer_Literal
   then
  Delay_Required := False;
 
+  --  For Alignment and various Size aspects, don't delay for
+  --  an attribute reference whose prefix is Standard, for
+  --  example Standard'Maximum_Alignment or Standard'Word_Size.
+
+  elsif (A_Id = Aspect_Alignment
+  or else A_Id = Aspect_Component_Size
+  or else A_Id = Aspect_Object_Size
+  or else A_Id = Aspect_Size
+  or else A_Id = Aspect_Value_Size)
+and then Present (Expr)
+and then Nkind (Expr) = N_Attribute_Reference
+and then Nkind (Prefix (Expr)) = N_Identifier
+and then Chars (Prefix (Expr)) = Name_Standard
+  then
+ Delay_Required := False;
+
   --  All other cases are delayed
 
   else


diff --git a/gcc/ada/sem_ch3.adb b/gcc/ada/sem_ch3.adb
--- a/gcc/ada/sem_ch3.adb
+++ b/gcc/ada/sem_ch3.adb
@@ -3668,7 +3668,7 @@ package body Sem_Ch3 is
   --  has aspects that require delayed analysis, the resolution of the
   --  aggregate must be deferred to the freeze point of the object. This
   --  special processing was created for address clauses, but it must
-  --  also apply to Alignment. This must be done before the aspect
+  --  also apply to address aspects. This must be done before the aspect
   --  specifications are analyzed because we must handle the aggregate
   --  before the analysis of the object declaration is complete.
 
@@ -3896,7 +3896,7 @@ package body Sem_Ch3 is
 while Present (A) loop
A_Id := Get_Aspect_Id (Chars (Identifier (A)));
 
-   if A_Id = Aspect_Alignment or else A_Id = Aspect_Address then
+   if A_Id = Aspect_Address then
 
   --  Set flag on object entity, for later proc

[Ada] Cleanup code related to object overlays

2020-07-15 Thread Pierre-Marie de Rodat
Cleanup frontend code before routine Find_Overlaid_Entity will be reused
in GNATprove backend.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* sem_util.adb (Find_Overlaid_Entity): Fix style in comment.
(Note_Possible_Modification): Simplify repeated calls to Ekind.diff --git a/gcc/ada/sem_util.adb b/gcc/ada/sem_util.adb
--- a/gcc/ada/sem_util.adb
+++ b/gcc/ada/sem_util.adb
@@ -8691,7 +8691,7 @@ package body Sem_Util is
Expr := Prefix (Expr);
exit;
 
-   --  Check for Const where Const is a constant entity
+--  Check for Const where Const is a constant entity
 
 elsif Is_Entity_Name (Expr)
   and then Ekind (Entity (Expr)) = E_Constant
@@ -23841,7 +23841,7 @@ package body Sem_Util is
 
--  Follow renaming chain
 
-   if (Ekind (Ent) = E_Variable or else Ekind (Ent) = E_Constant)
+   if Ekind_In (Ent, E_Variable, E_Constant)
  and then Present (Renamed_Object (Ent))
then
   Exp := Renamed_Object (Ent);




[Ada] Target name is an object reference

2020-07-15 Thread Pierre-Marie de Rodat
Target name (i.e. "@"), which was introduced to Ada 202X in AI12-0125,
denotes a constant object (RM 3.3(21.2/5)), even though target_name
itself is not an object (RM 3.3(2)).

This patch allows @ to be appear as a prefix for Address attribute
(which requires an object). Also, it enables constructs like @'Last,
where @ represents a constrained array object with a static bound, to be
constant-folded in GNATprove mode (where @ is not expanded), just like
it is constant-folded in GNAT mode (but only after @ has been expanded).

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* sem_util.adb (Is_Object_Reference): Return True on
N_Target_Name.diff --git a/gcc/ada/sem_util.adb b/gcc/ada/sem_util.adb
--- a/gcc/ada/sem_util.adb
+++ b/gcc/ada/sem_util.adb
@@ -17217,6 +17217,11 @@ package body Sem_Util is
return Is_Rewrite_Substitution (N)
  and then Is_Object_Reference (Original_Node (N));
 
+--  AI12-0125: Target name represents a constant object
+
+when N_Target_Name =>
+   return True;
+
 when others =>
return False;
  end case;




[Ada] Missing error on operator call

2020-07-15 Thread Pierre-Marie de Rodat
In some cases where a procedure call is expected but a function is
provided such as "Interfaces.C."=" (x, y);" GNAT would not generate any
error message.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* sem_ch6.adb (Analyze_Procedure_Call): Detect use of operators
in a procedure call.
* sem_util.adb: Minor edit.diff --git a/gcc/ada/sem_ch6.adb b/gcc/ada/sem_ch6.adb
--- a/gcc/ada/sem_ch6.adb
+++ b/gcc/ada/sem_ch6.adb
@@ -2014,6 +2014,10 @@ package body Sem_Ch6 is
and then Comes_From_Source (N)
  then
 Error_Msg_N ("missing explicit dereference in call", N);
+
+ elsif Ekind (Entity (P)) = E_Operator then
+Error_Msg_Name_1 := Chars (P);
+Error_Msg_N ("operator % cannot be used as a procedure", N);
  end if;
 
  Analyze_Call_And_Resolve;


diff --git a/gcc/ada/sem_util.adb b/gcc/ada/sem_util.adb
--- a/gcc/ada/sem_util.adb
+++ b/gcc/ada/sem_util.adb
@@ -28605,12 +28605,12 @@ package body Sem_Util is
   then
  return;
 
-  --  In  an instance, there is an ongoing problem with completion of
+  --  In an instance, there is an ongoing problem with completion of
   --  types derived from private types. Their structure is what Gigi
-  --  expects, but the  Etype is the parent type rather than the
-  --  derived private type itself. Do not flag error in this case. The
-  --  private completion is an entity without a parent, like an Itype.
-  --  Similarly, full and partial views may be incorrect in the instance.
+  --  expects, but the Etype is the parent type rather than the derived
+  --  private type itself. Do not flag error in this case. The private
+  --  completion is an entity without a parent, like an Itype. Similarly,
+  --  full and partial views may be incorrect in the instance.
   --  There is no simple way to insure that it is consistent ???
 
   --  A similar view discrepancy can happen in an inlined body, for the




[Ada] Fix for possibly null ranges in 'Update and delta_aggregate

2020-07-15 Thread Pierre-Marie de Rodat
In expression like "(Arr with delta Low .. High => New_Component_Value)"
bounds Low .. High might denote a null range. In this case both Low and
High can be any values from the base type of the array's index type;
they don't need to belong to the array's index type itself.

This patch removes unnecessary range checks in GNATprove mode.
Compilation is not affected.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* exp_spark.adb (Expand_SPARK_Delta_Or_Update): Apply scalar
range checks against the base type of an index type, not against
the index type itself.diff --git a/gcc/ada/exp_spark.adb b/gcc/ada/exp_spark.adb
--- a/gcc/ada/exp_spark.adb
+++ b/gcc/ada/exp_spark.adb
@@ -227,9 +227,9 @@ package body Exp_SPARK is
 
   if Nkind (Index) = N_Range then
  Apply_Scalar_Range_Check
-   (Low_Bound  (Index), Etype (Index_Typ));
+   (Low_Bound  (Index), Base_Type (Etype (Index_Typ)));
  Apply_Scalar_Range_Check
-   (High_Bound (Index), Etype (Index_Typ));
+   (High_Bound (Index), Base_Type (Etype (Index_Typ)));
 
   --  Otherwise the index denotes a single element
 




[Ada] Mark standard containers as not in SPARK

2020-07-15 Thread Pierre-Marie de Rodat
Use aspect SPARK_Mode with value Off in the spec and body of standard
containers, bounded and unbounded versions, so that it is clearer that
they cannot be used in SPARK code. Formal containers should be used
instead.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* libgnat/a-cbdlli.adb, libgnat/a-cbdlli.ads,
libgnat/a-cbhama.adb, libgnat/a-cbhama.ads,
libgnat/a-cbhase.adb, libgnat/a-cbhase.ads,
libgnat/a-cbmutr.adb, libgnat/a-cbmutr.ads,
libgnat/a-cborma.adb, libgnat/a-cborma.ads,
libgnat/a-cborse.adb, libgnat/a-cborse.ads,
libgnat/a-cbprqu.adb, libgnat/a-cbprqu.ads,
libgnat/a-cbsyqu.adb, libgnat/a-cbsyqu.ads,
libgnat/a-cdlili.adb, libgnat/a-cdlili.ads,
libgnat/a-cidlli.adb, libgnat/a-cidlli.ads,
libgnat/a-cihama.adb, libgnat/a-cihama.ads,
libgnat/a-cihase.adb, libgnat/a-cihase.ads,
libgnat/a-cimutr.adb, libgnat/a-cimutr.ads,
libgnat/a-ciorma.adb, libgnat/a-ciorma.ads,
libgnat/a-ciormu.adb, libgnat/a-ciormu.ads,
libgnat/a-ciorse.adb, libgnat/a-ciorse.ads,
libgnat/a-cohama.adb, libgnat/a-cohama.ads,
libgnat/a-cohase.adb, libgnat/a-cohase.ads,
libgnat/a-coinve.adb, libgnat/a-coinve.ads,
libgnat/a-comutr.adb, libgnat/a-comutr.ads,
libgnat/a-convec.adb, libgnat/a-convec.ads,
libgnat/a-coorma.adb, libgnat/a-coorma.ads,
libgnat/a-coormu.adb, libgnat/a-coormu.ads,
libgnat/a-coorse.adb, libgnat/a-coorse.ads: Add SPARK_Mode =>
Off.diff --git a/gcc/ada/libgnat/a-cbdlli.adb b/gcc/ada/libgnat/a-cbdlli.adb
--- a/gcc/ada/libgnat/a-cbdlli.adb
+++ b/gcc/ada/libgnat/a-cbdlli.adb
@@ -29,7 +29,9 @@
 
 with System; use type System.Address;
 
-package body Ada.Containers.Bounded_Doubly_Linked_Lists is
+package body Ada.Containers.Bounded_Doubly_Linked_Lists with
+  SPARK_Mode => Off
+is
 
pragma Warnings (Off, "variable ""Busy*"" is not referenced");
pragma Warnings (Off, "variable ""Lock*"" is not referenced");


diff --git a/gcc/ada/libgnat/a-cbdlli.ads b/gcc/ada/libgnat/a-cbdlli.ads
--- a/gcc/ada/libgnat/a-cbdlli.ads
+++ b/gcc/ada/libgnat/a-cbdlli.ads
@@ -43,7 +43,9 @@ generic
with function "=" (Left, Right : Element_Type)
   return Boolean is <>;
 
-package Ada.Containers.Bounded_Doubly_Linked_Lists is
+package Ada.Containers.Bounded_Doubly_Linked_Lists with
+  SPARK_Mode => Off
+is
pragma Annotate (CodePeer, Skip_Analysis);
pragma Pure;
pragma Remote_Types;


diff --git a/gcc/ada/libgnat/a-cbhama.adb b/gcc/ada/libgnat/a-cbhama.adb
--- a/gcc/ada/libgnat/a-cbhama.adb
+++ b/gcc/ada/libgnat/a-cbhama.adb
@@ -39,7 +39,9 @@ with Ada.Containers.Prime_Numbers; use Ada.Containers.Prime_Numbers;
 
 with System; use type System.Address;
 
-package body Ada.Containers.Bounded_Hashed_Maps is
+package body Ada.Containers.Bounded_Hashed_Maps with
+  SPARK_Mode => Off
+is
 
pragma Warnings (Off, "variable ""Busy*"" is not referenced");
pragma Warnings (Off, "variable ""Lock*"" is not referenced");


diff --git a/gcc/ada/libgnat/a-cbhama.ads b/gcc/ada/libgnat/a-cbhama.ads
--- a/gcc/ada/libgnat/a-cbhama.ads
+++ b/gcc/ada/libgnat/a-cbhama.ads
@@ -45,7 +45,9 @@ generic
with function Equivalent_Keys (Left, Right : Key_Type) return Boolean;
with function "=" (Left, Right : Element_Type) return Boolean is <>;
 
-package Ada.Containers.Bounded_Hashed_Maps is
+package Ada.Containers.Bounded_Hashed_Maps with
+  SPARK_Mode => Off
+is
pragma Annotate (CodePeer, Skip_Analysis);
pragma Pure;
pragma Remote_Types;


diff --git a/gcc/ada/libgnat/a-cbhase.adb b/gcc/ada/libgnat/a-cbhase.adb
--- a/gcc/ada/libgnat/a-cbhase.adb
+++ b/gcc/ada/libgnat/a-cbhase.adb
@@ -39,7 +39,9 @@ with Ada.Containers.Prime_Numbers; use Ada.Containers.Prime_Numbers;
 
 with System; use type System.Address;
 
-package body Ada.Containers.Bounded_Hashed_Sets is
+package body Ada.Containers.Bounded_Hashed_Sets with
+  SPARK_Mode => Off
+is
 
pragma Warnings (Off, "variable ""Busy*"" is not referenced");
pragma Warnings (Off, "variable ""Lock*"" is not referenced");


diff --git a/gcc/ada/libgnat/a-cbhase.ads b/gcc/ada/libgnat/a-cbhase.ads
--- a/gcc/ada/libgnat/a-cbhase.ads
+++ b/gcc/ada/libgnat/a-cbhase.ads
@@ -48,7 +48,9 @@ generic
 
with function "=" (Left, Right : Element_Type) return Boolean is <>;
 
-package Ada.Containers.Bounded_Hashed_Sets is
+package Ada.Containers.Bounded_Hashed_Sets with
+  SPARK_Mode => Off
+is
pragma Annotate (CodePeer, Skip_Analysis);
pragma Pure;
pragma Remote_Types;


diff --git a/gcc/ada/libgnat/a-cbmutr.adb b/gcc/ada/libgnat/a-cbmutr.adb
--- a/gcc/ada/libgnat/a-cbmutr.adb
+++ b/gcc/ada/libgnat/a-cbmutr.adb
@@ -30,7 +30,9 @@
 with Ada.Finalization;
 with System; use type System.Address;
 
-package body Ada.Containers.Bounded_Multiway_Trees is
+package body Ada.Containers.Bounded_Multiway_Trees with
+  SPARK_Mode => Off
+is
 
pragma Warnings (Off, "variab

[Ada] Fix slices and qualified expressions being effectively volatile

2020-07-15 Thread Pierre-Marie de Rodat
Detecting effectively volatile objects in restricted contexts happens in
two routines: Is_Effectively_Volatile_Object and Is_OK_Volatile_Context.
Their handling of type conversions and slices were different; also none
of them has been dealing with qualified expressions, which has been just
added to list of non-interfering contexts in SPARK RM 7.1.3(10).

The change only affects SPARK code, as both "effectively volatile" and
"non-interfering context" are specific to SPARK.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* exp_ch5.adb (Expand_Assign_Array): Use short-circuit operator
(style).
* sem_res.adb (Resolve_Indexed_Component): Fix style in comment.
* sem_util.adb (Is_Effectively_Volatile_Object): Handle slices
just like indexed components; handle qualified expressions and
type conversions lie in Is_OK_Volatile_Context.
(Is_OK_Volatile_Context): Handle qualified expressions just like
type conversions.diff --git a/gcc/ada/exp_ch5.adb b/gcc/ada/exp_ch5.adb
--- a/gcc/ada/exp_ch5.adb
+++ b/gcc/ada/exp_ch5.adb
@@ -1521,7 +1521,7 @@ package body Exp_Ch5 is
   --  be assigned.
 
   elsif Possible_Bit_Aligned_Component (Lhs)
-  or
+  or else
 Possible_Bit_Aligned_Component (Rhs)
   then
  null;


diff --git a/gcc/ada/sem_res.adb b/gcc/ada/sem_res.adb
--- a/gcc/ada/sem_res.adb
+++ b/gcc/ada/sem_res.adb
@@ -9155,7 +9155,7 @@ package body Sem_Res is
  Array_Type := Implicitly_Designated_Type (Array_Type);
   end if;
 
-  --  If name was overloaded, set component type correctly now
+  --  If name was overloaded, set component type correctly now.
   --  If a misplaced call to an entry family (which has no index types)
   --  return. Error will be diagnosed from calling context.
 


diff --git a/gcc/ada/sem_util.adb b/gcc/ada/sem_util.adb
--- a/gcc/ada/sem_util.adb
+++ b/gcc/ada/sem_util.adb
@@ -15676,7 +15676,7 @@ package body Sem_Util is
  return Is_Object (Entity (N))
and then Is_Effectively_Volatile (Entity (N));
 
-  elsif Nkind (N) = N_Indexed_Component then
+  elsif Nkind_In (N, N_Indexed_Component, N_Slice) then
  return Is_Effectively_Volatile_Object (Prefix (N));
 
   elsif Nkind (N) = N_Selected_Component then
@@ -15685,6 +15685,12 @@ package body Sem_Util is
  or else
Is_Effectively_Volatile_Object (Selector_Name (N));
 
+  elsif Nkind_In (N, N_Qualified_Expression,
+ N_Unchecked_Type_Conversion,
+ N_Type_Conversion)
+  then
+ return Is_Effectively_Volatile_Object (Expression (N));
+
   else
  return False;
   end if;
@@ -17497,7 +17503,8 @@ package body Sem_Util is
   --  The volatile object appears as the expression of a type conversion
   --  occurring in a non-interfering context.
 
-  elsif Nkind_In (Context, N_Type_Conversion,
+  elsif Nkind_In (Context, N_Qualified_Expression,
+   N_Type_Conversion,
N_Unchecked_Type_Conversion)
 and then Expression (Context) = Obj_Ref
 and then Is_OK_Volatile_Context




[Ada] Extend static functions

2020-07-15 Thread Pierre-Marie de Rodat
This patch extends static functions and allow them on Intrinsic imported
subprograms in addition to expression functions, under -gnatX.  We also
implement compile time evaluation of Shift_Left/Right operators as a
first set of useful static-compatible intrinsics.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* inline.adb, inline.ads
(Inline_Static_Expression_Function_Call): Renamed
Inline_Static_Function_Call.
* sem_ch13.adb (Analyze_Aspect_Static): Allow static intrinsic
imported functions under -gnatX.
* sem_util.ads, sem_util.adb (Is_Static_Expression_Function):
Renamed Is_Static_Function.
(Is_Static_Expression_Function_Call): Renamed
Is_Static_Function_Call.
* sem_ch6.adb, sem_elab.adb, sem_res.adb: Update calls to
Is_Static_Function*.
* sem_eval.adb (Fold_Dummy, Eval_Intrinsic_Call, Fold_Shift):
New.
(Eval_Call): Add support for intrinsic calls, code refactoring.
(Eval_Entity_Name): Code refactoring.
(Eval_Logical_Op): Update comment.
(Eval_Shift): Call Fold_Shift. Update comments.
* par-prag.adb (Par [Pragma_Extensions_Allowed]): Set
Ada_Version to Ada_Version_Type'Last to handle
Extensions_Allowed (On) consistently.
* opt.ads (Extensions_Allowed): Update documentation.
* sem_attr.adb: Update comment.
* doc/gnat_rm/implementation_defined_pragmas.rst: Update
documentation of Extensions_Allowed.
* gnat_rm.texi: Regenerate.

patch.diff.gz
Description: application/gzip


[Ada] Assert failure with -gnatwr

2020-07-15 Thread Pierre-Marie de Rodat
In a case of a complex precondition expression with quantifiers, we can
get a crash in Resolve_Type_Conversion when trying to emit a -gnatwr
warning.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* sem_res.adb (Resolve_Type_Conversion): Protect against null
entity.  Add proper tag for -gnatwr warning.diff --git a/gcc/ada/sem_res.adb b/gcc/ada/sem_res.adb
--- a/gcc/ada/sem_res.adb
+++ b/gcc/ada/sem_res.adb
@@ -11679,6 +11679,7 @@ package body Sem_Res is
  --  odd subtype coming from the bounds).
 
  if (Is_Entity_Name (Orig_N)
+  and then Present (Entity (Orig_N))
   and then
 (Etype (Entity (Orig_N)) = Orig_T
   or else
@@ -11733,17 +11734,15 @@ package body Sem_Res is
 --  entity, give the name of the entity in the message. If not,
 --  just mention the expression.
 
---  Shoudn't we test Warn_On_Redundant_Constructs here ???
-
 else
if Is_Entity_Name (Orig_N) then
   Error_Msg_Node_2 := Orig_T;
   Error_Msg_NE -- CODEFIX
-("??redundant conversion, & is of type &!",
+("?r?redundant conversion, & is of type &!",
  N, Entity (Orig_N));
else
   Error_Msg_NE
-("??redundant conversion, expression is of type&!",
+("?r?redundant conversion, expression is of type&!",
  N, Orig_T);
end if;
 end if;




[Ada] ACATS 4.1P [BDB4001] - 13.11.4(22-23/3) not enforced

2020-07-15 Thread Pierre-Marie de Rodat
This ACATS test shows GNAT was not enforcing legality rules related to
subpools.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* exp_ch3.adb (Freeze_Type): Remove warning in expander,
replaced by a corresponding error in sem_ch13.adb. Replace
RTE_Available by RTU_Loaded to avoid adding unnecessary
dependencies.
* sem_ch13.adb (Associate_Storage_Pool): New procedure.
(Analyze_Attribute_Definition_Clause
[Attribute_Simple_Storage_Pool| Attribute_Storage_Pool]): Call
Associate_Storage_Pool to add proper legality checks on
subpools.diff --git a/gcc/ada/exp_ch3.adb b/gcc/ada/exp_ch3.adb
--- a/gcc/ada/exp_ch3.adb
+++ b/gcc/ada/exp_ch3.adb
@@ -8148,61 +8148,44 @@ package body Exp_Ch3 is
 
 elsif Ada_Version >= Ada_2012
   and then Present (Associated_Storage_Pool (Def_Id))
-
-  --  Omit this check for the case of a configurable run-time that
-  --  does not provide package System.Storage_Pools.Subpools.
-
-  and then RTE_Available (RE_Root_Storage_Pool_With_Subpools)
+  and then RTU_Loaded (System_Storage_Pools_Subpools)
 then
declare
   Loc   : constant Source_Ptr := Sloc (Def_Id);
   Pool  : constant Entity_Id :=
 Associated_Storage_Pool (Def_Id);
-  RSPWS : constant Entity_Id :=
-RTE (RE_Root_Storage_Pool_With_Subpools);
 
begin
   --  It is known that the accessibility level of the access
   --  type is deeper than that of the pool.
 
   if Type_Access_Level (Def_Id) > Object_Access_Level (Pool)
+and then Is_Class_Wide_Type (Etype (Pool))
 and then not Accessibility_Checks_Suppressed (Def_Id)
 and then not Accessibility_Checks_Suppressed (Pool)
   then
- --  Static case: the pool is known to be a descendant of
- --  Root_Storage_Pool_With_Subpools.
-
- if Is_Ancestor (RSPWS, Etype (Pool)) then
-Error_Msg_N
-  ("??subpool access type has deeper accessibility "
-   & "level than pool", Def_Id);
-
-Append_Freeze_Action (Def_Id,
-  Make_Raise_Program_Error (Loc,
-Reason => PE_Accessibility_Check_Failed));
-
- --  Dynamic case: when the pool is of a class-wide type,
- --  it may or may not support subpools depending on the
- --  path of derivation. Generate:
+ --  When the pool is of a class-wide type, it may or may
+ --  not support subpools depending on the path of
+ --  derivation. Generate:
 
  --if Def_Id in RSPWS'Class then
  --   raise Program_Error;
  --end if;
 
- elsif Is_Class_Wide_Type (Etype (Pool)) then
-Append_Freeze_Action (Def_Id,
-  Make_If_Statement (Loc,
-Condition   =>
-  Make_In (Loc,
-Left_Opnd  => New_Occurrence_Of (Pool, Loc),
-Right_Opnd =>
-  New_Occurrence_Of
-(Class_Wide_Type (RSPWS), Loc)),
-
-Then_Statements => New_List (
-  Make_Raise_Program_Error (Loc,
-Reason => PE_Accessibility_Check_Failed;
- end if;
+ Append_Freeze_Action (Def_Id,
+   Make_If_Statement (Loc,
+ Condition   =>
+   Make_In (Loc,
+ Left_Opnd  => New_Occurrence_Of (Pool, Loc),
+ Right_Opnd =>
+   New_Occurrence_Of
+ (Class_Wide_Type
+   (RTE
+ (RE_Root_Storage_Pool_With_Subpools)),
+  Loc)),
+ Then_Statements => New_List (
+   Make_Raise_Program_Error (Loc,
+ Reason => PE_Accessibility_Check_Failed;
   end if;
end;
 end if;


diff --git a/gcc/ada/sem_ch13.adb b/gcc/ada/sem_ch13.adb
--- a/gcc/ada/sem_ch13.adb
+++ b/gcc/ada/sem_ch13.adb
@@ -7044,6 +7044,121 @@ package body Sem_Ch13 is
 Pool : Entity_Id;
 T: Entity_Id;
 
+procedure Associate_Storage_Pool
+  (Ent 

[Ada] Do not generate extra copies inside initialization procedures

2020-07-15 Thread Pierre-Marie de Rodat
The RM C.6(19) clause says that atomic or volatile objects of types
that are not by reference must be passed by copy in a call if the
type of the formal is not atomic or volatile respectively.  But this
requirement does not apply to initialization procedures, which are
generated by the compiler, and doing it would create extra copies
for atomic or volatile components declared in record types.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* exp_ch6.adb (Requires_Atomic_Or_Volatile_Copy): Return false
inside an initialization procedure.diff --git a/gcc/ada/exp_ch6.adb b/gcc/ada/exp_ch6.adb
--- a/gcc/ada/exp_ch6.adb
+++ b/gcc/ada/exp_ch6.adb
@@ -2196,6 +2196,13 @@ package body Exp_Ch6 is
 return False;
  end if;
 
+ --  There is no requirement inside initialization procedures and this
+ --  would generate copies for atomic or volatile composite components.
+
+ if Inside_Init_Proc then
+return False;
+ end if;
+
  --  Check for atomicity mismatch
 
  if Is_Atomic_Object (Actual) and then not Is_Atomic (E_Formal)




[Ada] Fix oversight in Delayed_Aspect_Present predicate

2020-07-15 Thread Pierre-Marie de Rodat
The predicate returns true only if an aspect requiring delaying like
Alignment or Address is the first aspect in the list.  The change
also contains a small consistency fix for Freeze_Object_Declaration.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* sem_ch3.adb (Delayed_Aspect_Present): Fix oversight in loop.
* freeze.adb (Freeze_Object_Declaration): Use Declaration_Node
instead of Parent for the sake of consistency.diff --git a/gcc/ada/freeze.adb b/gcc/ada/freeze.adb
--- a/gcc/ada/freeze.adb
+++ b/gcc/ada/freeze.adb
@@ -3439,11 +3439,11 @@ package body Freeze is
  if Has_Delayed_Aspects (E)
and then Expander_Active
and then Is_Array_Type (Typ)
-   and then Present (Expression (Parent (E)))
+   and then Present (Expression (Declaration_Node (E)))
and then No (Linker_Section_Pragma (E))
  then
 declare
-   Decl : constant Node_Id := Parent (E);
+   Decl : constant Node_Id := Declaration_Node (E);
Lhs  : constant Node_Id := New_Occurrence_Of (E, Loc);
 
 begin


diff --git a/gcc/ada/sem_ch3.adb b/gcc/ada/sem_ch3.adb
--- a/gcc/ada/sem_ch3.adb
+++ b/gcc/ada/sem_ch3.adb
@@ -3891,9 +3891,11 @@ package body Sem_Ch3 is
 
   begin
  if Present (Aspect_Specifications (N)) then
-A:= First (Aspect_Specifications (N));
-A_Id := Get_Aspect_Id (Chars (Identifier (A)));
+A := First (Aspect_Specifications (N));
+
 while Present (A) loop
+   A_Id := Get_Aspect_Id (Chars (Identifier (A)));
+
if A_Id = Aspect_Alignment or else A_Id = Aspect_Address then
 
   --  Set flag on object entity, for later processing at




[Ada] Small addition and tweaks in documentation on freezing

2020-07-15 Thread Pierre-Marie de Rodat
Apart from the usual editorial tweaks, this documents the discrepancy
between the aspect and the non-aspect cases for alignment settings in
object declarations.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* einfo.ads (Delayed Freezing and Elaboration): Minor tweaks.
Document the discrepancy between the aspect and the non-aspect
cases for alignment settings in object declarations.diff --git a/gcc/ada/einfo.ads b/gcc/ada/einfo.ads
--- a/gcc/ada/einfo.ads
+++ b/gcc/ada/einfo.ads
@@ -266,28 +266,30 @@ package Einfo is
 --  The flag Has_Delayed_Freeze indicates that an entity carries an explicit
 --  freeze node, which appears later in the expanded tree.
 
---  a) The flag is used by the front-end to trigger expansion actions which
+--  a) The flag is used by the front end to trigger expansion activities which
 --  include the generation of that freeze node. Typically this happens at the
 --  end of the current compilation unit, or before the first subprogram body is
---  encountered in the current unit. See files freeze and exp_ch13 for details
+--  encountered in the current unit. See units Freeze and Exp_Ch13 for details
 --  on the actions triggered by a freeze node, which include the construction
 --  of initialization procedures and dispatch tables.
 
---  b) The presence of a freeze node on an entity is used by the backend to
+--  b) The presence of a freeze node on an entity is used by the back end to
 --  defer elaboration of the entity until its freeze node is seen. In the
 --  absence of an explicit freeze node, an entity is frozen (and elaborated)
 --  at the point of declaration.
 
 --  For object declarations, the flag is set when an address clause for the
 --  object is encountered. Legality checks on the address expression only take
---  place at the freeze point of the object.
+--  place at the freeze point of the object. In Ada 2012, the flag is also set
+--  when an address or an alignment aspect for the object is encountered (note
+--  the discrepancy with the non-aspect case).
 
 --  Most types have an explicit freeze node, because they cannot be elaborated
 --  until all representation and operational items that apply to them have been
 --  analyzed. Private types and incomplete types have the flag set as well, as
 --  do task and protected types.
 
---  Implicit base types created for type derivations, as well as classwide
+--  Implicit base types created for type derivations, as well as class-wide
 --  types created for all tagged types, have the flag set.
 
 --  If a subprogram has an access parameter whose designated type is incomplete




[Ada] Spurious accessibility error on allocator

2020-07-15 Thread Pierre-Marie de Rodat
This patch fixes an error in the compiler whereby an allocator for a
limited type may cause spurious accessibility errors due to a
miscalculation of access levels on interally generated temporaries.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* exp_ch6.adb (Make_Build_In_Place_Call_Allocator): Normalize
the associated node for internally generated objects to be like
their SOAAT counter-parts.diff --git a/gcc/ada/exp_ch6.adb b/gcc/ada/exp_ch6.adb
--- a/gcc/ada/exp_ch6.adb
+++ b/gcc/ada/exp_ch6.adb
@@ -9732,6 +9732,12 @@ package body Exp_Ch6 is
 New_Occurrence_Of (Etype (BIP_Func_Call), Loc),
   Expression   => New_Copy_Tree (BIP_Func_Call;
 
+  --  Manually set the associated node for the anonymous access type to
+  --  be its local declaration to avoid confusing and complicating
+  --  the accessibility machinary.
+
+  Set_Associated_Node_For_Itype (Anon_Type, Tmp_Decl);
+
   Expander_Mode_Save_And_Set (False);
   Insert_Action (Allocator, Tmp_Decl);
   Expander_Mode_Restore;




[Ada] Use Sloc of delay statement in timed entry call

2020-07-15 Thread Pierre-Marie de Rodat
This changes the Sloc used to expand a timed entry call from that
of the Select to that of the Delay statement for coverage purposes.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* exp_ch9.adb (Expand_N_Timed_Entry_Call): Use the Sloc of
the delay statement in the expansion.diff --git a/gcc/ada/exp_ch9.adb b/gcc/ada/exp_ch9.adb
--- a/gcc/ada/exp_ch9.adb
+++ b/gcc/ada/exp_ch9.adb
@@ -12613,8 +12613,6 @@ package body Exp_Ch9 is
--  global references if within an instantiation.
 
procedure Expand_N_Timed_Entry_Call (N : Node_Id) is
-  Loc : constant Source_Ptr := Sloc (N);
-
   Actuals: List_Id;
   Blk_Typ: Entity_Id;
   Call   : Node_Id;
@@ -12637,6 +12635,7 @@ package body Exp_Ch9 is
   Index  : Node_Id;
   Is_Disp_Select : Boolean;
   Lim_Typ_Stmts  : List_Id;
+  Loc: constant Source_Ptr := Sloc (D_Stat);
   N_Stats: List_Id;
   Obj: Entity_Id;
   Param  : Node_Id;




[Ada] Cleanup condition for an effectively volatile array type

2020-07-15 Thread Pierre-Marie de Rodat
When we detect effectively volatile array type we only need to examine
the type of array component if the array itself has no
Has_Volatile_Components property.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* sem_prag.adb (Atomic_Components): Simplify with Ekind_In.
(Complex_Representation): Fix type of E_Id, which just like when
for pragma Atomic_Components will hold an N_Identifier node, not
an entity.
* sem_util.adb (Is_Effectively_Volatile): Refactor to avoid
unnecessary computation.diff --git a/gcc/ada/sem_prag.adb b/gcc/ada/sem_prag.adb
--- a/gcc/ada/sem_prag.adb
+++ b/gcc/ada/sem_prag.adb
@@ -13626,9 +13626,7 @@ package body Sem_Prag is
 if (Nkind (D) = N_Full_Type_Declaration and then Is_Array_Type (E))
   or else
 (Nkind (D) = N_Object_Declaration
-   and then (Ekind (E) = E_Constant
-  or else
- Ekind (E) = E_Variable)
+   and then Ekind_In (E, E_Constant, E_Variable)
and then Nkind (Object_Definition (D)) =
N_Constrained_Array_Definition)
   or else
@@ -14258,7 +14256,7 @@ package body Sem_Prag is
  --  pragma Complex_Representation ([Entity =>] LOCAL_NAME);
 
  when Pragma_Complex_Representation => Complex_Representation : declare
-E_Id : Entity_Id;
+E_Id : Node_Id;
 E: Entity_Id;
 Ent  : Entity_Id;
 


diff --git a/gcc/ada/sem_util.adb b/gcc/ada/sem_util.adb
--- a/gcc/ada/sem_util.adb
+++ b/gcc/ada/sem_util.adb
@@ -15615,22 +15615,24 @@ package body Sem_Util is
  --  effectively volatile.
 
  elsif Is_Array_Type (Id) then
-declare
-   Anc : Entity_Id := Base_Type (Id);
-begin
-   if Is_Private_Type (Anc) then
-  Anc := Full_View (Anc);
-   end if;
+if Has_Volatile_Components (Id) then
+   return True;
+else
+   declare
+  Anc : Entity_Id := Base_Type (Id);
+   begin
+  if Is_Private_Type (Anc) then
+ Anc := Full_View (Anc);
+  end if;
 
-   --  Test for presence of ancestor, as the full view of a private
-   --  type may be missing in case of error.
+  --  Test for presence of ancestor, as the full view of a
+  --  private type may be missing in case of error.
 
-   return
- Has_Volatile_Components (Id)
-   or else
- (Present (Anc)
-   and then Is_Effectively_Volatile (Component_Type (Anc)));
-end;
+  return
+Present (Anc)
+  and then Is_Effectively_Volatile (Component_Type (Anc));
+   end;
+end if;
 
  --  A protected type is always volatile
 




[Ada] Ongoing work for unnamed and named container aggregates

2020-07-15 Thread Pierre-Marie de Rodat
This implements additional functionality for the Ada 202x container
aggregates, in particular the use of iterated_component_association in
both Unnamed (positional) and Named (keyed) aggregates for types for
which the Aspect Aggregate is defined.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* sem_aggr.adb (Resolve_Iterated_Component_Association): New
procedure, internal to Resolve_Container_Aggregate, to complete
semantic analysis of Iterated_Component_Associations.
* exp_aggr.adb (Expand_Iterated_Component): New procedure,
internal to Expand_Container_Aggregate, to expand the construct
into an implicit loop that performs individual insertions into
the target aggregate.diff --git a/gcc/ada/exp_aggr.adb b/gcc/ada/exp_aggr.adb
--- a/gcc/ada/exp_aggr.adb
+++ b/gcc/ada/exp_aggr.adb
@@ -6889,12 +6889,69 @@ package body Exp_Aggr is
   New_Indexed_Subp: Node_Id := Empty;
   Assign_Indexed_Subp : Node_Id := Empty;
 
+  procedure Expand_Iterated_Component (Comp : Node_Id);
+
   Aggr_Code : constant List_Id   := New_List;
   Temp  : constant Entity_Id := Make_Temporary (Loc, 'C', N);
 
+  Comp  : Node_Id;
   Decl  : Node_Id;
   Init_Stat : Node_Id;
 
+  ---
+  -- Expand_Iterated_Component --
+  ---
+
+  procedure Expand_Iterated_Component (Comp : Node_Id) is
+ Expr: constant Node_Id := Expression (Comp);
+ Loop_Id : constant Entity_Id :=
+Make_Defining_Identifier (Loc,
+  Chars => Chars (Defining_Identifier (Comp)));
+
+ L_Range: Node_Id;
+ L_Iteration_Scheme : Node_Id;
+ Loop_Stat  : Node_Id;
+ Stats  : List_Id;
+
+  begin
+ L_Range := Relocate_Node (First (Discrete_Choices (Comp)));
+ L_Iteration_Scheme :=
+   Make_Iteration_Scheme (Loc,
+ Loop_Parameter_Specification =>
+   Make_Loop_Parameter_Specification (Loc,
+ Defining_Identifier => Loop_Id,
+ Discrete_Subtype_Definition => L_Range));
+
+ --  Build insertion statement. for a positional aggregate only
+ --  the expression is needed. For a named aggregate the loop
+ --  variable, whose type is that of the key, is an additional
+ --  parameter for the insertion operation.
+
+ if Present (Add_Unnamed_Subp) then
+Stats := New_List
+  (Make_Procedure_Call_Statement (Loc,
+Name => New_Occurrence_Of (Entity (Add_Unnamed_Subp), Loc),
+Parameter_Associations =>
+  New_List (New_Occurrence_Of (Temp, Loc),
+ New_Copy_Tree (Expr;
+ else
+Stats := New_List
+  (Make_Procedure_Call_Statement (Loc,
+ Name => New_Occurrence_Of (Entity (Add_Named_Subp), Loc),
+ Parameter_Associations =>
+   New_List (New_Occurrence_Of (Temp, Loc),
+   New_Occurrence_Of (Loop_Id, Loc),
+   New_Copy_Tree (Expr;
+ end if;
+
+ Loop_Stat :=  Make_Implicit_Loop_Statement
+ (Node => N,
+  Identifier   => Empty,
+  Iteration_Scheme => L_Iteration_Scheme,
+  Statements   => Stats);
+ Append (Loop_Stat, Aggr_Code);
+  end Expand_Iterated_Component;
+
begin
   Parse_Aspect_Aggregate (Asp,
 Empty_Subp, Add_Named_Subp, Add_Unnamed_Subp,
@@ -6905,7 +6962,7 @@ package body Exp_Aggr is
   Object_Definition   => New_Occurrence_Of (Typ, Loc));
 
   Insert_Action (N, Decl);
-  if Ekind (Entity (Empty_Subp)) = E_Constant then
+  if Ekind (Entity (Empty_Subp)) = E_Function then
  Init_Stat := Make_Assignment_Statement (Loc,
Name => New_Occurrence_Of (Temp, Loc),
Expression => Make_Function_Call (Loc,
@@ -6919,24 +6976,70 @@ package body Exp_Aggr is
 
   --  First case: positional aggregate
 
-  if Present (Expressions (N)) then
+  if Present (Add_Unnamed_Subp) then
+ if Present (Expressions (N)) then
+declare
+   Insert : constant Entity_Id := Entity (Add_Unnamed_Subp);
+   Comp   : Node_Id;
+   Stat   : Node_Id;
+
+begin
+   Comp := First (Expressions (N));
+   while Present (Comp) loop
+  Stat := Make_Procedure_Call_Statement (Loc,
+Name => New_Occurrence_Of (Insert, Loc),
+Parameter_Associations =>
+  New_List (New_Occurrence_Of (Temp, Loc),
+ New_Copy_Tree (Comp)));
+  Append (Stat, Aggr_Code);
+  Next (Comp);
+   end loop;
+ 

Re: [PATCH] libsanitizer: Fix GetPcSpBp determination of SP on 32-bit Solaris/x86

2020-07-15 Thread Jakub Jelinek via Gcc-patches
On Wed, Jul 15, 2020 at 03:06:18PM +0200, Rainer Orth wrote:
> I must admit I missed that in that terrible nested maze of #ifdef's
> compiler-rt uses.
> 
> > I mean, while the ifndef/define change is guarded by #if SANITIZER_SOLARIS,
> > the last changed line is not.  I'm afraid I don't know if
> > ucontext->uc_mcontext.gregs[REG_UESP] or 
> > ucontext->uc_mcontext.gregs[REG_ESP];
> > is what we want to use on i686-linux...
> 
> So far, I've regtested both GCC and LLVM master on x86_64-pc-linux-gnu
> (both m64 and m32 multilibs) and there were no regressions.
> 
> I've then tried to make sense of the situation in the Linux kernel
> sources and found some hints suggesting that REG_UESP is right here,
> too:
> 
> * arch/x86/um/os-Linux/mcontext.c has
> 
> void get_regs_from_mc(struct uml_pt_regs *regs, mcontext_t *mc)
> {
> #ifdef __i386__
> #define COPY2(X,Y) regs->gp[X] = mc->gregs[REG_##Y]
> #define COPY(X) regs->gp[X] = mc->gregs[REG_##X]
> [...]
> COPY(EDI); COPY(ESI); COPY(EBP);
> COPY2(UESP, ESP); /* sic */
> 
> * Similarly in arch/x86/um/user-offsets.c:
> 
> void foo(void)
> {
> #ifdef __i386__
> [...]
> DEFINE(HOST_IP, EIP);
> DEFINE(HOST_SP, UESP);
> [...]
> DEFINE(HOST_BP, EBP);
> 
> * And arch/x86/include/uapi/asm/sigcontext.h:
> 
> struct sigcontext_32 {
> [...]
> __u32   bp;
> __u32   sp;
> [...]
> __u32   sp_at_signal;
> __u16   ss, __ssh;
> 
> I may still be mistaken, but all this suggests that Linux might be
> playing games with ESP and UESP, while Solaris and NetBSD (see below)
> keep them separate, but in the end UESP is the register to use.

I think what matters more is
kernel/signal.c and ia32/ia32_signal.c doing:
put_user_ex(regs->sp, &sc->sp);
...
put_user_ex(regs->sp, &sc->sp_at_signal);
i.e. both ESP and UESP ought to have the same value in the end on Linux.

Your patch is ok for trunk and for 10.3 after a few weeks (10.2 is frozen
ATM anyway) so that if there are some problems (like say some headers not
defining REG_UESP or something similar), we'll find that out on trunk first.

Jakub



c++: error recovery & pragmas

2020-07-15 Thread Nathan Sidwell
Parser error recovery can get confused by the tokens within a deferred 
pragma, as treats those as regular tokens.  This adjusts the recovery so 
that the pragma is treated as a unit.  Also, the preprocessor now 
ensures that we never have an EOF token inside a pragma -- the pragma is 
always closed first.


In case you're wondering, C++ modules has some preprocessor-recognized 
constructs that are naturally passed through as deferred pragmas, but 
with neither '#' nor 'pragma' being in the source.


gcc/cp/
* parser.c (cp_parser_skip_to_closing_parenthesis_1): Deal with
meeting a deferred pragma.
(cp_parser_skip_to_end_of_statement): Likewise.
(cp_parser_skip_to_end_of_block_or_statement): Likewise.
(cp_parser_skip_to_pragma_eol): We should never meet EOF.
(cp_parser_omp_declare_simd): Likewise.
(cp_parser_omp_declare_reduction, cp_parser_oacc_routine)
(pragma_lex): Likewise.
gcc/testsuite/
* g++.dg/parse/pragma-recovery.C: New.

pushed

--
Nathan Sidwell
diff --git c/gcc/cp/parser.c w/gcc/cp/parser.c
index 08cfd23d8c4..1532431378e 100644
--- c/gcc/cp/parser.c
+++ w/gcc/cp/parser.c
@@ -3689,6 +3689,11 @@ cp_parser_skip_to_closing_parenthesis_1 (cp_parser *parser,
 	condop_depth--;
 	  break;
 
+	case CPP_PRAGMA:
+	  /* We fell into a pragma.  Skip it, and continue. */
+	  cp_parser_skip_to_pragma_eol (parser, token);
+	  continue;
+
 	default:
 	  break;
 	}
@@ -3780,6 +3785,13 @@ cp_parser_skip_to_end_of_statement (cp_parser* parser)
 	  ++nesting_depth;
 	  break;
 
+	case CPP_PRAGMA:
+	  /* We fell into a pragma.  Skip it, and continue or return. */
+	  cp_parser_skip_to_pragma_eol (parser, token);
+	  if (!nesting_depth)
+	return;
+	  continue;
+
 	default:
 	  break;
 	}
@@ -3855,6 +3867,13 @@ cp_parser_skip_to_end_of_block_or_statement (cp_parser* parser)
 	  nesting_depth++;
 	  break;
 
+	case CPP_PRAGMA:
+	  /* Skip it, and continue or return. */
+	  cp_parser_skip_to_pragma_eol (parser, token);
+	  if (!nesting_depth)
+	return;
+	  continue;
+
 	default:
 	  break;
 	}
@@ -3921,8 +3940,15 @@ cp_parser_skip_to_pragma_eol (cp_parser* parser, cp_token *pragma_tok)
   parser->lexer->in_pragma = false;
 
   do
-token = cp_lexer_consume_token (parser->lexer);
-  while (token->type != CPP_PRAGMA_EOL && token->type != CPP_EOF);
+{
+  /* The preprocessor makes sure that a PRAGMA_EOL token appears
+ before an EOF token, even when the EOF is on the pragma line.
+ We should never get here without being inside a deferred
+ pragma.  */
+  gcc_checking_assert (cp_lexer_next_token_is_not (parser->lexer, CPP_EOF));
+  token = cp_lexer_consume_token (parser->lexer);
+}
+  while (token->type != CPP_PRAGMA_EOL);
 
   /* Ensure that the pragma is not parsed again.  */
   cp_lexer_purge_tokens_after (parser->lexer, pragma_tok);
@@ -41470,11 +41496,8 @@ cp_parser_omp_declare_simd (cp_parser *parser, cp_token *pragma_tok,
 }
 
   /* Store away all pragma tokens.  */
-  while (cp_lexer_next_token_is_not (parser->lexer, CPP_PRAGMA_EOL)
-	 && cp_lexer_next_token_is_not (parser->lexer, CPP_EOF))
+  while (cp_lexer_next_token_is_not (parser->lexer, CPP_PRAGMA_EOL))
 cp_lexer_consume_token (parser->lexer);
-  if (cp_lexer_next_token_is_not (parser->lexer, CPP_PRAGMA_EOL))
-parser->omp_declare_simd->error_seen = true;
   cp_parser_require_pragma_eol (parser, pragma_tok);
   struct cp_token_cache *cp
 = cp_token_cache_new (pragma_tok, cp_lexer_peek_token (parser->lexer));
@@ -42534,11 +42557,8 @@ cp_parser_omp_declare_reduction (cp_parser *parser, cp_token *pragma_tok,
 	{
 	  if (cp == NULL)
 	{
-	  while (cp_lexer_next_token_is_not (parser->lexer, CPP_PRAGMA_EOL)
-		 && cp_lexer_next_token_is_not (parser->lexer, CPP_EOF))
+	  while (cp_lexer_next_token_is_not (parser->lexer, CPP_PRAGMA_EOL))
 		cp_lexer_consume_token (parser->lexer);
-	  if (cp_lexer_next_token_is_not (parser->lexer, CPP_PRAGMA_EOL))
-		goto fail;
 	  cp = cp_token_cache_new (first_token,
    cp_lexer_peek_nth_token (parser->lexer,
 2));
@@ -43017,11 +43037,8 @@ cp_parser_oacc_routine (cp_parser *parser, cp_token *pragma_tok,
   else /* No optional '( name )'.  */
 {
   /* Store away all pragma tokens.  */
-  while (cp_lexer_next_token_is_not (parser->lexer, CPP_PRAGMA_EOL)
-	 && cp_lexer_next_token_is_not (parser->lexer, CPP_EOF))
+  while (cp_lexer_next_token_is_not (parser->lexer, CPP_PRAGMA_EOL))
 	cp_lexer_consume_token (parser->lexer);
-  if (cp_lexer_next_token_is_not (parser->lexer, CPP_PRAGMA_EOL))
-	parser->oacc_routine->error_seen = true;
   cp_parser_require_pragma_eol (parser, pragma_tok);
   struct cp_token_cache *cp
 	= cp_token_cache_new (pragma_tok, cp_lexer_peek_token (parser->lexer));
@@ -44014,7 +44031,7 @@ pragma_lex (tree *value, location_t *loc)
   if (loc)
   

Re: [PATCH] libsanitizer: Fix GetPcSpBp determination of SP on 32-bit Solaris/x86

2020-07-15 Thread Rainer Orth
Hi Jakub,

> On Tue, Jul 14, 2020 at 02:32:57PM +0200, Rainer Orth wrote:
>> The latest Solaris 11.4/x86 update uncovered a libsanitizer bug that
>> caused one test to FAIL for 32-bit:
>> 
>> +FAIL: c-c++-common/asan/null-deref-1.c   -O0  output pattern test
>> +FAIL: c-c++-common/asan/null-deref-1.c   -O1  output pattern test
>> +FAIL: c-c++-common/asan/null-deref-1.c   -O2  output pattern test
>> +FAIL: c-c++-common/asan/null-deref-1.c   -O2 -flto  output pattern test
>> +FAIL: c-c++-common/asan/null-deref-1.c -O2 -flto -flto-partition=none
>> output pattern test
>> +FAIL: c-c++-common/asan/null-deref-1.c   -O3 -g  output pattern test
>> +FAIL: c-c++-common/asan/null-deref-1.c   -Os  output pattern test
>> 
>> I've identified the problem and the fix has just landed in upstream
>> llvm-project:
>> 
>>  https://reviews.llvm.org/D83664
>> 
>> Tested on i386-pc-solaris2.11 on master, gcc-10 and gcc-9 branches.
>> 
>> Ok for all three?
>
> Won't this break i386-linux?

I must admit I missed that in that terrible nested maze of #ifdef's
compiler-rt uses.

> I mean, while the ifndef/define change is guarded by #if SANITIZER_SOLARIS,
> the last changed line is not.  I'm afraid I don't know if
> ucontext->uc_mcontext.gregs[REG_UESP] or ucontext->uc_mcontext.gregs[REG_ESP];
> is what we want to use on i686-linux...

So far, I've regtested both GCC and LLVM master on x86_64-pc-linux-gnu
(both m64 and m32 multilibs) and there were no regressions.

I've then tried to make sense of the situation in the Linux kernel
sources and found some hints suggesting that REG_UESP is right here,
too:

* arch/x86/um/os-Linux/mcontext.c has

void get_regs_from_mc(struct uml_pt_regs *regs, mcontext_t *mc)
{
#ifdef __i386__
#define COPY2(X,Y) regs->gp[X] = mc->gregs[REG_##Y]
#define COPY(X) regs->gp[X] = mc->gregs[REG_##X]
[...]
COPY(EDI); COPY(ESI); COPY(EBP);
COPY2(UESP, ESP); /* sic */

* Similarly in arch/x86/um/user-offsets.c:

void foo(void)
{
#ifdef __i386__
[...]
DEFINE(HOST_IP, EIP);
DEFINE(HOST_SP, UESP);
[...]
DEFINE(HOST_BP, EBP);

* And arch/x86/include/uapi/asm/sigcontext.h:

struct sigcontext_32 {
[...]
__u32   bp;
__u32   sp;
[...]
__u32   sp_at_signal;
__u16   ss, __ssh;

I may still be mistaken, but all this suggests that Linux might be
playing games with ESP and UESP, while Solaris and NetBSD (see below)
keep them separate, but in the end UESP is the register to use.

> Similarly netbsd.

Here's what I found in the NetBSD sources (I haven't tried doing builds
in a VM so far):

 includes .  The i386 version
(sys/arch/i386/include/mcontext.h) has

#ifndef _UC_MACHINE_SP
#define _UC_MACHINE_SP(uc)  ((uc)->uc_mcontext.__gregs[_REG_UESP])
#endif
#define _UC_MACHINE_FP(uc)  ((uc)->uc_mcontext.__gregs[_REG_EBP])
#define _UC_MACHINE_PC(uc)  ((uc)->uc_mcontext.__gregs[_REG_EIP])

which suggests REG_UESP is right here.

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: [PATCH v2] sparc/sparc64: use crtendS.o for default-pie executables [PR96190]

2020-07-15 Thread Eric Botcazou
> This should be:
> 
>   PR target/96190
>   * config/sparc/linux.h (ENDFILE_SPEC): Use GNU_USER_TARGET_ENDFILE_SPEC
>   to get crtendS.o for !no-pie mode.
>   * config/sparc/linux64.h(ENDFILE_SPEC): Ditto.

* config/sparc/linux64.h (ENDFILE_SPEC): Ditto.

> OK for mainline with this change.  You can also put it on the 10 branch
> after the 10.1 release is out if this is deemed necessary.

10.2

-- 
Eric Botcazou


GCC 10.1.1 Status Report (2020-07-15)

2020-07-15 Thread Richard Biener


Status
==

The GCC 10 branch is now frozen for the GCC 10.2 release, all changes
to he branch require a RM approval.


Quality Data


Priority  #   Change from last report
---   ---
P1 
P2  218   +   2
P3   53   +   6
P4  176   +   2
P5   22
---   ---
Total P1-P3 271   +   8
Total   469   +  10


Previous Report
===

https://gcc.gnu.org/pipermail/gcc/2020-June/232986.html


Re: [patch] Reunify x86 stack checking implementation

2020-07-15 Thread Uros Bizjak via Gcc-patches
> the stack clash protection mechanism in the x86 back-end was implemented by
> largely duplicating the existing stack checking implementation.  Now the only
> significant difference between them is the probing window, which is shifted by
> 1 probing interval (not 2 as documented in explow.c), but we can certainly do
> 1 more probe for stack checking even if it is redundant in almost all cases.
>
> Tested on x86-64/Linux, OK for the mainline?
>
>
> 2020-07-15  Eric Botcazou  
>
> * config/i386/i386.c (ix86_compute_frame_layout): Minor tweak.
> (ix86_adjust_stack_and_probe): Delete.
> (ix86_adjust_stack_and_probe_stack_clash): Rename to above and add
> PROTECTION_AREA parameter.  If it is true, probe PROBE_INTERVAL plus
> a small dope beyond SIZE bytes.
> (ix86_emit_probe_stack_range): Use local variable.
> (ix86_expand_prologue): Adjust calls to ix86_adjust_stack_and_probe
> and tidy up the stack checking code.
> * explow.c (get_stack_check_protect): Fix head comment.
> (anti_adjust_stack_and_probe_stack_clash): Likewise.
> (allocate_dynamic_stack_space): Add comment.

LGTM.

Thanks,
Uros.


Re: [Patch] [OpenMP, Fortran] Add structure/derived-type element mapping

2020-07-15 Thread Tobias Burnus

On 6/24/20 7:32 PM, Tobias Burnus wrote:


While testing, I encountered two bugs, one relating to kind=4
character string (patch pending review; PR95837)
not part of testcase)


As that PR has been committed, I updated the testcase
to check character(kind=4) as well. (I also removed
the unused variables to silence -Wall warnings when
running manually.)

Committed as obvious.

Tobias


-
Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Alexander 
Walter
commit e0685fadb6aa7c9cc895bc14cbbe2b9026fa3a94
Author: Tobias Burnus 
Date:   Wed Jul 15 12:29:44 2020 +0200

libgomp.fortran/struct-elem-map-1.f90: Add char kind=4 tests

As the Fortran PR 95837 has been fixed, the test could be be added.

libgomp/ChangeLog:

* testsuite/libgomp.fortran/struct-elem-map-1.f90: Remove unused
variables; add character(kind=4) tests; update TODO comment.

diff --git a/libgomp/testsuite/libgomp.fortran/struct-elem-map-1.f90 b/libgomp/testsuite/libgomp.fortran/struct-elem-map-1.f90
index f18eeb90165..58550c79d69 100644
--- a/libgomp/testsuite/libgomp.fortran/struct-elem-map-1.f90
+++ b/libgomp/testsuite/libgomp.fortran/struct-elem-map-1.f90
@@ -2,11 +2,9 @@
 !
 ! Test OpenMP 4.5 structure-element mapping
 
-! TODO: character(kind=4,...) needs to be tested, but depends on
-!   PR fortran/95837
-! TODO: ...%str4 should be tested but that currently fails due to
+! TODO: ...%str4 + %uni4 should be tested but that currently fails due to
 !   PR fortran/95868 (see commented lined)
-! TODO: Test also array-valued var, nested derived types,
+! TODO: Test also 'var' as array and/or pointer; nested derived types,
 !   type-extended types.
 
 program main
@@ -22,6 +20,10 @@ program main
 character(len=5) :: str2(4)
 character(len=:), pointer :: str3 => null()
 character(len=:), pointer :: str4(:) => null()
+character(kind=4, len=5) :: uni1
+character(kind=4, len=5) :: uni2(4)
+character(kind=4, len=:), pointer :: uni3 => null()
+character(kind=4, len=:), pointer :: uni4(:) => null()
   end type t2
 
   integer :: i
@@ -38,8 +40,7 @@ program main
 contains
   ! Implicitly mapped – but no pointers are mapped
   subroutine one() 
-type(t2) :: var, var2(4)
-type(t2), pointer :: var3, var4(:)
+type(t2) :: var
 
 print '(g0)', ' TESTCASE "one" '
 
@@ -47,11 +48,15 @@ contains
  b = 2, c = cmplx(-1.0_8, 2.0_8,kind=8), &
  d = [(-3*i, i = 1, 10)], &
  str1 = "abcde", &
- str2 = ["12345", "67890", "ABCDE", "FGHIJ"])
+ str2 = ["12345", "67890", "ABCDE", "FGHIJ"], &
+ uni1 = 4_"abcde", &
+ uni2 = [4_"12345", 4_"67890", 4_"ABCDE", 4_"FGHIJ"])
 allocate (var%e, source=99)
 allocate (var%f, source=[22, 33, 44, 55])
 allocate (var%str3, source="HelloWorld")
 allocate (var%str4, source=["Let's", "Go!!!"])
+allocate (var%uni3, source=4_"HelloWorld")
+allocate (var%uni4, source=[4_"Let's", 4_"Go!!!"])
 
 !$omp target map(tofrom:var)
   if (var%a /= 1) stop 1
@@ -60,15 +65,16 @@ contains
   if (any (var%d /= [(-3*i, i = 1, 10)])) stop 4
   if (var%str1 /= "abcde") stop 5
   if (any (var%str2 /= ["12345", "67890", "ABCDE", "FGHIJ"])) stop 6
+  if (var%uni1 /= 4_"abcde") stop 7
+  if (any (var%uni2 /= [4_"12345", 4_"67890", 4_"ABCDE", 4_"FGHIJ"])) stop 8
 !$omp end target
 
-deallocate(var%e, var%f, var%str3, var%str4)
+deallocate(var%e, var%f, var%str3, var%str4, var%uni3, var%uni4)
   end subroutine one
 
   ! Explicitly mapped – all and full arrays
   subroutine two() 
-type(t2) :: var, var2(4)
-type(t2), pointer :: var3, var4(:)
+type(t2) :: var
 
 print '(g0)', ' TESTCASE "two" '
 
@@ -76,14 +82,19 @@ contains
  b = 2, c = cmplx(-1.0_8, 2.0_8,kind=8), &
  d = [(-3*i, i = 1, 10)], &
  str1 = "abcde", &
- str2 = ["12345", "67890", "ABCDE", "FGHIJ"])
+ str2 = ["12345", "67890", "ABCDE", "FGHIJ"], &
+ uni1 = 4_"abcde", &
+ uni2 = [4_"12345", 4_"67890", 4_"ABCDE", 4_"FGHIJ"])
 allocate (var%e, source=99)
 allocate (var%f, source=[22, 33, 44, 55])
 allocate (var%str3, source="HelloWorld")
 allocate (var%str4, source=["Let's", "Go!!!"])
+allocate (var%uni3, source=4_"HelloWorld")
+allocate (var%uni4, source=[4_"Let's", 4_"Go!!!"])
 
 !$omp target map(tofrom: var%a, var%b, var%c, var%d, var%e, var%f, &
-!$omp&   var%str1, var%str2, var%str3, var%str4)
+!$omp&   var%str1, var%str2, var%str3, var%str4,   &
+!$omp&   var%uni1, var%uni2, var%uni3, var%uni4)
   if (var%a /= 1) stop 1
   if (var%b /= 2)  stop 2
   if (var%c%re /= -1.0_8 .or. var%c%im /= 2.0_8) stop 3
@@ -103,15 +114,24 @@ contains

Re: [PATCH 8/9] [OpenACC] Fix standalone attach for Fortran assumed-shape array pointers

2020-07-15 Thread Thomas Schwinge
Hi Julian, Tobias!

On 2020-07-14T13:43:37+0200, I wrote:
> On 2020-06-16T15:39:44-0700, Julian Brown  wrote:
>> As mentioned in the blurb for the previous patch, an "attach" operation
>> for a Fortran pointer with an array descriptor must copy that array
>> descriptor to the target.
>
> Heh, I see -- I don't think I had read the OpenACC standard in that way,
> but I think I agree your interpretation is fine.
>
> This does not create some sort of memory leak -- everything implicitly
> allocated there will eventually be deallocated again, right?
>
>> This patch arranges for that to be so.
>
> In response to the new OpenACC/Fortran testcase that I'd submtited in
> <87wo3co0tm.fsf@euler.schwinge.homeip.net">http://mid.mail-archive.com/87wo3co0tm.fsf@euler.schwinge.homeip.net>,
> you (Julian) correctly supposed in
> , that
> this patch indeed does resolve that testcase, too.  That wasn't obvious
> to me.  So, similar to
> 'libgomp/testsuite/libgomp.oacc-c-c++-common/pr95270-{1.2}.c', please
> include my new OpenACC/Fortran testcase (if that makes sense to you), and
> reference PR95270 in the commit log.

My new OpenACC/Fortran testcase got again broken ('libgomp: pointer
target not mapped for attach') by Tobias' commit
102502e32ea4e8a75d6b252ba319d09d735d9aa7 "[OpenMP, Fortran] Add
structure/derived-type element mapping",
http://mid.mail-archive.com/c5b43e02-d1d5-e7cf-c11c-6daf1e8f33c5@codesourcery.com>.

Similar ('libgomp: attempt to attach null pointer') for your new
'libgomp.oacc-fortran/attach-descriptor-1.f90'.

(Whether or not 'attach'ing 'NULL' should actually be allowed, is a
separate topic for discussion.)

So this patch here will (obviously) need to be adapted to what Tobias
changed.  (Plus my more general questions quoted above and below.)


Grüße
 Thomas


>> OK?
>
> Basically yes (for master and releases/gcc-10 branches), but please
> consider the following:
>
>> --- a/gcc/fortran/trans-openmp.c
>> +++ b/gcc/fortran/trans-openmp.c
>> @@ -2573,8 +2573,44 @@ gfc_trans_omp_clauses (stmtblock_t *block, 
>> gfc_omp_clauses *clauses,
>>  }
>>  }
>>if (GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (decl))
>> -  && n->u.map_op != OMP_MAP_ATTACH
>> -  && n->u.map_op != OMP_MAP_DETACH)
>> +  && (n->u.map_op == OMP_MAP_ATTACH
>> +  || n->u.map_op == OMP_MAP_DETACH))
>> +{
>> +  tree type = TREE_TYPE (decl);
>> +  tree data = gfc_conv_descriptor_data_get (decl);
>> +  if (present)
>> +data = gfc_build_cond_assign_expr (block, present,
>> +   data,
>> +   null_pointer_node);
>> +  tree ptr
>> += fold_convert (build_pointer_type (char_type_node),
>> +data);
>> +  ptr = build_fold_indirect_ref (ptr);
>> +  /* Standalone attach clauses used with arrays with
>> + descriptors must copy the descriptor to the target,
>> + else they won't have anything to perform the
>> + attachment onto (see OpenACC 2.6, "2.6.3. Data
>> + Structures with Pointers").  */
>> +  OMP_CLAUSE_DECL (node) = ptr;
>> +  node2 = build_omp_clause (input_location, OMP_CLAUSE_MAP);
>> +  OMP_CLAUSE_SET_MAP_KIND (node2, GOMP_MAP_TO_PSET);
>> +  OMP_CLAUSE_DECL (node2) = decl;
>> +  OMP_CLAUSE_SIZE (node2) = TYPE_SIZE_UNIT (type);
>> +  node3 = build_omp_clause (input_location, OMP_CLAUSE_MAP);
>> +  if (n->u.map_op == OMP_MAP_ATTACH)
>> +{
>> +  OMP_CLAUSE_SET_MAP_KIND (node3, GOMP_MAP_ATTACH);
>> +  n->u.map_op = OMP_MAP_ALLOC;
>> +}
>> +  else  /* OMP_MAP_DETACH.  */
>> +{
>> +  OMP_CLAUSE_SET_MAP_KIND (node3, GOMP_MAP_DETACH);
>> +  n->u.map_op = OMP_MAP_RELEASE;
>> +}
>> +  OMP_CLAUSE_DECL (node3) = data;
>> +  OMP_CLAUSE_SIZE (node3) = size_int (0);
>> +}
>
> So this ("case A") duplicates most of the code from...
>
>> +  else if (GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (decl)))
>>  {
>>[...]
>
> ... this existing case here ("case B").  It's not clear to me if these
> two cases really still need to be handled separately, and a little bit
> differently (regarding 'if (present)' handling, for example), or if they
> could/should (?) be merged?  Tobias, do you have an opinion?
>
> Do we h

Re: [GCC 10 PATCH] c++: Treat GNU and Advanced SIMD vectors as distinct [PR95726]

2020-07-15 Thread Richard Biener via Gcc-patches
On Tue, Jul 14, 2020 at 1:21 AM Richard Sandiford
 wrote:
>
> Jakub Jelinek  writes:
> > On Wed, Jul 08, 2020 at 03:10:14PM +0100, Richard Sandiford wrote:
> >> gcc/
> >>  PR target/95726
> >>  * config/aarch64/aarch64.c (aarch64_attribute_table): Add
> >>  "Advanced SIMD type".
> >>  * config/aarch64/aarch64-builtins.c: Include stringpool.h and
> >>  attribs.h.
> >>  (aarch64_init_simd_builtin_types): Add an "Advanced SIMD type"
> >>  attribute to each Advanced SIMD type.
> >>
> >> gcc/cp/
> >>  PR target/95726
> >>  * typeck.c (structural_comptypes): When comparing template
> >>  specializations, differentiate between vectors that have and
> >>  do not have an "Advanced SIMD type" attribute.
> >>
> >> gcc/testsuite/
> >>  PR target/95726
> >>  * g++.target/aarch64/pr95726.C: New test.
> >> --- a/gcc/cp/typeck.c
> >> +++ b/gcc/cp/typeck.c
> >> @@ -1429,6 +1429,15 @@ structural_comptypes (tree t1, tree t2, int strict)
> >>|| maybe_ne (TYPE_VECTOR_SUBPARTS (t1), TYPE_VECTOR_SUBPARTS (t2))
> >>|| !same_type_p (TREE_TYPE (t1), TREE_TYPE (t2)))
> >>  return false;
> >
> > I'd at least add an explaining comment that it is a hack for GCC 8-10 only,
> > for aarch64 and arm targets, why, reference to the PR and that it is solved
> > differently for GCC 11+.
>
> OK, done below.  This version also includes arm support.
>
> >> +  if (comparing_specializations)
> >> +{
> >> +  bool asimd1 = lookup_attribute ("Advanced SIMD type",
> >> +  TYPE_ATTRIBUTES (t1));
> >> +  bool asimd2 = lookup_attribute ("Advanced SIMD type",
> >> +  TYPE_ATTRIBUTES (t2));
> >> +  if (asimd1 != asimd2)
> >> +return false;
> >> +}
> >
> > Otherwise LGTM for release branches if it is acceptable that way to Jason.
>
> Thanks.  Jason, does it look OK from your point of view?
>
> Richard
>
>
> This is a release branch version of
> r11-1741-g:31427b974ed7b7dd54e28fec595e731bf6eea8ba and
> r11-2022-g:efe99cca78215e339ba79f0a900a896b4c0a3d36.
>
> The trunk versions of the patch made GNU and Advanced SIMD vectors
> distinct (but inter-convertible) in all cases.  However, the
> traditional behaviour is that the types are distinct in template
> arguments but not otherwise.
>
> Following a suggestion from Jason, this patch puts the check
> for different vector types under comparing_specializations.
> In order to keep the backport as simple as possible, the patch
> hard-codes the name of the attribute in the frontend rather than
> adding a new branch-only target hook.
>
> I didn't find a test that tripped the assert on the branch,
> even with the --param in the PR, so instead I tested this by
> forcing the hash function to only hash the tree code.  That made
> the static assertion in the test fail without the patch but pass
> with it.
>
> This means that the tests pass for unmodified sources even
> without the patch (unless you're very unlucky).

Can you please apply this patch so I can roll GCC 10.2 RC1?

In the unlikely event Jason requests changes we can integrate
those early next week.

Thanks,
Richard.

> gcc/
> PR target/95726
> * config/aarch64/aarch64.c (aarch64_attribute_table): Add
> "Advanced SIMD type".
> * config/aarch64/aarch64-builtins.c: Include stringpool.h and
> attribs.h.
> (aarch64_init_simd_builtin_types): Add an "Advanced SIMD type"
> attribute to each Advanced SIMD type.
> * config/arm/arm.c (arm_attribute_table): Add "Advanced SIMD type".
> * config/arm/arm-builtins.c: Include stringpool.h and attribs.h.
> (arm_init_simd_builtin_types): Add an "Advanced SIMD type"
> attribute to each Advanced SIMD type.
>
> gcc/cp/
> PR target/95726
> * typeck.c (structural_comptypes): When comparing template
> specializations, differentiate between vectors that have and
> do not have an "Advanced SIMD type" attribute.
>
> gcc/testsuite/
> PR target/95726
> * g++.target/aarch64/pr95726.C: New test.
> * g++.target/arm/pr95726.C: Likewise.
> ---
>  gcc/config/aarch64/aarch64-builtins.c  | 14 +---
>  gcc/config/aarch64/aarch64.c   |  1 +
>  gcc/config/arm/arm-builtins.c  | 15 ++--
>  gcc/config/arm/arm.c   |  1 +
>  gcc/cp/typeck.c| 42 ++
>  gcc/testsuite/g++.target/aarch64/pr95726.C | 28 +++
>  gcc/testsuite/g++.target/arm/pr95726.C | 31 
>  7 files changed, 125 insertions(+), 7 deletions(-)
>  create mode 100644 gcc/testsuite/g++.target/aarch64/pr95726.C
>  create mode 100644 gcc/testsuite/g++.target/arm/pr95726.C
>
> diff --git a/gcc/cp/typeck.c b/gcc/cp/typeck.c
> index 5f8f8290f0f..6ebf6319efd 100644
> --- a/gcc/cp/typeck.c
> +++ b/gcc/cp/typeck.c
> @@ -1429,6 +1429,48 @@ structural_comptypes (tr

Re: [PATCH] genemit.c (main): split insn-emit.c for compiling parallelly

2020-07-15 Thread Rainer Orth
Hi Jojo,

> diff --git a/gcc/Makefile.in b/gcc/Makefile.in
> index 2ba76656dbf..3306510a9a8 100644
> --- a/gcc/Makefile.in
> +++ b/gcc/Makefile.in
> @@ -1253,6 +1253,13 @@ ANALYZER_OBJS = \
>  # We put the *-match.o and insn-*.o files first so that a parallel make
>  # will build them sooner, because they are large and otherwise tend to be
>  # the last objects to finish building.
> +
> +insn-generated-split-num = 15
> +
> +insn-emit-split-c = $(foreach o, $(shell seq 1
> $(insn-generated-split-num)), insn-emit$(o).c)

I just came across this: seq is a non-standard GNU extension and thus
not guaranteed to be available.

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


[PATCH] genemit.c (main): split insn-emit.c for compiling parallelly

2020-07-15 Thread Jojo R
Hi,

The size of generated file like insn-emit.c is very huge
if we add lots of define_expand patten or define_insn patten.

It’s present easily when there are much more intrinsic interfaces
it costs much more time to create toolchain with compiling these files 
:(

gcc/ChangeLog:

* genemit.c (main): Print 'split line'.
* Makefile.in (insn-emit.c): Define split count and file

---
 gcc/Makefile.in | 10 ++
 gcc/genemit.c | 86 +++--
 2 files changed, 58 insertions(+), 38 deletions(-)

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 2ba76656dbf..3306510a9a8 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1253,6 +1253,13 @@ ANALYZER_OBJS = \
 # We put the *-match.o and insn-*.o files first so that a parallel make
 # will build them sooner, because they are large and otherwise tend to be
 # the last objects to finish building.
+
+insn-generated-split-num = 15
+
+insn-emit-split-c = $(foreach o, $(shell seq 1 $(insn-generated-split-num)), 
insn-emit$(o).c)
+insn-emit-split-obj = $(patsubst %.c,%.o, $(insn-emit-split-c))
+$(insn-emit-split-c): insn-emit.c
+
 OBJS = \
 gimple-match.o \
 generic-match.o \
@@ -1260,6 +1267,7 @@ OBJS = \
 insn-automata.o \
 insn-dfatab.o \
 insn-emit.o \
+   $(insn-emit-split-obj) \
 insn-extract.o \
 insn-latencytab.o \
 insn-modes.o \
@@ -2367,6 +2375,8 @@ $(simple_generated_c:insn-%.c=s-%): s-%: 
build/gen%$(build_exeext)
 $(RUN_GEN) build/gen$*$(build_exeext) $(md_file) \
 $(filter insn-conditions.md,$^) > tmp-$*.c
 $(SHELL) $(srcdir)/../move-if-change tmp-$*.c insn-$*.c
+   -csplit insn-$*.c /i\ am\ split\ line/ -k -s 
{$(insn-generated-split-num)} -f insn-$* -b "%d.c"
+   -( [ ! -s insn-$*0.c ] && for i in `seq 1 $(insn-generated-split-num)`; 
do touch insn-$*$$i.c; done && echo "" > insn-$*.c)
 $(STAMP) s-$*

 # gencheck doesn't read the machine description, and the file produced
diff --git a/gcc/genemit.c b/gcc/genemit.c
index 84d07d388ee..fd60cdeeb96 100644
--- a/gcc/genemit.c
+++ b/gcc/genemit.c
@@ -847,6 +847,45 @@ handle_overloaded_gen (overloaded_name *oname)
 }
 }

+#define printf_include() \
+ printf ("/* Generated automatically by the program `genemit'\n\
+from the machine description file `md'. */\n\n"); \
+ printf ("#define IN_TARGET_CODE 1\n"); \
+ printf ("#include \"config.h\"\n"); \
+ printf ("#include \"system.h\"\n"); \
+ printf ("#include \"coretypes.h\"\n"); \
+ printf ("#include \"backend.h\"\n"); \
+ printf ("#include \"predict.h\"\n"); \
+ printf ("#include \"tree.h\"\n"); \
+ printf ("#include \"rtl.h\"\n"); \
+ printf ("#include \"alias.h\"\n"); \
+ printf ("#include \"varasm.h\"\n"); \
+ printf ("#include \"stor-layout.h\"\n"); \
+ printf ("#include \"calls.h\"\n"); \
+ printf ("#include \"memmodel.h\"\n"); \
+ printf ("#include \"tm_p.h\"\n"); \
+ printf ("#include \"flags.h\"\n"); \
+ printf ("#include \"insn-config.h\"\n"); \
+ printf ("#include \"expmed.h\"\n"); \
+ printf ("#include \"dojump.h\"\n"); \
+ printf ("#include \"explow.h\"\n"); \
+ printf ("#include \"emit-rtl.h\"\n"); \
+ printf ("#include \"stmt.h\"\n"); \
+ printf ("#include \"expr.h\"\n"); \
+ printf ("#include \"insn-codes.h\"\n"); \
+ printf ("#include \"optabs.h\"\n"); \
+ printf ("#include \"dfp.h\"\n"); \
+ printf ("#include \"output.h\"\n"); \
+ printf ("#include \"recog.h\"\n"); \
+ printf ("#include \"df.h\"\n"); \
+ printf ("#include \"resource.h\"\n"); \
+ printf ("#include \"reload.h\"\n"); \
+ printf ("#include \"diagnostic-core.h\"\n"); \
+ printf ("#include \"regs.h\"\n"); \
+ printf ("#include \"tm-constrs.h\"\n"); \
+ printf ("#include \"ggc.h\"\n"); \
+ printf ("#include \"target.h\"\n\n"); \
+
 int
 main (int argc, const char **argv)
 {
@@ -862,49 +901,19 @@ main (int argc, const char **argv)
 /* Assign sequential codes to all entries in the machine description
 in parallel with the tables in insn-output.c. */

- printf ("/* Generated automatically by the program `genemit'\n\
-from the machine description file `md'. */\n\n");
-
- printf ("#define IN_TARGET_CODE 1\n");
- printf ("#include \"config.h\"\n");
- printf ("#include \"system.h\"\n");
- printf ("#include \"coretypes.h\"\n");
- printf ("#include \"backend.h\"\n");
- printf ("#include \"predict.h\"\n");
- printf ("#include \"tree.h\"\n");
- printf ("#include \"rtl.h\"\n");
- printf ("#include \"alias.h\"\n");
- printf ("#include \"varasm.h\"\n");
- printf ("#include \"stor-layout.h\"\n");
- printf ("#include \"calls.h\"\n");
- printf ("#include \"memmodel.h\"\n");
- printf ("#include \"tm_p.h\"\n");
- printf ("#include \"flags.h\"\n");
- printf ("#include \"insn-config.h\"\n");
- printf ("#include \"expmed.h\"\n");
- printf ("#include \"dojump.h\"\n");
- printf ("#include \"explow.h\"\n");
- printf ("#include \"emit-rtl.h\"\n");
- printf ("#include \"stmt.h\"\n");
- printf ("#include \"expr.h\"\n");
- printf ("#include \"insn-codes.h\"\n");
- printf ("#include \"optabs.h\"\n");
- printf ("

[patch] Reunify x86 stack checking implementation

2020-07-15 Thread Eric Botcazou
Hi,

the stack clash protection mechanism in the x86 back-end was implemented by 
largely duplicating the existing stack checking implementation.  Now the only 
significant difference between them is the probing window, which is shifted by 
1 probing interval (not 2 as documented in explow.c), but we can certainly do 
1 more probe for stack checking even if it is redundant in almost all cases.

Tested on x86-64/Linux, OK for the mainline?


2020-07-15  Eric Botcazou  

* config/i386/i386.c (ix86_compute_frame_layout): Minor tweak.
(ix86_adjust_stack_and_probe): Delete.
(ix86_adjust_stack_and_probe_stack_clash): Rename to above and add
PROTECTION_AREA parameter.  If it is true, probe PROBE_INTERVAL plus
a small dope beyond SIZE bytes.
(ix86_emit_probe_stack_range): Use local variable.
(ix86_expand_prologue): Adjust calls to ix86_adjust_stack_and_probe
and tidy up the stack checking code.
* explow.c (get_stack_check_protect): Fix head comment.
(anti_adjust_stack_and_probe_stack_clash): Likewise.
(allocate_dynamic_stack_space): Add comment.

-- 
Eric Botcazoudiff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 5c373c091ce..31757b044c8 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -6169,10 +6169,7 @@ ix86_compute_frame_layout (void)
 }
 
   frame->save_regs_using_mov
-= (TARGET_PROLOGUE_USING_MOVE && m->use_fast_prologue_epilogue
-   /* If static stack checking is enabled and done with probes,
-	  the registers need to be saved before allocating the frame.  */
-   && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
+= TARGET_PROLOGUE_USING_MOVE && m->use_fast_prologue_epilogue;
 
   /* Skip return address and error code in exception handler.  */
   offset = INCOMING_FRAME_SP_OFFSET;
@@ -6329,6 +6326,9 @@ ix86_compute_frame_layout (void)
 
   if ((!to_allocate && frame->nregs <= 1)
   || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x8000))
+   /* If static stack checking is enabled and done with probes,
+	  the registers need to be saved before allocating the frame.  */
+  || flag_stack_check == STATIC_BUILTIN_STACK_CHECK
   /* If stack clash probing needs a loop, then it needs a
 	 scratch register.  But the returned register is only guaranteed
 	 to be safe to use after register saves are complete.  So if
@@ -7122,17 +7122,20 @@ release_scratch_register_on_entry (struct scratch_reg *sr, HOST_WIDE_INT offset,
 
 /* Emit code to adjust the stack pointer by SIZE bytes while probing it.
 
-   This differs from the next routine in that it tries hard to prevent
-   attacks that jump the stack guard.  Thus it is never allowed to allocate
-   more than PROBE_INTERVAL bytes of stack space without a suitable
-   probe.
+   If INT_REGISTERS_SAVED is true, then integer registers have already been
+   pushed on the stack.
 
-   INT_REGISTERS_SAVED is true if integer registers have already been
-   pushed on the stack.  */
+   If PROTECTION AREA is true, then probe PROBE_INTERVAL plus a small dope
+   beyond SIZE bytes.
+
+   This assumes no knowledge of the current probing state, i.e. it is never
+   allowed to allocate more than PROBE_INTERVAL bytes of stack space without
+   a suitable probe.  */
 
 static void
-ix86_adjust_stack_and_probe_stack_clash (HOST_WIDE_INT size,
-	 const bool int_registers_saved)
+ix86_adjust_stack_and_probe (HOST_WIDE_INT size,
+			 const bool int_registers_saved,
+			 const bool protection_area)
 {
   struct machine_function *m = cfun->machine;
 
@@ -7194,10 +7197,17 @@ ix86_adjust_stack_and_probe_stack_clash (HOST_WIDE_INT size,
   emit_insn (gen_blockage ());
 }
 
+  const HOST_WIDE_INT probe_interval = get_probe_interval ();
+  const int dope = 4 * UNITS_PER_WORD;
+
+  /* If there is protection area, take it into account in the size.  */
+  if (protection_area)
+size += probe_interval + dope;
+
   /* If we allocate less than the size of the guard statically,
  then no probing is necessary, but we do need to allocate
  the stack.  */
-  if (size < (1 << param_stack_clash_protection_guard_size))
+  else if (size < (1 << param_stack_clash_protection_guard_size))
 {
   pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
 			 GEN_INT (-size), -1,
@@ -7209,7 +7219,6 @@ ix86_adjust_stack_and_probe_stack_clash (HOST_WIDE_INT size,
   /* We're allocating a large enough stack frame that we need to
  emit probes.  Either emit them inline or in a loop depending
  on the size.  */
-  HOST_WIDE_INT probe_interval = get_probe_interval ();
   if (size <= 4 * probe_interval)
 {
   HOST_WIDE_INT i;
@@ -7228,12 +7237,19 @@ ix86_adjust_stack_and_probe_stack_clash (HOST_WIDE_INT size,
 	}
 
   /* We need to allocate space for the residual, but we do not need
-	 to probe the residual.  */
+	 to probe the residual...  */
   HOST_WIDE_INT residual = (i - probe_in

RE: [PATCH PR94442] [AArch64] Redundant ldp/stp instructions emitted at -O3

2020-07-15 Thread xiezhiheng
> -Original Message-
> From: Richard Sandiford [mailto:richard.sandif...@arm.com]
> Sent: Tuesday, July 7, 2020 10:08 PM
> To: xiezhiheng 
> Cc: Richard Biener ; gcc-patches@gcc.gnu.org
> Subject: Re: [PATCH PR94442] [AArch64] Redundant ldp/stp instructions
> emitted at -O3
> 
> xiezhiheng  writes:
> >> -Original Message-
> >> From: Richard Sandiford [mailto:richard.sandif...@arm.com]
> >> Sent: Monday, July 6, 2020 5:31 PM
> >> To: xiezhiheng 
> >> Cc: Richard Biener ;
> gcc-patches@gcc.gnu.org
> >> Subject: Re: [PATCH PR94442] [AArch64] Redundant ldp/stp instructions
> >> emitted at -O3
> >>
> >> No, this is unfortunately a known bug.  See:
> >>
> >>   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95964
> >>
> >> (Although the PR is recent, it's been a known bug for longer.)
> >>
> >> As you say, the difficulty is that the correct attributes depend on what
> >> the built-in function does.  Most integer arithmetic is “const”, but
> things
> >> get more complicated for floating-point arithmetic.
> >>
> >> The SVE intrinsics use a three stage process:
> >>
> >> - each function is classified into one of several groups
> >> - each group has a set of flags that describe what functions in the
> >>   group can do
> >> - these flags get converted into attributes based on the current
> >>   command-line options
> >>
> >> I guess we should have something similar for the arm_neon.h built-ins.
> >>
> >> If you're willing to help fix this, that'd be great.  I think a first
> >> step would be to agree a design.
> >>
> >> Thanks,
> >> Richard
> >
> > I'd like to have a try.
> 
> Great!
> 
> > I have checked the steps in SVE intrinsics.
> > It defines a base class "function_base" and derives different classes
> > to describe several intrinsics for each.  And each class may
> > have its own unique flags described in virtual function "call_properties".
> > The specific attributes will be converted from these flags in
> > "get_attributes" later.
> >
> > I find that there are more than 100 classes in total and if I only
> > need to classify them into different groups by attributes, maybe
> > we does not need so many classes?
> 
> Yeah, I agree.
> 
> Long term, there might be value in defining arm_neon.h in a similar
> way to arm_sve.h: i.e. have arm_neon.h defer most of the work to
> a special compiler pragma.  But that's going to be a lot of work.
> 
> I think it's possible to make incremental improvements to the current
> arm_neon.h implementation without that work being thrown away if we
> ever
> did switch to a pragma in future.  And the incremental approach seems
> more practical.
> 
> > The difficult thing I think is how to classify neon intrinsics into
> > different groups.  I'm going to follow up the way in SVE intrinsics
> > first now.
> 
> For now I'd suggest just giving a name to each combination of flags
> that the intrinsics need, rather than splitting instructions in a
> more fine-grained way.  (It's not at all obvious from the final state
> of the SVE code, but even there, the idea was to have as few groups as
> possible.  I.e. the groups were supposedly only split where necessary.
> As you say, there still ended up being a lot of groups in the end…)
> 
> It'd be easier to review if the work was split up into smaller steps.
> E.g. maybe one way would be this, with each number being a single
> patch:
> 
> (1) (a) Add a flags field to the built-in function definitions
> that for now is always zero.
> (b) Pick a name N to describe the most conservative set of flags.
> (c) Make every built-in function definition use N.
> 

I have finished the first part.

(a) I add a new parameter called FLAG to every built-in function macro.

(b) I define some flags in aarch64-builtins.c
FLAG_NONE for no needed flags
FLAG_READ_FPCR for functions will read FPCR register
FLAG_RAISE_FP_EXCEPTIONS for functions will raise fp exceptions
FLAG_READ_MEMORY for functions will read global memory
FLAG_PREFETCH_MEMORY for functions will prefetch data to memory
FLAG_WRITE_MEMORY for functions will write global memory

FLAG_FP is used for floating-point arithmetic
FLAG_ALL is all flags above

(c) I add a field in struct aarch64_simd_builtin_datum to record flags
for each built-in function.  But the default flags I set for built-in functions
are FLAG_ALL because by default the built-in functions might do anything.

And bootstrap and regression are tested ok on aarch64 Linux platform.

Any suggestions?

Thanks,
Xie Zhiheng

> (2) (a) Pick one type of function that cannot yet be described properly.
> (b) Pick a name N for that type of function.
> (c) Add whichever new flags are needed.
> (d) Add the appropriate attributes when the flags are set,
> possibly based on command-line options.
> (e) Make (exactly) one built-in function definition use N.
> 
> (3) (a) Pick some functions that all need the same attributes and
> that can already be described properly
> (b) Update all of thei

Re: [PATCH v2] sparc/sparc64: use crtendS.o for default-pie executables [PR96190]

2020-07-15 Thread Eric Botcazou
> In --enable-default-pie mode compiler should switch from
> using crtend.o to crtendS.o. On sparc it is especially important
> because crtend.o contains PIC-unfriendly code.
> 
> We use GNU_USER_TARGET_ENDFILE_SPEC as a baseline spec to get
> crtendS.o instead of crtend.o in !no-pie mode.
> 
> gcc:
> 
> 2020-07-14  Sergei Trofimovich  
> 
>   PR target/96190
>   * config/sparc/linux.h: Extend GNU_USER_TARGET_ENDFILE_SPEC
>   to get crtendS.o for !no-pie mode.
>   * config/sparc/linux64.h: Ditto.

This should be:

PR target/96190
* config/sparc/linux.h (ENDFILE_SPEC): Use GNU_USER_TARGET_ENDFILE_SPEC
to get crtendS.o for !no-pie mode.
* config/sparc/linux64.h(ENDFILE_SPEC): Ditto.

OK for mainline with this change.  You can also put it on the 10 branch after 
the 10.1 release is out if this is deemed necessary.

-- 
Eric Botcazou


[PATCH PR96195] aarch64: ICE during GIMPLE pass:vect

2020-07-15 Thread yangyang (ET)
Hi, 

This is a simple fix for PR96195.

For the test case, GCC generates the following gimple statement in 
pass_vect:

  vect__21.16_58 = zp.simdclone.2 (vect_vec_iv_.15_56);

The mode of vect__21.16_58 is VNx2SI while the mode of zp.simdclone.2 
(vect_vec_iv_.15_56) is V4SI, resulting in the crash.

In vectorizable_simd_clone_call, type compatibility is handled based on 
the number of elements and the type compatibility of elements, which is not 
enough. 
This patch add VIEW_CONVERT_EXPRs if the arguments types and return 
type of simd clone function are distinct with the vectype of stmt.

Added one testcase for this. Bootstrap and tested on both aarch64 and 
x86 Linux platform, no new regression witnessed.

Ok for trunk?

Thanks,
Yang Yang


+2020-07-15  Yang Yang  
+
+   PR tree-optimization/96195
+   * tree-vect-stmts.c (vectorizable_simd_clone_call): Add
+   VIEW_CONVERT_EXPRs if the arguments types and return type
+   of simd clone function are distinct with the vectype of stmt.
+

+2020-07-15  Yang Yang  
+
+   PR tree-optimization/96195
+   * gcc.target/aarch64/sve/pr96195.c: New test.
+


pr96195-v1.patch
Description: pr96195-v1.patch


[PATCH] nvptx: Provide vec_set and vec_extract patterns.

2020-07-15 Thread Roger Sayle

This patch provides standard vec_extract and vec_set patterns to the
nvptx backend, to extract an element from a PTX vector and set an
element of a PTX vector respectively.  PTX vectors (I hesitate to
call them SIMD vectors) may contain up to four elements, so vector
modes up to size four are supported by this patch even though the
nvptx backend currently only allows V2SI and V2DI, i.e. two out
of the ten possible vector modes.

As an example of the improvement, the following C function:

typedef int __v2si __attribute__((__vector_size__(8)));
int foo (__v2si arg) { return arg[0]+arg[1]; }

previously generated this code using a shift:

mov.u64 %r25, %ar0;
ld.v2.u32   %r26, [%r25];
mov.b64 %r28, %r26;
shr.s64 %r30, %r28, 32;
cvt.u32.u32 %r31, %r26.x;
cvt.u32.u64 %r32, %r30;
add.u32 %value, %r31, %r32;

but with this patch now generates:

mov.u64 %r25, %ar0;
ld.v2.u32   %r26, [%r25];
mov.u32 %r28, %r26.x;
mov.u32 %r29, %r26.y;
add.u32 %value, %r28, %r29;

I've implemented these getters and setters as their own instructions
instead of attempting the much more intrusive patch of changing the
backend's definition of register_operand.  Given the limited utility
of PTX vectors, I'm not convinced that attempting to support them as
operands in every instruction would be worth the effort involved.

This patch has been tested on nvptx-none hosted on x86_64-pc-linux-gnu
with "make" and "make check" with no new regressions.
Ok for mainline?


2020-07-15  Roger Sayle  

gcc/ChangeLog:
* config/nvptx/nvptx.md (nvptx_vector_index_operand): New predicate.
(VECELEM): New mode attribute for a vector's uppercase element mode.
(Vecelem): New mode attribute for a vector's lowercase element mode.
(*vec_set_0, *vec_set_1, *vec_set_2,
*vec_set_3): New instructions.
(vec_set): New expander to generate one of the above insns.
(vec_extract): New instruction.


Thanks in advance,
Roger
--
Roger Sayle
NextMove Software
Cambridge, UK

diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md
index 6545b81..b363277 100644
--- a/gcc/config/nvptx/nvptx.md
+++ b/gcc/config/nvptx/nvptx.md
@@ -118,6 +118,10 @@
 (define_predicate "nvptx_float_comparison_operator"
   (match_code "eq,ne,le,ge,lt,gt,uneq,unle,unge,unlt,ungt,unordered,ordered"))
 
+(define_predicate "nvptx_vector_index_operand"
+  (and (match_code "const_int")
+   (match_test "UINTVAL (op) < 4")))
+
 ;; Test for a valid operand for a call instruction.
 (define_predicate "call_insn_operand"
   (match_code "symbol_ref,reg")
@@ -194,6 +198,10 @@
 ;; pointer-sized quantities.  Exactly one of the two alternatives will match.
 (define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")])
 
+;; Define element mode for each vector mode.
+(define_mode_attr VECELEM [(V2SI "SI") (V2DI "DI")])
+(define_mode_attr Vecelem [(V2SI "si") (V2DI "di")])
+
 ;; We should get away with not defining memory alternatives, since we don't
 ;; get variables in this mode and pseudos are never spilled.
 (define_insn "movbi"
@@ -1051,6 +1059,79 @@
   ""
   "%.\\tcvt.s%T0%t1\\t%0, %1;")
 
+;; Vector operations
+
+(define_insn "*vec_set_0"
+  [(set (match_operand:VECIM 0 "nvptx_register_operand" "=R")
+(vec_merge:VECIM
+ (vec_duplicate:VECIM
+   (match_operand: 1 "nvptx_register_operand" "R"))
+ (match_dup 0)
+ (const_int 1)))]
+  ""
+  "%.\\tmov%t1\\t%0.x, %1;")
+
+(define_insn "*vec_set_1"
+  [(set (match_operand:VECIM 0 "nvptx_register_operand" "=R")
+(vec_merge:VECIM
+ (vec_duplicate:VECIM
+   (match_operand: 1 "nvptx_register_operand" "R"))
+ (match_dup 0)
+ (const_int 2)))]
+  ""
+  "%.\\tmov%t1\\t%0.y, %1;")
+
+(define_insn "*vec_set_2"
+  [(set (match_operand:VECIM 0 "nvptx_register_operand" "=R")
+(vec_merge:VECIM
+ (vec_duplicate:VECIM
+   (match_operand: 1 "nvptx_register_operand" "R"))
+ (match_dup 0)
+ (const_int 4)))]
+  ""
+  "%.\\tmov%t1\\t%0.z, %1;")
+
+(define_insn "*vec_set_3"
+  [(set (match_operand:VECIM 0 "nvptx_register_operand" "=R")
+(vec_merge:VECIM
+ (vec_duplicate:VECIM
+   (match_operand: 1 "nvptx_register_operand" "R"))
+ (match_dup 0)
+ (const_int 8)))]
+  ""
+  "%.\\tmov%t1\\t%0.w, %1;")
+
+(define_expand "vec_set"
+  [(match_operand:VECIM 0 "nvptx_register_operand")
+   (match_operand: 1 "nvptx_register_operand")
+   (match_operand:SI 2 "nvptx_vector_index_operand")]
+  ""
+{
+  enum machine_mode mode = GET_MODE (operands[0]);
+  int mask = 1 << INTVAL (operands[2]);
+  rtx tmp = gen_rtx_VEC_DUPLICATE (mode, operands[1]);
+  tmp = gen_rtx_VEC_MERGE (mode, tmp, operands[0], GEN_INT (mask));
+  emit_insn (gen_rtx_SET (operands[0], tmp));
+