[RFC] dump_varmap in tree-ssa-structalias.c

2016-03-09 Thread Tom de Vries

Hi,

I wrote attached patch to print the actual contents of the varmap 
variable in tree-ssa-structalias.c.


Does it make sense to rewrite this into a dump_varmap/debug_varmap patch?

Thanks,
- Tom
diff --git a/gcc/tree-ssa-structalias.c b/gcc/tree-ssa-structalias.c
index de12380..9d02b14 100644
--- a/gcc/tree-ssa-structalias.c
+++ b/gcc/tree-ssa-structalias.c
@@ -7486,6 +7486,84 @@ associate_varinfo_to_alias (struct cgraph_node *node, void *data)
   return false;
 }
 
+static void
+dump_varmap (FILE *file)
+{
+  if (varmap.length () > 0)
+fprintf (file, "variables:\n");
+
+  for (unsigned int i = 0; i < varmap.length (); ++i)
+{
+  varinfo_t vi = get_varinfo (i);
+  const char *sep = "";
+
+  if (vi == NULL)
+	continue;
+
+  fprintf (file, "%u: %s\n", vi->id, vi->name);
+
+  sep = " ";
+  if (vi->is_artificial_var)
+	fprintf (file, "%sartificial", sep);
+  if (vi->is_special_var)
+	fprintf (file, "%sspecial", sep);
+  if (vi->is_unknown_size_var)
+	fprintf (file, "%sunknown-size", sep);
+  if (vi->is_full_var)
+	fprintf (file, "%sfull", sep);
+  if (vi->is_heap_var)
+	fprintf (file, "%sheap", sep);
+  if (vi->may_have_pointers)
+	fprintf (file, "%smay-have-pointers", sep);
+  if (vi->only_restrict_pointers)
+	fprintf (file, "%sonly-restrict-pointers", sep);
+  if (vi->is_restrict_var)
+	fprintf (file, "%sis-restrict-var", sep);
+  if (vi->is_global_var)
+	fprintf (file, "%sglobal", sep);
+  if (vi->is_ipa_escape_point)
+	fprintf (file, "%sipa-escape-point", sep);
+  if (vi->is_fn_info)
+	fprintf (file, "%sfn-info", sep);
+  if (vi->ruid)
+	fprintf (file, "%srestrict-uid:%u", sep, vi->ruid);
+  if (vi->next)
+	fprintf (file, "%snext:%u", sep, vi->next);
+  if (vi->head != vi->id)
+	fprintf (file, "%shead:%u", sep, vi->head);
+  if (vi->offset)
+	fprintf (file, "%soffset:" HOST_WIDE_INT_PRINT_DEC, sep, vi->offset);
+  if (vi->size != ~(unsigned HOST_WIDE_INT)0)
+	fprintf (file, "%ssize:" HOST_WIDE_INT_PRINT_DEC, sep, vi->size);
+  if (vi->fullsize != ~(unsigned HOST_WIDE_INT)0
+	  && vi->fullsize != vi->size)
+	fprintf (file, "%sfullsize:" HOST_WIDE_INT_PRINT_DEC, sep, vi->fullsize);
+  fprintf (file, "\n");
+
+  if (vi->solution && !bitmap_empty_p (vi->solution))
+	{
+	  bitmap_iterator bi;
+	  unsigned i;
+	  fprintf (file, " solution: {" );
+	  EXECUTE_IF_SET_IN_BITMAP (vi->solution, 0, i, bi)
+	fprintf (file, " %u", i);
+	  fprintf (file, " }\n" );
+	}
+  if (vi->oldsolution && !bitmap_empty_p (vi->oldsolution)
+	  && !bitmap_equal_p (vi->solution, vi->oldsolution))
+	{
+	  bitmap_iterator bi;
+	  unsigned i;
+	  fprintf (file, " oldsolution: {" );
+	  EXECUTE_IF_SET_IN_BITMAP (vi->oldsolution, 0, i, bi)
+	fprintf (file, " %u", i);
+	  fprintf (file, " }\n" );
+	}
+}
+
+  fprintf (file, "\n");
+}
+
 /* Execute the driver for IPA PTA.  */
 static unsigned int
 ipa_pta_execute (void)
@@ -7654,9 +7732,15 @@ ipa_pta_execute (void)
 	}
 }
 
+  if (dump_file && (dump_flags & TDF_DETAILS))
+dump_varmap (dump_file);
+
   /* From the constraints compute the points-to sets.  */
   solve_constraints ();
 
+  if (dump_file && (dump_flags & TDF_DETAILS))
+dump_varmap (dump_file);
+
   /* Compute the global points-to sets for ESCAPED.
  ???  Note that the computed escape set is not correct
  for the whole unit as we fail to consider graph edges to


[PATCH] c++/65579 - set readonly bit on static constexpr members of templates

2016-03-09 Thread Martin Sebor

While going through constexpr bugs looking for background
on one I'm currently working on I came across bug 65579 -
[C++11] gcc requires definition of a static constexpr member
even though it is not odr-used.

The bug points out that GCC (sometimes) emits references to
static constexpr data members of class templates even when
they aren't odr-used.  (A more detailed analysis of why this
happens is in my comment #1 on the bug.)

The attached rather trivial patch seems to fix the problem
and (somewhat to my surprise) pass regression tests on x86_64.

Martin
PR c++/65579 - [C++11] gcc requires definition of a static constexpr member
	even though it is not odr-used

gcc/testsuite/ChangeLog:
2016-03-09  Martin Sebor  

	PR c++/65579
	* g++.dg/cpp0x/constexpr-static12.C: New test.

gcc/cp/ChangeLog:
2016-03-09  Martin Sebor  

	PR c++/65579
	* typeck.c (cp_apply_type_quals_to_decl): Make sure constexpr
data members of a template type are marked readonly.
diff --git a/gcc/cp/typeck.c b/gcc/cp/typeck.c
index 20f0afc..690eaa8 100644
--- a/gcc/cp/typeck.c
+++ b/gcc/cp/typeck.c
@@ -9266,7 +9266,9 @@ cp_apply_type_quals_to_decl (int type_quals, tree decl)
 
   /* If the type has (or might have) a mutable component, that component
  might be modified.  */
-  if (TYPE_HAS_MUTABLE_P (type) || !COMPLETE_TYPE_P (type))
+  if (TYPE_HAS_MUTABLE_P (type)
+  || (!COMPLETE_TYPE_P (type)
+	  && (!VAR_P (decl) || !DECL_DECLARED_CONSTEXPR_P (decl
 type_quals &= ~TYPE_QUAL_CONST;
 
   c_apply_type_quals_to_decl (type_quals, decl);
diff --git a/gcc/testsuite/g++.dg/cpp0x/constexpr-static12.C b/gcc/testsuite/g++.dg/cpp0x/constexpr-static12.C
new file mode 100644
index 000..abf1d66
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/constexpr-static12.C
@@ -0,0 +1,23 @@
+// PR c++/65579 - [C++11] gcc requires definition of a static constexpr
+//   member even though it is not odr-used
+// { dg-do compile { target c++11 } }
+
+template 
+struct A
+{
+  int i;
+};
+
+struct B
+{
+  static constexpr A<0> constexpr_member = { 1 };
+};
+
+int foo ()
+{
+  return B::constexpr_member.i;
+}
+
+// Verify that the constexpr_member reference has been folded away
+// and isn't referenced in the assembly output.
+// { dg-final { scan-assembler-not "constexpr_member" } }


Re: [PATCH][AArch64] Replace insn to zero up DF register

2016-03-09 Thread Evandro Menezes

On 03/01/16 13:08, Evandro Menezes wrote:

On 03/01/16 13:02, Wilco Dijkstra wrote:

Evandro Menezes wrote:

The meaning of these attributes are not clear to me.  Is there a
reference somewhere about which insns are FP or SIMD or neither?
The meaning should be clear, "fp" is a floating point instruction, 
"simd" a SIMD one

as defined in ARM-ARM.


Indeed, I had to add the Y for the f_mcr insn to match it with nosimd.
However, I didn't feel that it should be moved to the right, since it's
already disparaged.  Am I missing something detail?
It might not matter for this specific case, but I have seen reload 
forcing the very
first alternative without looking at any costs or preferences - as 
long as it is legal.
This suggests we need to order alternatives from most preferred 
alternative to least

preferred one.

I think it is good enough for commit, James?


Methinks that my issue with those attributes is that I'm not as fluent 
in AArch64 as I'd like to be.


Please, feel free to edit the patch changing the order then.


   Replace insn to zero up SIMD registers

   gcc/
* config/aarch64/aarch64.md
(*movhf_aarch64): Add "movi %0, #0" to zero up register.
(*movsf_aarch64): Likewise and add "simd" and "fp" attributes.
(*movdf_aarch64): Likewise.

Swapped the order of the constraints to favor MOVI.

Just say the word...

Thank you,

--
Evandro Menezes

>From bcb76a4c864436930e1236e7ce35d9e689adf075 Mon Sep 17 00:00:00 2001
From: Evandro Menezes 
Date: Mon, 19 Oct 2015 18:31:48 -0500
Subject: [PATCH] Replace insn to zero up SIMD registers

gcc/
	* config/aarch64/aarch64.md
	(*movhf_aarch64): Add "movi %0, #0" to zero up register.
	(*movsf_aarch64): Likewise and add "simd" and "fp" attributes.
	(*movdf_aarch64): Likewise.
---
 gcc/config/aarch64/aarch64.md | 33 -
 1 file changed, 20 insertions(+), 13 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 68676c9..4502a58 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1163,11 +1163,12 @@
 )
 
 (define_insn "*movhf_aarch64"
-  [(set (match_operand:HF 0 "nonimmediate_operand" "=w, ?r,w,w,m,r,m ,r")
-	(match_operand:HF 1 "general_operand"  "?rY, w,w,m,w,m,rY,r"))]
+  [(set (match_operand:HF 0 "nonimmediate_operand" "=w,w  ,?r,w,w,m,r,m ,r")
+	(match_operand:HF 1 "general_operand"  "Y ,?rY, w,w,m,w,m,rY,r"))]
   "TARGET_FLOAT && (register_operand (operands[0], HFmode)
 || aarch64_reg_or_fp_zero (operands[1], HFmode))"
   "@
+   movi\\t%0.4h, #0
mov\\t%0.h[0], %w1
umov\\t%w0, %1.h[0]
mov\\t%0.h[0], %1.h[0]
@@ -1176,18 +1177,19 @@
ldrh\\t%w0, %1
strh\\t%w1, %0
mov\\t%w0, %w1"
-  [(set_attr "type" "neon_from_gp,neon_to_gp,neon_move,\
+  [(set_attr "type" "neon_move,neon_from_gp,neon_to_gp,neon_move,\
  f_loads,f_stores,load1,store1,mov_reg")
-   (set_attr "simd" "yes,yes,yes,*,*,*,*,*")
-   (set_attr "fp"   "*,*,*,yes,yes,*,*,*")]
+   (set_attr "simd" "yes,yes,yes,yes,*,*,*,*,*")
+   (set_attr "fp"   "*,*,*,*,yes,yes,*,*,*")]
 )
 
 (define_insn "*movsf_aarch64"
-  [(set (match_operand:SF 0 "nonimmediate_operand" "=w, ?r,w,w  ,w,m,r,m ,r")
-	(match_operand:SF 1 "general_operand"  "?rY, w,w,Ufc,m,w,m,rY,r"))]
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=w,w  ,?r,w,w  ,w,m,r,m ,r")
+	(match_operand:SF 1 "general_operand"  "Y ,?rY, w,w,Ufc,m,w,m,rY,r"))]
   "TARGET_FLOAT && (register_operand (operands[0], SFmode)
 || aarch64_reg_or_fp_zero (operands[1], SFmode))"
   "@
+   movi\\t%0.2s, #0
fmov\\t%s0, %w1
fmov\\t%w0, %s1
fmov\\t%s0, %s1
@@ -1197,16 +1199,19 @@
ldr\\t%w0, %1
str\\t%w1, %0
mov\\t%w0, %w1"
-  [(set_attr "type" "f_mcr,f_mrc,fmov,fconsts,\
- f_loads,f_stores,load1,store1,mov_reg")]
+  [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconsts,\
+ f_loads,f_stores,load1,store1,mov_reg")
+   (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*")
+   (set_attr "fp"   "*,*,*,yes,yes,yes,yes,*,*,*")]
 )
 
 (define_insn "*movdf_aarch64"
-  [(set (match_operand:DF 0 "nonimmediate_operand" "=w, ?r,w,w  ,w,m,r,m ,r")
-	(match_operand:DF 1 "general_operand"  "?rY, w,w,Ufc,m,w,m,rY,r"))]
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=w,w  ,?r,w,w  ,w,m,r,m ,r")
+	(match_operand:DF 1 "general_operand"  "Y ,?rY, w,w,Ufc,m,w,m,rY,r"))]
   "TARGET_FLOAT && (register_operand (operands[0], DFmode)
 || aarch64_reg_or_fp_zero (operands[1], DFmode))"
   "@
+   movi\\t%d0, #0
fmov\\t%d0, %x1
fmov\\t%x0, %d1
fmov\\t%d0, %d1
@@ -1216,8 +1221,10 @@
ldr\\t%x0, %1
str\\t%x1, %0
mov\\t%x0, %x1"
-  [(set_attr "type" "f_mcr,f_mrc,fmov,fconstd,\
- f_loadd,f_stored,load1,store1,mov_reg")]
+  [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,\
+ f_loadd,f_stored,load1,store1,mov_reg")
+   (set_attr "simd" 

Re: [Patch, fortran] PR68241 - [meta-bug] Deferred-length character

2016-03-09 Thread Paul Richard Thomas
Committed as revision 234093. Will close all the associated PRs.

Cheers

Paul

On 9 March 2016 at 19:33, Paul Richard Thomas
 wrote:
> Dominique,
>
> Many thanks for the verification. I will update my tree forthwith,
> bootstrap, regtest and commit.
>
> Thanks
>
> Paul
>
> On 9 March 2016 at 18:34, Dominique d'Humières  wrote:
>> Dear Paul,
>>
>> As you said on IRC the patch needs -l to apply. After that the gcc-5 branch 
>> bootstrapped and regtested without any problem.
>>
>> Thanks,
>>
>> Dominique
>>
>>> Le 7 mars 2016 à 11:22, Paul Richard Thomas  
>>> a écrit :
>>>
>>> Dear All,
>>>
>>> I had promised to get the 5-branch up to date in respect of deferred
>>> character patches after then had been in place on trunk for "a few
>>> weeks". Well, I got pulled away by PR69423 and have only now come back
>>> to the earlier patch.
>>>
>>> The attached patch corresponds to trunk revisions 232450 and 233589.
>>> They did not apply cleanly 5-branch in one or two places but it was no
>>> big deal to put them right.
>>>
>>> Bootstrapped and regtested on FC21/x86_64 - OK for 5-branch?
>>>
>>> Best regards
>>>
>>> Paul
>>>
>>> 2016-03-07  Paul Thomas  
>>>
>>>Backport from trunk.
>>>PR fortran/69423
>>>* trans-decl.c (create_function_arglist): Deferred character
>>>length functions, with and without declared results, address
>>>the passed reference type as '.result' and the local string
>>>length as '..result'.
>>>(gfc_null_and_pass_deferred_len): Helper function to null and
>>>return deferred string lengths, as needed.
>>>(gfc_trans_deferred_vars): Call it, thereby reducing repeated
>>>code, add call for deferred arrays and reroute pointer function
>>>results. Avoid using 'tmp' for anything other that a temporary
>>>tree by introducing 'type_of_array' for the arrayspec type.
>>>
>>> 2016-03-07  Paul Thomas  
>>>
>>>Backport from trunk.
>>>PR fortran/64324
>>>* resolve.c (check_uop_procedure): Prevent deferred length
>>>characters from being trapped by assumed length error.
>>>
>>>Backport from trunk.
>>>PR fortran/49630
>>>PR fortran/54070
>>>PR fortran/60593
>>>PR fortran/60795
>>>PR fortran/61147
>>>PR fortran/64324
>>>* trans-array.c (gfc_conv_scalarized_array_ref): Pass decl for
>>>function as well as variable expressions.
>>>(gfc_array_init_size): Add 'expr' as an argument. Use this to
>>>correctly set the descriptor dtype for deferred characters.
>>>(gfc_array_allocate): Add 'expr' to the call to
>>>'gfc_array_init_size'.
>>>* trans.c (gfc_build_array_ref): Expand logic for setting span
>>>to include indirect references to character lengths.
>>>* trans-decl.c (gfc_get_symbol_decl): Ensure that deferred
>>>result char lengths that are PARM_DECLs are indirectly
>>>referenced both for directly passed and by reference.
>>>(create_function_arglist): If the length type is a pointer type
>>>then store the length as the 'passed_length' and make the char
>>>length an indirect reference to it.
>>>(gfc_trans_deferred_vars): If a character length has escaped
>>>being set as an indirect reference, return it via the 'passed
>>>length'.
>>>* trans-expr.c (gfc_conv_procedure_call): The length of
>>>deferred character length results is set TREE_STATIC and set to
>>>zero.
>>>(gfc_trans_assignment_1): Do not fix the rse string_length if
>>>it is a variable, a parameter or an indirect reference. Add the
>>>code to trap assignment of scalars to unallocated arrays.
>>>* trans-stmt.c (gfc_trans_allocate): Remove 'def_str_len' and
>>>all references to it. Instead, replicate the code to obtain a
>>>explicitly defined string length and provide a value before
>>>array allocation so that the dtype is correctly set.
>>>trans-types.c (gfc_get_character_type): If the character length
>>>is a pointer, use the indirect reference.
>>>
>>> 2016-03-07  Paul Thomas  
>>>
>>>Backport from trunk.
>>>PR fortran/69423
>>>* gfortran.dg/deferred_character_15.f90 : New test.
>>>
>>> 2016-03-07  Paul Thomas  
>>>
>>>Backport from trunk.
>>>PR fortran/49630
>>>* gfortran.dg/deferred_character_13.f90: New test for the fix
>>>of comment 3 of the PR.
>>>
>>>Backport from trunk.
>>>PR fortran/54070
>>>* gfortran.dg/deferred_character_8.f90: New test
>>>* gfortran.dg/allocate_error_5.f90: New test
>>>
>>>Backport from trunk.
>>>PR fortran/60593
>>>* gfortran.dg/deferred_character_10.f90: New test
>>>
>>>Backport from trunk.
>>>PR fortran/60795
>>>* gfortran.dg/deferred_character_14.f90: New test
>>>
>>>Backport from trunk.
>>>PR fortran/61147
>>>* gfortran.dg/deferred_character_11.f90: New test
>>>
>>>Backport from 

Re: [PATCH] Require type compatible bases in DDR initialization (PR tree-optimization/70127)

2016-03-09 Thread Jakub Jelinek
On Tue, Mar 08, 2016 at 07:11:45PM +0100, Richard Biener wrote:
> I believe the safest fix is to re-instantiate the compatibility check by 
> refactoring operand_equal_p to perform it on the full ref (but not recursions 
> where it would be redundant and maybe too conservative).
> I've noticed this as well when doing the last operand_equal_p surgery, esp. 
> The incomplete and bogus half-way type checking done at its top.

I've tried to add types_compatible_p check to operand_equal_p for all
toplevel expressions, but that affected 25x more operand_equal_p calls
during x86_64 and i686-linux bootstrap/regtest than just doing it
for *MEM_REF only - details in the PR; after discussions on IRC with Richard
and Honza I've committed reversion of the October change, and we'll need to
start with operand_equal_p changes early during stage1 next time, rather
than at the end of stage1.

Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk.

2016-03-09  Jakub Jelinek  

PR tree-optimization/70127
* fold-const.c (operand_equal_p): Revert the 2015-10-28 change.

* gcc.c-torture/execute/pr70127.c: New test.

--- gcc/fold-const.c.jj 2016-03-09 15:06:21.0 +0100
+++ gcc/fold-const.c2016-03-09 18:25:07.429926750 +0100
@@ -3032,6 +3032,9 @@ operand_equal_p (const_tree arg0, const_
   TYPE_SIZE (TREE_TYPE (arg1)),
   flags)))
return 0;
+ /* Verify that access happens in similar types.  */
+ if (!types_compatible_p (TREE_TYPE (arg0), TREE_TYPE (arg1)))
+   return 0;
  /* Verify that accesses are TBAA compatible.  */
  if (!alias_ptr_types_compatible_p
(TREE_TYPE (TREE_OPERAND (arg0, 1)),
--- gcc/testsuite/gcc.c-torture/execute/pr70127.c.jj2016-03-08 
12:11:11.890835632 +0100
+++ gcc/testsuite/gcc.c-torture/execute/pr70127.c   2016-03-08 
12:10:58.0 +0100
@@ -0,0 +1,23 @@
+/* PR tree-optimization/70127 */
+
+struct S { int f; signed int g : 2; } a[1], c = {5, 1}, d;
+short b;
+
+__attribute__((noinline, noclone)) void
+foo (int x)
+{
+  if (x != 1)
+__builtin_abort ();
+}
+
+int
+main ()
+{
+  while (b++ <= 0)
+{
+  struct S e = {1, 1};
+  d = e = a[0] = c;
+}
+  foo (a[0].g);
+  return 0;
+}


Jakub


Re: [PATCH] Fix ICE with xmm{16-31} in *truncdfsf_fast_mixed with -mtune=barcelona (PR target/70086)

2016-03-09 Thread Uros Bizjak
On Wed, Mar 9, 2016 at 5:58 PM, Jakub Jelinek  wrote:
> On Wed, Mar 09, 2016 at 03:51:04PM +0100, Jakub Jelinek wrote:
>> Unfortunately, this really doesn't seem to work, I get ICEs on the
>> testcases.  I've tried to allow EXT_REX_SSE_REG_P for -mavx512f -mno-avx512vl
>> just for MEM_P (operands[1]), but even that ICEs.  Perhaps there are bugs
>> in other splitters.
>>
>> I'll bootstrap/regtest this then:
>>
>> 2016-03-04  Jakub Jelinek  
>>
>>   PR target/70086
>>   * config/i386/i386.md (truncdfsf2 splitter): Use gen_vec_concatv2df
>>   instead of gen_sse2_loadlpd.
>>   * config/i386/sse.md (*vec_concatv2df): Rename to...
>>   (vec_concatv2df): ... this.
>>
>>   * gcc.target/i386/pr70086-1.c: New test.
>>   * gcc.target/i386/pr70086-2.c: New test.
>>   * gcc.target/i386/pr70086-3.c: New test.
>
> Now successfully bootstrapped/regtested on x86_64-linux and i686-linux.
> Ok for trunk?

OK.

Thanks,
Uros.

>> --- gcc/config/i386/i386.md.jj2016-03-08 09:01:50.871475493 +0100
>> +++ gcc/config/i386/i386.md   2016-03-09 15:40:00.102942847 +0100
>> @@ -4393,8 +4393,8 @@ (define_split
>>emit_insn (gen_vec_dupv2df (operands[4], operands[1]));
>>  }
>>else
>> -emit_insn (gen_sse2_loadlpd (operands[4],
>> -  CONST0_RTX (V2DFmode), operands[1]));
>> +emit_insn (gen_vec_concatv2df (operands[4], operands[1],
>> +CONST0_RTX (DFmode)));
>>  })
>>
>>  ;; It's more profitable to split and then extend in the same register.
>> --- gcc/config/i386/sse.md.jj 2016-03-09 15:08:17.0 +0100
>> +++ gcc/config/i386/sse.md2016-03-09 15:15:10.346223894 +0100
>> @@ -8951,7 +8951,7 @@ (define_insn "vec_dupv2df"
>> (set_attr "prefix" "orig,maybe_vex,evex")
>> (set_attr "mode" "V2DF,DF,DF")])
>>
>> -(define_insn "*vec_concatv2df"
>> +(define_insn "vec_concatv2df"
>>[(set (match_operand:V2DF 0 "register_operand" "=x,x,v,x,v,x,x,v,x,x")
>>   (vec_concat:V2DF
>> (match_operand:DF 1 "nonimmediate_operand" " 0,x,v,m,m,0,x,m,0,0")
>> --- gcc/testsuite/gcc.target/i386/pr70086-1.c.jj  2016-03-09 
>> 15:12:55.177060382 +0100
>> +++ gcc/testsuite/gcc.target/i386/pr70086-1.c 2016-03-09 15:12:55.177060382 
>> +0100
>> @@ -0,0 +1,11 @@
>> +/* PR target/70086 */
>> +/* { dg-do compile } */
>> +/* { dg-options "-mtune=barcelona -mavx512vl -ffloat-store" } */
>> +
>> +float
>> +foo (float a, float b, double c, float d, double e, float f)
>> +{
>> +  e -= d;
>> +  d *= e;
>> +  return e + d;
>> +}
>> --- gcc/testsuite/gcc.target/i386/pr70086-2.c.jj  2016-03-09 
>> 15:12:55.177060382 +0100
>> +++ gcc/testsuite/gcc.target/i386/pr70086-2.c 2016-03-09 15:35:52.0 
>> +0100
>> @@ -0,0 +1,21 @@
>> +/* PR target/70086 */
>> +/* { dg-do compile { target { ! ia32 } } } */
>> +/* { dg-options "-O2 -mtune=barcelona -mavx512vl" } */
>> +
>> +float
>> +foo (double *p)
>> +{
>> +  register float xmm16 __asm ("xmm16");
>> +  xmm16 = *p;
>> +  asm volatile ("" : "+v" (xmm16));
>> +  return xmm16;
>> +}
>> +
>> +float
>> +bar (double x)
>> +{
>> +  register float xmm16 __asm ("xmm16");
>> +  xmm16 = x;
>> +  asm volatile ("" : "+v" (xmm16));
>> +  return xmm16;
>> +}
>> --- gcc/testsuite/gcc.target/i386/pr70086-3.c.jj  2016-03-09 
>> 15:36:28.332831118 +0100
>> +++ gcc/testsuite/gcc.target/i386/pr70086-3.c 2016-03-09 15:35:33.0 
>> +0100
>> @@ -0,0 +1,21 @@
>> +/* PR target/70086 */
>> +/* { dg-do compile { target { ! ia32 } } } */
>> +/* { dg-options "-O2 -mtune=barcelona -mavx512f -mno-avx512vl" } */
>> +
>> +float
>> +foo (double *p)
>> +{
>> +  register float xmm16 __asm ("xmm16");
>> +  xmm16 = *p;
>> +  asm volatile ("" : "+v" (xmm16));
>> +  return xmm16;
>> +}
>> +
>> +float
>> +bar (double x)
>> +{
>> +  register float xmm16 __asm ("xmm16");
>> +  xmm16 = x;
>> +  asm volatile ("" : "+v" (xmm16));
>> +  return xmm16;
>> +}
>
> Jakub


Re: [PATCH, rs6000] Add support for xxpermr and vpermr instructions

2016-03-09 Thread David Edelsohn
On Tue, Mar 8, 2016 at 11:24 AM, Kelvin Nilsen
 wrote:
>
> This patch adds support for two new Power9 instructions, xxpermr and vpermr,
> providing more efficient vector permutation operations on little-endian
> configurations. These new instructions are described in the Power ISA 3.0
> document.  Selection of the new instructions is conditioned upon
> TARGET_P9_VECTOR and !VECTOR_ELT_ORDER_BIG.
>
> The patch has bootstrapped and tested on powerpc64le-unknown-linux-gnu and
> powerpc64-unknown-linux-gnu with no regressions.  Is this ok for GCC 7 when
> stage 1 opens?

gcc/ChangeLog:

2016-03-07  Kelvin Nilsen  

* config/rs6000/rs6000.c (rs6000_expand_vector_set): If
!BYTES_BIG_ENDIAN and TARGET_P9_VECTOR, expand using template that
translates into new xxpermr or vpermr instructions.
(altivec_expand_vec_perm_le): If TARGET_P9_VECTOR, expand using
template that translates into new xxpermr or vpermr instructions.
* config/rs6000/altivec.md: (UNSPEC_VPERMR): New unspec constant.
(*altivec_vpermr__internal): New insn.

gcc/testsuite/ChangeLog:

2016-03-07  Kelvin Nilsen  

* gcc.target/powerpc/p9-permute.c: Generalize test to run on
big-endian Power9 in addition to little-endian Power9.
* gcc.target/powerpc/p9-vpermr.c: New test.

This patch is okay when GCC trunk re-opens for new features.

Thanks, David

P.S. In the future, please include the ChangeLog entry in the body of
the message, not a separate attachment.


[openacc] combined loop errors

2016-03-09 Thread Cesar Philippidis
This patch teaches the c and c++ FEs how to update the list of clauses
after calling c_finish_omp_clauses when parsing combined loop
constructs. The problem here is, if an invalid clause isn't removed by
the FE, the gimplifier will ICE because the tree node representing the
clause contains incomplete fields. So updating the split clauses allows
the compiler to gracefully error.

Is this patch ok for trunk? It's specific to openacc.

Thanks,
Cesar
2016-03-09  Cesar Philippidis  

	gcc/c/
	* c-parser.c (c_parser_oacc_loop): Update cclauses and clauses
	when calling c_finish_omp_clauses.

	gcc/cp/
	* parser.c (cp_parser_oacc_loop): Update cclauses and clauses
	when calling c_finish_omp_clauses.

	gcc/testsuite/
	* c-c++-common/goacc/combined-directives-2.c: New test.


diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c
index a7d5827..60ec996 100644
--- a/gcc/c/c-parser.c
+++ b/gcc/c/c-parser.c
@@ -13789,9 +13789,9 @@ c_parser_oacc_loop (location_t loc, c_parser *parser, char *p_name,
 {
   clauses = c_oacc_split_loop_clauses (clauses, cclauses);
   if (*cclauses)
-	c_finish_omp_clauses (*cclauses, false);
+	*cclauses = c_finish_omp_clauses (*cclauses, false);
   if (clauses)
-	c_finish_omp_clauses (clauses, false);
+	clauses = c_finish_omp_clauses (clauses, false);
 }
 
   tree block = c_begin_compound_stmt (true);
diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
index 726d5fc..6ae45b0 100644
--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -35346,9 +35346,9 @@ cp_parser_oacc_loop (cp_parser *parser, cp_token *pragma_tok, char *p_name,
 {
   clauses = c_oacc_split_loop_clauses (clauses, cclauses);
   if (*cclauses)
-	finish_omp_clauses (*cclauses, false);
+	*cclauses = finish_omp_clauses (*cclauses, false);
   if (clauses)
-	finish_omp_clauses (clauses, false);
+	clauses = finish_omp_clauses (clauses, false);
 }
 
   tree block = begin_omp_structured_block ();
diff --git a/gcc/testsuite/c-c++-common/goacc/combined-directives-2.c b/gcc/testsuite/c-c++-common/goacc/combined-directives-2.c
new file mode 100644
index 000..c51e2f9
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/combined-directives-2.c
@@ -0,0 +1,14 @@
+/* Ensure that bogus clauses aren't propagated in combined loop
+   constructs.  */
+
+int
+main ()
+{
+  int a, i;
+
+#pragma acc parallel loop vector copy(a[0:100]) reduction(+:a) /* { dg-error "'a' does not have pointer or array type" } */
+  for (i = 0; i < 100; i++)
+a++;
+
+  return a;
+}


Re: [openacc] combined loop errors

2016-03-09 Thread Jakub Jelinek
On Wed, Mar 09, 2016 at 10:51:41AM -0800, Cesar Philippidis wrote:
> This patch teaches the c and c++ FEs how to update the list of clauses
> after calling c_finish_omp_clauses when parsing combined loop
> constructs. The problem here is, if an invalid clause isn't removed by
> the FE, the gimplifier will ICE because the tree node representing the
> clause contains incomplete fields. So updating the split clauses allows
> the compiler to gracefully error.
> 
> Is this patch ok for trunk? It's specific to openacc.
> 
> Thanks,
> Cesar

> 2016-03-09  Cesar Philippidis  
> 
>   gcc/c/
>   * c-parser.c (c_parser_oacc_loop): Update cclauses and clauses
>   when calling c_finish_omp_clauses.
> 
>   gcc/cp/
>   * parser.c (cp_parser_oacc_loop): Update cclauses and clauses
>   when calling c_finish_omp_clauses.
> 
>   gcc/testsuite/
>   * c-c++-common/goacc/combined-directives-2.c: New test.

Ok, thanks.

Jakub


[PATCH 2/2] PR c++/70105: prevent nonsensical underline spew for macro expansions

2016-03-09 Thread David Malcolm
diagnostic_show_locus can sometimes do the wrong thing when handling
expressions built up from macros.

PR c++/70105 (currently marked as a P3 regression) has an example of
a diagnostic where over 500 lines of irrelevant source are printed,
and underlined, giving >1000 lines of useless spew to stderr.

This patch adds extra sanitization to diagnostic-show-locus.c, so that
we only attempt to print underlines and secondary locations if such
locations are "sufficiently sane" relative to the primary location
of a diagnostic.

This "sufficiently sane" condition is implemented by a new helper
function compatible_locations_p, which requires such locations to
have the same macro expansion hierarchy as the primary location,
using linemap_macro_map_loc_unwind_toward_spelling, effectively
mimicing the expansion performed by LRK_SPELLING_LOCATION.

This may be too strong a condition, but it effectively fixes
PR c++/70105, without removing any underlines in my testing.

Successfully bootstrapped in combination with the previous
patch on x86_64-pc-linux-gnu; adds 15 new PASS results to g++.sum
and 4 new PASS results to gcc.sum.

Committed to trunk as r234088.

The new test cases contain lines > 2048 long in order to stress the
new code, hence I had to send the following "by hand" via an attachment, as
"git send-email" informs me that > 998 characters is too long to send
"inline", due to SMTP limits as described by
http://www.ietf.org/rfc/rfc2821.txt.

gcc/ChangeLog:
PR c/68473
PR c++/70105
* diagnostic-show-locus.c (compatible_locations_p): New function.
(layout::layout): Sanitize ranges using compatible_locations_p.

gcc/testsuite/ChangeLog:
PR c/68473
PR c++/70105
* g++.dg/diagnostic/pr70105.C: New test.
* gcc.dg/plugin/diagnostic-test-expressions-1.c (foo): New decl.
(test_multiple_ordinary_maps): New test function.

libcpp/ChangeLog:
PR c/68473
PR c++/70105
* line-map.c (linemap_macro_map_loc_unwind_toward_spelling): Move
decl...
* include/line-map.h
(linemap_macro_map_loc_unwind_toward_spelling): ...here,
converting from static to extern.
Index: gcc/ChangeLog
===
--- gcc/ChangeLog	(revision 234087)
+++ gcc/ChangeLog	(revision 234088)
@@ -2,6 +2,13 @@
 
 	PR c/68473
 	PR c++/70105
+	* diagnostic-show-locus.c (compatible_locations_p): New function.
+	(layout::layout): Sanitize ranges using compatible_locations_p.
+
+2016-03-09  David Malcolm  
+
+	PR c/68473
+	PR c++/70105
 	* diagnostic-show-locus.c (layout_range::layout_range): Replace
 	location_range param with three const expanded_locations * and a
 	bool.
Index: gcc/testsuite/gcc.dg/plugin/diagnostic-test-expressions-1.c
===
--- gcc/testsuite/gcc.dg/plugin/diagnostic-test-expressions-1.c	(revision 234087)
+++ gcc/testsuite/gcc.dg/plugin/diagnostic-test-expressions-1.c	(revision 234088)
@@ -635,3 +635,39 @@
^~~~
{ dg-end-multiline-output "" } */
 }
+
+/* Verify that we can underline expressions that span multiple
+   ordinary maps.  */
+
+extern int foo (int, ...);
+
+void test_multiple_ordinary_maps (void)
+{
+  /* The expression
+foo (0, "very long string...")
+ below contains a transition between ordinary maps due to a very long
+ line (>127 "columns", treating tab characters as 1 column).  */
+  __emit_expression_range (0, foo (0, /* { dg-warning "range" } */
+   "0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789"));
+
+/* { dg-begin-multiline-output "" }
+   __emit_expression_range (0, foo (0,
+   ^~~~
+"0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789"));
+~
+   { dg-end-multiline-output "" } */
+
+  /* Another expression that transitions between ordinary maps; this
+ one due to an ordinary map for a very long line transitioning back to
+ one for a very short line.  The policy in linemap_line_start
+ means that we need a transition from >10 bits of column
+ (i.e. 2048 columns) to a line with <= 80 columns.  */
+  __emit_expression_range (0, foo (0, 

Re: [Patch, fortran] PR68241 - [meta-bug] Deferred-length character

2016-03-09 Thread Paul Richard Thomas
Dominique,

Many thanks for the verification. I will update my tree forthwith,
bootstrap, regtest and commit.

Thanks

Paul

On 9 March 2016 at 18:34, Dominique d'Humières  wrote:
> Dear Paul,
>
> As you said on IRC the patch needs -l to apply. After that the gcc-5 branch 
> bootstrapped and regtested without any problem.
>
> Thanks,
>
> Dominique
>
>> Le 7 mars 2016 à 11:22, Paul Richard Thomas  
>> a écrit :
>>
>> Dear All,
>>
>> I had promised to get the 5-branch up to date in respect of deferred
>> character patches after then had been in place on trunk for "a few
>> weeks". Well, I got pulled away by PR69423 and have only now come back
>> to the earlier patch.
>>
>> The attached patch corresponds to trunk revisions 232450 and 233589.
>> They did not apply cleanly 5-branch in one or two places but it was no
>> big deal to put them right.
>>
>> Bootstrapped and regtested on FC21/x86_64 - OK for 5-branch?
>>
>> Best regards
>>
>> Paul
>>
>> 2016-03-07  Paul Thomas  
>>
>>Backport from trunk.
>>PR fortran/69423
>>* trans-decl.c (create_function_arglist): Deferred character
>>length functions, with and without declared results, address
>>the passed reference type as '.result' and the local string
>>length as '..result'.
>>(gfc_null_and_pass_deferred_len): Helper function to null and
>>return deferred string lengths, as needed.
>>(gfc_trans_deferred_vars): Call it, thereby reducing repeated
>>code, add call for deferred arrays and reroute pointer function
>>results. Avoid using 'tmp' for anything other that a temporary
>>tree by introducing 'type_of_array' for the arrayspec type.
>>
>> 2016-03-07  Paul Thomas  
>>
>>Backport from trunk.
>>PR fortran/64324
>>* resolve.c (check_uop_procedure): Prevent deferred length
>>characters from being trapped by assumed length error.
>>
>>Backport from trunk.
>>PR fortran/49630
>>PR fortran/54070
>>PR fortran/60593
>>PR fortran/60795
>>PR fortran/61147
>>PR fortran/64324
>>* trans-array.c (gfc_conv_scalarized_array_ref): Pass decl for
>>function as well as variable expressions.
>>(gfc_array_init_size): Add 'expr' as an argument. Use this to
>>correctly set the descriptor dtype for deferred characters.
>>(gfc_array_allocate): Add 'expr' to the call to
>>'gfc_array_init_size'.
>>* trans.c (gfc_build_array_ref): Expand logic for setting span
>>to include indirect references to character lengths.
>>* trans-decl.c (gfc_get_symbol_decl): Ensure that deferred
>>result char lengths that are PARM_DECLs are indirectly
>>referenced both for directly passed and by reference.
>>(create_function_arglist): If the length type is a pointer type
>>then store the length as the 'passed_length' and make the char
>>length an indirect reference to it.
>>(gfc_trans_deferred_vars): If a character length has escaped
>>being set as an indirect reference, return it via the 'passed
>>length'.
>>* trans-expr.c (gfc_conv_procedure_call): The length of
>>deferred character length results is set TREE_STATIC and set to
>>zero.
>>(gfc_trans_assignment_1): Do not fix the rse string_length if
>>it is a variable, a parameter or an indirect reference. Add the
>>code to trap assignment of scalars to unallocated arrays.
>>* trans-stmt.c (gfc_trans_allocate): Remove 'def_str_len' and
>>all references to it. Instead, replicate the code to obtain a
>>explicitly defined string length and provide a value before
>>array allocation so that the dtype is correctly set.
>>trans-types.c (gfc_get_character_type): If the character length
>>is a pointer, use the indirect reference.
>>
>> 2016-03-07  Paul Thomas  
>>
>>Backport from trunk.
>>PR fortran/69423
>>* gfortran.dg/deferred_character_15.f90 : New test.
>>
>> 2016-03-07  Paul Thomas  
>>
>>Backport from trunk.
>>PR fortran/49630
>>* gfortran.dg/deferred_character_13.f90: New test for the fix
>>of comment 3 of the PR.
>>
>>Backport from trunk.
>>PR fortran/54070
>>* gfortran.dg/deferred_character_8.f90: New test
>>* gfortran.dg/allocate_error_5.f90: New test
>>
>>Backport from trunk.
>>PR fortran/60593
>>* gfortran.dg/deferred_character_10.f90: New test
>>
>>Backport from trunk.
>>PR fortran/60795
>>* gfortran.dg/deferred_character_14.f90: New test
>>
>>Backport from trunk.
>>PR fortran/61147
>>* gfortran.dg/deferred_character_11.f90: New test
>>
>>Backport from trunk.
>>PR fortran/64324
>>* gfortran.dg/deferred_character_9.f90: New test
>>
>>
>>
>>
>>
>> --
>> The difference between genius and stupidity is; genius has its limits.
>>
>> Albert Einstein
>> 
>



-- 
The difference between genius and stupidity is; genius has its limits.

Albert 

[PATCH 1/2] PR c++/70105: Defer location expansion until diagnostic_show_locus

2016-03-09 Thread David Malcolm
This patch is enabling work for the fix for PR c++/70105, so that
patch 2 in the kit can make use of location_t values when
sanitizing underlines.

This is an updated version of
  "[PATCH 1/3] Delay location expansion within rich_location until printing"
  https://gcc.gnu.org/ml/gcc-patches/2015-12/msg01293.html
This was originally intended for a "deeper" fix for PR c/68473,
hence the ChangeLog entries also reference that PR.

Repeating part of the blurb from there:

> Previously, source_location/location_t values passed to
> rich_location were immediately expanded (to expanded_location
> instances stored inside the rich_location).

> This patch updates the insides of class rich_location to delay
> this expansion until the insides of diagnostic_show_locus.

Delaying the expansion means that rich_location's ctor and
rich_location::set_range no longer need access to the line_table.
In that earlier version of the patch I removed the redundant params,
but to minimize the scope of this patch, I've kept these parameters
in this iteration of the patch (to avoid touching every call site).
I'd prefer to remove them, but that seems like a stage 1 thing.

Successfully bootstrapped in combination with the followup
patch on x86_64-pc-linux-gnu.

Committed to trunk as r234087.

gcc/ChangeLog:
PR c/68473
PR c++/70105
* diagnostic-show-locus.c (layout_range::layout_range): Replace
location_range param with three const expanded_locations * and a
bool.
(layout::layout): Replace call to
rich_location::lazily_expand_location with get_expanded_location.
Extract the range and perform location expansion here, passing
the results to the layout_range ctor.
* diagnostic.c (source_range::debug): Delete.
* diagnostic.h (diagnostic_expand_location): Reimplement in terms
of rich_location::get_expanded_location.
* gcc-rich-location.c (get_range_for_expr): Delete.
(gcc_rich_location::add_expr): Reimplement to avoid the
rich_location::add_range overload that took a location_range,
passing a location_t instead.

gcc/testsuite/ChangeLog:
PR c/68473
PR c++/70105
* gcc.dg/plugin/diagnostic_plugin_show_trees.c (show_tree):
Drop range information from call to inform_at_rich_loc.
* gcc.dg/plugin/diagnostic_plugin_test_show_locus.c (add_range):
New.
(test_show_locus): Replace calls to rich_location::add_range with
calls to add_range.  Rewrite the tests that used the now-defunct
rich_location ctor taking a source_range.  Simplify other tests
by replacing calls to COMBINE_LOCATION_DATA with calls to
make_location.

libcpp/ChangeLog:
PR c/68473
PR c++/70105
* include/line-map.h (source_range::debug): Delete.
(struct location_range): Update comment.  Replace
expanded_location fields "m_start", "m_finish", and "m_caret" with
a source_location field: "m_loc".
(class rich_location): Reword comment.
(rich_location::get_loc): Reimplement in terms of a new overloaded
variant which takes an unsigned int.
(rich_location::get_loc_addr): Delete.
(rich_location::add_range): Drop params "start" and "finish" in
favor of param "loc".  Drop overloaded variants taking a
source_range or location_range *.
(rich_location::lazily_expand_location): Delete in favor of...
(rich_location::get_expanded_location): New decl.
(rich_location::m_loc): Delete field.
(rich_location::m_column_override): New field.
* line-map.c (rich_location::rich_location):  Drop name of
line_maps * param.  Update initializations for deletion of field
"m_loc" and addition of field "m_column_override".  Reimplement
body as a call to add_range.  Delete overloaded variant taking a
source_range.
(rich_location::get_loc): New function.
(rich_location::lazily_expand_location): Delete in favor of...
(rich_location::get_expanded_location): New function.
(rich_location::override_column): Reimplement.
(rich_location::add_range): Drop params "start" and "finish" in
favor of param "loc".  Eliminate location expansion in favor of
simply storing loc.  Drop overloaded variants taking a
source_range or location_range *.
(rich_location::set_range): Eliminate location expansion.
---
 gcc/diagnostic-show-locus.c|  41 ---
 gcc/diagnostic.c   |  14 ---
 gcc/diagnostic.h   |   2 +-
 gcc/gcc-rich-location.c|  28 +
 .../gcc.dg/plugin/diagnostic_plugin_show_trees.c   |   8 +-
 .../plugin/diagnostic_plugin_test_show_locus.c |  97 -
 libcpp/include/line-map.h  |  51 +++--
 libcpp/line-map.c   

Re: [PATCH] Add -funconstrained-commons to work around PR/69368 (and others) in SPEC2006

2016-03-09 Thread Alan Lawrence

On 07/03/16 11:02, Alan Lawrence wrote:

On 04/03/16 13:27, Richard Biener wrote:

I think to make it work with LTO you need to mark it 'Optimization'.
Also it's about
arrays so maybe

'Assume common declarations may be overridden with ones with a larger
trailing array'

also if we document it here we should eventually document it in invoke.texi.

Not sure if "unknown commons" is a good term, maybe "unconstrained
commons" instead?


All done; I doubt there is really a good word, unconstrained seems as good as
any. I've reused much the same wording in invoke.texi, unless you think there
is more to add.

On 04/03/16 13:33, Jakub Jelinek wrote:

Also, isn't the *.opt description line supposed to end with a full stop?


Ah, yes, thanks.

Is this version OK for trunk?

gcc/ChangeLog:

DATE  Alan Lawrence  
   Jakub Jelinek  

 * common.opt (funconstrained-commons, flag_unconstrained_commons): New.
 * tree.c (array_at_struct_end_p): Do not limit to size of decl for
 DECL_COMMONS if flag_unconstrained_commons is set.
 * tree-dfa.c (get_ref_base_and_extent): Likewise.


And add to that
* doc/invoke.texi (Optimize Options): Add -funconstrained-commons.
(funconstrained-commons): Document.

Thanks,
Alan



gcc/testsuite/ChangeLog:

 * gfortran.dg/unconstrained_commons.f: New.
---
  gcc/common.opt|  5 +
  gcc/doc/invoke.texi   |  8 +++-
  gcc/testsuite/gfortran.dg/unconstrained_commons.f | 20 
  gcc/tree-dfa.c| 15 ++-
  gcc/tree.c|  6 --
  5 files changed, 50 insertions(+), 4 deletions(-)
  create mode 100644 gcc/testsuite/gfortran.dg/unconstrained_commons.f

diff --git a/gcc/common.opt b/gcc/common.opt
index 520fa9c..bbf79ef 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -2451,6 +2451,11 @@ fsplit-paths
  Common Report Var(flag_split_paths) Init(0) Optimization
  Split paths leading to loop backedges.

+funconstrained-commons
+Common Var(flag_unconstrained_commons) Optimization
+Assume common declarations may be overridden with ones with a larger
+trailing array.
+
  funit-at-a-time
  Common Report Var(flag_unit_at_a_time) Init(1)
  Compile whole compilation unit at a time.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 0a2a6f4..68933a1 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -407,7 +407,7 @@ Objective-C and Objective-C++ Dialects}.
  -ftree-parallelize-loops=@var{n} -ftree-pre -ftree-partial-pre -ftree-pta @gol
  -ftree-reassoc -ftree-sink -ftree-slsr -ftree-sra @gol
  -ftree-switch-conversion -ftree-tail-merge -ftree-ter @gol
--ftree-vectorize -ftree-vrp @gol
+-ftree-vectorize -ftree-vrp -funconstrained-commons @gol
  -funit-at-a-time -funroll-all-loops -funroll-loops @gol
  -funsafe-loop-optimizations -funsafe-math-optimizations -funswitch-loops @gol
  -fipa-ra -fvariable-expansion-in-unroller -fvect-cost-model -fvpt @gol
@@ -6659,6 +6659,12 @@ the loop optimizer itself cannot prove that these 
assumptions are valid.
  If you use @option{-Wunsafe-loop-optimizations}, the compiler warns you
  if it finds this kind of loop.

+@item -funconstrained-commons
+@opindex funconstrained-commons
+This option tells the compiler that variables declared in common blocks
+(e.g. Fortran) may later be overridden with longer trailing arrays. This
+prevents certain optimizations that depend on knowing the array bounds.
+
  @item -fcrossjumping
  @opindex fcrossjumping
  Perform cross-jumping transformation.
diff --git a/gcc/testsuite/gfortran.dg/unconstrained_commons.f 
b/gcc/testsuite/gfortran.dg/unconstrained_commons.f
new file mode 100644
index 000..f9fc471
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/unconstrained_commons.f
@@ -0,0 +1,20 @@
+! { dg-do compile }
+! { dg-options "-O3 -funconstrained-commons -fdump-tree-dom2-details" }
+
+! Test for PR69368: a single-element array in a common block, which will be
+! overridden with a larger size at link time (contrary to language spec).
+! Dominator opts considers accesses to differently-computed elements of X as
+! equivalent, unless -funconstrained-commons is passed in.
+  SUBROUTINE FOO
+  IMPLICIT DOUBLE PRECISION (X)
+  INTEGER J
+  COMMON /MYCOMMON / X(1)
+  DO 10 J=1,1024
+ X(J+1)=X(J+7)
+  10  CONTINUE
+  RETURN
+  END
+! { dg-final { scan-tree-dump-not "FIND" "dom2" } }
+! We should retain both a read and write of mycommon.x.
+! { dg-final { scan-tree-dump-times "  _\[0-9\]+ = mycommon\\.x\\\[_\[0-9\]+\\\];" 1 
"dom2" } }
+! { dg-final { scan-tree-dump-times "  mycommon\\.x\\\[_\[0-9\]+\\\] = _\[0-9\]+;" 1 
"dom2" } }
diff --git a/gcc/tree-dfa.c b/gcc/tree-dfa.c
index 0e98056..f133abc 100644
--- a/gcc/tree-dfa.c
+++ b/gcc/tree-dfa.c
@@ -612,9 +612,22 @@ get_ref_base_and_extent (tree exp, HOST_WIDE_INT 

Re: [PATCH] Fix ab SSA_NAME handling in eipa_sra replace_removed_params_ssa_names (PR tree-optimization/70152)

2016-03-09 Thread Richard Biener
On March 9, 2016 6:02:26 PM GMT+01:00, Jakub Jelinek  wrote:
>Hi!
>
>If a parameter is unused, eipa_sra replaces the SSA_NAMEs with that
>PARM_DECL SSA_NAME_VAR with SSA_NAMEs with a VAR_DECL instead.
>IMHO it is fine to do that even when all or some of its SSA_NAMEs
>are (ab), but we need to copy that flag over.
>
>Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

Richard.

>2016-03-09  Jakub Jelinek  
>
>   PR tree-optimization/70152
>   * tree-sra.c (replace_removed_params_ssa_names): Copy over
>   SSA_NAME_OCCURS_IN_ABNORMAL_PHI from old_name to new_name.
>
>   * gcc.dg/pr70152.c: New test.
>
>--- gcc/tree-sra.c.jj  2016-02-26 20:30:21.0 +0100
>+++ gcc/tree-sra.c 2016-03-09 10:55:23.628924709 +0100
>@@ -4758,6 +4758,8 @@ replace_removed_params_ssa_names (tree o
> 
>   repl = get_replaced_param_substitute (adj);
>   new_name = make_ssa_name (repl, stmt);
>+  SSA_NAME_OCCURS_IN_ABNORMAL_PHI (new_name)
>+= SSA_NAME_OCCURS_IN_ABNORMAL_PHI (old_name);
> 
>   if (dump_file)
> {
>--- gcc/testsuite/gcc.dg/pr70152.c.jj  2016-03-09 11:04:33.704398525
>+0100
>+++ gcc/testsuite/gcc.dg/pr70152.c 2016-03-09 11:04:51.815150732 +0100
>@@ -0,0 +1,27 @@
>+/* PR tree-optimization/70152 */
>+/* { dg-do compile } */
>+/* { dg-options "-O2" } */
>+
>+int a;
>+int foo (void);
>+int setjmp (char *);
>+char buf[64];
>+
>+static int
>+bar (int x)
>+{
>+  x = 0;
>+  setjmp (buf);
>+  for (;;)
>+{
>+switch (x)
>+  case 5:
>+  x = foo ();
>+}
>+}
>+
>+void
>+baz (void)
>+{
>+  bar (a);
>+}
>
>   Jakub




Re: [Patch, fortran] PR68241 - [meta-bug] Deferred-length character

2016-03-09 Thread Dominique d'Humières
Dear Paul,

As you said on IRC the patch needs -l to apply. After that the gcc-5 branch 
bootstrapped and regtested without any problem.

Thanks,

Dominique

> Le 7 mars 2016 à 11:22, Paul Richard Thomas  a 
> écrit :
> 
> Dear All,
> 
> I had promised to get the 5-branch up to date in respect of deferred
> character patches after then had been in place on trunk for "a few
> weeks". Well, I got pulled away by PR69423 and have only now come back
> to the earlier patch.
> 
> The attached patch corresponds to trunk revisions 232450 and 233589.
> They did not apply cleanly 5-branch in one or two places but it was no
> big deal to put them right.
> 
> Bootstrapped and regtested on FC21/x86_64 - OK for 5-branch?
> 
> Best regards
> 
> Paul
> 
> 2016-03-07  Paul Thomas  
> 
>Backport from trunk.
>PR fortran/69423
>* trans-decl.c (create_function_arglist): Deferred character
>length functions, with and without declared results, address
>the passed reference type as '.result' and the local string
>length as '..result'.
>(gfc_null_and_pass_deferred_len): Helper function to null and
>return deferred string lengths, as needed.
>(gfc_trans_deferred_vars): Call it, thereby reducing repeated
>code, add call for deferred arrays and reroute pointer function
>results. Avoid using 'tmp' for anything other that a temporary
>tree by introducing 'type_of_array' for the arrayspec type.
> 
> 2016-03-07  Paul Thomas  
> 
>Backport from trunk.
>PR fortran/64324
>* resolve.c (check_uop_procedure): Prevent deferred length
>characters from being trapped by assumed length error.
> 
>Backport from trunk.
>PR fortran/49630
>PR fortran/54070
>PR fortran/60593
>PR fortran/60795
>PR fortran/61147
>PR fortran/64324
>* trans-array.c (gfc_conv_scalarized_array_ref): Pass decl for
>function as well as variable expressions.
>(gfc_array_init_size): Add 'expr' as an argument. Use this to
>correctly set the descriptor dtype for deferred characters.
>(gfc_array_allocate): Add 'expr' to the call to
>'gfc_array_init_size'.
>* trans.c (gfc_build_array_ref): Expand logic for setting span
>to include indirect references to character lengths.
>* trans-decl.c (gfc_get_symbol_decl): Ensure that deferred
>result char lengths that are PARM_DECLs are indirectly
>referenced both for directly passed and by reference.
>(create_function_arglist): If the length type is a pointer type
>then store the length as the 'passed_length' and make the char
>length an indirect reference to it.
>(gfc_trans_deferred_vars): If a character length has escaped
>being set as an indirect reference, return it via the 'passed
>length'.
>* trans-expr.c (gfc_conv_procedure_call): The length of
>deferred character length results is set TREE_STATIC and set to
>zero.
>(gfc_trans_assignment_1): Do not fix the rse string_length if
>it is a variable, a parameter or an indirect reference. Add the
>code to trap assignment of scalars to unallocated arrays.
>* trans-stmt.c (gfc_trans_allocate): Remove 'def_str_len' and
>all references to it. Instead, replicate the code to obtain a
>explicitly defined string length and provide a value before
>array allocation so that the dtype is correctly set.
>trans-types.c (gfc_get_character_type): If the character length
>is a pointer, use the indirect reference.
> 
> 2016-03-07  Paul Thomas  
> 
>Backport from trunk.
>PR fortran/69423
>* gfortran.dg/deferred_character_15.f90 : New test.
> 
> 2016-03-07  Paul Thomas  
> 
>Backport from trunk.
>PR fortran/49630
>* gfortran.dg/deferred_character_13.f90: New test for the fix
>of comment 3 of the PR.
> 
>Backport from trunk.
>PR fortran/54070
>* gfortran.dg/deferred_character_8.f90: New test
>* gfortran.dg/allocate_error_5.f90: New test
> 
>Backport from trunk.
>PR fortran/60593
>* gfortran.dg/deferred_character_10.f90: New test
> 
>Backport from trunk.
>PR fortran/60795
>* gfortran.dg/deferred_character_14.f90: New test
> 
>Backport from trunk.
>PR fortran/61147
>* gfortran.dg/deferred_character_11.f90: New test
> 
>Backport from trunk.
>PR fortran/64324
>* gfortran.dg/deferred_character_9.f90: New test
> 
> 
> 
> 
> 
> -- 
> The difference between genius and stupidity is; genius has its limits.
> 
> Albert Einstein
> 



[C++ RFC] magic_varargs_p issues (PR c++/70144)

2016-03-09 Thread Jakub Jelinek
Hi!

The following testcase results in ICE in C++, while is properly rejected in
C.  The problem is that the C++ FE treats some varargs builtins as magic and
doesn't perform any conversion on their args.
The first patch is just minimal, just ensures that we reject the builtins
without library implementation there.  But, as the second testcase shows,
e.g. for __builtin_classify_type_p there is disagreement between C and C++,
where the former applies function-to-pointer and array-to-pointer
conversions even for those magic builtins, but C++ does not.
Unfortunately the second patch breaks some Cilk+ tests, so I'd probably need
to tweak it slightly, e.g. by magic_varargs_p returning 0 / 1 / 2 levels,
0 would mean no magic, 1 would mean do decay_conversion, 2 would mean do
just mark_type_use + reject_gcc_builtin, and return 2 just for Cilk+
reductions and 1 for all other magic varargs functions (for type generic
I believe function-to-pointer and array-to-pointer are desirable, aren't
they).

So, what approach do you prefer?  I've so far bootstrapped/regtested the
second patch, which showed those
+FAIL: g++.dg/cilk-plus/AN/builtin_fn_custom_tplt.cc
+UNRESOLVED: g++.dg/cilk-plus/AN/builtin_fn_custom_tplt.cc
+FAIL: g++.dg/cilk-plus/AN/builtin_fn_mutating_tplt.cc
+UNRESOLVED: g++.dg/cilk-plus/AN/builtin_fn_mutating_tplt.cc
(for all opt/-g levels) regressions.

Jakub
2016-03-09  Jakub Jelinek  

PR c++/70144
* call.c (build_over_call): For magic_varargs_p arguments,
call reject_gcc_builtin if the argument is a FUNCTION_DECL.

* c-c++-common/pr70144.c: New test.

--- gcc/cp/call.c.jj2016-03-04 08:23:29.0 +0100
+++ gcc/cp/call.c   2016-03-09 13:04:34.280011774 +0100
@@ -7516,8 +7516,12 @@ build_over_call (struct z_candidate *can
 {
   tree a = (*args)[arg_index];
   if (magic_varargs_p (fn))
-   /* Do no conversions for magic varargs.  */
-   a = mark_type_use (a);
+   {
+ /* Do no conversions for magic varargs.  */
+ a = mark_type_use (a);
+ if (TREE_CODE (a) == FUNCTION_DECL && reject_gcc_builtin (a))
+   return error_mark_node;
+   }
   else if (DECL_CONSTRUCTOR_P (fn)
   && same_type_ignoring_top_level_qualifiers_p (DECL_CONTEXT (fn),
 TREE_TYPE (a)))
--- gcc/testsuite/c-c++-common/pr70144.c.jj 2016-03-09 13:10:58.246778355 
+0100
+++ gcc/testsuite/c-c++-common/pr70144.c2016-03-09 13:10:04.0 
+0100
@@ -0,0 +1,9 @@
+/* PR c++/70144 */
+/* { dg-do compile } */
+
+void
+foo ()
+{
+  __builtin_constant_p (__builtin_constant_p) ?: ({ unsigned t = 0; t; }); 
/* { dg-error "must be directly called" } */
+  __builtin_classify_type (__builtin_expect);  /* { dg-error "must be directly 
called" } */
+}
2016-03-09  Jakub Jelinek  

PR c++/70144
* call.c (build_over_call): For magic_varargs_p, call decay_conversion
instead of mark_type_use.  Don't store error_mark_node arguments to
argarray, instead return error_mark_node.

* c-c++-common/pr70144-1.c: New test.
* c-c++-common/pr70144-2.c: New test.

--- gcc/cp/call.c.jj2016-03-04 08:23:29.0 +0100
+++ gcc/cp/call.c   2016-03-09 13:29:40.674522135 +0100
@@ -7516,8 +7516,8 @@ build_over_call (struct z_candidate *can
 {
   tree a = (*args)[arg_index];
   if (magic_varargs_p (fn))
-   /* Do no conversions for magic varargs.  */
-   a = mark_type_use (a);
+   /* For magic varargs only do decay_conversion.  */
+   a = decay_conversion (a, complain);
   else if (DECL_CONSTRUCTOR_P (fn)
   && same_type_ignoring_top_level_qualifiers_p (DECL_CONTEXT (fn),
 TREE_TYPE (a)))
@@ -7530,6 +7530,8 @@ build_over_call (struct z_candidate *can
}
   else
a = convert_arg_to_ellipsis (a, complain);
+  if (a == error_mark_node)
+   return error_mark_node;
   argarray[j++] = a;
 }
 
--- gcc/testsuite/c-c++-common/pr70144-1.c.jj   2016-03-09 13:10:58.246778355 
+0100
+++ gcc/testsuite/c-c++-common/pr70144-1.c  2016-03-09 13:10:04.0 
+0100
@@ -0,0 +1,9 @@
+/* PR c++/70144 */
+/* { dg-do compile } */
+
+void
+foo ()
+{
+  __builtin_constant_p (__builtin_constant_p) ?: ({ unsigned t = 0; t; }); 
/* { dg-error "must be directly called" } */
+  __builtin_classify_type (__builtin_expect);  /* { dg-error "must be directly 
called" } */
+}
--- gcc/testsuite/c-c++-common/pr70144-2.c.jj   2016-03-09 13:31:28.354062276 
+0100
+++ gcc/testsuite/c-c++-common/pr70144-2.c  2016-03-09 13:31:49.673773235 
+0100
@@ -0,0 +1,12 @@
+/* PR c++/70144 */
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+int
+main ()
+{
+  if (__builtin_constant_p (__builtin_memset) != 0
+  || __builtin_classify_type (__builtin_memset) != 5)
+__builtin_abort ();
+  

Re: [PATCH 2/2][GCC][ARM] Fix testcases after introduction of Cortex-R8

2016-03-09 Thread Mike Stump
On Mar 9, 2016, at 8:57 AM, Andre Vieira (lists) 
 wrote:
>> I'm seeing a DejaGNU error while testing
>> RUNTESTFLAGS="arm.exp=pr45701-*.c":
>> ERROR: (DejaGnu) proc "^-" does not exist.

> 2016-03-09 Andre Vieira 
> 
> * gcc.target/arm/pr45701-1.c: Escape brackets.

Be sure to run test cases with dejagnu before check in.

Re: [PATCH] Fix ICE with xmm{16-31} in *truncdfsf_fast_mixed with -mtune=barcelona (PR target/70086)

2016-03-09 Thread Jakub Jelinek
On Wed, Mar 09, 2016 at 03:51:04PM +0100, Jakub Jelinek wrote:
> Unfortunately, this really doesn't seem to work, I get ICEs on the
> testcases.  I've tried to allow EXT_REX_SSE_REG_P for -mavx512f -mno-avx512vl
> just for MEM_P (operands[1]), but even that ICEs.  Perhaps there are bugs
> in other splitters.
> 
> I'll bootstrap/regtest this then:
> 
> 2016-03-04  Jakub Jelinek  
> 
>   PR target/70086
>   * config/i386/i386.md (truncdfsf2 splitter): Use gen_vec_concatv2df
>   instead of gen_sse2_loadlpd.
>   * config/i386/sse.md (*vec_concatv2df): Rename to...
>   (vec_concatv2df): ... this.
> 
>   * gcc.target/i386/pr70086-1.c: New test.
>   * gcc.target/i386/pr70086-2.c: New test.
>   * gcc.target/i386/pr70086-3.c: New test.

Now successfully bootstrapped/regtested on x86_64-linux and i686-linux.
Ok for trunk?

> --- gcc/config/i386/i386.md.jj2016-03-08 09:01:50.871475493 +0100
> +++ gcc/config/i386/i386.md   2016-03-09 15:40:00.102942847 +0100
> @@ -4393,8 +4393,8 @@ (define_split
>emit_insn (gen_vec_dupv2df (operands[4], operands[1]));
>  }
>else
> -emit_insn (gen_sse2_loadlpd (operands[4],
> -  CONST0_RTX (V2DFmode), operands[1]));
> +emit_insn (gen_vec_concatv2df (operands[4], operands[1],
> +CONST0_RTX (DFmode)));
>  })
>  
>  ;; It's more profitable to split and then extend in the same register.
> --- gcc/config/i386/sse.md.jj 2016-03-09 15:08:17.0 +0100
> +++ gcc/config/i386/sse.md2016-03-09 15:15:10.346223894 +0100
> @@ -8951,7 +8951,7 @@ (define_insn "vec_dupv2df"
> (set_attr "prefix" "orig,maybe_vex,evex")
> (set_attr "mode" "V2DF,DF,DF")])
>  
> -(define_insn "*vec_concatv2df"
> +(define_insn "vec_concatv2df"
>[(set (match_operand:V2DF 0 "register_operand" "=x,x,v,x,v,x,x,v,x,x")
>   (vec_concat:V2DF
> (match_operand:DF 1 "nonimmediate_operand" " 0,x,v,m,m,0,x,m,0,0")
> --- gcc/testsuite/gcc.target/i386/pr70086-1.c.jj  2016-03-09 
> 15:12:55.177060382 +0100
> +++ gcc/testsuite/gcc.target/i386/pr70086-1.c 2016-03-09 15:12:55.177060382 
> +0100
> @@ -0,0 +1,11 @@
> +/* PR target/70086 */
> +/* { dg-do compile } */
> +/* { dg-options "-mtune=barcelona -mavx512vl -ffloat-store" } */
> +
> +float
> +foo (float a, float b, double c, float d, double e, float f)
> +{
> +  e -= d;
> +  d *= e;
> +  return e + d;
> +}
> --- gcc/testsuite/gcc.target/i386/pr70086-2.c.jj  2016-03-09 
> 15:12:55.177060382 +0100
> +++ gcc/testsuite/gcc.target/i386/pr70086-2.c 2016-03-09 15:35:52.0 
> +0100
> @@ -0,0 +1,21 @@
> +/* PR target/70086 */
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mtune=barcelona -mavx512vl" } */
> +
> +float
> +foo (double *p)
> +{
> +  register float xmm16 __asm ("xmm16");
> +  xmm16 = *p;
> +  asm volatile ("" : "+v" (xmm16));
> +  return xmm16;
> +}
> +
> +float
> +bar (double x)
> +{
> +  register float xmm16 __asm ("xmm16");
> +  xmm16 = x;
> +  asm volatile ("" : "+v" (xmm16));
> +  return xmm16;
> +}
> --- gcc/testsuite/gcc.target/i386/pr70086-3.c.jj  2016-03-09 
> 15:36:28.332831118 +0100
> +++ gcc/testsuite/gcc.target/i386/pr70086-3.c 2016-03-09 15:35:33.0 
> +0100
> @@ -0,0 +1,21 @@
> +/* PR target/70086 */
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mtune=barcelona -mavx512f -mno-avx512vl" } */
> +
> +float
> +foo (double *p)
> +{
> +  register float xmm16 __asm ("xmm16");
> +  xmm16 = *p;
> +  asm volatile ("" : "+v" (xmm16));
> +  return xmm16;
> +}
> +
> +float
> +bar (double x)
> +{
> +  register float xmm16 __asm ("xmm16");
> +  xmm16 = x;
> +  asm volatile ("" : "+v" (xmm16));
> +  return xmm16;
> +}

Jakub


[PATCH] Fix ab SSA_NAME handling in eipa_sra replace_removed_params_ssa_names (PR tree-optimization/70152)

2016-03-09 Thread Jakub Jelinek
Hi!

If a parameter is unused, eipa_sra replaces the SSA_NAMEs with that
PARM_DECL SSA_NAME_VAR with SSA_NAMEs with a VAR_DECL instead.
IMHO it is fine to do that even when all or some of its SSA_NAMEs
are (ab), but we need to copy that flag over.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2016-03-09  Jakub Jelinek  

PR tree-optimization/70152
* tree-sra.c (replace_removed_params_ssa_names): Copy over
SSA_NAME_OCCURS_IN_ABNORMAL_PHI from old_name to new_name.

* gcc.dg/pr70152.c: New test.

--- gcc/tree-sra.c.jj   2016-02-26 20:30:21.0 +0100
+++ gcc/tree-sra.c  2016-03-09 10:55:23.628924709 +0100
@@ -4758,6 +4758,8 @@ replace_removed_params_ssa_names (tree o
 
   repl = get_replaced_param_substitute (adj);
   new_name = make_ssa_name (repl, stmt);
+  SSA_NAME_OCCURS_IN_ABNORMAL_PHI (new_name)
+= SSA_NAME_OCCURS_IN_ABNORMAL_PHI (old_name);
 
   if (dump_file)
 {
--- gcc/testsuite/gcc.dg/pr70152.c.jj   2016-03-09 11:04:33.704398525 +0100
+++ gcc/testsuite/gcc.dg/pr70152.c  2016-03-09 11:04:51.815150732 +0100
@@ -0,0 +1,27 @@
+/* PR tree-optimization/70152 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int a;
+int foo (void);
+int setjmp (char *);
+char buf[64];
+
+static int
+bar (int x)
+{
+  x = 0;
+  setjmp (buf);
+  for (;;)
+{
+switch (x)
+  case 5:
+   x = foo ();
+}
+}
+
+void
+baz (void)
+{
+  bar (a);
+}

Jakub


Re: [PATCH 2/2][GCC][ARM] Fix testcases after introduction of Cortex-R8

2016-03-09 Thread Andre Vieira (lists)
On 08/03/16 14:56, Kyrill Tkachov wrote:
> Hi Andre,
> 
> On 08/03/16 11:05, Andre Vieira (lists) wrote:
>> On 03/03/16 11:28, Kyrill Tkachov wrote:
>>> Hi Andre,
>>>
>>> On 02/03/16 12:21, Andre Vieira (lists) wrote:
 Hi,

 Tests used to check for "r8" which will not work because cortex-r8
 string is now included in the assembly. Fixed by checking for
 "[^\-]r8".

 Is this Ok?

 Cheers,
 Andre

 gcc/testsuite/ChangeLog:

 2016-03-02  Andre Vieira  

* gcc.target/arm/pr45701-1.c: Change assembler scan to not
trigger for cortex-r8, when scanning for register r8.
* gcc.target/arm/pr45701-2.c: Likewise.
>>> Ok.
>>> Thanks,
>>> Kyrill
>>>
>> Thomas commited on my behalf at revision r234040.
>>
>> Had to rebase arm-tune.md and invoke.texi, these were all obvious
>> changes.
> 
> I'm seeing a DejaGNU error while testing
> RUNTESTFLAGS="arm.exp=pr45701-*.c":
> ERROR: (DejaGnu) proc "^-" does not exist.
> The error code is NONE
> The info on the error is:
> invalid command name "^-"
> while executing
> "::tcl_unknown ^-"
> ("uplevel" body line 1)
> invoked from within
> "uplevel 1 ::tcl_unknown $args"
> 
> That's due to the scan-assembler-not test:
> /* { dg-final { scan-assembler-not "[^\-]r8" } } */
> 
> The '[' and ']' need to be escaped by a backslash.
> Can you please post a patch to add the escapes.
> Sorry for missing this in the original review...
> 
> Kyrill
> 
>> Cheers,
>> Andre
>>
> 
Hi there,

Sorry for missing those too.

2016-03-09 Andre Vieira 

* gcc.target/arm/pr45701-1.c: Escape brackets.
* gcc.target/arm/pr45701-2.c: Likewise.
diff --git a/gcc/testsuite/gcc.target/arm/pr45701-1.c 
b/gcc/testsuite/gcc.target/arm/pr45701-1.c
index 
a5db56fc6f2f3cb334b514a72ff500308c361832..01db15abfd03eb916676e39b5db14a39596cbad6
 100644
--- a/gcc/testsuite/gcc.target/arm/pr45701-1.c
+++ b/gcc/testsuite/gcc.target/arm/pr45701-1.c
@@ -2,7 +2,7 @@
 /* { dg-skip-if "" { ! { arm_thumb1_ok || arm_thumb2_ok } } } */
 /* { dg-options "-mthumb -Os" }  */
 /* { dg-final { scan-assembler "push\t\{r3" } } */
-/* { dg-final { scan-assembler-not "[^\-]r8" } } */
+/* { dg-final { scan-assembler-not "\[^\-\]r8" } } */
 
 extern int hist_verify;
 extern int a1;
diff --git a/gcc/testsuite/gcc.target/arm/pr45701-2.c 
b/gcc/testsuite/gcc.target/arm/pr45701-2.c
index 
765981b90db38f534e13e9e8a8f538c8408f798a..ce66d7509d1769fb96bb05e0d274be27e28a7188
 100644
--- a/gcc/testsuite/gcc.target/arm/pr45701-2.c
+++ b/gcc/testsuite/gcc.target/arm/pr45701-2.c
@@ -2,7 +2,7 @@
 /* { dg-skip-if "" { ! { arm_thumb1_ok || arm_thumb2_ok } } } */
 /* { dg-options "-mthumb -Os" }  */
 /* { dg-final { scan-assembler "push\t\{r3" } } */
-/* { dg-final { scan-assembler-not "[^\-]r8" } } */
+/* { dg-final { scan-assembler-not "\[^\-\]r8" } } */
 
 extern int hist_verify;
 extern int a1;


[PATCH][ARM] Make Cortex-R8 use ARMv7 multilib

2016-03-09 Thread Andre Vieira (lists)
Hi there,

This patch adds cortex-r8 to the list of cores using the armv7 multilib.

This patch is based on Thomas' multilib patch series:
https://gcc.gnu.org/ml/gcc-patches/2015-12/msg01584.html
https://gcc.gnu.org/ml/gcc-patches/2015-12/msg01585.html
https://gcc.gnu.org/ml/gcc-patches/2015-12/msg01586.html

Is this OK?

2016-03-09  Andre Vieira  

  * gcc/config/arm/t-baremetal: Add cortex-r8.
diff --git a/gcc/config/arm/t-baremetal b/gcc/config/arm/t-baremetal
index 
ffd29815e6ec22c747e77747ed9b69e0ae21b63a..6794b1cc02e73ca5b53a6350f09f9ae3afd171dc
 100644
--- a/gcc/config/arm/t-baremetal
+++ b/gcc/config/arm/t-baremetal
@@ -33,6 +33,7 @@ MULTILIB_MATCHES  += march?armv7=mcpu?cortex-r4
 MULTILIB_MATCHES  += march?armv7=mcpu?cortex-r4f
 MULTILIB_MATCHES  += march?armv7=mcpu?cortex-r5
 MULTILIB_MATCHES  += march?armv7=mcpu?cortex-r7
+MULTILIB_MATCHES  += march?armv7=mcpu?cortex-r8
 MULTILIB_MATCHES  += march?armv7=mcpu?generic-armv7-a
 MULTILIB_MATCHES  += march?armv7=mcpu?cortex-a5
 MULTILIB_MATCHES  += march?armv7=mcpu?cortex-a7


Re: [ptx] debug info

2016-03-09 Thread Alexander Monakov
On Wed, 9 Mar 2016, Nathan Sidwell wrote:
> On 03/09/16 09:55, Alexander Monakov wrote:
> > The preceding code special-casing response to -gstabs can also be removed
> > after this patch.  Should I submit the (trivial) removal patch?
> 
> No.   I found that necessary to stop the testsuite testing stabs -- it expects
> an error, but we don't get one without that check.

Since you removed the unnecessary override, the specific check is no longer
necessary: toplevel code is capable of issuing the error for unsupported debug
info format like this:

:|x86_64-pc-linux-gnu-accel-nvptx-none-gcc -xc - -S -o- -gstabs
:1:0: sorry, unimplemented: stabs debug format not supported

cc1: error: target system does not support the ‘stabs’ debug format

:|x86_64-pc-linux-gnu-accel-nvptx-none-gcc -xc - -S -o- -gcoff
cc1: error: target system does not support the ‘coff’ debug format

So nvptx can the generic mechanism that produces such errors for all targets
and all debug formats, rather than ad-hoc target-specific handling.

Alexander

[PATCH] Fix PR70138

2016-03-09 Thread Richard Biener

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.

Richard.

2016-03-09  Richard Biener  
Jakub Jelinek  

PR tree-optimization/70138
* tree-vect-loop-manip.c (vect_update_ivs_after_vectorizer):
Also skip vect_double_reduction_def.

* gcc.dg/vect/pr70138-1.c: New testcase.
* gcc.dg/vect/pr70138-2.c: Likewise.

Index: gcc/tree-vect-loop-manip.c
===
*** gcc/tree-vect-loop-manip.c  (revision 234085)
--- gcc/tree-vect-loop-manip.c  (working copy)
*** vect_update_ivs_after_vectorizer (loop_v
*** 1692,1698 
  
/* Skip reduction phis.  */
stmt_info = vinfo_for_stmt (phi);
!   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
  {
  if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
--- 1687,1694 
  
/* Skip reduction phis.  */
stmt_info = vinfo_for_stmt (phi);
!   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
! || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
  {
  if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
Index: gcc/testsuite/gcc.dg/vect/pr70138-1.c
===
*** gcc/testsuite/gcc.dg/vect/pr70138-1.c   (revision 0)
--- gcc/testsuite/gcc.dg/vect/pr70138-1.c   (working copy)
***
*** 0 
--- 1,40 
+ /* { dg-do run } */
+ 
+ #include "tree-vect.h"
+ 
+ double u[33 * 33];
+ 
+ __attribute__((noinline, noclone)) static void
+ foo (int *x)
+ {
+   double c = 0.0;
+   int a, b;
+   for (a = 0; a < 33; a++)
+ {
+   for (b = 0; b < 33; b++)
+   c = c + u[34 * a];
+   u[34 * a] *= 2.0;
+ }
+   *x = c;
+ }
+ 
+ int
+ main ()
+ {
+   int d, e;
+   check_vect ();
+   for (d = 0; d < 33 * 33; d++)
+ {
+   u[d] = 499.0;
+   __asm__ volatile ("" : : : "memory");
+ }
+   for (d = 0; d < 33; d++)
+ {
+   u[d * 34] = (d + 2);
+   __asm__ volatile ("" : : : "memory");
+ }
+   foo ();
+   if (e != 33 * (2 + 34) / 2 * 33)
+ __builtin_abort ();
+   return 0;
+ }
Index: gcc/testsuite/gcc.dg/vect/pr70138-2.c
===
*** gcc/testsuite/gcc.dg/vect/pr70138-2.c   (revision 0)
--- gcc/testsuite/gcc.dg/vect/pr70138-2.c   (working copy)
***
*** 0 
--- 1,35 
+ /* { dg-do run } */
+ 
+ #include "tree-vect.h"
+ 
+ double u[33];
+ 
+ __attribute__((noinline, noclone)) static void
+ foo (int *x)
+ {
+   double c = 0.0;
+   int a, b;
+   for (a = 0; a < 33; a++)
+ {
+   for (b = 0; b < 33; b++)
+   c = c + u[a];
+   u[a] *= 2.0;
+ }
+   *x = c;
+ }
+ 
+ int
+ main ()
+ {
+   int d, e;
+   check_vect ();
+   for (d = 0; d < 33; d++)
+ {
+   u[d] = (d + 2);
+   __asm__ volatile ("" : : : "memory");
+ }
+   foo ();
+   if (e != 33 * (2 + 34) / 2 * 33)
+ __builtin_abort ();
+   return 0;
+ }


Re: [ptx] debug info

2016-03-09 Thread Nathan Sidwell

On 03/09/16 09:55, Alexander Monakov wrote:

Hello Nathan,

On Wed, 9 Mar 2016, Nathan Sidwell wrote:

I've committed this to trunk, to remove the squashing of debug information.
It appears to function correctly.

I'd had this patch for a while, but forgot to commit it.


The preceding code special-casing response to -gstabs can also be removed
after this patch.  Should I submit the (trivial) removal patch?


No.   I found that necessary to stop the testsuite testing stabs -- it expects 
an error, but we don't get one without that check.



Furthermore, this is not useful without support in libgomp/plugin-nvptx.c
and nvptx-none-run.c (PTX JIT does not propagate lineinfo by default).  Would
you like me to submit patches for those?


please.

nathan



Re: C PATCH for c/70093 (ICE with nested-function returning VM type)

2016-03-09 Thread Marek Polacek
On Wed, Mar 09, 2016 at 04:31:37PM +0100, Jakub Jelinek wrote:
> No, I meant:
>   switch (n)
> {
>   struct S x;
> case 1:
>   fn ();
>   break;
> case 2:
>   fn2 ();
>   break;
> case 3:
>   x = fn ();
>   if (x.a[0] != 42)
>   __builtin_abort ();
>   break;
> case 4:
>   if (fn ().a[0] != 42)
>   __builtin_abort ();
>   break;
> ...
> 
> The reason is that anything after a noreturn call can be optimized away
> shortly afterwards.  Perhaps you want __attribute__((noinline, noclone)) on
> the function too just in case (I know you haven't included -O*).
 
Aha.  I couldn't do exactly this because of 
error: switch jumps into scope of identifier with variably modified type
so I moved the decl out of the switch.

> Otherwise LGTM.

Thanks.

Bootstrapped/regtested on x86_64-linux.

2016-03-09  Marek Polacek  

PR c/70093
* c-typeck.c (build_function_call_vec): Create a TARGET_EXPR for
nested functions returning VM types.

* cgraphunit.c (cgraph_node::expand_thunk): Also build call to the
function being thunked if the result type doesn't have fixed size.
* gimplify.c (gimplify_modify_expr): Also set LHS if the result type
doesn't have fixed size.

* gcc.dg/nested-func-10.c: New test.
* gcc.dg/nested-func-9.c: New test.

diff --git gcc/c/c-typeck.c gcc/c/c-typeck.c
index 6aa0f03..de9d465 100644
--- gcc/c/c-typeck.c
+++ gcc/c/c-typeck.c
@@ -3068,6 +3068,16 @@ build_function_call_vec (location_t loc, vec 
arg_loc,
 result = build_call_array_loc (loc, TREE_TYPE (fntype),
   function, nargs, argarray);
 
+  /* In this improbable scenario, a nested function returns a VM type.
+ Create a TARGET_EXPR so that the call always has a LHS, much as
+ what the C++ FE does for functions returning non-PODs.  */
+  if (variably_modified_type_p (TREE_TYPE (fntype), NULL_TREE))
+{
+  tree tmp = create_tmp_var_raw (TREE_TYPE (fntype));
+  result = build4 (TARGET_EXPR, TREE_TYPE (fntype), tmp, result,
+  NULL_TREE, NULL_TREE);
+}
+
   if (VOID_TYPE_P (TREE_TYPE (result)))
 {
   if (TYPE_QUALS (TREE_TYPE (result)) != TYPE_UNQUALIFIED)
diff --git gcc/cgraphunit.c gcc/cgraphunit.c
index 8b3fddc..4351ae4 100644
--- gcc/cgraphunit.c
+++ gcc/cgraphunit.c
@@ -1708,7 +1708,9 @@ cgraph_node::expand_thunk (bool output_asm_thunks, bool 
force_gimple_thunk)
 
   /* Build call to the function being thunked.  */
   if (!VOID_TYPE_P (restype)
- && (!alias_is_noreturn || TREE_ADDRESSABLE (restype)))
+ && (!alias_is_noreturn
+ || TREE_ADDRESSABLE (restype)
+ || TREE_CODE (TYPE_SIZE_UNIT (restype)) != INTEGER_CST))
{
  if (DECL_BY_REFERENCE (resdecl))
{
diff --git gcc/gimplify.c gcc/gimplify.c
index b331e41..692d168 100644
--- gcc/gimplify.c
+++ gcc/gimplify.c
@@ -4838,7 +4838,8 @@ gimplify_modify_expr (tree *expr_p, gimple_seq *pre_p, 
gimple_seq *post_p,
}
   notice_special_calls (call_stmt);
   if (!gimple_call_noreturn_p (call_stmt)
- || TREE_ADDRESSABLE (TREE_TYPE (*to_p)))
+ || TREE_ADDRESSABLE (TREE_TYPE (*to_p))
+ || TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (*to_p))) != INTEGER_CST)
gimple_call_set_lhs (call_stmt, *to_p);
   assign = call_stmt;
 }
diff --git gcc/testsuite/gcc.dg/nested-func-10.c 
gcc/testsuite/gcc.dg/nested-func-10.c
index e69de29..ac6f76f 100644
--- gcc/testsuite/gcc.dg/nested-func-10.c
+++ gcc/testsuite/gcc.dg/nested-func-10.c
@@ -0,0 +1,56 @@
+/* PR c/70093 */
+/* { dg-do compile } */
+/* { dg-options "" } */
+
+void __attribute__((noinline, noclone))
+foo (int n)
+{
+  struct S { int a[n]; };
+
+  struct S __attribute__((noreturn))
+  fn (void)
+  {
+__builtin_abort ();
+  }
+
+  auto struct S __attribute__((noreturn))
+  fn2 (void)
+  {
+__builtin_abort ();
+  }
+
+  struct S x;
+  __typeof__ (fn ()) *p = 
+  switch (n)
+{
+case 1:
+  fn ();
+  break;
+case 2:
+  fn2 ();
+  break;
+case 3:
+  x = fn ();
+  if (x.a[0] != 42)
+   __builtin_abort ();
+  break;
+case 4:
+  if (fn ().a[0] != 42)
+   __builtin_abort ();
+  break;
+case 5:
+  if (p->a[0] != 42)
+   __builtin_abort ();
+  break;
+case 6:
+  if (fn2 ().a[0] != 42)
+   __builtin_abort ();
+  break;
+}
+}
+
+int
+main (void)
+{
+  foo (1);
+}
diff --git gcc/testsuite/gcc.dg/nested-func-9.c 
gcc/testsuite/gcc.dg/nested-func-9.c
index e69de29..902c258 100644
--- gcc/testsuite/gcc.dg/nested-func-9.c
+++ gcc/testsuite/gcc.dg/nested-func-9.c
@@ -0,0 +1,47 @@
+/* PR c/70093 */
+/* { dg-do run } */
+/* { dg-options "" } */
+
+void
+foo (int n)
+{
+  struct S { int a[n]; };
+
+  struct S
+  fn (void)
+  {
+struct S s;
+s.a[0] = 42;
+return s;
+  }
+
+  auto struct S
+  fn2 (void)
+  {
+ 

Re: C PATCH for c/70093 (ICE with nested-function returning VM type)

2016-03-09 Thread Jakub Jelinek
On Wed, Mar 09, 2016 at 04:20:24PM +0100, Marek Polacek wrote:
> --- gcc/testsuite/gcc.dg/nested-func-10.c
> +++ gcc/testsuite/gcc.dg/nested-func-10.c
> @@ -0,0 +1,49 @@
> +/* PR c/70093 */
> +/* { dg-do compile } */
> +/* { dg-options "" } */
> +
> +void
> +foo (int n)
> +{
> +  struct S { int a[n]; };
> +
> +  struct S __attribute__((noreturn))
> +  fn (void)
> +  {
> +__builtin_abort ();
> +  }
> +
> +  auto struct S __attribute__((noreturn))
> +  fn2 (void)
> +  {
> +__builtin_abort ();
> +  }
> +
> +  switch (n)
> +{
> +case 42:;
> +  struct S x;
> +  fn ();
> +  fn2 ();
> +  x = fn ();
> +
> +  if (x.a[0] != 42)
> + __builtin_abort ();
> +
> +  if (fn ().a[0] != 42)
> + __builtin_abort ();
> +
> +  __typeof__ (fn ()) *p = 
> +  if (p->a[0] != 42)
> + __builtin_abort ();
> +
> +  if (fn2 ().a[0] != 42)
> + __builtin_abort ();

No, I meant:
  switch (n)
{
  struct S x;
case 1:
  fn ();
  break;
case 2:
  fn2 ();
  break;
case 3:
  x = fn ();
  if (x.a[0] != 42)
__builtin_abort ();
  break;
case 4:
  if (fn ().a[0] != 42)
__builtin_abort ();
  break;
...

The reason is that anything after a noreturn call can be optimized away
shortly afterwards.  Perhaps you want __attribute__((noinline, noclone)) on
the function too just in case (I know you haven't included -O*).

Otherwise LGTM.

Jakub


Re: C PATCH for c/70093 (ICE with nested-function returning VM type)

2016-03-09 Thread Marek Polacek
On Wed, Mar 09, 2016 at 03:45:45PM +0100, Jakub Jelinek wrote:
> Instead of the expecting warnings, wouldn't it be better to simply call
> __builtin_abort () in fn ()?
 
Maybe.  Done.

> > +  struct S x;
> > +  x = fn ();
> > +
> > +  if (x.a[0] != 42)
> > +__builtin_abort ();
> > +
> > +  if (fn ().a[0] != 42)
> > +__builtin_abort ();
> > +
> > +  __typeof__ (fn ()) *p = 
> > +  if (p->a[0] != 42)
> > +__builtin_abort ();
> > +
> > +  if (fn2 ().a[0] != 42)
> > +__builtin_abort ();
> 
> And do these all just conditionally, say in a big switch on foo's parameter?
 
Like in the following?

> And, I'm really surprised that you haven't included the case of a call
> without lhs at the source level, so just
>   fn ();
> and
>   fn2 ();
> somewhere.
 
Uhm, yes.  Dunno why they're gone.  So I've added them:

Bootstrapped/regtested on x86_64-linux, ok for trunk?

2016-03-09  Marek Polacek  

PR c/70093
* c-typeck.c (build_function_call_vec): Create a TARGET_EXPR for
nested functions returning VM types.

* cgraphunit.c (cgraph_node::expand_thunk): Also build call to the
function being thunked if the result type doesn't have fixed size.
* gimplify.c (gimplify_modify_expr): Also set LHS if the result type
doesn't have fixed size.

* gcc.dg/nested-func-10.c: New test.
* gcc.dg/nested-func-9.c: New test.

diff --git gcc/c/c-typeck.c gcc/c/c-typeck.c
index 6aa0f03..de9d465 100644
--- gcc/c/c-typeck.c
+++ gcc/c/c-typeck.c
@@ -3068,6 +3068,16 @@ build_function_call_vec (location_t loc, vec 
arg_loc,
 result = build_call_array_loc (loc, TREE_TYPE (fntype),
   function, nargs, argarray);
 
+  /* In this improbable scenario, a nested function returns a VM type.
+ Create a TARGET_EXPR so that the call always has a LHS, much as
+ what the C++ FE does for functions returning non-PODs.  */
+  if (variably_modified_type_p (TREE_TYPE (fntype), NULL_TREE))
+{
+  tree tmp = create_tmp_var_raw (TREE_TYPE (fntype));
+  result = build4 (TARGET_EXPR, TREE_TYPE (fntype), tmp, result,
+  NULL_TREE, NULL_TREE);
+}
+
   if (VOID_TYPE_P (TREE_TYPE (result)))
 {
   if (TYPE_QUALS (TREE_TYPE (result)) != TYPE_UNQUALIFIED)
diff --git gcc/cgraphunit.c gcc/cgraphunit.c
index 8b3fddc..4351ae4 100644
--- gcc/cgraphunit.c
+++ gcc/cgraphunit.c
@@ -1708,7 +1708,9 @@ cgraph_node::expand_thunk (bool output_asm_thunks, bool 
force_gimple_thunk)
 
   /* Build call to the function being thunked.  */
   if (!VOID_TYPE_P (restype)
- && (!alias_is_noreturn || TREE_ADDRESSABLE (restype)))
+ && (!alias_is_noreturn
+ || TREE_ADDRESSABLE (restype)
+ || TREE_CODE (TYPE_SIZE_UNIT (restype)) != INTEGER_CST))
{
  if (DECL_BY_REFERENCE (resdecl))
{
diff --git gcc/gimplify.c gcc/gimplify.c
index b331e41..692d168 100644
--- gcc/gimplify.c
+++ gcc/gimplify.c
@@ -4838,7 +4838,8 @@ gimplify_modify_expr (tree *expr_p, gimple_seq *pre_p, 
gimple_seq *post_p,
}
   notice_special_calls (call_stmt);
   if (!gimple_call_noreturn_p (call_stmt)
- || TREE_ADDRESSABLE (TREE_TYPE (*to_p)))
+ || TREE_ADDRESSABLE (TREE_TYPE (*to_p))
+ || TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (*to_p))) != INTEGER_CST)
gimple_call_set_lhs (call_stmt, *to_p);
   assign = call_stmt;
 }
diff --git gcc/testsuite/gcc.dg/nested-func-10.c 
gcc/testsuite/gcc.dg/nested-func-10.c
index e69de29..c12ff3f 100644
--- gcc/testsuite/gcc.dg/nested-func-10.c
+++ gcc/testsuite/gcc.dg/nested-func-10.c
@@ -0,0 +1,49 @@
+/* PR c/70093 */
+/* { dg-do compile } */
+/* { dg-options "" } */
+
+void
+foo (int n)
+{
+  struct S { int a[n]; };
+
+  struct S __attribute__((noreturn))
+  fn (void)
+  {
+__builtin_abort ();
+  }
+
+  auto struct S __attribute__((noreturn))
+  fn2 (void)
+  {
+__builtin_abort ();
+  }
+
+  switch (n)
+{
+case 42:;
+  struct S x;
+  fn ();
+  fn2 ();
+  x = fn ();
+
+  if (x.a[0] != 42)
+   __builtin_abort ();
+
+  if (fn ().a[0] != 42)
+   __builtin_abort ();
+
+  __typeof__ (fn ()) *p = 
+  if (p->a[0] != 42)
+   __builtin_abort ();
+
+  if (fn2 ().a[0] != 42)
+   __builtin_abort ();
+}
+}
+
+int
+main (void)
+{
+  foo (1);
+}
diff --git gcc/testsuite/gcc.dg/nested-func-9.c 
gcc/testsuite/gcc.dg/nested-func-9.c
index e69de29..902c258 100644
--- gcc/testsuite/gcc.dg/nested-func-9.c
+++ gcc/testsuite/gcc.dg/nested-func-9.c
@@ -0,0 +1,47 @@
+/* PR c/70093 */
+/* { dg-do run } */
+/* { dg-options "" } */
+
+void
+foo (int n)
+{
+  struct S { int a[n]; };
+
+  struct S
+  fn (void)
+  {
+struct S s;
+s.a[0] = 42;
+return s;
+  }
+
+  auto struct S
+  fn2 (void)
+  {
+return fn ();
+  }
+
+  struct S x;
+  fn ();
+  fn2 ();
+  x = fn ();
+
+  if (x.a[0] != 42)
+__builtin_abort ();
+
+  if (fn 

Re: [ptx] debug info

2016-03-09 Thread Alexander Monakov
Hello Nathan,

On Wed, 9 Mar 2016, Nathan Sidwell wrote:
> I've committed this to trunk, to remove the squashing of debug information.
> It appears to function correctly.
> 
> I'd had this patch for a while, but forgot to commit it.

The preceding code special-casing response to -gstabs can also be removed
after this patch.  Should I submit the (trivial) removal patch?

Furthermore, this is not useful without support in libgomp/plugin-nvptx.c
and nvptx-none-run.c (PTX JIT does not propagate lineinfo by default).  Would
you like me to submit patches for those?

Thanks.
Alexander


Re: [PATCH] Fix ICE with xmm{16-31} in *truncdfsf_fast_mixed with -mtune=barcelona (PR target/70086)

2016-03-09 Thread Jakub Jelinek
On Wed, Mar 09, 2016 at 02:06:03PM +0100, Uros Bizjak wrote:
> Let's go with the option 2) and always generate vec_concatv2df, as we
> only need it for [v,m,C] alternative. In the long term, we should
> enhance all patterns with new alternatives, but not in stage-4.

Ok, see patch below.

> Attached (lightly tested) patch that implements option 2) also allows
> us to simplify splitter enable condition a bit.

> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index cb8bcec..ef80d6a 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -4362,9 +4362,8 @@
> (match_operand:DF 1 "nonimmediate_operand")))]
>"TARGET_USE_VECTOR_FP_CONVERTS
> && optimize_insn_for_speed_p ()
> -   && reload_completed && SSE_REG_P (operands[0])
> -   && (!EXT_REX_SSE_REG_P (operands[0])
> -   || TARGET_AVX512VL)"
> +   && reload_completed
> +   && SSE_REG_P (operands[0])"
> [(set (match_dup 2)
>(vec_concat:V4SF
>  (float_truncate:V2SF

Unfortunately, this really doesn't seem to work, I get ICEs on the
testcases.  I've tried to allow EXT_REX_SSE_REG_P for -mavx512f -mno-avx512vl
just for MEM_P (operands[1]), but even that ICEs.  Perhaps there are bugs
in other splitters.

I'll bootstrap/regtest this then:

2016-03-04  Jakub Jelinek  

PR target/70086
* config/i386/i386.md (truncdfsf2 splitter): Use gen_vec_concatv2df
instead of gen_sse2_loadlpd.
* config/i386/sse.md (*vec_concatv2df): Rename to...
(vec_concatv2df): ... this.

* gcc.target/i386/pr70086-1.c: New test.
* gcc.target/i386/pr70086-2.c: New test.
* gcc.target/i386/pr70086-3.c: New test.

--- gcc/config/i386/i386.md.jj  2016-03-08 09:01:50.871475493 +0100
+++ gcc/config/i386/i386.md 2016-03-09 15:40:00.102942847 +0100
@@ -4393,8 +4393,8 @@ (define_split
   emit_insn (gen_vec_dupv2df (operands[4], operands[1]));
 }
   else
-emit_insn (gen_sse2_loadlpd (operands[4],
-CONST0_RTX (V2DFmode), operands[1]));
+emit_insn (gen_vec_concatv2df (operands[4], operands[1],
+  CONST0_RTX (DFmode)));
 })
 
 ;; It's more profitable to split and then extend in the same register.
--- gcc/config/i386/sse.md.jj   2016-03-09 15:08:17.0 +0100
+++ gcc/config/i386/sse.md  2016-03-09 15:15:10.346223894 +0100
@@ -8951,7 +8951,7 @@ (define_insn "vec_dupv2df"
(set_attr "prefix" "orig,maybe_vex,evex")
(set_attr "mode" "V2DF,DF,DF")])
 
-(define_insn "*vec_concatv2df"
+(define_insn "vec_concatv2df"
   [(set (match_operand:V2DF 0 "register_operand" "=x,x,v,x,v,x,x,v,x,x")
(vec_concat:V2DF
  (match_operand:DF 1 "nonimmediate_operand" " 0,x,v,m,m,0,x,m,0,0")
--- gcc/testsuite/gcc.target/i386/pr70086-1.c.jj2016-03-09 
15:12:55.177060382 +0100
+++ gcc/testsuite/gcc.target/i386/pr70086-1.c   2016-03-09 15:12:55.177060382 
+0100
@@ -0,0 +1,11 @@
+/* PR target/70086 */
+/* { dg-do compile } */
+/* { dg-options "-mtune=barcelona -mavx512vl -ffloat-store" } */
+
+float
+foo (float a, float b, double c, float d, double e, float f)
+{
+  e -= d;
+  d *= e;
+  return e + d;
+}
--- gcc/testsuite/gcc.target/i386/pr70086-2.c.jj2016-03-09 
15:12:55.177060382 +0100
+++ gcc/testsuite/gcc.target/i386/pr70086-2.c   2016-03-09 15:35:52.0 
+0100
@@ -0,0 +1,21 @@
+/* PR target/70086 */
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mtune=barcelona -mavx512vl" } */
+
+float
+foo (double *p)
+{
+  register float xmm16 __asm ("xmm16");
+  xmm16 = *p;
+  asm volatile ("" : "+v" (xmm16));
+  return xmm16;
+}
+
+float
+bar (double x)
+{
+  register float xmm16 __asm ("xmm16");
+  xmm16 = x;
+  asm volatile ("" : "+v" (xmm16));
+  return xmm16;
+}
--- gcc/testsuite/gcc.target/i386/pr70086-3.c.jj2016-03-09 
15:36:28.332831118 +0100
+++ gcc/testsuite/gcc.target/i386/pr70086-3.c   2016-03-09 15:35:33.0 
+0100
@@ -0,0 +1,21 @@
+/* PR target/70086 */
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mtune=barcelona -mavx512f -mno-avx512vl" } */
+
+float
+foo (double *p)
+{
+  register float xmm16 __asm ("xmm16");
+  xmm16 = *p;
+  asm volatile ("" : "+v" (xmm16));
+  return xmm16;
+}
+
+float
+bar (double x)
+{
+  register float xmm16 __asm ("xmm16");
+  xmm16 = x;
+  asm volatile ("" : "+v" (xmm16));
+  return xmm16;
+}

Jakub


Re: C PATCH for c/70093 (ICE with nested-function returning VM type)

2016-03-09 Thread Jakub Jelinek
On Wed, Mar 09, 2016 at 03:34:40PM +0100, Marek Polacek wrote:
> --- gcc/testsuite/gcc.dg/nested-func-10.c
> +++ gcc/testsuite/gcc.dg/nested-func-10.c
> @@ -0,0 +1,45 @@
> +/* PR c/70093 */
> +/* { dg-do compile } */
> +/* { dg-options "" } */
> +
> +void
> +foo (int n)
> +{
> +  struct S { int a[n]; };
> +
> +  struct S __attribute__((noreturn))
> +  fn (void)
> +  {
> +struct S s;
> +s.a[0] = 42;
> +return s; /* { dg-warning "function declared .noreturn.|.noreturn. 
> function does return" } */
> +  }
> +
> +  auto struct S __attribute__((noreturn))
> +  fn2 (void)
> +  {
> +return fn (); /* { dg-warning "function declared .noreturn." } */
> +  }

Instead of the expecting warnings, wouldn't it be better to simply call
__builtin_abort () in fn ()?

> +  struct S x;
> +  x = fn ();
> +
> +  if (x.a[0] != 42)
> +__builtin_abort ();
> +
> +  if (fn ().a[0] != 42)
> +__builtin_abort ();
> +
> +  __typeof__ (fn ()) *p = 
> +  if (p->a[0] != 42)
> +__builtin_abort ();
> +
> +  if (fn2 ().a[0] != 42)
> +__builtin_abort ();

And do these all just conditionally, say in a big switch on foo's parameter?

And, I'm really surprised that you haven't included the case of a call
without lhs at the source level, so just
  fn ();
and
  fn2 ();
somewhere.

> --- gcc/testsuite/gcc.dg/nested-func-9.c
> +++ gcc/testsuite/gcc.dg/nested-func-9.c
> @@ -0,0 +1,45 @@
> +/* PR c/70093 */
> +/* { dg-do run } */
> +/* { dg-options "" } */
> +
> +void
> +foo (int n)
> +{
> +  struct S { int a[n]; };
> +
> +  struct S
> +  fn (void)
> +  {
> +struct S s;
> +s.a[0] = 42;
> +return s;
> +  }
> +
> +  auto struct S
> +  fn2 (void)
> +  {
> +return fn ();
> +  }
> +
> +  struct S x;
> +  x = fn ();
> +
> +  if (x.a[0] != 42)
> +__builtin_abort ();
> +
> +  if (fn ().a[0] != 42)
> +__builtin_abort ();
> +
> +  __typeof__ (fn ()) *p = 
> +  if (p->a[0] != 42)
> +__builtin_abort ();
> +
> +  if (fn2 ().a[0] != 42)
> +__builtin_abort ();

Similarly here, I miss calls that don't use the return value.

Jakub


Re: C PATCH for c/70093 (ICE with nested-function returning VM type)

2016-03-09 Thread Marek Polacek
On Wed, Mar 09, 2016 at 12:24:42PM +0100, Jakub Jelinek wrote:
> On Wed, Mar 09, 2016 at 12:05:51PM +0100, Marek Polacek wrote:
> > This PR points out that nested functions returning VM types don't work as
> > expected (yeah, go figure).  We got an ICE on the testcase because we were
> > trying to allocate variable-sized temporary instead of using 
> > __builtin_alloca
> > or its kin.  Jakub suggested to follow what the C++ front end does here.  It
> > seems to be the case that it creates a TARGET_EXPR if the call doesn't have
> > a LHS.  That seems to work out well.  The run-time testcase sanity-checks 
> > that
> > we do something reasonable.
> > 
> > Not a regression, but on the other hand the patch doesn't change anything 
> > for
> > 99.9% programs out there.
> 
> Wonder if you still can get an ICE if you add __attribute__((noreturn)) to
> such nested function.  Quick grep shows that there are some suspicious spots
> and others are fine:
[...]

Wow, indeed it ICEs with __attribute__((noreturn)).  Technically, only the
gimplify.c part is needed to fix the new ICE, but I've also fixed the
cgraphunit.c spot for good measure.  New compile test added.

Bootstrapped/regtested on x86_64-linux, ok for trunk or gcc-7?

2016-03-09  Marek Polacek  

PR c/70093
* c-typeck.c (build_function_call_vec): Create a TARGET_EXPR for
nested functions returning VM types.

* cgraphunit.c (cgraph_node::expand_thunk): Also build call to the
function being thunked if the result type doesn't have fixed size.
* gimplify.c (gimplify_modify_expr): Also set LHS if the result type
doesn't have fixed size.

* gcc.dg/nested-func-10.c: New test.
* gcc.dg/nested-func-9.c: New test.

diff --git gcc/c/c-typeck.c gcc/c/c-typeck.c
index 6aa0f03..de9d465 100644
--- gcc/c/c-typeck.c
+++ gcc/c/c-typeck.c
@@ -3068,6 +3068,16 @@ build_function_call_vec (location_t loc, vec 
arg_loc,
 result = build_call_array_loc (loc, TREE_TYPE (fntype),
   function, nargs, argarray);
 
+  /* In this improbable scenario, a nested function returns a VM type.
+ Create a TARGET_EXPR so that the call always has a LHS, much as
+ what the C++ FE does for functions returning non-PODs.  */
+  if (variably_modified_type_p (TREE_TYPE (fntype), NULL_TREE))
+{
+  tree tmp = create_tmp_var_raw (TREE_TYPE (fntype));
+  result = build4 (TARGET_EXPR, TREE_TYPE (fntype), tmp, result,
+  NULL_TREE, NULL_TREE);
+}
+
   if (VOID_TYPE_P (TREE_TYPE (result)))
 {
   if (TYPE_QUALS (TREE_TYPE (result)) != TYPE_UNQUALIFIED)
diff --git gcc/cgraphunit.c gcc/cgraphunit.c
index 8b3fddc..4351ae4 100644
--- gcc/cgraphunit.c
+++ gcc/cgraphunit.c
@@ -1708,7 +1708,9 @@ cgraph_node::expand_thunk (bool output_asm_thunks, bool 
force_gimple_thunk)
 
   /* Build call to the function being thunked.  */
   if (!VOID_TYPE_P (restype)
- && (!alias_is_noreturn || TREE_ADDRESSABLE (restype)))
+ && (!alias_is_noreturn
+ || TREE_ADDRESSABLE (restype)
+ || TREE_CODE (TYPE_SIZE_UNIT (restype)) != INTEGER_CST))
{
  if (DECL_BY_REFERENCE (resdecl))
{
diff --git gcc/gimplify.c gcc/gimplify.c
index b331e41..692d168 100644
--- gcc/gimplify.c
+++ gcc/gimplify.c
@@ -4838,7 +4838,8 @@ gimplify_modify_expr (tree *expr_p, gimple_seq *pre_p, 
gimple_seq *post_p,
}
   notice_special_calls (call_stmt);
   if (!gimple_call_noreturn_p (call_stmt)
- || TREE_ADDRESSABLE (TREE_TYPE (*to_p)))
+ || TREE_ADDRESSABLE (TREE_TYPE (*to_p))
+ || TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (*to_p))) != INTEGER_CST)
gimple_call_set_lhs (call_stmt, *to_p);
   assign = call_stmt;
 }
diff --git gcc/testsuite/gcc.dg/nested-func-10.c 
gcc/testsuite/gcc.dg/nested-func-10.c
index e69de29..1b869ac 100644
--- gcc/testsuite/gcc.dg/nested-func-10.c
+++ gcc/testsuite/gcc.dg/nested-func-10.c
@@ -0,0 +1,45 @@
+/* PR c/70093 */
+/* { dg-do compile } */
+/* { dg-options "" } */
+
+void
+foo (int n)
+{
+  struct S { int a[n]; };
+
+  struct S __attribute__((noreturn))
+  fn (void)
+  {
+struct S s;
+s.a[0] = 42;
+return s; /* { dg-warning "function declared .noreturn.|.noreturn. 
function does return" } */
+  }
+
+  auto struct S __attribute__((noreturn))
+  fn2 (void)
+  {
+return fn (); /* { dg-warning "function declared .noreturn." } */
+  }
+
+  struct S x;
+  x = fn ();
+
+  if (x.a[0] != 42)
+__builtin_abort ();
+
+  if (fn ().a[0] != 42)
+__builtin_abort ();
+
+  __typeof__ (fn ()) *p = 
+  if (p->a[0] != 42)
+__builtin_abort ();
+
+  if (fn2 ().a[0] != 42)
+__builtin_abort ();
+}
+
+int
+main (void)
+{
+  foo (1);
+}
diff --git gcc/testsuite/gcc.dg/nested-func-9.c 
gcc/testsuite/gcc.dg/nested-func-9.c
index e69de29..b703f3a 100644
--- gcc/testsuite/gcc.dg/nested-func-9.c
+++ gcc/testsuite/gcc.dg/nested-func-9.c

[PATCH] Fix PR70143

2016-03-09 Thread Richard Biener

The following fixes PR70143.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.

Richard.

2016-03-09  Richard Biener  

c-family/
PR c/70143
* c-common.c (strict_aliasing_warning): Add back
alias_sets_conflict_p check.

* gcc.dg/Wstrict-aliasing-bogus-upcast.c: New testcase.
* gcc.dg/Wstrict-aliasing-struct-with-char-member.c: Likewise.
* gcc.dg/Wstrict-aliasing-struct-member.c: Remove again.

Index: gcc/c-family/c-common.c
===
*** gcc/c-family/c-common.c (revision 234025)
--- gcc/c-family/c-common.c (working copy)
*** strict_aliasing_warning (tree otype, tre
*** 1568,1574 
alias_set_type set2 = get_alias_set (TREE_TYPE (type));
  
if (set1 != set2 && set2 != 0
! && (set1 == 0 || !alias_set_subset_of (set2, set1)))
{
  warning (OPT_Wstrict_aliasing, "dereferencing type-punned "
   "pointer will break strict-aliasing rules");
--- 1568,1576 
alias_set_type set2 = get_alias_set (TREE_TYPE (type));
  
if (set1 != set2 && set2 != 0
! && (set1 == 0
! || (!alias_set_subset_of (set2, set1)
! && !alias_sets_conflict_p (set1, set2
{
  warning (OPT_Wstrict_aliasing, "dereferencing type-punned "
   "pointer will break strict-aliasing rules");
Index: gcc/testsuite/gcc.dg/Wstrict-aliasing-bogus-upcast.c
===
*** gcc/testsuite/gcc.dg/Wstrict-aliasing-bogus-upcast.c(revision 0)
--- gcc/testsuite/gcc.dg/Wstrict-aliasing-bogus-upcast.c(working copy)
***
*** 0 
--- 1,17 
+ /* { dg-do compile } */
+ /* { dg-options "-O2 -Wall" } */
+ 
+ struct a {
+ int i;
+ };
+ struct b {
+ struct a a;
+ int j;
+ };
+ int main(void)
+ {
+   static struct b b;
+   struct a *ap=(struct a *)
+   return ((struct b *)>i)->j; /* { dg-bogus "will break strict-aliasing" 
} */
+ }
+ 
Index: gcc/testsuite/gcc.dg/Wstrict-aliasing-struct-with-char-member.c
===
--- gcc/testsuite/gcc.dg/Wstrict-aliasing-struct-with-char-member.c 
(revision 0)
+++ gcc/testsuite/gcc.dg/Wstrict-aliasing-struct-with-char-member.c 
(working copy)
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -Wall" } */
+
+struct a {
+int i;
+char c;
+};
+struct b {
+float f;
+float g;
+};
+int main(void)
+{
+  static struct b b;
+  return ((struct a *))->i; /* { dg-warning "will break strict-aliasing" } */
+}


[ptx] debug info

2016-03-09 Thread Nathan Sidwell
I've committed this to trunk, to remove the squashing of debug information.  It 
 appears to function correctly.


I'd had this patch for a while, but forgot to commit it.

nathan
2016-03-09  Nathan Sidwell  

	* config/nvptx/nvptx.c (nvptx_option_override): Don't kill debug level.

Index: config/nvptx/nvptx.c
===
--- config/nvptx/nvptx.c	(revision 234081)
+++ config/nvptx/nvptx.c	(working copy)
@@ -164,11 +164,6 @@ nvptx_option_override (void)
 /* The stabs testcases want to know stabs isn't supported.  */
 sorry ("stabs debug format not supported");
 
-  /* Actually we don't have any debug format, but don't be
- unneccesarily noisy.  */
-  write_symbols = NO_DEBUG;
-  debug_info_level = DINFO_LEVEL_NONE;
-
   if (nvptx_optimize < 0)
 nvptx_optimize = optimize > 0;
 


[gomp4] ptx debug

2016-03-09 Thread Nathan Sidwell
I've committed this to gomp4 branch, to remove the squashing of debug 
information.  It  appears to function correctly.


I'd had this patch for a while, but forgot to commit it.

nathan
2016-03-09  Nathan Sidwell  

	* config/nvptx/nvptx.c (nvptx_option_override): Don't kill debug level.

Index: config/nvptx/nvptx.c
===
--- config/nvptx/nvptx.c	(revision 234081)
+++ config/nvptx/nvptx.c	(working copy)
@@ -164,11 +164,6 @@ nvptx_option_override (void)
 /* The stabs testcases want to know stabs isn't supported.  */
 sorry ("stabs debug format not supported");
 
-  /* Actually we don't have any debug format, but don't be
- unneccesarily noisy.  */
-  write_symbols = NO_DEBUG;
-  debug_info_level = DINFO_LEVEL_NONE;
-
   if (nvptx_optimize < 0)
 nvptx_optimize = optimize > 0;
 


Re: [Patch testsuite] Change xfail conditions for bb-slp-34.c

2016-03-09 Thread Rainer Orth
Hi James,

>> Unfortunately, the patch broke the test on sparc:
>> 
>> FAIL: gcc.dg/vect/bb-slp-34.c -flto -ffat-lto-objects scan-tree-dump slp2
>> "basic block vectorized"
>> FAIL: gcc.dg/vect/bb-slp-34.c scan-tree-dump slp2 "basic block vectorized"
>> 
>> While before the scan-tree-dump wasn't run due to missing vect_perm
>> support, it now fails.
>> 
>> I believe you meant to just not xfail the test on aarch64* and arm*,
>
> Indeed.
>
>> which is what the following patch does.  James, could you please test
>> it?
>
> Yes, that still does the right thing for me on aarch64-none-elf, and I think
> I now understand why.
>
> Before I touched it that said:
>
>   /* { dg-final { scan-tree-dump "basic block vectorized" "slp2" { target
> vect_perm xfail *-*-* } } } */
>
> Which means "only run this statement for vect_perm targets, but expect it
> to fail on all targets".
>
> Then I changed it to say :
>
>   /* { dg-final { scan-tree-dump "basic block vectorized" "slp2" { xfail {
> { vect_perm } && { ! { aarch64*-*-* arm*-*-* } } } } } } */
>
> Which means "always run this, but expect it to fail on vect_perm targets that
> are not arm/aarch64".
>
> Now you've changed it to say:
>
>   /* { dg-final { scan-tree-dump "basic block vectorized" "slp2" { target
> vect_perm xfail { ! { aarch64*-*-* arm*-*-* } } } } } */
>
> Which means "only run this for vect perm targets, and expect it to fail
> if the target is not arm/aarch64.
>
> Thanks for the patch, it looks right to me and is what I was originally
> trying to write. Sorry for the sparc (and presumably other
> vect_int && !vect_perm targets) break.

no worries, and thanks for the confirmation.  I know this
effective-target keyword business can be hard to get right ;-(

I've now installed the patch.

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: [PATCH] Fix ICE with xmm{16-31} in *truncdfsf_fast_mixed with -mtune=barcelona (PR target/70086)

2016-03-09 Thread Uros Bizjak
On Sat, Mar 5, 2016 at 7:39 AM, Jakub Jelinek  wrote:
> Hi!
>
> The r222470 commit changed =x into =v constraint in *truncdfsf_fast_mixed.
> The problem is that for some tunings we have a splitter
> /* For converting DF(xmm2) to SF(xmm1), use the following code instead of
>cvtsd2ss:
>   unpcklpd xmm2,xmm2   ; packed conversion might crash on signaling NaNs
>   cvtpd2ps xmm2,xmm1
> If the input operand is memory, it attempts to emit sse2_loadlpd
> instruction.  But, that define_insn doesn't have any v constraints and so we
> fail to recognize it.  For the vmovsd 2 operand m -> v instruction
> *vec_concatv2df implements that too.
> So I see 3 options for this:
> 1) as the patch does, emit *vec_concatv2df manually
> 2) rename *vec_concatv2df to vec_concatv2df and use gen_vec_concatv2df
>in the splitter; possibly use it instead of sse2_loadlpd there, because
>that insn has uglier/more complex pattern
> 3) tweak sse2_loadlpd - add various v alternatives to it, guard them with
>avx512vl isa, etc.
>
> I bet the 3) treatment is desirable and likely many other instructions need
> it, but that doesn't sound like stage4 material to me, I find it quite
> risky, do you agree?  If yes, the following patch can work temporarily
> (bootstrapped/regtested on x86_64-linux and i686-linux), or I can do 2),
> but in that case I'd like to know your preferences about the suboption
> (whether to replace gen_sse2_loadlpd with gen_vec_concatv2df or whether
> to use it only for the EXT_REX_SSE_REG_P regs).

Let's go with the option 2) and always generate vec_concatv2df, as we
only need it for [v,m,C] alternative. In the long term, we should
enhance all patterns with new alternatives, but not in stage-4.

Attached (lightly tested) patch that implements option 2) also allows
us to simplify splitter enable condition a bit.

Uros.
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index cb8bcec..ef80d6a 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -4362,9 +4362,8 @@
  (match_operand:DF 1 "nonimmediate_operand")))]
   "TARGET_USE_VECTOR_FP_CONVERTS
&& optimize_insn_for_speed_p ()
-   && reload_completed && SSE_REG_P (operands[0])
-   && (!EXT_REX_SSE_REG_P (operands[0])
-   || TARGET_AVX512VL)"
+   && reload_completed
+   && SSE_REG_P (operands[0])"
[(set (match_dup 2)
 (vec_concat:V4SF
   (float_truncate:V2SF
@@ -4393,8 +4392,10 @@
   emit_insn (gen_vec_dupv2df (operands[4], operands[1]));
 }
   else
-emit_insn (gen_sse2_loadlpd (operands[4],
-CONST0_RTX (V2DFmode), operands[1]));
+/* Emit *vec_concatv2df.  */
+emit_insn (gen_rtx_SET (operands[4],
+   gen_rtx_VEC_CONCAT (V2DFmode, operands[1],
+   CONST0_RTX (DFmode;
 })
 
 ;; It's more profitable to split and then extend in the same register.


[PATCH][ARM] Use proper output modifier for DImode register in store exclusive patterns

2016-03-09 Thread Kyrill Tkachov

Hi all,

I notice that the output code for our store exclusive patterns accesses 
unallocated memory.
It wants to output an strexd instruction with a pair of consecutive registers 
corresponding
to a DImode value. For that it creates the SImode top half of the DImode 
register and puts it
into operands[3]. But the pattern only defines entries only up to operands[2], 
with no match_dup 3
or like that, so operands[3] should technically be out of bounds.

We already have a mechanism for printing the top half of a DImode register, 
that's the 'H' output modifier.
So this patch changes those patterns to use that, eliminating the out of bounds 
access and making
the code a bit simpler as well.

Bootstrapped and tested on arm-none-linux-gnueabihf.

Ok for trunk?

Thanks,
Kyrill

2016-03-09  Kyrylo Tkachov  

* config/arm/sync.md (arm_store_exclusive):
Use 'H' output modifier on operands[2] rather than creating a new
entry in out-of-bounds memory of the operands array.
(arm_store_release_exclusivedi): Likewise.
diff --git a/gcc/config/arm/sync.md b/gcc/config/arm/sync.md
index 6dd2dc396210bc45374d13e1a20f124cc490b630..8158f53025400045569533a1e8c6583025d490c8 100644
--- a/gcc/config/arm/sync.md
+++ b/gcc/config/arm/sync.md
@@ -422,14 +422,13 @@ (define_insn "arm_store_exclusive"
   {
 if (mode == DImode)
   {
-	rtx value = operands[2];
 	/* The restrictions on target registers in ARM mode are that the two
 	   registers are consecutive and the first one is even; Thumb is
 	   actually more flexible, but DI should give us this anyway.
-	   Note that the 1st register always gets the lowest word in memory.  */
-	gcc_assert ((REGNO (value) & 1) == 0 || TARGET_THUMB2);
-	operands[3] = gen_rtx_REG (SImode, REGNO (value) + 1);
-	return "strexd%?\t%0, %2, %3, %C1";
+	   Note that the 1st register always gets the
+	   lowest word in memory.  */
+	gcc_assert ((REGNO (operands[2]) & 1) == 0 || TARGET_THUMB2);
+	return "strexd%?\t%0, %2, %H2, %C1";
   }
 return "strex%?\t%0, %2, %C1";
   }
@@ -445,11 +444,9 @@ (define_insn "arm_store_release_exclusivedi"
 	  VUNSPEC_SLX))]
   "TARGET_HAVE_LDACQ && ARM_DOUBLEWORD_ALIGN"
   {
-rtx value = operands[2];
 /* See comment in arm_store_exclusive above.  */
-gcc_assert ((REGNO (value) & 1) == 0 || TARGET_THUMB2);
-operands[3] = gen_rtx_REG (SImode, REGNO (value) + 1);
-return "stlexd%?\t%0, %2, %3, %C1";
+gcc_assert ((REGNO (operands[2]) & 1) == 0 || TARGET_THUMB2);
+return "stlexd%?\t%0, %2, %H2, %C1";
   }
   [(set_attr "predicable" "yes")
(set_attr "predicable_short_it" "no")])


Re: [PATCH] Fix pextr{b,w} with -masm=intel (PR target/70049)

2016-03-09 Thread Uros Bizjak
On Wed, Mar 2, 2016 at 8:12 PM, Jakub Jelinek  wrote:
> Hi!
>
> Like the recent patch for kmovw, vpextr{b,w} is another instruction
> with output being r32/m{8,16} rather than r32/m32, so we shouldn't use %k0
> for "m" constraint.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2016-03-02  Jakub Jelinek  
>
> PR target/70049
> * config/i386/sse.md (*vec_extract): Use %k0 instead of %0
> if the operand is "m".

Probably you want to say "Don't use k if the operand is "m". At least
this is what the patch does.

> * gcc.target/i386/pr70049.c: New test.

OK with the updated ChangeLog.

Thanks,
Uros.

> --- gcc/config/i386/sse.md.jj   2016-02-19 14:42:07.0 +0100
> +++ gcc/config/i386/sse.md  2016-03-02 18:15:51.950982743 +0100
> @@ -13307,7 +13307,9 @@ (define_insn "*vec_extract"
>   (parallel
> [(match_operand:SI 2 "const_0_to__operand")])))]
>"TARGET_SSE2"
> -  "%vpextr\t{%2, %1, %k0|%k0, %1, %2}"
> +  "@
> +   %vpextr\t{%2, %1, %k0|%k0, %1, %2}
> +   %vpextr\t{%2, %1, %0|%0, %1, %2}"
>[(set_attr "isa" "*,sse4")
> (set_attr "type" "sselog1")
> (set_attr "prefix_data16" "1")
> --- gcc/testsuite/gcc.target/i386/pr70049.c.jj  2016-03-02 18:18:36.413730956 
> +0100
> +++ gcc/testsuite/gcc.target/i386/pr70049.c 2016-03-02 18:20:08.194474318 
> +0100
> @@ -0,0 +1,20 @@
> +/* PR target/70049 */
> +/* { dg-do assemble { target avx } } */
> +/* { dg-require-effective-target masm_intel } */
> +/* { dg-options "-Og -mavx -masm=intel" } */
> +
> +typedef unsigned short A;
> +typedef unsigned short B __attribute__ ((vector_size (32)));
> +typedef unsigned int C;
> +typedef unsigned int D __attribute__ ((vector_size (32)));
> +typedef unsigned long long E;
> +typedef unsigned long long F __attribute__ ((vector_size (32)));
> +
> +C
> +foo(A a, C b, E c, F d, B e, D f, F g)
> +{
> +  b <<= 28;
> +  e[1] += b;
> +  d %= (F) { 0, f[4] } | 1;
> +  return a + b + c + d[3] + e[1] + g[3];
> +}
>
> Jakub


Re: [Patch testsuite] Change xfail conditions for bb-slp-34.c

2016-03-09 Thread James Greenhalgh
On Wed, Mar 09, 2016 at 12:53:02PM +0100, Rainer Orth wrote:
> Richard Biener  writes:
> 
> > On Thu, 3 Mar 2016, James Greenhalgh wrote:
> >
> >> 
> >> Hi,
> >> 
> >> ARM and AArch64 will still vectorize bb-slp-34.c - we're not operating
> >> with a cost model so we vectorize to a 64-bit vector of two ints, and the
> >> permutes are just element swaps.
> >> 
> >> So, don't mark this test xfail for arm/aarch64.
> >> 
> >> Checked on x86_64-none-linux-gnu, arm-none-eabi and aarch64-none-elf with
> >> no issues.
> >> 
> >> OK?
> >
> > Ok.  Indeed with using V2SI vectors the vectorization is valid. 
> 
> Unfortunately, the patch broke the test on sparc:
> 
> FAIL: gcc.dg/vect/bb-slp-34.c -flto -ffat-lto-objects  scan-tree-dump slp2 
> "basic block vectorized"
> FAIL: gcc.dg/vect/bb-slp-34.c scan-tree-dump slp2 "basic block vectorized"
> 
> While before the scan-tree-dump wasn't run due to missing vect_perm
> support, it now fails.
> 
> I believe you meant to just not xfail the test on aarch64* and arm*,

Indeed.

> which is what the following patch does.  James, could you please test
> it?

Yes, that still does the right thing for me on aarch64-none-elf, and I think
I now understand why.

Before I touched it that said:

  /* { dg-final { scan-tree-dump "basic block vectorized" "slp2" { target 
vect_perm xfail *-*-* } } } */

Which means "only run this statement for vect_perm targets, but expect it
to fail on all targets".

Then I changed it to say :

  /* { dg-final { scan-tree-dump "basic block vectorized" "slp2" { xfail { { 
vect_perm } && { ! { aarch64*-*-* arm*-*-* } } } } } } */

Which means "always run this, but expect it to fail on vect_perm targets that
are not arm/aarch64".

Now you've changed it to say:

  /* { dg-final { scan-tree-dump "basic block vectorized" "slp2" { target 
vect_perm xfail { ! { aarch64*-*-* arm*-*-* } } } } } */

Which means "only run this for vect perm targets, and expect it to fail
if the target is not arm/aarch64.

Thanks for the patch, it looks right to me and is what I was originally
trying to write. Sorry for the sparc (and presumably other
vect_int && !vect_perm targets) break.

Thanks,
James




Re: [Patch testsuite] Change xfail conditions for bb-slp-34.c

2016-03-09 Thread Rainer Orth
Richard Biener  writes:

> On Thu, 3 Mar 2016, James Greenhalgh wrote:
>
>> 
>> Hi,
>> 
>> ARM and AArch64 will still vectorize bb-slp-34.c - we're not operating
>> with a cost model so we vectorize to a 64-bit vector of two ints, and the
>> permutes are just element swaps.
>> 
>> So, don't mark this test xfail for arm/aarch64.
>> 
>> Checked on x86_64-none-linux-gnu, arm-none-eabi and aarch64-none-elf with
>> no issues.
>> 
>> OK?
>
> Ok.  Indeed with using V2SI vectors the vectorization is valid. 

Unfortunately, the patch broke the test on sparc:

FAIL: gcc.dg/vect/bb-slp-34.c -flto -ffat-lto-objects  scan-tree-dump slp2 
"basic block vectorized"
FAIL: gcc.dg/vect/bb-slp-34.c scan-tree-dump slp2 "basic block vectorized"

While before the scan-tree-dump wasn't run due to missing vect_perm
support, it now fails.

I believe you meant to just not xfail the test on aarch64* and arm*,
which is what the following patch does.  James, could you please test
it?

Thanks.
Rainer


2016-03-09  Rainer Orth  

* gcc.dg/vect/bb-slp-34.c: Really don't xfail on aarch64-*-*,
arm-*-*.

# HG changeset patch
# Parent  2b37d3d00ad8084dfecd2fc2165d20fffc47e5bf
Really don't xfail gcc.dg/vect/bb-slp-34.c on aarch64, arm

diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-34.c b/gcc/testsuite/gcc.dg/vect/bb-slp-34.c
--- a/gcc/testsuite/gcc.dg/vect/bb-slp-34.c
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-34.c
@@ -33,4 +33,4 @@ int main()
 }
 
 /* ??? XFAILed because we access "excess" elements with the permutation.  */
-/* { dg-final { scan-tree-dump "basic block vectorized" "slp2" { xfail { { vect_perm } && { ! { aarch64*-*-* arm*-*-* } } } } } } */
+/* { dg-final { scan-tree-dump "basic block vectorized" "slp2" { target vect_perm xfail { ! { aarch64*-*-* arm*-*-* } } } } } */

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: C PATCH for c/70093 (ICE with nested-function returning VM type)

2016-03-09 Thread Jakub Jelinek
On Wed, Mar 09, 2016 at 12:05:51PM +0100, Marek Polacek wrote:
> This PR points out that nested functions returning VM types don't work as
> expected (yeah, go figure).  We got an ICE on the testcase because we were
> trying to allocate variable-sized temporary instead of using __builtin_alloca
> or its kin.  Jakub suggested to follow what the C++ front end does here.  It
> seems to be the case that it creates a TARGET_EXPR if the call doesn't have
> a LHS.  That seems to work out well.  The run-time testcase sanity-checks that
> we do something reasonable.
> 
> Not a regression, but on the other hand the patch doesn't change anything for
> 99.9% programs out there.

Wonder if you still can get an ICE if you add __attribute__((noreturn)) to
such nested function.  Quick grep shows that there are some suspicious spots
and others are fine:
cgraphunit.c-  /* Build call to the function being thunked.  */
cgraphunit.c-  if (!VOID_TYPE_P (restype)
cgraphunit.c: && (!alias_is_noreturn || TREE_ADDRESSABLE (restype)))
cgraphunit.c-   {
^^^ needs checking
gimplify.c:  if (!gimple_call_noreturn_p (call_stmt)
gimplify.c-   || TREE_ADDRESSABLE (TREE_TYPE (*to_p)))
gimplify.c- gimple_call_set_lhs (call_stmt, *to_p);
^^^ likewise
tree-cfg.c-  if (lhs
tree-cfg.c-  && gimple_call_ctrl_altering_p (stmt)
tree-cfg.c:  && gimple_call_noreturn_p (stmt)
tree-cfg.c-  && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (lhs))) == INTEGER_CST
tree-cfg.c-  && !TREE_ADDRESSABLE (TREE_TYPE (lhs)))
tree-cfg.c-{
tree-cfg.c:  error ("LHS in noreturn call");
tree-cfg.c-  return true;
tree-cfg.c-}
^^^ looks fine
tree-cfgcleanup.c-  /* If there is an LHS, remove it, but only if its type has 
fixed size.
tree-cfgcleanup.c- The LHS will need to be recreated during RTL expansion 
and creating
tree-cfgcleanup.c- temporaries of variable-sized types is not supported.  
Also don't
tree-cfgcleanup.c- do this with TREE_ADDRESSABLE types, as assign_temp will 
abort.  */
tree-cfgcleanup.c-  tree lhs = gimple_call_lhs (stmt);
tree-cfgcleanup.c-  if (lhs && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (lhs))) == 
INTEGER_CST
tree-cfgcleanup.c-  && !TREE_ADDRESSABLE (TREE_TYPE (lhs)))
tree-cfgcleanup.c-{
tree-cfgcleanup.c-  gimple_call_set_lhs (stmt, NULL_TREE);
^^^ likewise

Jakub


C PATCH for c/70093 (ICE with nested-function returning VM type)

2016-03-09 Thread Marek Polacek
This PR points out that nested functions returning VM types don't work as
expected (yeah, go figure).  We got an ICE on the testcase because we were
trying to allocate variable-sized temporary instead of using __builtin_alloca
or its kin.  Jakub suggested to follow what the C++ front end does here.  It
seems to be the case that it creates a TARGET_EXPR if the call doesn't have
a LHS.  That seems to work out well.  The run-time testcase sanity-checks that
we do something reasonable.

Not a regression, but on the other hand the patch doesn't change anything for
99.9% programs out there.

Bootstrapped/regtested on x86_64-linux.

2016-03-08  Marek Polacek  

PR c/70093
* c-typeck.c (build_function_call_vec): Create a TARGET_EXPR for
nested functions returning VM types.

* gcc.dg/nested-func-9.c: New test.

diff --git gcc/c/c-typeck.c gcc/c/c-typeck.c
index 6aa0f03..de9d465 100644
--- gcc/c/c-typeck.c
+++ gcc/c/c-typeck.c
@@ -3068,6 +3068,16 @@ build_function_call_vec (location_t loc, vec 
arg_loc,
 result = build_call_array_loc (loc, TREE_TYPE (fntype),
   function, nargs, argarray);
 
+  /* In this improbable scenario, a nested function returns a VM type.
+ Create a TARGET_EXPR so that the call always has a LHS, much as
+ what the C++ FE does for functions returning non-PODs.  */
+  if (variably_modified_type_p (TREE_TYPE (fntype), NULL_TREE))
+{
+  tree tmp = create_tmp_var_raw (TREE_TYPE (fntype));
+  result = build4 (TARGET_EXPR, TREE_TYPE (fntype), tmp, result,
+  NULL_TREE, NULL_TREE);
+}
+
   if (VOID_TYPE_P (TREE_TYPE (result)))
 {
   if (TYPE_QUALS (TREE_TYPE (result)) != TYPE_UNQUALIFIED)
diff --git gcc/testsuite/gcc.dg/nested-func-9.c 
gcc/testsuite/gcc.dg/nested-func-9.c
index e69de29..b703f3a 100644
--- gcc/testsuite/gcc.dg/nested-func-9.c
+++ gcc/testsuite/gcc.dg/nested-func-9.c
@@ -0,0 +1,45 @@
+/* PR c/70093 */
+/* { dg-do run } */
+/* { dg-options "" } */
+
+void
+foo (int n)
+{
+  struct S { int a[n]; };
+
+  struct S
+  fn (void)
+  {
+struct S s;
+s.a[0] = 42;
+return s;
+  }
+
+  auto struct S
+  fn2 (void)
+  {
+return fn ();
+  }
+
+  struct S x;
+  x = fn ();
+
+  if (x.a[0] != 42)
+__builtin_abort ();
+
+  if (fn ().a[0] != 42)
+__builtin_abort ();
+
+  __typeof__ (fn ()) *p = 
+  if (p->a[0] != 42)
+__builtin_abort ();
+
+  if (fn2 ().a[0] != 42)
+__builtin_abort ();
+}
+
+int
+main (void)
+{
+  foo (1);
+}

Marek


Re: [PATCH][SPARC] sparc: switch -fasynchronous-unwind-tables on by default.

2016-03-09 Thread Eric Botcazou
> Consider the attached test program.  When built with -g in sparc64-*-*
> the resulting binary contains:
> 
> - A .eh_frame segment containing CFA information for __libc_csu_init and
>   __libc_csu_fini.
> 
> - A .debug_frame segment containing CFA information for func2, func1 and
>   main.
> 
> The backtrace(3) implementation for sparc contains a simple unwinder
> that works well in most cases, but that unwinder is not used if
> libgcc_s.so can be dlopened and it provides _Unwind_Backtrace.  Now,
> _Unwind_Backtrace uses .eh_frame but not .debug_frame.  Thus,
> backtrace(3) is only useful in programs built with
> -fasynchronous-unwind-tables even if -g provides CFA info in
> .debug_frame.

How does that work for e.g. PowerPC or MIPS?  Why not do the same for SPARC?

-- 
Eric Botcazou


Re: [PATCH][ARM] Split out armv7ve effective target check

2016-03-09 Thread Kyrill Tkachov

Ping.
https://gcc.gnu.org/ml/gcc-patches/2016-03/msg00162.html

Thanks,
Kyrill
On 02/03/16 13:32, Kyrill Tkachov wrote:

Hi all,

I'm seeing the fails:
FAIL: gcc.target/arm/atomic_loaddi_2.c scan-assembler-times ldrd\tr[0-9]+, 
r[0-9]+, \\[r[0-9]+\\] 1
FAIL: gcc.target/arm/atomic_loaddi_5.c scan-assembler-times ldrd\tr[0-9]+, 
r[0-9]+, \\[r[0-9]+\\] 1
FAIL: gcc.target/arm/atomic_loaddi_8.c scan-assembler-times ldrd\tr[0-9]+, 
r[0-9]+, \\[r[0-9]+\\] 1

when testing an arm multilib with /-march=armv7-a.

The tests have an effective target check for armv7ve but it doesn't work because
under the hood the check is the same as for armv7-a, that is it checks for the 
__ARM_ARCH_7A__
predefine which is set for both march values.

To check for armv7ve using predefines we need to check for both __ARM_ARCH_7A__ 
and for the hardware
integer division predefine, making armv7ve special.

So this patch separates the effective target check definition from the rest of 
the architectures
and defines it appropriately.

With this patch the aforementioned tests appear UNSUPPORTED when testing the 
/-march=armv7-a multilib.

Ok for trunk?

Thanks,
Kyrill

2016-03-02  Kyrylo Tkachov  

* lib/target-supports.exp: Remove v7ve entry from loop
creating effective target checks.
(check_effective_target_arm_arch_v7ve_ok): New procedure.
(add_options_for_arm_arch_v7ve): Likewise.




Re: [PATCH][ARM][RFC] PR target/65578 Fix gcc.dg/torture/stackalign/builtin-apply-4.c for single-precision fpus

2016-03-09 Thread Kyrill Tkachov

Ping*4.

Thanks,
Kyrill
On 02/03/16 13:46, Kyrill Tkachov wrote:

Ping*3.

Thanks,
Kyrill
On 24/02/16 13:48, Kyrill Tkachov wrote:

Ping*2

Thanks,
Kyrill

On 17/02/16 10:12, Kyrill Tkachov wrote:

Ping.
https://gcc.gnu.org/ml/gcc-patches/2016-02/msg00634.html

As mentioned before, this is actually a fix for PR target/69538.
I got confused when writing the cover letter and ChangeLog...

Thanks,
Kyrill

On 09/02/16 17:24, Kyrill Tkachov wrote:


On 09/02/16 17:21, Kyrill Tkachov wrote:

Hi all,

In this wrong-code PR the builtin-apply-4.c test fails with -flto but only when 
targeting an fpu
with only single-precision capabilities.

bar is a function returing a double. For non-LTO compilation the caller of bar 
reads the return value
from it from the s0 and s1 VFP registers like expected, but for -flto the 
caller seems to expect the
return value from the r0 and r1 regs.  The RTL dumps show that too.

Debugging the calls to arm_function_value show that in the -flto compilation 
the function bar is deemed
to be a local function call and assigned the ARM_PCS_AAPCS_LOCAL PCS variant, 
whereas for the non-LTO (and non-breaking)
compilation it uses the ARM_PCS_AAPCS_VFP variant.

Further down in use_vfp_abi when deciding whether to use VFP registers for the 
result there is a bit of
logic that rejects VFP registers when handling the ARM_PCS_AAPCS_LOCAL variant 
with a double precision value
on an FPU that is not TARGET_VFP_DOUBLE.

This seems wrong for ARM_PCS_AAPCS_LOCAL to me. ARM_PCS_AAPCS_LOCAL means that 
the function doesn't escape
the translation unit and we can thus use whatever variant we want. From what I 
understand we want to use the
VFP regs when possible for FP values.

So this patch removes that restriction and for the testcase the caller of bar 
correctly reads the return
value of bar from the VFP registers and everything works.

This patch has been bootstrapped and tested on arm-none-linux-gnueabihf 
configured with --with-fpu=fpv4-sp-d16.
The bootstrapped was performed with LTO.
I didn't see any regressions.

It seems that this logic was put there in 2009 with r154034 as part of a large 
patch to enable support for half-precision
floating point.

I'm not very familiar with this part of the code, so is this a safe patch to do?
The patch should only ever change behaviour for single-precision-only fpus and 
only for static functions
that don't get called outside their translation units (or during LTO I suppose) 
so there shouldn't
be any ABI problems, I think.

Is this ok for trunk?

Thanks,
Kyrill



Huh, I just realised I wrote completely the wrong PR number on this.
The PR I'm talking about here is PR target/69538

Sorry for the confusion.

Kyrill



2016-02-09 Kyrylo Tkachov 

PR target/65578
* config/arm/arm.c (use_vfp_abi): Remove id_double argument.
Don't check for is_double and TARGET_VFP_DOUBLE.
(aapcs_vfp_is_call_or_return_candidate): Update callsite.
(aapcs_vfp_is_return_candidate): Likewise.
(aapcs_vfp_is_call_candidate): Likewise.
(aapcs_vfp_allocate_return_reg): Likewise.












Re: [PATCH] Fix PR67278

2016-03-09 Thread Jakub Jelinek
On Tue, Mar 08, 2016 at 11:27:28PM +0100, Andreas Schwab wrote:
> On powerpc -m32:
> 
> FAIL: gcc.dg/simd-7.c (test for excess errors)
> Excess errors:
> /daten/gcc/gcc-20160307/gcc/testsuite/gcc.dg/simd-7.c:8:1: warning: GCC 
> vector returned by reference: non-standard ABI extension with no 
> compatibility guarantee
> /daten/gcc/gcc-20160307/gcc/testsuite/gcc.dg/simd-7.c:7:1: warning: GCC 
> vector passed by reference: non-standard ABI extension with no compatibility 
> guarantee

Fixed thusly, committed as obvious:

2016-03-09  Jakub Jelinek  

PR middle-end/67278
* gcc.dg/simd-7.c: Add -w -Wno-psabi to dg-options.

--- gcc/testsuite/gcc.dg/simd-7.c   (revision 234078)
+++ gcc/testsuite/gcc.dg/simd-7.c   (working copy)
@@ -1,4 +1,5 @@
 /* { dg-do compile } */
+/* { dg-options "-w -Wno-psabi" } */
 
 #if __SIZEOF_LONG_DOUBLE__ == 16 || __SIZEOF_LONG_DOUBLE__ == 8
 typedef long double a __attribute__((vector_size (16)));

Jakub


Re: [PATCH] Fix ICE with vector types in X % -Y pattern (PR middle-end/70050)

2016-03-09 Thread Jakub Jelinek
On Tue, Mar 08, 2016 at 11:16:28PM +0100, Andreas Schwab wrote:
> Marek Polacek  writes:
> 
> > diff --git gcc/testsuite/gcc.dg/pr70050.c gcc/testsuite/gcc.dg/pr70050.c
> > index e69de29..610456f 100644
> > --- gcc/testsuite/gcc.dg/pr70050.c
> > +++ gcc/testsuite/gcc.dg/pr70050.c
> > @@ -0,0 +1,11 @@
> > +/* PR middle-end/70025 */
> > +/* { dg-do compile } */
> > +/* { dg-options "-Wno-psabi" } */
> > +
> > +typedef int v8si __attribute__ ((vector_size (32)));
> > +
> > +v8si
> > +foo (v8si v)
> 
> On powerpc:
> 
> FAIL: gcc.dg/pr70050.c (test for excess errors)
> Excess errors:
> /daten/gcc/gcc-20160307/gcc/testsuite/gcc.dg/pr70050.c:9:1: warning: GCC 
> vector returned by reference: non-standard ABI extension with no 
> compatibility guarantee
> /daten/gcc/gcc-20160307/gcc/testsuite/gcc.dg/pr70050.c:8:1: warning: GCC 
> vector passed by reference: non-standard ABI extension with no compatibility 
> guarantee

Fixed thusly, committed as obvious:

2016-03-09  Jakub Jelinek  

PR target/67278
* gcc.dg/simd-8.c: Add -w -Wno-psabi to dg-options.

PR middle-end/70050
* gcc.dg/pr70050.c: Add -w to dg-options.  Fix up PR number
in testcase comment.

--- gcc/testsuite/gcc.dg/simd-8.c.jj2016-03-02 14:07:58.0 +0100
+++ gcc/testsuite/gcc.dg/simd-8.c   2016-03-09 10:11:15.726261498 +0100
@@ -1,4 +1,5 @@
 /* { dg-do compile } */
+/* { dg-options "-w -Wno-psabi" } */
 
 #if __SIZEOF_LONG_DOUBLE__ == 16 || __SIZEOF_LONG_DOUBLE__ == 8
 typedef long double a __attribute__((vector_size (32)));
--- gcc/testsuite/gcc.dg/pr70050.c.jj   2016-03-03 15:28:40.0 +0100
+++ gcc/testsuite/gcc.dg/pr70050.c  2016-03-09 10:10:17.591060300 +0100
@@ -1,6 +1,6 @@
-/* PR middle-end/70025 */
+/* PR middle-end/70050 */
 /* { dg-do compile } */
-/* { dg-options "-Wno-psabi" } */
+/* { dg-options "-w -Wno-psabi" } */
 
 typedef int v8si __attribute__ ((vector_size (32)));
 


Jakub


Re: [PATCH, 16/16] Add libgomp.oacc-fortran/kernels-*.f95

2016-03-09 Thread Tom de Vries

On 09/11/15 21:12, Tom de Vries wrote:

On 09/11/15 16:35, Tom de Vries wrote:

Hi,

this patch series for stage1 trunk adds support to:
- parallelize oacc kernels regions using parloops, and
- map the loops onto the oacc gang dimension.

The patch series contains these patches:

  1Insert new exit block only when needed in
 transform_to_exit_first_loop_alt
  2Make create_parallel_loop return void
  3Ignore reduction clause on kernels directive
  4Implement -foffload-alias
  5Add in_oacc_kernels_region in struct loop
  6Add pass_oacc_kernels
  7Add pass_dominator_oacc_kernels
  8Add pass_ch_oacc_kernels
  9Add pass_parallelize_loops_oacc_kernels
 10Add pass_oacc_kernels pass group in passes.def
 11Update testcases after adding kernels pass group
 12Handle acc loop directive
 13Add c-c++-common/goacc/kernels-*.c
 14Add gfortran.dg/goacc/kernels-*.f95
 15Add libgomp.oacc-c-c++-common/kernels-*.c
 16Add libgomp.oacc-fortran/kernels-*.f95

The first 9 patches are more or less independent, but patches 10-16 are
intended to be committed at the same time.

Bootstrapped and reg-tested on x86_64.

Build and reg-tested with nvidia accelerator, in combination with a
patch that enables accelerator testing (which is submitted at
https://gcc.gnu.org/ml/gcc-patches/2015-10/msg01771.html ).

I'll post the individual patches in reply to this message.


This patch adds Fortran oacc kernels execution tests.


Retested on current trunk.

Committed, minus the kernels-parallel-loop-data-enter-exit.f95 test.

Thanks,
- Tom


0016-Add-libgomp.oacc-fortran-kernels-.f95.patch


Add libgomp.oacc-fortran/kernels-*.f95

2015-11-09  Tom de Vries  

* testsuite/libgomp.oacc-fortran/kernels-loop-2.f95: New test.
* testsuite/libgomp.oacc-fortran/kernels-loop-data-2.f95: Same.
* testsuite/libgomp.oacc-fortran/kernels-loop-data-enter-exit-2.f95:
Same.
* testsuite/libgomp.oacc-fortran/kernels-loop-data-enter-exit.f95: Same.
* testsuite/libgomp.oacc-fortran/kernels-loop-data-update.f95: Same.
* testsuite/libgomp.oacc-fortran/kernels-loop-data.f95: Same.
* testsuite/libgomp.oacc-fortran/kernels-loop.f95: Same.
* 
testsuite/libgomp.oacc-fortran/kernels-parallel-loop-data-enter-exit.f95:
Same.
---
  .../libgomp.oacc-fortran/kernels-loop-2.f95| 32 ++
  .../libgomp.oacc-fortran/kernels-loop-data-2.f95   | 38 ++
  .../kernels-loop-data-enter-exit-2.f95 | 38 ++
  .../kernels-loop-data-enter-exit.f95   | 36 
  .../kernels-loop-data-update.f95   | 36 
  .../libgomp.oacc-fortran/kernels-loop-data.f95 | 36 
  .../libgomp.oacc-fortran/kernels-loop.f95  | 28 
  .../kernels-parallel-loop-data-enter-exit.f95  | 37 +
  8 files changed, 281 insertions(+)
  create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/kernels-loop-2.f95
  create mode 100644 
libgomp/testsuite/libgomp.oacc-fortran/kernels-loop-data-2.f95
  create mode 100644 
libgomp/testsuite/libgomp.oacc-fortran/kernels-loop-data-enter-exit-2.f95
  create mode 100644 
libgomp/testsuite/libgomp.oacc-fortran/kernels-loop-data-enter-exit.f95
  create mode 100644 
libgomp/testsuite/libgomp.oacc-fortran/kernels-loop-data-update.f95
  create mode 100644 
libgomp/testsuite/libgomp.oacc-fortran/kernels-loop-data.f95
  create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/kernels-loop.f95
  create mode 100644 
libgomp/testsuite/libgomp.oacc-fortran/kernels-parallel-loop-data-enter-exit.f95

diff --git a/libgomp/testsuite/libgomp.oacc-fortran/kernels-loop-2.f95 
b/libgomp/testsuite/libgomp.oacc-fortran/kernels-loop-2.f95
new file mode 100644
index 000..1fb40ee
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-loop-2.f95
@@ -0,0 +1,32 @@
+! { dg-do run }
+! { dg-options "-ftree-parallelize-loops=32" }
+
+program main
+  implicit none
+  integer, parameter :: n = 1024
+  integer, dimension (0:n-1) :: a, b, c
+  integer:: i, ii
+
+  !$acc kernels copyout (a(0:n-1))
+  do i = 0, n - 1
+ a(i) = i * 2
+  end do
+  !$acc end kernels
+
+  !$acc kernels copyout (b(0:n-1))
+  do i = 0, n -1
+ b(i) = i * 4
+  end do
+  !$acc end kernels
+
+  !$acc kernels copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1))
+  do ii = 0, n - 1
+ c(ii) = a(ii) + b(ii)
+  end do
+  !$acc end kernels
+
+  do i = 0, n - 1
+ if (c(i) .ne. a(i) + b(i)) call abort
+  end do
+
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/kernels-loop-data-2.f95 
b/libgomp/testsuite/libgomp.oacc-fortran/kernels-loop-data-2.f95
new file mode 100644
index 000..7b52253
--- /dev/null
+++ 

Re: [PATCH, 15/16] Add libgomp.oacc-c-c++-common/kernels-*.c

2016-03-09 Thread Tom de Vries

On 09/11/15 21:10, Tom de Vries wrote:

On 09/11/15 16:35, Tom de Vries wrote:

Hi,

this patch series for stage1 trunk adds support to:
- parallelize oacc kernels regions using parloops, and
- map the loops onto the oacc gang dimension.

The patch series contains these patches:

  1Insert new exit block only when needed in
 transform_to_exit_first_loop_alt
  2Make create_parallel_loop return void
  3Ignore reduction clause on kernels directive
  4Implement -foffload-alias
  5Add in_oacc_kernels_region in struct loop
  6Add pass_oacc_kernels
  7Add pass_dominator_oacc_kernels
  8Add pass_ch_oacc_kernels
  9Add pass_parallelize_loops_oacc_kernels
 10Add pass_oacc_kernels pass group in passes.def
 11Update testcases after adding kernels pass group
 12Handle acc loop directive
 13Add c-c++-common/goacc/kernels-*.c
 14Add gfortran.dg/goacc/kernels-*.f95
 15Add libgomp.oacc-c-c++-common/kernels-*.c
 16Add libgomp.oacc-fortran/kernels-*.f95

The first 9 patches are more or less independent, but patches 10-16 are
intended to be committed at the same time.

Bootstrapped and reg-tested on x86_64.

Build and reg-tested with nvidia accelerator, in combination with a
patch that enables accelerator testing (which is submitted at
https://gcc.gnu.org/ml/gcc-patches/2015-10/msg01771.html ).

I'll post the individual patches in reply to this message.


This patch adds C/C++ oacc kernels execution tests.



Retested on current trunk.

Committed, minus the kernels-parallel-loop-data-enter-exit.f95 test.

Thanks,
- Tom


0015-Add-libgomp.oacc-c-c-common-kernels-.c.patch


Add libgomp.oacc-c-c++-common/kernels-*.c

2015-11-09  Tom de Vries  

* testsuite/libgomp.oacc-c-c++-common/kernels-loop-2.c: New test.
* testsuite/libgomp.oacc-c-c++-common/kernels-loop-3.c: Same.
* testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-2.c: Same.
* testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-3.c: Same.
* testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-4.c: Same.
* testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-5.c: Same.
* testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq-6.c: Same.
* testsuite/libgomp.oacc-c-c++-common/kernels-loop-and-seq.c: Same.
* testsuite/libgomp.oacc-c-c++-common/kernels-loop-collapse.c: Same.
* testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-2.c: Same.
* testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-enter-exit-2.c:
Same.
* testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-enter-exit.c:
Same.
* testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-update.c: Same.
* testsuite/libgomp.oacc-c-c++-common/kernels-loop-data.c: Same.
* testsuite/libgomp.oacc-c-c++-common/kernels-loop-g.c: Same.
* testsuite/libgomp.oacc-c-c++-common/kernels-loop-mod-not-zero.c: Same.
* testsuite/libgomp.oacc-c-c++-common/kernels-loop-n.c: Same.
* testsuite/libgomp.oacc-c-c++-common/kernels-loop-nest.c: Same.
* testsuite/libgomp.oacc-c-c++-common/kernels-loop.c: Same.
* 
testsuite/libgomp.oacc-c-c++-common/kernels-parallel-loop-data-enter-exit.c:
Same.
* testsuite/libgomp.oacc-c-c++-common/kernels-reduction.c: Same.
---
  .../libgomp.oacc-c-c++-common/kernels-loop-2.c | 47 ++
  .../libgomp.oacc-c-c++-common/kernels-loop-3.c | 34 +
  .../kernels-loop-and-seq-2.c   | 36 ++
  .../kernels-loop-and-seq-3.c   | 37 ++
  .../kernels-loop-and-seq-4.c   | 36 ++
  .../kernels-loop-and-seq-5.c   | 37 ++
  .../kernels-loop-and-seq-6.c   | 36 ++
  .../kernels-loop-and-seq.c | 37 ++
  .../kernels-loop-collapse.c| 40 
  .../kernels-loop-data-2.c  | 56 ++
  .../kernels-loop-data-enter-exit-2.c   | 54 +
  .../kernels-loop-data-enter-exit.c | 51 
  .../kernels-loop-data-update.c | 53 
  .../libgomp.oacc-c-c++-common/kernels-loop-data.c  | 50 +++
  .../libgomp.oacc-c-c++-common/kernels-loop-g.c |  5 ++
  .../kernels-loop-mod-not-zero.c| 41 
  .../libgomp.oacc-c-c++-common/kernels-loop-n.c | 47 ++
  .../libgomp.oacc-c-c++-common/kernels-loop-nest.c  | 26 ++
  .../libgomp.oacc-c-c++-common/kernels-loop.c   | 41 
  .../kernels-parallel-loop-data-enter-exit.c| 52 
  .../libgomp.oacc-c-c++-common/kernels-reduction.c