Re: [PATCH] PR70117, ppc long double isinf

2016-04-07 Thread Alan Modra
On Wed, Apr 06, 2016 at 06:49:19PM +0930, Alan Modra wrote:
> On Wed, Apr 06, 2016 at 10:46:48AM +0200, Richard Biener wrote:
> > Can you add a testcase or two for the isnormal () case?
> 
> Sure.  I'll adapt the testcase I was using to verify the output,

Revised testcase - target fixed, compiled at -O2 with volatile vars so
we're testing optimized builtins with non-constant data.

diff --git a/gcc/testsuite/gcc.target/powerpc/pr70117.c 
b/gcc/testsuite/gcc.target/powerpc/pr70117.c
new file mode 100644
index 000..f1fdedb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr70117.c
@@ -0,0 +1,92 @@
+/* { dg-do run { target { powerpc*-*-linux* powerpc*-*-darwin* powerpc*-*-aix* 
rs6000-*-* } } } */
+/* { dg-options "-std=c99 -mlong-double-128 -O2" } */
+
+#include 
+
+union gl_long_double_union
+{
+  struct { double hi; double lo; } dd;
+  long double ld;
+};
+
+/* This is gnulib's LDBL_MAX which, being 107 bits in precision, is
+   slightly larger than gcc's 106 bit precision LDBL_MAX.  */
+volatile union gl_long_double_union gl_LDBL_MAX =
+  { { DBL_MAX, DBL_MAX / (double)134217728UL / (double)134217728UL } };
+
+volatile double min_denorm = 0x1p-1074;
+volatile double ld_low = 0x1p-969;
+volatile double dinf = 1.0/0.0;
+volatile double dnan = 0.0/0.0;
+
+int
+main (void)
+{
+  long double ld;
+
+  ld = gl_LDBL_MAX.ld;
+  if (__builtin_isinfl (ld))
+__builtin_abort ();
+  ld = -gl_LDBL_MAX.ld;
+  if (__builtin_isinfl (ld))
+__builtin_abort ();
+
+  ld = gl_LDBL_MAX.ld;
+  if (!__builtin_isfinite (ld))
+__builtin_abort ();
+  ld = -gl_LDBL_MAX.ld;
+  if (!__builtin_isfinite (ld))
+__builtin_abort ();
+
+  ld = ld_low;
+  if (!__builtin_isnormal (ld))
+__builtin_abort ();
+  ld = -ld_low;
+  if (!__builtin_isnormal (ld))
+__builtin_abort ();
+
+  ld = -min_denorm;
+  ld += ld_low;
+  if (__builtin_isnormal (ld))
+__builtin_abort ();
+  ld = min_denorm;
+  ld -= ld_low;
+  if (__builtin_isnormal (ld))
+__builtin_abort ();
+
+  ld = 0.0;
+  if (__builtin_isnormal (ld))
+__builtin_abort ();
+  ld = -0.0;
+  if (__builtin_isnormal (ld))
+__builtin_abort ();
+
+  ld = LDBL_MAX;
+  if (!__builtin_isnormal (ld))
+__builtin_abort ();
+  ld = -LDBL_MAX;
+  if (!__builtin_isnormal (ld))
+__builtin_abort ();
+
+  ld = gl_LDBL_MAX.ld;
+  if (!__builtin_isnormal (ld))
+__builtin_abort ();
+  ld = -gl_LDBL_MAX.ld;
+  if (!__builtin_isnormal (ld))
+__builtin_abort ();
+
+  ld = dinf;
+  if (__builtin_isnormal (ld))
+__builtin_abort ();
+  ld = -dinf;
+  if (__builtin_isnormal (ld))
+__builtin_abort ();
+
+  ld = dnan;
+  if (__builtin_isnormal (ld))
+__builtin_abort ();
+  ld = -dnan;
+  if (__builtin_isnormal (ld))
+__builtin_abort ();
+  return 0;
+}

> >  What does XLC do here?
> 
> Not sure, sorry.  I don't have xlc handy.  Will try later.

It seems that to compile 128-bit long double with xlc, I need xlc128,
and I don't have that..  For a double, xlc implements isnormal() on
power8 by moving the fpr argument to a gpr followed by a bunch of bit
twiddling to test the exponent.  xlc's sequence isn't as good as it
could be, 15 insns.  The ideal ought to be the following, I think,
which gcc compiles to 8 insns on power8 (and could be 7 insns if a
useless sign extension was eliminated).

int
bit_isnormal (double x)
{
  union { double d; uint64_t l; } val;
  val.d = x;
  uint64_t exp = (val.l >> 52) & 0x7ff;
  return exp - 1 < 0x7fe;
}

The above is around twice as fast as fold_builtin_interclass_mathfn
implementation of isnormal() for double, on power8.  I expect a bit
twiddling implementation for IBM extended would show similar or better
improvement.

However I'm not inclined to pursue this, especially for gcc-6.  The
patch I posted for isnormal() IBM extended is already faster (about
65% average timing on power8) than what existed previously.

-- 
Alan Modra
Australia Development Lab, IBM


Re: [RFA 1/2]: Don't ignore target_header_dir when deciding inhibit_libc

2016-04-07 Thread Andre Vieira (lists)
On 17/03/16 16:33, Andre Vieira (lists) wrote:
> On 23/10/15 12:31, Bernd Schmidt wrote:
>> On 10/12/2015 11:58 AM, Ulrich Weigand wrote:
>>>
>>> Index: gcc/configure.ac
>>> ===
>>> --- gcc/configure.ac(revision 228530)
>>> +++ gcc/configure.ac(working copy)
>>> @@ -1993,7 +1993,7 @@ elif test "x$TARGET_SYSTEM_ROOT" != x; t
>>>   fi
>>>
>>>   if test x$host != x$target || test "x$TARGET_SYSTEM_ROOT" != x; then
>>> -  if test "x$with_headers" != x; then
>>> +  if test "x$with_headers" != x && test "x$with_headers" != xyes; then
>>>   target_header_dir=$with_headers
>>> elif test "x$with_sysroot" = x; then
>>>  
>>> target_header_dir="${test_exec_prefix}/${target_noncanonical}/sys-include"
>>>
>>
>> I'm missing the beginning of this conversation, but this looks like a
>> reasonable change (avoiding target_header_dir=yes for --with-headers).
>> So, approved.
>>
>>
>> Bernd
>>
> Hi there,
> 
> I was wondering why this never made it to trunk. I am currently running
> into an issue that this patch would fix.
> 
> Cheers,
> Andre
> 
Ping.


Re: [PATCH] PR70117, ppc long double isinf

2016-04-07 Thread Richard Biener
On April 7, 2016 10:03:54 AM GMT+02:00, Alan Modra  wrote:
>On Wed, Apr 06, 2016 at 06:49:19PM +0930, Alan Modra wrote:
>> On Wed, Apr 06, 2016 at 10:46:48AM +0200, Richard Biener wrote:
>> > Can you add a testcase or two for the isnormal () case?
>> 
>> Sure.  I'll adapt the testcase I was using to verify the output,
>
>Revised testcase - target fixed, compiled at -O2 with volatile vars so
>we're testing optimized builtins with non-constant data.
>
>diff --git a/gcc/testsuite/gcc.target/powerpc/pr70117.c
>b/gcc/testsuite/gcc.target/powerpc/pr70117.c
>new file mode 100644
>index 000..f1fdedb
>--- /dev/null
>+++ b/gcc/testsuite/gcc.target/powerpc/pr70117.c
>@@ -0,0 +1,92 @@
>+/* { dg-do run { target { powerpc*-*-linux* powerpc*-*-darwin*
>powerpc*-*-aix* rs6000-*-* } } } */
>+/* { dg-options "-std=c99 -mlong-double-128 -O2" } */
>+
>+#include 
>+
>+union gl_long_double_union
>+{
>+  struct { double hi; double lo; } dd;
>+  long double ld;
>+};
>+
>+/* This is gnulib's LDBL_MAX which, being 107 bits in precision, is
>+   slightly larger than gcc's 106 bit precision LDBL_MAX.  */
>+volatile union gl_long_double_union gl_LDBL_MAX =
>+  { { DBL_MAX, DBL_MAX / (double)134217728UL / (double)134217728UL }
>};
>+
>+volatile double min_denorm = 0x1p-1074;
>+volatile double ld_low = 0x1p-969;
>+volatile double dinf = 1.0/0.0;
>+volatile double dnan = 0.0/0.0;
>+
>+int
>+main (void)
>+{
>+  long double ld;
>+
>+  ld = gl_LDBL_MAX.ld;
>+  if (__builtin_isinfl (ld))
>+__builtin_abort ();
>+  ld = -gl_LDBL_MAX.ld;
>+  if (__builtin_isinfl (ld))
>+__builtin_abort ();
>+
>+  ld = gl_LDBL_MAX.ld;
>+  if (!__builtin_isfinite (ld))
>+__builtin_abort ();
>+  ld = -gl_LDBL_MAX.ld;
>+  if (!__builtin_isfinite (ld))
>+__builtin_abort ();
>+
>+  ld = ld_low;
>+  if (!__builtin_isnormal (ld))
>+__builtin_abort ();
>+  ld = -ld_low;
>+  if (!__builtin_isnormal (ld))
>+__builtin_abort ();
>+
>+  ld = -min_denorm;
>+  ld += ld_low;
>+  if (__builtin_isnormal (ld))
>+__builtin_abort ();
>+  ld = min_denorm;
>+  ld -= ld_low;
>+  if (__builtin_isnormal (ld))
>+__builtin_abort ();
>+
>+  ld = 0.0;
>+  if (__builtin_isnormal (ld))
>+__builtin_abort ();
>+  ld = -0.0;
>+  if (__builtin_isnormal (ld))
>+__builtin_abort ();
>+
>+  ld = LDBL_MAX;
>+  if (!__builtin_isnormal (ld))
>+__builtin_abort ();
>+  ld = -LDBL_MAX;
>+  if (!__builtin_isnormal (ld))
>+__builtin_abort ();
>+
>+  ld = gl_LDBL_MAX.ld;
>+  if (!__builtin_isnormal (ld))
>+__builtin_abort ();
>+  ld = -gl_LDBL_MAX.ld;
>+  if (!__builtin_isnormal (ld))
>+__builtin_abort ();
>+
>+  ld = dinf;
>+  if (__builtin_isnormal (ld))
>+__builtin_abort ();
>+  ld = -dinf;
>+  if (__builtin_isnormal (ld))
>+__builtin_abort ();
>+
>+  ld = dnan;
>+  if (__builtin_isnormal (ld))
>+__builtin_abort ();
>+  ld = -dnan;
>+  if (__builtin_isnormal (ld))
>+__builtin_abort ();
>+  return 0;
>+}
>
>> >  What does XLC do here?
>> 
>> Not sure, sorry.  I don't have xlc handy.  Will try later.
>
>It seems that to compile 128-bit long double with xlc, I need xlc128,
>and I don't have that..  For a double, xlc implements isnormal() on
>power8 by moving the fpr argument to a gpr followed by a bunch of bit
>twiddling to test the exponent.  xlc's sequence isn't as good as it
>could be, 15 insns.  The ideal ought to be the following, I think,
>which gcc compiles to 8 insns on power8 (and could be 7 insns if a
>useless sign extension was eliminated).
>
>int
>bit_isnormal (double x)
>{
>  union { double d; uint64_t l; } val;
>  val.d = x;
>  uint64_t exp = (val.l >> 52) & 0x7ff;
>  return exp - 1 < 0x7fe;
>}
>
>The above is around twice as fast as fold_builtin_interclass_mathfn
>implementation of isnormal() for double, on power8.  I expect a bit
>twiddling implementation for IBM extended would show similar or better
>improvement.
>
>However I'm not inclined to pursue this, especially for gcc-6.  The
>patch I posted for isnormal() IBM extended is already faster (about
>65% average timing on power8) than what existed previously.

That's good to know.  I think the patch is OK but please seek approval from a 
ppc maintainer as well

Thanks,
Richard.




Re: Add some C++17 items to gcc-6/changes.html

2016-04-07 Thread Jonathan Wakely

On 06/04/16 10:02 -0400, Jason Merrill wrote:

On 04/04/2016 06:22 AM, Jonathan Wakely wrote:

I plan to commit this to wwwdocs CVS. Have I missed anything that
should be listed?


I'd mention that TM requires the -fgnu-tm flag.


Done, the attached patch is what I committed.


Index: htdocs/gcc-6/changes.html
===
RCS file: /cvs/gcc/wwwdocs/htdocs/gcc-6/changes.html,v
retrieving revision 1.72
diff -u -r1.72 changes.html
--- htdocs/gcc-6/changes.html	22 Mar 2016 15:41:09 -	1.72
+++ htdocs/gcc-6/changes.html	7 Apr 2016 09:37:18 -
@@ -239,6 +239,13 @@
 aggressive in dead-store elimination in situations where
 a memory store to a location precedes a constructor to the
 memory location.
+G++ now supports
+https://gcc.gnu.org/projects/cxx-status.html#cxx1z.html";>C++17
+fold expressions, u8 character literals,
+extended static_assert, and nested namespace definitions.
+G++ now allows constant evaluation for all non-type template arguments.
+G++ now supports C++ Transactional Memory when compiling with
+-fgnu-tm. 
   
 
 Runtime Library (libstdc++)


Re: openacc reference reductions

2016-04-07 Thread Jakub Jelinek
On Wed, Apr 06, 2016 at 01:21:30PM -0700, Cesar Philippidis wrote:
> That's a good idea. I went ahead and combined this patch with the data
> map reduction fix for PR70289 that I posted on Monday,
> , because I'm
> already scanning for parallel reduction data clauses in there. As you
> suggested, I introduced an OMP_CLAUSE_MAP_IN_REDUCTION macro to the data
> clauses associated with acc parallel reductions.
> 
> Is this patch OK for trunk? It fixes PR70289, PR70348, PR70373, PR70533,
> PR70535 and PR70537.

> 2016-04-06  Cesar Philippidis  
> 
>   PR lto/70289

Then please use
PR lto/70289
PR ipa/70348
PR tree-optimization/70373
PR middle-end/70533
PR middle-end/70535
PR70537 sounds like a typo to me, did you mean some other PR?

>   gcc/
>   * gimplify.c (gimplify_adjust_acc_parallel_reductions): New function.
...
>   * gcc/tree.h (OMP_CLAUSE_MAP_IN_REDUCTION): New macro.

No gcc/ prefix please.

>   * testsuite/libgomp.oacc-c-c++-common/reduction-1.c: Increate test
>   coverage.

Increase?

>   * testsuite/libgomp.oacc-c-c++-common/reduction-2.c: Likewise.
>   * testsuite/libgomp.oacc-c-c++-common/reduction-3.c: Likewise.
>   * testsuite/libgomp.oacc-c-c++-common/reduction-4.c: Likewise.
>   * testsuite/libgomp.oacc-c-c++-common/reduction-5.c: Likewise.
>   * testsuite/libgomp.oacc-c-c++-common/reduction-6.c: New test.
>   * testsuite/libgomp.oacc-c-c++-common/reduction.h: New test.
>   * testsuite/libgomp.oacc-fortran/parallel-reduction.f90: New test.
>   * testsuite/libgomp.oacc-fortran/pr70289.f90: New test.
>   * testsuite/libgomp.oacc-fortran/reduction-1.f90: Increate test

Ditto.

> +   if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_PRIVATE
> +   || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE)
> + {
> +   error_at (OMP_CLAUSE_LOCATION (c), "invalid private reduction "
> +   "on %qE", DECL_NAME (decl));

This looks wrongly formatted, "on is not below OMP.

> +  /* Scan 3: Add a present_or_copy clause for any reduction variable which
> + doens't have a data clause already.  */

doesn't

> +  for (hash_set::iterator iter = reduction_decls->begin ();
> +   iter != reduction_decls->end (); ++iter)
> +{
> +  tree decl = *iter;
> +
> +  tree nc = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_MAP);
> +  OMP_CLAUSE_SET_MAP_KIND (nc, GOMP_MAP_TOFROM);
> +  OMP_CLAUSE_DECL (nc) = decl;
> +  if (!POINTER_TYPE_P (TREE_TYPE (decl)))
> + OMP_CLAUSE_MAP_IN_REDUCTION (nc) = 1;
> +  TREE_CHAIN (nc) = list;
> +  list = nc;
> +}
> +
> + cleanup:
> +  delete reduction_decls;
> +  delete pointer_decls;
> +
> +  return list;
> +}

But more importantly, do you really have to do this separately?
I admit I haven't stepped through your testcases in the debugger, so
correct me if I'm missing something:
I mean, gimplify_scan_omp_clauses should omp_add_variable for the
OMP_CLAUSE_REDUCTION with
GOVD_REDUCTION | GOVD_SEEN | GOVD_EXPLICIT
and OMP_CLAUSE_MAP with GOVD_MAP | GOVD_EXPLICIT or so, similarly
GOVD_PRIVATE and/or GOVD_FIRSTPRIVATE flags from
OMP_CLAUSE_PRIVATE/OMP_CLAUSE_FIRSTPRIVATE.
So I believe you should have all the info you need in
(gimplify_adjust_omp_clauses) ,
you have the CODE of the construct this is on (so check OACC_PARALLEL
or whatever you need), and you should be able to check if there is
explicit map/private/firstprivate clause together with OMP_CLAUSE_REDUCTION
or not, and then you can add the extra implicit clause and set
OMP_CLAUSE_MAP_IN_REDUCTION on it as appropriate.

Jakub


Re: [PATCH] Fix PR31096

2016-04-07 Thread Hurugalawadi, Naveen
Hi,

Thanks for the review, views and comments on the issue.

>> -1 is an integer constant, so that's still invalid. It is also invalid for
>> unsigned. The :s are useless since the output is a single insn.

The patch is modified as per your review comments.

Currently the following conditions had been taken care in the patch
as per your suggestions:-

a * c op b * c -> Optimizes for all cases
a * c op b * c -> Does not optimize when  c = 0

a * -c eq/ne b * -c -> Optimizes for all cases
a * -c lt/gt/ge/le b * -c -> Optimizes when c is positive
a * -c lt/gt/ge/le b * -c -> Optimizes and becomes b lt/gt/ge/le when c is 
negative

Have added a minimal testcase which covers all the above instances.
Please review the patch and let me know if its okay?

Thanks,
Naveendiff --git a/gcc/match.pd b/gcc/match.pd
index 75aa601..9386172 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -894,7 +894,24 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   && tree_nop_conversion_p (type, TREE_TYPE (@1)))
   (convert (bit_and (bit_not @1) @0
 
+/* Fold A * 10 == B * 10 into A == B.  */
+(for cmp (eq ne)
+ (simplify
+  (cmp (mult:c @0 @1) (mult:c @2 @1))
+  (if (TYPE_OVERFLOW_UNDEFINED (type)
+   && !integer_zerop (@1))
+   (cmp @0 @2
 
+/* Fold A * 10 < B * 10 into A < B.  */
+(for cmp (lt gt le ge)
+ (simplify
+  (cmp (mult:c @0 @1) (mult:c @2 @1))
+  (if (TYPE_OVERFLOW_UNDEFINED (type)
+   && !integer_zerop (@1))
+   (if (tree_expr_nonnegative_p (@1))
+(cmp @0 @2))
+   (if (!tree_expr_nonnegative_p (@1))
+(cmp @2 @0)
 
 /* ((X inner_op C0) outer_op C1)
With X being a tree where value_range has reasoned certain bits to always be
diff --git a/gcc/testsuite/gcc.dg/pr31096-1.c b/gcc/testsuite/gcc.dg/pr31096-1.c
new file mode 100644
index 000..8489724
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr31096-1.c
@@ -0,0 +1,29 @@
+/* PR middle-end/31096 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" }  */
+
+int
+f (int a, int b)
+{
+  return a > b;
+}
+
+int
+f1 (int a, int b)
+{
+  return a * 10 >= b * 10;
+}
+
+int
+f2 (int a, int b)
+{
+  return a * -42 <  b * -42;
+}
+
+int
+f3 (int a, int b)
+{
+  return a * 0 <= b * 0;
+}
+
+/* { dg-final { scan-tree-dump-not "\(D\) * " "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/pr31096.c b/gcc/testsuite/gcc.dg/pr31096.c
new file mode 100644
index 000..05536ad
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr31096.c
@@ -0,0 +1,29 @@
+/* PR middle-end/31096 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" }  */
+
+int
+f (int a, int b)
+{
+  return a == b;
+}
+
+int
+f1 (int a, int b)
+{
+  return a * 10 == b * 10;
+}
+
+int
+f2 (int a, int b)
+{
+  return a * -42 !=  b * -42;
+}
+
+int
+f3 (int a, int b)
+{
+  return a * 0 != b * 0;
+}
+
+/* { dg-final { scan-tree-dump-not "\(D\) * " "optimized" } } */


Re: [PATCH] Fix PR31096

2016-04-07 Thread Marc Glisse

On Thu, 7 Apr 2016, Hurugalawadi, Naveen wrote:


+/* Fold A * 10 == B * 10 into A == B.  */
+(for cmp (eq ne)
+ (simplify
+  (cmp (mult:c @0 @1) (mult:c @2 @1))
+  (if (TYPE_OVERFLOW_UNDEFINED (type)


type is the return type of the comparison. The relevant type here is
TREE_TYPE (@0). Maybe add a testcase with unsigned, to check that it
does not transform?


+   && !integer_zerop (@1))
+   (cmp @0 @2


!integer_zerop is not a promise that the variable is not zero, it just
says that we don't know for sure that it is zero. integer_nonzerop would
work. Or writing (cmp (mult @0 INTEGER_CST@1) (mult @2 @1)), but
then !wi::eq_p (@1, 0) would be a better test.

To make it more general (not limited to constants), you could probably
use tree_expr_nonzero_p, and at some point someone would enhance
tree_single_nonzero_warnv_p by checking VRP information for SSA_NAME.

The other transformation has similar issues.

--
Marc Glisse


[gcc-5] Fix handling of subarrays with update directive

2016-04-07 Thread Thomas Schwinge
Hi Jakub!

On Wed, 23 Mar 2016 14:10:31 +0100, Jakub Jelinek  wrote:
> On Wed, Mar 23, 2016 at 08:05:19AM -0500, James Norris wrote:
> > On 03/23/2016 05:24 AM, Jakub Jelinek wrote:
> > 2016-03-23  James Norris  
> > Daichi Fukuoka 

>   PR libgomp/69414
> > * oacc-mem.c (delete_copyout, update_dev_host): Fix device address.
> > * testsuite/libgomp.oacc-c-c++-common/update-1.c: Additional tests.
> > * testsuite/libgomp.oacc-c-c++-common/update-1-2.c: Likewise.
> > * testsuite/libgomp.oacc-fortran/update-1.f90: New file.
> 
> Ok with that change.

OK to backport that commit to gcc-5-branch (which it has been reported
against)?


Grüße
 Thomas


signature.asc
Description: PGP signature


Re: [gcc-5] Fix handling of subarrays with update directive

2016-04-07 Thread Jakub Jelinek
On Thu, Apr 07, 2016 at 01:28:48PM +0200, Thomas Schwinge wrote:
> Hi Jakub!
> 
> On Wed, 23 Mar 2016 14:10:31 +0100, Jakub Jelinek  wrote:
> > On Wed, Mar 23, 2016 at 08:05:19AM -0500, James Norris wrote:
> > > On 03/23/2016 05:24 AM, Jakub Jelinek wrote:
> > > 2016-03-23  James Norris  
> > > Daichi Fukuoka 
> 
> > PR libgomp/69414
> > > * oacc-mem.c (delete_copyout, update_dev_host): Fix device 
> > > address.
> > > * testsuite/libgomp.oacc-c-c++-common/update-1.c: Additional 
> > > tests.
> > > * testsuite/libgomp.oacc-c-c++-common/update-1-2.c: Likewise.
> > > * testsuite/libgomp.oacc-fortran/update-1.f90: New file.
> > 
> > Ok with that change.
> 
> OK to backport that commit to gcc-5-branch (which it has been reported
> against)?

Ok.

Jakub


Re: [gcc-5] Fix handling of subarrays with update directive

2016-04-07 Thread Thomas Schwinge
Hi!

On Thu, 7 Apr 2016 13:29:58 +0200, Jakub Jelinek  wrote:
> On Thu, Apr 07, 2016 at 01:28:48PM +0200, Thomas Schwinge wrote:
> > On Wed, 23 Mar 2016 14:10:31 +0100, Jakub Jelinek  wrote:
> > > On Wed, Mar 23, 2016 at 08:05:19AM -0500, James Norris wrote:
> > > > On 03/23/2016 05:24 AM, Jakub Jelinek wrote:
> > > > 2016-03-23  James Norris  
> > > > Daichi Fukuoka 
> > 
> > >   PR libgomp/69414
> > > > * oacc-mem.c (delete_copyout, update_dev_host): Fix device 
> > > > address.
> > > > * testsuite/libgomp.oacc-c-c++-common/update-1.c: Additional 
> > > > tests.
> > > > * testsuite/libgomp.oacc-c-c++-common/update-1-2.c: Likewise.
> > > > * testsuite/libgomp.oacc-fortran/update-1.f90: New file.
> > > 
> > > Ok with that change.
> > 
> > OK to backport that commit to gcc-5-branch (which it has been reported
> > against)?
> 
> Ok.

In r234806 committed to gcc-5-branch (without changes):

commit 09222d2f8af5e1d4b07d56a56b6806b674af2952
Author: tschwinge 
Date:   Thu Apr 7 11:43:30 2016 +

[PR libgomp/69414] Fix handling of subarrays with update directive

libgomp/
Backport trunk r234428:

2016-03-23  James Norris  
Daichi Fukuoka 

PR libgomp/69414
* oacc-mem.c (delete_copyout, update_dev_host): Fix device address.
* testsuite/libgomp.oacc-c-c++-common/update-1.c: Additional tests.
* testsuite/libgomp.oacc-c-c++-common/update-1-2.c: Likewise.
* testsuite/libgomp.oacc-fortran/update-1.f90: New file.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gcc-5-branch@234806 
138bc75d-0d04-0410-961f-82ee72b054a4
---
 libgomp/ChangeLog  |   13 ++
 libgomp/oacc-mem.c |6 +-
 .../libgomp.oacc-c-c++-common/update-1-2.c |   85 ++-
 .../testsuite/libgomp.oacc-c-c++-common/update-1.c |   87 ++-
 .../testsuite/libgomp.oacc-fortran/update-1.f90|  242 
 5 files changed, 424 insertions(+), 9 deletions(-)

diff --git libgomp/ChangeLog libgomp/ChangeLog
index 3da9fa1..ed890e0 100644
--- libgomp/ChangeLog
+++ libgomp/ChangeLog
@@ -1,3 +1,16 @@
+2016-04-07  Thomas Schwinge  
+
+   Backport trunk r234428:
+
+   2016-03-23  James Norris  
+   Daichi Fukuoka 
+
+   PR libgomp/69414
+   * oacc-mem.c (delete_copyout, update_dev_host): Fix device address.
+   * testsuite/libgomp.oacc-c-c++-common/update-1.c: Additional tests.
+   * testsuite/libgomp.oacc-c-c++-common/update-1-2.c: Likewise.
+   * testsuite/libgomp.oacc-fortran/update-1.f90: New file.
+
 2016-02-16  Tom de Vries  
 
backport from trunk:
diff --git libgomp/oacc-mem.c libgomp/oacc-mem.c
index 89ef5fc..c3e12fa 100644
--- libgomp/oacc-mem.c
+++ libgomp/oacc-mem.c
@@ -447,7 +447,8 @@ delete_copyout (unsigned f, void *h, size_t s)
   if (!n)
 gomp_fatal ("[%p,%d] is not mapped", (void *)h, (int)s);
 
-  d = (void *) (n->tgt->tgt_start + n->tgt_offset);
+  d = (void *) (n->tgt->tgt_start + n->tgt_offset
+   + (uintptr_t) h - n->host_start);
 
   host_size = n->host_end - n->host_start;
 
@@ -490,7 +491,8 @@ update_dev_host (int is_dev, void *h, size_t s)
   if (!n)
 gomp_fatal ("[%p,%d] is not mapped", h, (int)s);
 
-  d = (void *) (n->tgt->tgt_start + n->tgt_offset);
+  d = (void *) (n->tgt->tgt_start + n->tgt_offset
+   + (uintptr_t) h - n->host_start);
 
   if (is_dev)
 acc_dev->host2dev_func (acc_dev->target_id, d, h, s);
diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/update-1-2.c 
libgomp/testsuite/libgomp.oacc-c-c++-common/update-1-2.c
index c7e7257..82c3192 100644
--- libgomp/testsuite/libgomp.oacc-c-c++-common/update-1-2.c
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/update-1-2.c
@@ -13,6 +13,7 @@ int
 main (int argc, char **argv)
 {
 int N = 8;
+int NDIV2 = N / 2;
 float *a, *b, *c;
 float *d_a, *d_b, *d_c;
 int i;
@@ -242,7 +243,7 @@ main (int argc, char **argv)
 a[i] = 6.0;
 }
 
-#pragma acc update device (a[0:N >> 1])
+#pragma acc update device (a[0:NDIV2])
 
 #pragma acc parallel present (a[0:N], b[0:N])
 {
@@ -254,7 +255,7 @@ main (int argc, char **argv)
 
 #pragma acc update self (a[0:N], b[0:N])
 
-for (i = 0; i < (N >> 1); i++)
+for (i = 0; i < NDIV2; i++)
 {
 if (a[i] != 6.0)
 abort ();
@@ -263,7 +264,7 @@ main (int argc, char **argv)
 abort ();
 }
 
-for (i = (N >> 1); i < N; i++)
+for (i = NDIV2; i < N; i++)
 {
 if (a[i] != 5.0)
 abort ();
@@ -278,5 +279,83 @@ main (int argc, char **argv)
 if (!acc_is_present (&b[0], (N * sizeof (float
   abort ();
 
+for (i = 0; i < N; i++)
+{
+a[i] = 0.0;
+}
+
+#pragma acc update device (a[0:4])
+
+#pragma acc parallel present (a[0:N])
+{
+int ii;
+
+for (ii = 0; ii < N; ii++)
+a[ii] = a[

[Patch] Fix PR 60040

2016-04-07 Thread Senthil Kumar Selvaraj
Hi,

  The below patch fixes PR 60040 by not halting with a hard error on
  a spill failure, if reload knows that it has to run again anyway.
  It also fixes two reload related ICEs on trunk
  (gcc.c-torture/compile/920625-1.c and gcc.dg/tree-ssa/pr70232.c)
  for the AVR target. I've slighly reworked the patch - the original
  patch by Joern Rennecke did not skip the setting of failure to 1; it
  never gets reset afterwards.

  Bootstrapped and regtested on/for x86_64-linux with no regressions.
  Thee avr target shows additional PASSes for the above two testcases
  and no other regressions.

  If ok, could someone commit please? I don't have commit access.

Regards
Senthil

gcc/ChangeLog

2016-04-07  Joern Rennecke  
Senthil Kumar Selvaraj  

PR target/60040
* reload1.c (find_reload_regs): Add tentative parameter.
and don't report spill failure if param set.
(reload): Propagate something_changed to
select_reload_regs.
(select_reload_regs): Add tentative parameter.

gcc/testsuite/ChangeLog

2016-04-07  Sebastian Huber  
Matthijs Kooijman  
Senthil Kumar Selvaraj  

PR target/60040
* gcc.target/avr/pr60040-1.c: New.
* gcc.target/avr/pr60040-2.c: Likewise.


diff --git gcc/reload1.c gcc/reload1.c
index c2800f8..58f58a9 100644
--- gcc/reload1.c
+++ gcc/reload1.c
@@ -346,8 +346,8 @@ static void maybe_fix_stack_asms (void);
 static void copy_reloads (struct insn_chain *);
 static void calculate_needs_all_insns (int);
 static int find_reg (struct insn_chain *, int);
-static void find_reload_regs (struct insn_chain *);
-static void select_reload_regs (void);
+static void find_reload_regs (struct insn_chain *, bool);
+static void select_reload_regs (bool);
 static void delete_caller_save_insns (void);
 
 static void spill_failure (rtx_insn *, enum reg_class);
@@ -1022,7 +1022,7 @@ reload (rtx_insn *first, int global)
  something_changed = 1;
}
 
-  select_reload_regs ();
+  select_reload_regs (something_changed);
   if (failure)
goto failed;
 
@@ -1960,10 +1960,13 @@ find_reg (struct insn_chain *chain, int order)
is given by CHAIN.
Do it by ascending class number, since otherwise a reg
might be spilled for a big class and might fail to count
-   for a smaller class even though it belongs to that class.  */
+   for a smaller class even though it belongs to that class.
+   TENTATIVE means that we had some changes that might have invalidated
+   the reloads and that we are going to loop again anyway, so don't give
+   a hard error on failure to find a reload reg. */
 
 static void
-find_reload_regs (struct insn_chain *chain)
+find_reload_regs (struct insn_chain *chain, bool tentative)
 {
   int i;
 
@@ -2012,9 +2015,12 @@ find_reload_regs (struct insn_chain *chain)
  {
if (dump_file)
  fprintf (dump_file, "reload failure for reload %d\n", r);
-   spill_failure (chain->insn, rld[r].rclass);
-   failure = 1;
-   return;
+   if (!tentative)
+   {
+   spill_failure (chain->insn, rld[r].rclass);
+   failure = 1;
+   return;
+   }
  }
 }
 
@@ -2025,14 +2031,14 @@ find_reload_regs (struct insn_chain *chain)
 }
 
 static void
-select_reload_regs (void)
+select_reload_regs (bool tentative)
 {
   struct insn_chain *chain;
 
   /* Try to satisfy the needs for each insn.  */
   for (chain = insns_need_reload; chain != 0;
chain = chain->next_need_reload)
-find_reload_regs (chain);
+find_reload_regs (chain, tentative);
 }
 
 /* Delete all insns that were inserted by emit_caller_save_insns during
diff --git gcc/testsuite/gcc.target/avr/pr60040-1.c 
gcc/testsuite/gcc.target/avr/pr60040-1.c
new file mode 100644
index 000..4fe296b
--- /dev/null
+++ gcc/testsuite/gcc.target/avr/pr60040-1.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-Os" } */
+
+float dhistory[10];
+float test;
+
+float getSlope(float history[]) {
+  float sumx = 0;
+  float sumy = 0;
+  float sumxy = 0;
+  float sumxsq = 0;
+  float rate = 0;
+  int n = 10;
+
+  int i;
+  for (i=1; i< 11; i++) {
+sumx = sumx + i;
+sumy = sumy + history[i-1];
+sumy = sumy + history[i-1];
+sumxsq = sumxsq + (i*i);
+  }
+
+  rate = sumy+sumx+sumxsq;
+  return rate;
+}
+
+void loop() {
+  test = getSlope(dhistory);
+}
diff --git gcc/testsuite/gcc.target/avr/pr60040-2.c 
gcc/testsuite/gcc.target/avr/pr60040-2.c
new file mode 100644
index 000..c40d49f
--- /dev/null
+++ gcc/testsuite/gcc.target/avr/pr60040-2.c
@@ -0,0 +1,112 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+typedef unsigned char __uint8_t;
+typedef short unsigned int __uint16_t;
+typedef long unsigned int __uint32_t;
+typedef __uint8_t uint8_t ;
+typedef __uint16_t uint16_t ;
+typedef __uint32_t uint32_t ;
+typedef __builtin_va_lis

Re: [Patch AArch64 3/3] Fix up for pr70133

2016-04-07 Thread Kyrill Tkachov

Hi all,

On 06/04/16 11:10, James Greenhalgh wrote:

Hi,

Having updated the way we parse and output extension strings, now we just
need to wire up the native detection to use these new features.

In doing some cleanup and rename I ended up fixing 8-spaces to tabs in
about half the file. I've done the rest while I'm here to save us from
some a mixed-style file.

Bootstrapped on aarch64-none-linux-gnu, then tested with defaults and
an explicit -march=native passed (on a system detected as
cortex-a57+crypto, and again on a system detected as
cortex-a72.cortex-a53+crypto). I also set up a dummy /proc/cpuinfo and
used that to manually check the input data in pr70133.

OK?


This looks good to me (but I can't approve).

Thanks,
Kyrill


Thanks,
James

---
2016-04-06  James Greenhalgh  

PR target/70133

* config/aarch64/driver-aarch64.c
(aarch64_get_extension_string_for_isa_flags): New.
(arch_extension): Rename to...
(aarch64_arch_extension): ...This.
(ext_to_feat_string): Rename to...
(aarch64_extensions): ...This.
(aarch64_core_data): Keep track of architecture extension flags.
(cpu_data): Rename to...
(aarch64_cpu_data): ...This.
(aarch64_arch_driver_info): Keep track of architecture extension
flags.
(get_arch_name_from_id): Rename to...
(get_arch_from_id): ...This, change return type.
(host_detect_local_cpu): Update and reformat for renames, handle
extensions through common infrastructure.





Re: [PATCH][ARM][RFC] PR target/65578 Fix gcc.dg/torture/stackalign/builtin-apply-4.c for single-precision fpus

2016-04-07 Thread Kyrill Tkachov

Hi Ramana,

On 23/03/16 12:09, Ramana Radhakrishnan wrote:

On Tue, Feb 9, 2016 at 5:21 PM, Kyrill Tkachov
 wrote:

Hi all,

In this wrong-code PR the builtin-apply-4.c test fails with -flto but only
when targeting an fpu
with only single-precision capabilities.

bar is a function returing a double. For non-LTO compilation the caller of
bar reads the return value
from it from the s0 and s1 VFP registers like expected, but for -flto the
caller seems to expect the
return value from the r0 and r1 regs.  The RTL dumps show that too.

Debugging the calls to arm_function_value show that in the -flto compilation
the function bar is deemed
to be a local function call and assigned the ARM_PCS_AAPCS_LOCAL PCS
variant, whereas for the non-LTO (and non-breaking)
compilation it uses the ARM_PCS_AAPCS_VFP variant.

Further down in use_vfp_abi when deciding whether to use VFP registers for
the result there is a bit of
logic that rejects VFP registers when handling the ARM_PCS_AAPCS_LOCAL
variant with a double precision value
on an FPU that is not TARGET_VFP_DOUBLE.

This seems wrong for ARM_PCS_AAPCS_LOCAL to me. ARM_PCS_AAPCS_LOCAL means
that the function doesn't escape
the translation unit and we can thus use whatever variant we want. From what
I understand we want to use the
VFP regs when possible for FP values.

So this patch removes that restriction and for the testcase the caller of
bar correctly reads the return
value of bar from the VFP registers and everything works.

This patch has been bootstrapped and tested on arm-none-linux-gnueabihf
configured with --with-fpu=fpv4-sp-d16.
The bootstrapped was performed with LTO.
I didn't see any regressions.

It seems that this logic was put there in 2009 with r154034 as part of a
large patch to enable support for half-precision
floating point.

I'm not very familiar with this part of the code, so is this a safe patch to
do?
The patch should only ever change behaviour for single-precision-only fpus
and only for static functions
that don't get called outside their translation units (or during LTO I
suppose) so there shouldn't
be any ABI problems, I think.

Is this ok for trunk?

I spent sometime this morning reading through this patch and it does
look reasonably ok. The AAPCS tests if run for hardfloat should catch
any regressions. However given the stage we are in I'd like this
tested through compat.exp and struct-layout.exp across the range of
ABIs and FPU options to ensure we haven't missed anything. Richard ,
could you also give this a once over ?


I've ran compat.exp and struct-layout-1.exp against GCC 5
and a trunk compiler without this patch and it didn't expose
any failures with any /-mfpu options that I tried.

Thanks,
Kyrill



Ramana








Thanks,
Kyrill

2016-02-09  Kyrylo Tkachov  

 PR target/65578
 * config/arm/arm.c (use_vfp_abi): Remove id_double argument.
 Don't check for is_double and TARGET_VFP_DOUBLE.
 (aapcs_vfp_is_call_or_return_candidate): Update callsite.
 (aapcs_vfp_is_return_candidate): Likewise.
 (aapcs_vfp_is_call_candidate): Likewise.
 (aapcs_vfp_allocate_return_reg): Likewise.




Re: [PATCH] Avoid needless unsharing during constexpr evaluation (PR c++/70452)

2016-04-07 Thread Jason Merrill

OK.

Jason


Re: [patch] libstdc++/70503 Ensure std::thread helpers have internal linkage

2016-04-07 Thread Tom de Vries

Hi,

FYI, this testcase fails for me as follows:
...
/usr/bin/ld: Dwarf Error: found dwarf version '4', this reader only 
handles version 2 and 3 information.^M

/tmp/ccMBMHB5.o: In function `main':^M
70503.cc:(.text.startup.main+0x5): undefined reference to 
`execute_native_thread_routine'^M
/usr/bin/ld: Dwarf Error: found dwarf version '0', this reader only 
handles version 2 and 3 information.^M
70503.cc:(.text.startup.main+0xa): undefined reference to 
`execute_native_thread_routine_compat'^M

collect2: error: ld returned 1 exit status^M
compiler exited with status 1
output is:
/usr/bin/ld: Dwarf Error: found dwarf version '4', this reader only 
handles version 2 and 3 information.^M

/tmp/ccMBMHB5.o: In function `main':^M
70503.cc:(.text.startup.main+0x5): undefined reference to 
`execute_native_thread_routine'^M
/usr/bin/ld: Dwarf Error: found dwarf version '0', this reader only 
handles version 2 and 3 information.^M
70503.cc:(.text.startup.main+0xa): undefined reference to 
`execute_native_thread_routine_compat'^M

collect2: error: ld returned 1 exit status^M

FAIL: 30_threads/thread/70503.cc  (test for errors, line 31)
FAIL: 30_threads/thread/70503.cc  (test for errors, line 32)
FAIL: 30_threads/thread/70503.cc (test for excess errors)
...

$ /usr/bin/ld -v
GNU ld (GNU Binutils for Ubuntu) 2.20.1-system.20100303

$ cat /etc/issue
Ubuntu 10.04.4 LTS \n \l

Thanks,
- Tom

On 05/04/16 19:23, Jonathan Wakely wrote:

On 05/04/16 14:37 +0100, Jonathan Wakely wrote:

   Ensure std::thread helpers have internal linkage
   PR libstdc++/70503
   * src/c++11/thread.cc (execute_native_thread_routine,
   execute_native_thread_routine_compat): Give internal linkage.
   * testsuite/30_threads/thread/70503.cc: New test.


This small tweak to the new test checks that -static works before
trying to run it.

Tested x86_64-linux, with and without static glibc installed.

Committed to trunk.

patch.txt


commit 1fdc9bbe912281ba6d573fc28c53817402656e57
Author: Jonathan Wakely 
Date:   Tue Apr 5 18:18:36 2016 +0100

* testsuite/30_threads/thread/70503.cc: Require -static to work.

diff --git a/libstdc++-v3/testsuite/30_threads/thread/70503.cc 
b/libstdc++-v3/testsuite/30_threads/thread/70503.cc
index 1763ac2..950d754 100644
--- a/libstdc++-v3/testsuite/30_threads/thread/70503.cc
+++ b/libstdc++-v3/testsuite/30_threads/thread/70503.cc
@@ -19,6 +19,7 @@
  // { dg-options " -std=gnu++11 -static" { target *-*-*gnu* } }
  // { dg-require-cstdint "" }
  // { dg-require-gthreads "" }
+// { dg-require-effective-target static }

  extern "C" {
void execute_native_thread_routine(void);





[lto-partition.c] move assignment to refs_node, last_visited_node outside if-else

2016-04-07 Thread Prathamesh Kulkarni
Hi,
This is a silly patch that moves the following assignments outside
if-else conditionals,
refs_node = node;
last_visited_node++;
resulting in empty else block.
OK for trunk ?

Thanks,
Prathamesh
diff --git a/gcc/lto/lto-partition.c b/gcc/lto/lto-partition.c
index 9eb63c2..dca59af 100644
--- a/gcc/lto/lto-partition.c
+++ b/gcc/lto/lto-partition.c
@@ -568,14 +568,13 @@ lto_balanced_map (int n_lto_partitions)
  symtab_node *snode = lto_symtab_encoder_deref (partition->encoder,
last_visited_node);
 
+ refs_node = snode;
+ last_visited_node++;
+
  if (cgraph_node *node = dyn_cast  (snode))
{
  struct cgraph_edge *edge;
 
- refs_node = node;
-
- last_visited_node++;
-
  gcc_assert (node->definition || node->weakref);
 
  /* Compute boundary cost of callgraph edges.  */
@@ -614,11 +613,6 @@ lto_balanced_map (int n_lto_partitions)
cost += edge_cost;
}
}
- else
-   {
- refs_node = snode;
- last_visited_node++;
-   }
 
  /* Compute boundary cost of IPA REF edges and at the same time look 
into
 variables referenced from current partition and try to add them.  
*/


ChangeLog
Description: Binary data


Re: [patch] libstdc++/70503 Ensure std::thread helpers have internal linkage

2016-04-07 Thread Jonathan Wakely

On 07/04/16 15:27 +0200, Tom de Vries wrote:

Hi,

FYI, this testcase fails for me as follows:
...
/usr/bin/ld: Dwarf Error: found dwarf version '4', this reader only 
handles version 2 and 3 information.^M

/tmp/ccMBMHB5.o: In function `main':^M
70503.cc:(.text.startup.main+0x5): undefined reference to 
`execute_native_thread_routine'^M
/usr/bin/ld: Dwarf Error: found dwarf version '0', this reader only 
handles version 2 and 3 information.^M
70503.cc:(.text.startup.main+0xa): undefined reference to 
`execute_native_thread_routine_compat'^M

collect2: error: ld returned 1 exit status^M
compiler exited with status 1
output is:
/usr/bin/ld: Dwarf Error: found dwarf version '4', this reader only 
handles version 2 and 3 information.^M

/tmp/ccMBMHB5.o: In function `main':^M
70503.cc:(.text.startup.main+0x5): undefined reference to 
`execute_native_thread_routine'^M
/usr/bin/ld: Dwarf Error: found dwarf version '0', this reader only 
handles version 2 and 3 information.^M
70503.cc:(.text.startup.main+0xa): undefined reference to 
`execute_native_thread_routine_compat'^M

collect2: error: ld returned 1 exit status^M

FAIL: 30_threads/thread/70503.cc  (test for errors, line 31)
FAIL: 30_threads/thread/70503.cc  (test for errors, line 32)
FAIL: 30_threads/thread/70503.cc (test for excess errors)


Hmm, I'm not sure whether it's better to use -gdwarf-2 or add
{ dg-prune-output "dwarf version" }


Re: [Patch] Avoid deadlock in guality tests.

2016-04-07 Thread Yvan Roux
Hi,

On 6 April 2016 at 17:29, Yvan Roux  wrote:
> On 6 April 2016 at 17:24, Pedro Alves  wrote:
>> On 04/06/2016 04:13 PM, Yvan Roux wrote:
>>> On 6 April 2016 at 17:09, Pedro Alves  wrote:
 On 04/06/2016 03:53 PM, Yvan Roux wrote:
> Dejagnu cleanup mechanism needs to be enhanced, but I think that it
> would also be better if guality tests don't get stuck and/or can be
> killed easily.  This patch changes GDB signals handling to nostop for
> SIGSEGV, SIGINT, SIGTERM and SIGBUS.  I am not sure if we need to
> increase the list of signals to all the stop ones (which are not used
> by GDB) or to restrict it just to SIGSEGV.

 I'd suggest:

  handle all pass nostop
  handle SIGINT pass nostop

 That would make gdb pass _all_ signals except SIGTRAP
>>>
>>> I've committed it already :/
>>>
>>> I can make the change, but isn't there cases where SIGILL is used for
>>> breakpoints in GDB (I think I've seen that somewhere).
>>
>> True, and SIGSEGV and SIGEMT too.  But GDB handles that transparently
>> and won't pass such a breakpoint signal to the program, even with
>> "handle pass".  Only "handle SIGTRAP pass" passes a
>> breakpoint/step/etc. trap to the program.
>
> Ah ok, thanks for the explanations Pedro, I'll prepare a new patch and
> validate it.

Here is the new patch which makes GDB pass all signals except SIGTRAP.
validated on native armv8l target without regessions. ok for trunk ?

Cheers,
Yvan

2016-04-07  Yvan Roux  
Pedro Alves  

* gcc.dg/guality/guality.h (main): Make GDB pass all signals except
SIGTRAP.
diff --git a/gcc/testsuite/gcc.dg/guality/guality.h 
b/gcc/testsuite/gcc.dg/guality/guality.h
index d5867d8..acf977f 100644
--- a/gcc/testsuite/gcc.dg/guality/guality.h
+++ b/gcc/testsuite/gcc.dg/guality/guality.h
@@ -252,10 +252,8 @@ main (int argc, char *argv[])
   if (!guality_gdb_input
  || fprintf (guality_gdb_input, "\
 set height 0\n\
+handle all pass nostop\n\
 handle SIGINT pass nostop\n\
-handle SIGTERM pass nostop\n\
-handle SIGSEGV pass nostop\n\
-handle SIGBUS pass nostop\n\
 attach %i\n\
 set guality_attached = 1\n\
 b %i\n\


Re: [patch] libstdc++/70503 Ensure std::thread helpers have internal linkage

2016-04-07 Thread Jakub Jelinek
On Thu, Apr 07, 2016 at 02:34:51PM +0100, Jonathan Wakely wrote:
> On 07/04/16 15:27 +0200, Tom de Vries wrote:
> >FYI, this testcase fails for me as follows:
> >...
> >/usr/bin/ld: Dwarf Error: found dwarf version '4', this reader only
> >handles version 2 and 3 information.^M
> >/tmp/ccMBMHB5.o: In function `main':^M
> >70503.cc:(.text.startup.main+0x5): undefined reference to
> >`execute_native_thread_routine'^M
> >/usr/bin/ld: Dwarf Error: found dwarf version '0', this reader only
> >handles version 2 and 3 information.^M
> >70503.cc:(.text.startup.main+0xa): undefined reference to
> >`execute_native_thread_routine_compat'^M
> >collect2: error: ld returned 1 exit status^M
> >compiler exited with status 1
> >output is:
> >/usr/bin/ld: Dwarf Error: found dwarf version '4', this reader only
> >handles version 2 and 3 information.^M
> >/tmp/ccMBMHB5.o: In function `main':^M
> >70503.cc:(.text.startup.main+0x5): undefined reference to
> >`execute_native_thread_routine'^M
> >/usr/bin/ld: Dwarf Error: found dwarf version '0', this reader only
> >handles version 2 and 3 information.^M
> >70503.cc:(.text.startup.main+0xa): undefined reference to
> >`execute_native_thread_routine_compat'^M
> >collect2: error: ld returned 1 exit status^M
> >
> >FAIL: 30_threads/thread/70503.cc  (test for errors, line 31)
> >FAIL: 30_threads/thread/70503.cc  (test for errors, line 32)
> >FAIL: 30_threads/thread/70503.cc (test for excess errors)
> 
> Hmm, I'm not sure whether it's better to use -gdwarf-2 or add
> { dg-prune-output "dwarf version" }

Or perhaps instead change the test, so that instead of testing for
undefined symbols it provides some definition of those 2 symbols
and makes sure libstdc++.a(thread.o) is linked in too (such as
using std::thread::detach or similar somewhere).
Then in the buggy libstdc++ case, because those old definitions
were strong rather than weak, it should fail to link, while with
fixed libstdc++ succeed.

Jakub


Re: [PATCH][ARM] Add deprecation warning on pre-v4t architecture revisions

2016-04-07 Thread Kyrill Tkachov

Ping.

Thanks,
Kyrill
On 31/03/16 13:28, Kyrill Tkachov wrote:

Ping.

Thanks,
Kyrill

On 23/03/16 10:11, Kyrill Tkachov wrote:

Ping.

Thanks,
Kyrill

On 07/03/16 15:40, Kyrill Tkachov wrote:

Ping.
https://gcc.gnu.org/ml/gcc-patches/2016-03/msg00046.html

Thanks,
Kyrill

On 01/03/16 16:17, Kyrill Tkachov wrote:

Hi all,

For GCC 6 we want to deprecate architecture revisions prior to ARMv4T.
This patch implements this by documenting the deprecation in invoke.texi and 
adding
a warning whenever the user specifies an -march or -mcpu option that selects 
such
an architecture revision.

Bootstrapped and tested on arm.

Ok for trunk?

Thanks,
Kyrill

P.S. I'll add a note to changes.html to that effect separately.

2016-03-01  Kyrylo Tkachov  

* config/arm/arm.c (arm_option_override): Warn on pre-ARMv4T
architecture revisions.
* doc/invoke.texi (ARM Options): Add note on deprecation of pre-ARMv4T
architecture revisions.

2016-03-01  Kyrylo Tkachov  

* gcc.target/arm/ftest-armv4-arm.c: Add dg-warning for deprecation
warning.
* gcc.target/arm/pr62554.c: Likewise.
* gcc.target/arm/pr69610-1.c: Likewise.
* gcc.target/arm/pr69610-2.c: Likewise.










Re: [lto-partition.c] move assignment to refs_node, last_visited_node outside if-else

2016-04-07 Thread Marek Polacek
On Thu, Apr 07, 2016 at 07:05:09PM +0530, Prathamesh Kulkarni wrote:
> Hi,
> This is a silly patch that moves the following assignments outside
> if-else conditionals,
> refs_node = node;
> last_visited_node++;
> resulting in empty else block.
> OK for trunk ?
> 
> Thanks,
> Prathamesh

> diff --git a/gcc/lto/lto-partition.c b/gcc/lto/lto-partition.c
> index 9eb63c2..dca59af 100644
> --- a/gcc/lto/lto-partition.c
> +++ b/gcc/lto/lto-partition.c
> @@ -568,14 +568,13 @@ lto_balanced_map (int n_lto_partitions)
> symtab_node *snode = lto_symtab_encoder_deref (partition->encoder,
>   last_visited_node);
>  
> +   refs_node = snode;
> +   last_visited_node++;
> +
> if (cgraph_node *node = dyn_cast  (snode))
>   {
> struct cgraph_edge *edge;
>  
> -   refs_node = node;
> -
> -   last_visited_node++;
> -
> gcc_assert (node->definition || node->weakref);
>  
> /* Compute boundary cost of callgraph edges.  */
> @@ -614,11 +613,6 @@ lto_balanced_map (int n_lto_partitions)
>   cost += edge_cost;
>   }
>   }
> -   else
> - {
> -   refs_node = snode;

But this is snode, not node.

> -   last_visited_node++;
> - }
>  
> /* Compute boundary cost of IPA REF edges and at the same time look 
> into
>variables referenced from current partition and try to add them.  
> */

Marek


Re: [lto-partition.c] move assignment to refs_node, last_visited_node outside if-else

2016-04-07 Thread Prathamesh Kulkarni
On 7 April 2016 at 19:11, Marek Polacek  wrote:
> On Thu, Apr 07, 2016 at 07:05:09PM +0530, Prathamesh Kulkarni wrote:
>> Hi,
>> This is a silly patch that moves the following assignments outside
>> if-else conditionals,
>> refs_node = node;
>> last_visited_node++;
>> resulting in empty else block.
>> OK for trunk ?
>>
>> Thanks,
>> Prathamesh
>
>> diff --git a/gcc/lto/lto-partition.c b/gcc/lto/lto-partition.c
>> index 9eb63c2..dca59af 100644
>> --- a/gcc/lto/lto-partition.c
>> +++ b/gcc/lto/lto-partition.c
>> @@ -568,14 +568,13 @@ lto_balanced_map (int n_lto_partitions)
>> symtab_node *snode = lto_symtab_encoder_deref (partition->encoder,
>>   last_visited_node);
>>
>> +   refs_node = snode;
>> +   last_visited_node++;
>> +
>> if (cgraph_node *node = dyn_cast  (snode))
>>   {
>> struct cgraph_edge *edge;
>>
>> -   refs_node = node;
>> -
>> -   last_visited_node++;
>> -
>> gcc_assert (node->definition || node->weakref);
>>
>> /* Compute boundary cost of callgraph edges.  */
>> @@ -614,11 +613,6 @@ lto_balanced_map (int n_lto_partitions)
>>   cost += edge_cost;
>>   }
>>   }
>> -   else
>> - {
>> -   refs_node = snode;
>
> But this is snode, not node.
node is snode dyn_cast to cgraph_node *.

 symtab_node *snode = lto_symtab_encoder_deref (partition->encoder,
last_visited_node);

 if (cgraph_node *node = dyn_cast  (snode))
   {
  struct cgraph_edge *edge;
  refs_node = node;
  last_visited_node++;
  
   }
else
  {
refs_node = snode;
last_visited_node++;
  }

Thanks,
Prathamesh
>
>> -   last_visited_node++;
>> - }
>>
>> /* Compute boundary cost of IPA REF edges and at the same time look 
>> into
>>variables referenced from current partition and try to add them.  
>> */
>
> Marek


Re: [PATCH] PR47040 - Make error message for empty array constructor more helpful/correct

2016-04-07 Thread Steve Kargl
The latter is obvious as this "fixes" neither a regression
nor documentation.  For the former, see Fortran 95, section 4.5.

-- 
steve

On Thu, Apr 07, 2016 at 07:51:14AM +0200, Dominique d'Humières wrote:
> Could you please elaborate.
> 
> Dominique
> 
> > Le 7 avr. 2016 à 07:48, Steve Kargl  a 
> > écrit :
> > 
> > On Wed, Apr 06, 2016 at 05:44:55PM +0200, Dominique d'Humières wrote:
> >> Is the following patch OK (regtested on x86_64-apple-darwin15)? Should it 
> >> be back ported to the gcc-5 branch?
> > 
> > No and No.
> > 
> > -- 
> > Steve
> 

-- 
Steve


Re: [PATCH] PR70117, ppc long double isinf

2016-04-07 Thread Alan Modra
On Thu, Apr 07, 2016 at 11:32:58AM +0200, Richard Biener wrote:
> That's good to know.  I think the patch is OK but please seek approval from a 
> ppc maintainer as well

There's only one of those.  David?  Thread starts here
https://gcc.gnu.org/ml/gcc-patches/2016-04/msg00213.html

-- 
Alan Modra
Australia Development Lab, IBM


Re: [patch] libstdc++/70503 Ensure std::thread helpers have internal linkage

2016-04-07 Thread Jonathan Wakely

On 07/04/16 15:42 +0200, Jakub Jelinek wrote:

Or perhaps instead change the test, so that instead of testing for
undefined symbols it provides some definition of those 2 symbols
and makes sure libstdc++.a(thread.o) is linked in too (such as
using std::thread::detach or similar somewhere).
Then in the buggy libstdc++ case, because those old definitions
were strong rather than weak, it should fail to link, while with
fixed libstdc++ succeed.


That'll work. Tested x86_64-linux, committed to trunk.



commit 0f47894e015b05f43391a353777a2ad5b74c05fa
Author: Jonathan Wakely 
Date:   Thu Apr 7 15:25:47 2016 +0100

	* testsuite/30_threads/thread/70503.cc: Adjust from xfail to pass.

diff --git a/libstdc++-v3/testsuite/30_threads/thread/70503.cc b/libstdc++-v3/testsuite/30_threads/thread/70503.cc
index 950d754..3b64ef8 100644
--- a/libstdc++-v3/testsuite/30_threads/thread/70503.cc
+++ b/libstdc++-v3/testsuite/30_threads/thread/70503.cc
@@ -16,19 +16,23 @@
 // .
 
 // { dg-do link }
-// { dg-options " -std=gnu++11 -static" { target *-*-*gnu* } }
+// { dg-options "-std=gnu++11 -static" { target *-*-*gnu* } }
 // { dg-require-cstdint "" }
 // { dg-require-gthreads "" }
 // { dg-require-effective-target static }
 
+#include 
+
 extern "C" {
-  void execute_native_thread_routine(void);
-  void execute_native_thread_routine_compat(void);
+  // Should not get multiple definition errors from libstdc++.a(thread.o)
+  void execute_native_thread_routine(void) { }
+  void execute_native_thread_routine_compat(void) { }
 }
 
 int main()
 {
-  execute_native_thread_routine(); // { dg-error "undefined reference" }
-  execute_native_thread_routine_compat(); // { dg-error "undefined reference" }
+  execute_native_thread_routine();
+  execute_native_thread_routine_compat();
+
+  std::thread{}.detach();  // ensure libstdc++.a(thread.o) is linked in
 }
-// { dg-prune-output "collect2: error: ld returned" }


Re: [PATCH] PR70117, ppc long double isinf

2016-04-07 Thread David Edelsohn
On Thu, Apr 7, 2016 at 10:17 AM, Alan Modra  wrote:
> On Thu, Apr 07, 2016 at 11:32:58AM +0200, Richard Biener wrote:
>> That's good to know.  I think the patch is OK but please seek approval from 
>> a ppc maintainer as well
>
> There's only one of those.  David?  Thread starts here
> https://gcc.gnu.org/ml/gcc-patches/2016-04/msg00213.html

Yes, I have been following this entertaining thread.

This is okay.

By the way, xlc -qldbl128 should enable 128 bit.

Thanks, David


[PATCH][ARM] PR target/70566 Check that condition register is dead in tst-imm -> lsls-imm Thumb2 peepholes

2016-04-07 Thread Kyrill Tkachov

Hi all,

In this wrong-code PR we have a Thumb2 peephole transforming:
tstr3, #2
bne.L3
beq.L6

into:
lslsr3, r3, #30  // LSLS is shorter than TST in Thumb2
bmi.L3
beq.L6

that is, the branch following the extract+compare has its condition properly 
changed but the
following branch doesn't get updated to check the opposite condition of MI (PL).
Since the peepholes in thumb2.md only see the compare and a single branch the 
solution,
suggested by Richard, is to guard those peepholes on the condition that the 
condition register
is dead after the first branch. This patch does that and with it we no longer 
perform the transformation
on the testcase. I've checked manually that we still perform the peephole when 
the condition register
is indeed dead after the sequence.

Bootstrapped and tested on on arm-none-linux-gnueabihf with --with-mode=thumb 
as this affects only
Thumb2 codegen.

Ok for trunk?

This PR also affects GCC 5 and 4.9 so I'll be testing the patch there as well.

Thanks,
Kyrill


2016-04-07  Kyrylo Tkachov  

PR target/70566
* config/arm/thumb2.md (tst + branch-> lsls + branch
peephole below *orsi_not_shiftsi_si): Require that condition
register is dead after the peephole.
(second peephole after the above): Likewise.

2016-04-07  Kyrylo Tkachov  

PR target/70566
* gcc.c-torture/execute/pr70566.c: New test.
diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md
index 992536593d6c0a8b8fe5a324f32e279c69746157..ab08288413c3e64911e8d7a8199b9809e0282d8e 100644
--- a/gcc/config/arm/thumb2.md
+++ b/gcc/config/arm/thumb2.md
@@ -1550,7 +1550,8 @@ (define_peephole2
 		  (match_operand 5 "" "")
 		  (match_operand 6 "" "")))]
   "TARGET_THUMB2
-   && (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 32)"
+   && (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 32)
+   && peep2_reg_dead_p (2, operands[0])"
   [(parallel [(set (match_dup 0)
 		   (compare:CC_NOOV (ashift:SI (match_dup 1) (match_dup 2))
 (const_int 0)))
@@ -1578,7 +1579,8 @@ (define_peephole2
 		  (match_operand 5 "" "")
 		  (match_operand 6 "" "")))]
   "TARGET_THUMB2
-   && (INTVAL (operands[2]) > 0 && INTVAL (operands[2]) < 32)"
+   && (INTVAL (operands[2]) > 0 && INTVAL (operands[2]) < 32)
+   && peep2_reg_dead_p (2, operands[0])"
   [(parallel [(set (match_dup 0)
 		   (compare:CC_NOOV (ashift:SI (match_dup 1) (match_dup 2))
 (const_int 0)))
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr70566.c b/gcc/testsuite/gcc.c-torture/execute/pr70566.c
new file mode 100644
index ..f47106e70c7d4d7f3623f9505c02445a63332a9d
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr70566.c
@@ -0,0 +1,47 @@
+/* PR target/70566.  */
+
+#define NULL 0
+
+struct mystruct
+{
+  unsigned int f1 : 1;
+  unsigned int f2 : 1;
+  unsigned int f3 : 1;
+};
+
+__attribute__ ((noinline)) void
+myfunc (int a, void *b)
+{
+}
+__attribute__ ((noinline)) int
+myfunc2 (void *a)
+{
+  return 0;
+}
+
+static void
+set_f2 (struct mystruct *user, int f2)
+{
+  if (user->f2 != f2)
+myfunc (myfunc2 (NULL), NULL);
+  else
+__builtin_abort ();
+}
+
+__attribute__ ((noinline)) void
+foo (void *data)
+{
+  struct mystruct *user = data;
+  if (!user->f2)
+set_f2 (user, 1);
+}
+
+int
+main (void)
+{
+  struct mystruct a;
+  a.f1 = 1;
+  a.f2 = 0;
+  foo (&a);
+  return 0;
+}


Re: Do not give realistic estimates for loop with array accesses

2016-04-07 Thread Tom de Vries

On 30/03/16 14:36, Richard Biener wrote:

On Wed, 30 Mar 2016, Jan Hubicka wrote:


> >
> >You are only changing one place in this file.

>
>You are right. I am attaching the updated patch which I am re-testing now.

> >
> >The vectorizer already checks this (albeit indirectly):
> >
> >   HOST_WIDE_INT max_niter
> > = max_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo));
> >   if ((LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
> >&& (LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor))
> >   || (max_niter != -1
> >   && (unsigned HOST_WIDE_INT) max_niter < vectorization_factor))
> > {
> >   if (dump_enabled_p ())
> > dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> >  "not vectorized: iteration count smaller than "
> >  "vectorization factor.\n");
> >   return false;
> > }

>
>Yes, but one tests only vectorization_factor and other min_profitable_estimate
>which probably should be greater than vectorization_factor.
>
>The check above should therefore become redundant.  My reading of the code is
>that min_profiltable_estimate is computed after the check above, so it is
>probably an useful shortcut and the message is also bit more informative.
>I updated the later test to use max_niter variable once it is computed.
>
>OK with those changes assuming testing passes?

Ok.


This patch caused PR70577 - 'tree-ssa/prefetch-5.c scan-tree-dump-times 
aprefetch failures' ( https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70577 ).


Thanks,
- Tom


Re: [C++ PATCH] PR 70501, ICE in verify ctor sanity

2016-04-07 Thread Nathan Sidwell

On 04/06/16 07:49, Jason Merrill wrote:


Sure, but that also seems unnecessary; vector rvalues don't have object identity
the way class and array rvalues do.


I attach 2 patches.

70501-2.patch fixes the ICE by treating VECTOR_TYPEs thesame as PMFs in 
cxx_eval_bare_aggregate).


70501-other.patch stops finish_compound_literal wrapping VECTOR_TYPEs in a 
TARGET_EXPR.  And also moves the comments  around, as I found them a little 
confusing.  We might want to wait until 7.0 to apply that patch, as it's not a 
regression.


For avoidance of doubt I tested the first patch both with and without the second 
patch.


ok?

nathan

2016-04-06  Nathan Sidwell  

	PR c++/70501
	* constexpr.c (cxx_eval_bare_aggregate): Handle VECTOR_TYPE
	similarly to PMF.

	* g++.dg/init/pr70501.C: New.

Index: cp/constexpr.c
===
--- cp/constexpr.c	(revision 234768)
+++ cp/constexpr.c	(working copy)
@@ -2370,10 +2370,10 @@ cxx_eval_bare_aggregate (const constexpr
   tree type = TREE_TYPE (t);
 
   constexpr_ctx new_ctx;
-  if (TYPE_PTRMEMFUNC_P (type))
+  if (TYPE_PTRMEMFUNC_P (type) || VECTOR_TYPE_P (type))
 {
-  /* We don't really need the ctx->ctor business for a PMF, but it's
-	 simpler to use the same code.  */
+  /* We don't really need the ctx->ctor business for a PMF or
+	 vector, but it's simpler to use the same code.  */
   new_ctx = *ctx;
   new_ctx.ctor = build_constructor (type, NULL);
   new_ctx.object = NULL_TREE;
Index: testsuite/g++.dg/init/pr70501.C
===
--- testsuite/g++.dg/init/pr70501.C	(nonexistent)
+++ testsuite/g++.dg/init/pr70501.C	(working copy)
@@ -0,0 +1,11 @@
+/* { dg-options "" } Not pedantic */
+
+typedef int v4si __attribute__ ((vector_size (16)));
+
+struct S { v4si v; };
+
+void
+fn2 (int i, int j)
+{
+  struct S s = { .v = i <= j + (v4si){(1, 2)} };
+}
2016-04-06  Nathan Sidwell  

	* semantics.c (finish_compound_lteral): Don't wrap VECTOR_TYPEs in a
	TARGET_EXPR.

Index: cp/semantics.c
===
--- cp/semantics.c	(revision 234768)
+++ cp/semantics.c	(working copy)
@@ -2732,8 +2732,8 @@ finish_compound_literal (tree type, tree
   compound_literal = digest_init (type, compound_literal, complain);
   if (TREE_CODE (compound_literal) == CONSTRUCTOR)
 TREE_HAS_CONSTRUCTOR (compound_literal) = true;
-  /* Put static/constant array temporaries in static variables, but always
- represent class temporaries with TARGET_EXPR so we elide copies.  */
+
+  /* Put static/constant array temporaries in static variables.  */
   if ((!at_function_scope_p () || CP_TYPE_CONST_P (type))
   && TREE_CODE (type) == ARRAY_TYPE
   && !TYPE_HAS_NONTRIVIAL_DESTRUCTOR (type)
@@ -2763,8 +2763,13 @@ finish_compound_literal (tree type, tree
 	return error_mark_node;
   return decl;
 }
-  else
-return get_target_expr_sfinae (compound_literal, complain);
+
+  /* Represent other compound literals with TARGET_EXPR so we produce
+ an lvalue, but can elide copies.  */
+  if (!VECTOR_TYPE_P (type))
+compound_literal = get_target_expr_sfinae (compound_literal, complain);
+
+  return compound_literal;
 }
 
 /* Return the declaration for the function-name variable indicated by


Re: [Patch AArch64 1/3] Enable CRC by default for armv8.1-a

2016-04-07 Thread Christophe Lyon
On 6 April 2016 at 12:10, James Greenhalgh  wrote:
>
> Hi,
>
> This change reflects binutils support for CRC, where it is always enabled
> for armv8.1-a.
>

Does v8.1 always enable CRC?

If not, then don't you want to change the binutils default instead?

Christophe.

> OK?
>
> Thanks,
> James
>
> ---
> 2016-04-06  James Greenhalgh  
>
> * config/aarch64/aarch64.h (AARCH64_FL_FOR_ARCH8_1): Also add
> AARCH64_FL_CRC.
>


Re: Scan for parallelization of the oacc kernels test-cases in gfortran.dg/goacc

2016-04-07 Thread Thomas Schwinge
Hi!

On Tue, 5 Apr 2016 11:12:44 +0200, Tom de Vries  wrote:
> On 18/03/16 13:37, Thomas Schwinge wrote:
> > On Wed, 9 Mar 2016 10:17:28 +0100, Tom de Vries  
> > wrote:
> >> [Should have cited
> >> 
> >> instead of the C/C++ tests]
> >
> >> Retested on current trunk.
> >>
> >> Committed, minus the kernels-parallel-loop-data-enter-exit.f95 test.
> >
> > [tree scanning tests (as
> > done for C/C++, and also present for Fortran on gomp-4_0-branch)]

> > (Note
> > that I had to XFAIL gfortran.dg/goacc/kernels-loop-n.f95.)
> 
> Right. I remember looking into this before, and classified it as the 
> openacc version of PR68787 - fipa-pta to interpret restrict.
> 
> Now that we'll have an xfail for it, I've filed it as PR70545 - 
> '[openacc] gfortran.dg/goacc/kernels-loop-n.f95 not parallelized'.

Makes sense to specify that PR70545 "Depends on: PR68787", and add
"Keywords: openacc"?

> >  OK to commit?
> 
> Yes please.

With the XFAIL noted in the respective test case, committed in r234809:

commit 1b61585a37935375c252a27648089c37018f459e
Author: tschwinge 
Date:   Thu Apr 7 15:21:37 2016 +

Scan for parallelization of the oacc kernels test-cases in gfortran.dg/goacc

gcc/testsuite/
* gfortran.dg/goacc/kernels-loop-2.f95: Scan for parallelization.
* gfortran.dg/goacc/kernels-loop-data-2.f95: Likewise.
* gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95: Likewise.
* gfortran.dg/goacc/kernels-loop-data-enter-exit.f95: Likewise.
* gfortran.dg/goacc/kernels-loop-data-update.f95: Likewise.
* gfortran.dg/goacc/kernels-loop-data.f95: Likewise.
* gfortran.dg/goacc/kernels-loop.f95: Likewise.
* gfortran.dg/goacc/kernels-loop-n.f95: Likewise, XFAILed.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@234809 
138bc75d-0d04-0410-961f-82ee72b054a4
---
 gcc/testsuite/ChangeLog  | 12 
 gcc/testsuite/gfortran.dg/goacc/kernels-loop-2.f95   |  2 ++
 gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-2.f95  |  1 +
 .../gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95 |  2 ++
 .../gfortran.dg/goacc/kernels-loop-data-enter-exit.f95   |  2 ++
 gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-update.f95 |  2 ++
 gcc/testsuite/gfortran.dg/goacc/kernels-loop-data.f95|  2 ++
 gcc/testsuite/gfortran.dg/goacc/kernels-loop-n.f95   |  3 +++
 gcc/testsuite/gfortran.dg/goacc/kernels-loop.f95 |  2 ++
 9 files changed, 28 insertions(+)

diff --git gcc/testsuite/ChangeLog gcc/testsuite/ChangeLog
index d3c74ed..7688a83 100644
--- gcc/testsuite/ChangeLog
+++ gcc/testsuite/ChangeLog
@@ -1,3 +1,15 @@
+2016-04-07  Thomas Schwinge  
+   Tom de Vries  
+
+   * gfortran.dg/goacc/kernels-loop-2.f95: Scan for parallelization.
+   * gfortran.dg/goacc/kernels-loop-data-2.f95: Likewise.
+   * gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95: Likewise.
+   * gfortran.dg/goacc/kernels-loop-data-enter-exit.f95: Likewise.
+   * gfortran.dg/goacc/kernels-loop-data-update.f95: Likewise.
+   * gfortran.dg/goacc/kernels-loop-data.f95: Likewise.
+   * gfortran.dg/goacc/kernels-loop.f95: Likewise.
+   * gfortran.dg/goacc/kernels-loop-n.f95: Likewise, XFAILed.
+
 2016-04-06  Patrick Palka  
 
PR c/70436
diff --git gcc/testsuite/gfortran.dg/goacc/kernels-loop-2.f95 
gcc/testsuite/gfortran.dg/goacc/kernels-loop-2.f95
index 5cc2e8b..865f7a6 100644
--- gcc/testsuite/gfortran.dg/goacc/kernels-loop-2.f95
+++ gcc/testsuite/gfortran.dg/goacc/kernels-loop-2.f95
@@ -40,3 +40,5 @@ end program main
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 
"optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 
"optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.2 " 1 
"optimized" } }
+
+! { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 3 "parloops1" } }
diff --git gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-2.f95 
gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-2.f95
index d1bfc70..c9f3a62 100644
--- gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-2.f95
+++ gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-2.f95
@@ -47,3 +47,4 @@ end program main
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 
"optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.2 " 1 
"optimized" } }
 
+! { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 3 "parloops1" } }
diff --git gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95 
gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95
index feac7b2..3361607 100644
--- gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95
+++ gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95
@@ -46,3 +46,5 @@ end program 

Re: [PATCH, testsuite/ARM] Skip pr70496.c for cortex-m devices

2016-04-07 Thread Thomas Preudhomme
On Wednesday 06 April 2016 12:09:25 Kyrill Tkachov wrote:
> Hi Thomas,
> 
> On 06/04/16 12:03, Thomas Preudhomme wrote:
> > Hi,
> > 
> > Testcase in gcc.target/arm/pr70496.c uses an .arm directive so assumes the
> > target has an ARM execution state. This patch adds a dg-skip-if directive
> > to skip that test on Cortex-M targets since they don't have such an
> > execution state.
> > 
> > ChangeLog entry is as follows:
> > 
> > 
> > *** gcc/testsuite/ChangeLog ***
> > 
> > 2016-04-06  Thomas Preud'homme  
> > 
> >  PR testsuite/70553
> >  * gcc.target/arm/pr70496.c: Skip for ARM Cortex-M targets.
> > 
> > diff --git a/gcc/testsuite/gcc.target/arm/pr70496.c
> > b/gcc/testsuite/gcc.target/arm/pr70496.c
> > index
> > 89957e2c7a75cb89153b3e3fc34d8051b6a997d1..548a8243059ddaec63ed897dc67f4751
> > d806a065 100644
> > --- a/gcc/testsuite/gcc.target/arm/pr70496.c
> > +++ b/gcc/testsuite/gcc.target/arm/pr70496.c
> > @@ -1,6 +1,7 @@
> > 
> >   /* { dg-do assemble } */
> >   /* { dg-options "-mthumb -O2" } */
> >   /* { dg-require-effective-target arm_thumb2_ok } */
> > 
> > +/* { dg-skip-if "does not have ARM state" { arm_cortex_m } } */
> 
> Would it be better to just require the arm_arm_ok effective target?
> That should try to compile a test with -marm added to the command,
> which should fail for Cortex-M targets.

Fair point. What about the following patch then?


*** gcc/testsuite/ChangeLog ***

2016-04-06  Thomas Preud'homme  

PR testsuite/70553
* gcc.target/arm/pr70496.c: Also require arm_arm_ok effective target.


diff --git a/gcc/testsuite/gcc.target/arm/pr70496.c 
b/gcc/testsuite/gcc.target/arm/pr70496.c
index 
89957e2c7a75cb89153b3e3fc34d8051b6a997d1..d3ee0b505a842268dec0fa4da09da2355a9c3715
 
100644
--- a/gcc/testsuite/gcc.target/arm/pr70496.c
+++ b/gcc/testsuite/gcc.target/arm/pr70496.c
@@ -1,5 +1,6 @@
 /* { dg-do assemble } */
 /* { dg-options "-mthumb -O2" } */
+/* { dg-require-effective-target arm_arm_ok } */
 /* { dg-require-effective-target arm_thumb2_ok } */
 
 int i;


Is this ok for trunk?

Best regards,

Thomas


Re: [PATCH, testsuite/ARM] Skip pr70496.c for cortex-m devices

2016-04-07 Thread Kyrill Tkachov


On 07/04/16 17:13, Thomas Preudhomme wrote:

On Wednesday 06 April 2016 12:09:25 Kyrill Tkachov wrote:

Hi Thomas,

On 06/04/16 12:03, Thomas Preudhomme wrote:

Hi,

Testcase in gcc.target/arm/pr70496.c uses an .arm directive so assumes the
target has an ARM execution state. This patch adds a dg-skip-if directive
to skip that test on Cortex-M targets since they don't have such an
execution state.

ChangeLog entry is as follows:


*** gcc/testsuite/ChangeLog ***

2016-04-06  Thomas Preud'homme  

  PR testsuite/70553
  * gcc.target/arm/pr70496.c: Skip for ARM Cortex-M targets.

diff --git a/gcc/testsuite/gcc.target/arm/pr70496.c
b/gcc/testsuite/gcc.target/arm/pr70496.c
index
89957e2c7a75cb89153b3e3fc34d8051b6a997d1..548a8243059ddaec63ed897dc67f4751
d806a065 100644
--- a/gcc/testsuite/gcc.target/arm/pr70496.c
+++ b/gcc/testsuite/gcc.target/arm/pr70496.c
@@ -1,6 +1,7 @@

   /* { dg-do assemble } */
   /* { dg-options "-mthumb -O2" } */
   /* { dg-require-effective-target arm_thumb2_ok } */

+/* { dg-skip-if "does not have ARM state" { arm_cortex_m } } */

Would it be better to just require the arm_arm_ok effective target?
That should try to compile a test with -marm added to the command,
which should fail for Cortex-M targets.

Fair point. What about the following patch then?


*** gcc/testsuite/ChangeLog ***

2016-04-06  Thomas Preud'homme  

 PR testsuite/70553
 * gcc.target/arm/pr70496.c: Also require arm_arm_ok effective target.


diff --git a/gcc/testsuite/gcc.target/arm/pr70496.c
b/gcc/testsuite/gcc.target/arm/pr70496.c
index
89957e2c7a75cb89153b3e3fc34d8051b6a997d1..d3ee0b505a842268dec0fa4da09da2355a9c3715
100644
--- a/gcc/testsuite/gcc.target/arm/pr70496.c
+++ b/gcc/testsuite/gcc.target/arm/pr70496.c
@@ -1,5 +1,6 @@
  /* { dg-do assemble } */
  /* { dg-options "-mthumb -O2" } */
+/* { dg-require-effective-target arm_arm_ok } */
  /* { dg-require-effective-target arm_thumb2_ok } */
  
  int i;



Is this ok for trunk?


That's ok.
Thanks,
Kyrill


Best regards,

Thomas




Re: [gomp4] Also test -O0 for OpenACC C, C++ offloading test cases

2016-04-07 Thread Thomas Schwinge
Hi!

On Fri, 1 Apr 2016 10:55:49 +0200, I wrote:
> On Thu, 24 Mar 2016 22:31:29 +0100, I wrote:
> > On Wed, 23 Mar 2016 19:57:50 +0100, Bernd Schmidt  
> > wrote:
> > > Ok with [...].
> > 
> > Thanks for the review; committed in r234471:
> 
> > Also test -O0 for OpenACC C, C++ offloading test cases
> 
> Merged into gomp-4_0-branch in r234664:

> --- libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-1.c
> +++ libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-1.c
> @@ -1,5 +1,6 @@
>  /* { dg-do run { target openacc_nvidia_accel_selected } } */
>  /* { dg-additional-options "-lcuda -lcublas -lcudart" } */
> +/* { dg-xfail-run-if "TODO" { *-*-* } { "-O0" } { "" } } */
>  
>  #include 
>  #include 

Filed .

> --- libgomp/testsuite/libgomp.oacc-c-c++-common/if-1.c
> +++ libgomp/testsuite/libgomp.oacc-c-c++-common/if-1.c
> @@ -1,3 +1,5 @@
> +/* { dg-xfail-run-if "TODO" { *-*-* } { "-O0" } { "" } } */
> +
>  #include 
>  #include 
>  #include 

Filed .

The both PASS on trunk.

Currently unclear if it's the same underlying problem or not.


Grüße
 Thomas


signature.asc
Description: PGP signature


Re: [Patch AArch64 1/3] Enable CRC by default for armv8.1-a

2016-04-07 Thread James Greenhalgh
On Thu, Apr 07, 2016 at 05:23:59PM +0200, Christophe Lyon wrote:
> On 6 April 2016 at 12:10, James Greenhalgh  wrote:
> >
> > Hi,
> >
> > This change reflects binutils support for CRC, where it is always enabled
> > for armv8.1-a.
> >
> 
> Does v8.1 always enable CRC?

Yes. -march=armv8.1-a should always enable CRC. Unfortunately some
binutils versions do not honour this, which is why in the next patch we
must always put +crc out.

> If not, then don't you want to change the binutils default instead?

No, this patch is the correct thing to do - regadless of what binutils does,
GCC should enable access to the CRC intrinsics with -march=armv8.1-a, so we
want this patch.

Thanks,
James

> > ---
> > 2016-04-06  James Greenhalgh  
> >
> > * config/aarch64/aarch64.h (AARCH64_FL_FOR_ARCH8_1): Also add
> > AARCH64_FL_CRC.
> >


Re: [Patch] Fix PR 60040

2016-04-07 Thread Bernd Schmidt

On 04/07/2016 01:52 PM, Senthil Kumar Selvaraj wrote:

   The below patch fixes PR 60040 by not halting with a hard error on
   a spill failure, if reload knows that it has to run again anyway.


Some additional information as to how this situation creates a spill 
failure would be useful. It's hard to tell whether this patch just 
papers over a problem that can still trigger in other circumstances.



-   spill_failure (chain->insn, rld[r].rclass);
-   failure = 1;
-   return;
+   if (!tentative)
+   {
+   spill_failure (chain->insn, rld[r].rclass);
+   failure = 1;
+   return;
+   }
  }


The indentation looks all wrong.


Bernd


Re: [PATCH, rs6000] Add support for int versions of vec_adde

2016-04-07 Thread Bill Seurer

On 04/05/16 21:27, David Edelsohn wrote:

On Tue, Apr 5, 2016 at 3:36 PM, Bill Seurer  wrote:

This patch adds support for the signed and unsigned int versions of the
vec_adde altivec builtins from the Power Architecture 64-Bit ELF V2 ABI
OpenPOWER ABI for Linux Supplement (16 July 2015 Version 1.1).  There are
many of the builtins that are missing and this is the first of a series
of patches to add them.

There aren't instructions for the int versions of vec_adde so the
output code is built from other built-ins that do have instructions
which in this case is just two vec_adds.

The new test cases are executable tests which verify that the generated
code produces expected values.  C macros were used so that the same
test case could be used for both the signed and unsigned versions.  An
extra executable test case is also included to ensure that the modified
support for the __int128 versions of vec_adde is not broken.  The same
test case could not be used for both int and __int128 because of some
differences in loading and storing the vectors.

Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no
regressions.  Is this ok for trunk?

[gcc]

2016-04-06  Bill Seurer  

 * config/rs6000/rs6000-builtin.def (vec_adde): Change vec_adde to a
 special case builtin.
 * config/rs6000/rs6000-c.c (altivec_overloaded_builtins,
 altivec_resolve_overloaded_builtin): Remove ALTIVEC_BUILTIN_VEC_ADDE
 from altivec_overloaded_builtins structure.  Add support for it to
 altivec_resolve_overloaded_builtin function.
 * config/rs6000/rs6000.c (altivec_init_builtins): Add definition
 for __builtin_vec_adde.

[gcc/testsuite]

2016-04-06  Bill Seurer  

 * gcc.target/powerpc/vec-adde.c: New test.
 * gcc.target/powerpc/vec-adde-int128.c: New test.

Index: gcc/config/rs6000/rs6000-builtin.def
===
--- gcc/config/rs6000/rs6000-builtin.def(revision 234745)
+++ gcc/config/rs6000/rs6000-builtin.def(working copy)
@@ -951,7 +951,6 @@ BU_ALTIVEC_X (VEC_EXT_V4SF, "vec_ext_v4sf", CO
 before we get to the point about classifying the builtin type.  */

  /* 3 argument Altivec overloaded builtins.  */
-BU_ALTIVEC_OVERLOAD_3 (ADDE,  "adde")
  BU_ALTIVEC_OVERLOAD_3 (ADDEC, "addec")
  BU_ALTIVEC_OVERLOAD_3 (MADD,   "madd")
  BU_ALTIVEC_OVERLOAD_3 (MADDS,  "madds")
@@ -1137,6 +1136,7 @@ BU_ALTIVEC_OVERLOAD_P (VCMPGT_P,   "vcmpgt_p")
  BU_ALTIVEC_OVERLOAD_P (VCMPGE_P,   "vcmpge_p")

  /* Overloaded Altivec builtins that are handled as special cases.  */
+BU_ALTIVEC_OVERLOAD_X (ADDE,  "adde")
  BU_ALTIVEC_OVERLOAD_X (CTF,   "ctf")
  BU_ALTIVEC_OVERLOAD_X (CTS,   "cts")
  BU_ALTIVEC_OVERLOAD_X (CTU,   "ctu")
Index: gcc/config/rs6000/rs6000-c.c
===
--- gcc/config/rs6000/rs6000-c.c(revision 234745)
+++ gcc/config/rs6000/rs6000-c.c(working copy)
@@ -842,11 +842,6 @@ const struct altivec_builtin_types altivec_overloa
  RS6000_BTI_unsigned_V1TI, 0 },
{ ALTIVEC_BUILTIN_VEC_ADDC, P8V_BUILTIN_VADDCUQ,
  RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 },
-  { ALTIVEC_BUILTIN_VEC_ADDE, P8V_BUILTIN_VADDEUQM,
-RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI,
-RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI },
-  { ALTIVEC_BUILTIN_VEC_ADDE, P8V_BUILTIN_VADDEUQM,
-RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI },
{ ALTIVEC_BUILTIN_VEC_ADDEC, P8V_BUILTIN_VADDECUQ,
  RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI,
  RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI },
@@ -4515,6 +4510,59 @@ assignment for unaligned loads and stores");
  warning (OPT_Wdeprecated, "vec_lvsr is deprecated for little endian; use \
  assignment for unaligned loads and stores");

+  if (fcode == ALTIVEC_BUILTIN_VEC_ADDE)
+{
+  /* vec_adde needs to be special cased because there is no instruction
+ for the {un}signed int version */


End comment sentence with period and two spaces


+  if (nargs != 3)
+   {
+ error ("vec_adde only accepts 3 arguments");
+ return error_mark_node;
+   }
+
+  tree arg0 = (*arglist)[0];
+  tree arg0_type = TREE_TYPE (arg0);
+  tree arg1 = (*arglist)[1];
+  tree arg1_type = TREE_TYPE (arg1);
+  tree arg2 = (*arglist)[2];
+  tree arg2_type = TREE_TYPE (arg2);
+
+  /* All 3 arguments must be vectors of (signed or unsigned) (int or
+ __int128) and the types must match */


Same.


+  if ((arg0_type != arg1_type) || (arg1_type != arg2_type))
+   goto bad;
+  if (TREE_CODE (arg0_type) != VECTOR_TYPE)
+   goto bad;
+
+  switch (TYPE_MODE (TREE_TYPE (arg0_type)))
+   {
+ /* for {un}signed ints,
+vec_adde (va, vb, carryv) == vec_add (vec_add (va, vb), carryv) */


Same.


+ case SImo

Re: [committed] OpenMP declare simd ABI changes on x86_64/i686

2016-04-07 Thread Jakub Jelinek
On Wed, Apr 06, 2016 at 02:54:31PM +0200, Jakub Jelinek wrote:
> Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk.
> I'll try to coordinate with Intel about 3) as well as the default alignment
> if aligned clause is used on declare simd without any explicit alignment.

I've missed in the ABI spec that unlike SSE2/AVX/AVX2 entrypoints, the
AVX512F in the masked case (i.e. _ZGVeM*) need to pass the mask in
integer registers rather than in vectors of characteristic type, either in
unsigned int or unsigned long long (the latter only for QImode
characteristic type), and sometimes in more than one of these (the rule is
that there should be as many mask parameters as there are vectors of the
characteristic type).

This doesn't generate perfect code right now, e.g. even for simple
#pragma omp declare simd
int bar (int a, int b)
{
  return a + b;
}
we for _ZGVeM16* generate:
leaq8(%rsp), %r10
andq$-64, %rsp
vpbroadcastd%edi, %zmm2
vpaddd  %zmm1, %zmm0, %zmm0
vpxord  %zmm1, %zmm1, %zmm1
pushq   -8(%r10)
pushq   %rbp
movq%rsp, %rbp
pushq   %r10
subq$112, %rsp
vpsrlvd .LC0(%rip), %zmm2, %zmm2
vpandd  .LC1(%rip), %zmm2, %zmm2
vpcmpd  $4, %zmm1, %zmm2, %k1
kmovw   %k1, %eax
testw   %ax, %ax
je  .L65
vmovdqa32   %zmm0, -112(%rbp){%k1}
vmovdqa64   -112(%rbp), %zmm0
addq$112, %rsp
popq%r10
popq%rbp
leaq-8(%r10), %rsp
where it really should do:
kmovw   %edi, %k1
vpaddd  %zmm1, %zmm0, %zmm0{z}{%k1}
or so, but perhaps we should in the vectorizer recognize
  vect_cst__50 = {mask.321_7(D), mask.321_7(D), mask.321_7(D), mask.321_7(D), 
mask.321_7(D), mask.321_7(D), mask.321_7(D), mask.321_7(D), mask.321_7(D), 
mask.321_7(D), mask.321_7(D), mask.321_7(D), mask.321_7(D), mask.321_7(D), 
mask.321_7(D), mask.321_7(D)};
  vect__8.627_51 = vect_cst__50 >> { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 
13, 14, 15 };
  vect__9.628_53 = vect__8.627_51 & { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1 };
  mask__36.631_57 = vect__9.628_53 != { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0 };
for VECTOR_BOOLEAN_P mask__36.631 as
  mask__36.631_57 = VIEW_CONVERT_EXPR ;
and eventually handle even more complex cases.  That said, we still don't use 
the
masked clones in the vectorizer (I thought it went in, but apparently it
didn't, will need to look for the discussions), so it is not top priority
right now, just what's important is to get the ABI right.

Thus, I've committed following fix after bootstrapping/regtesting on
x86_64-linux and i686-linux:

2016-04-07  Jakub Jelinek  

* cgraph.h (struct cgraph_simd_clone): Add mask_mode field.
* omp-low.c (simd_clone_init_simd_arrays, simd_clone_adjust): Handle
node->simdclone->mask_mode != VOIDmode masks.
(simd_clone_adjust_argument_types): Likewise.  Move sc var definition
earlier, use it instead of node->simdclone.
* config/i386/i386.c (ix86_simd_clone_compute_vecsize_and_simdlen):
Set clonei->mask_mode.

* c-c++-common/attr-simd.c: Add scan-assembler* directives for AVX512F
clones.
* c-c++-common/attr-simd-2.c: Likewise.
* c-c++-common/attr-simd-4.c: Likewise.
* gcc.dg/gomp/simd-clones-2.c: Likewise.
* gcc.dg/gomp/simd-clones-3.c: Likewise.

--- gcc/cgraph.h.jj 2016-04-04 12:28:41.0 +0200
+++ gcc/cgraph.h2016-04-07 10:56:36.534410726 +0200
@@ -766,6 +766,11 @@ struct GTY(()) cgraph_simd_clone {
   /* Max hardware vector size in bits for floating point vectors.  */
   unsigned int vecsize_float;
 
+  /* Machine mode of the mask argument(s), if they are to be passed
+ as bitmasks in integer argument(s).  VOIDmode if masks are passed
+ as vectors of characteristic type.  */
+  machine_mode mask_mode;
+
   /* The mangling character for a given vector size.  This is used
  to determine the ISA mangling bit as specified in the Intel
  Vector ABI.  */
--- gcc/omp-low.c.jj2016-04-06 14:40:57.0 +0200
+++ gcc/omp-low.c   2016-04-07 21:32:47.633630411 +0200
@@ -18916,7 +18916,9 @@ simd_clone_adjust_argument_types (struct
   adjustments.create (args.length ());
   unsigned i, j, veclen;
   struct ipa_parm_adjustment adj;
-  for (i = 0; i < node->simdclone->nargs; ++i)
+  struct cgraph_simd_clone *sc = node->simdclone;
+
+  for (i = 0; i < sc->nargs; ++i)
 {
   memset (&adj, 0, sizeof (adj));
   tree parm = args[i];
@@ -18924,10 +18926,10 @@ simd_clone_adjust_argument_types (struct
   adj.base_index = i;
   adj.base = parm;
 
-  node->simdclone->args[i].orig_arg = node->definition ? parm : NULL_TREE;
-  node->simdclone->args[i].orig_type = parm_type;
+  sc->args[i].orig_arg = node->definition ? parm : NULL_TREE;
+  sc->args[i].orig_type = parm_type;
 
-   

[committed] Fix lto/simd_function_0.c testcase (PR testsuite/70581)

2016-04-07 Thread Jakub Jelinek
Hi!

This fixes this testcase when using assembler that doesn't support AVX512F,
which is now required for simd clones (previously it has been just avx2
and thus avx2 effective target has been sufficient).

Regtested on x86_64-linux, committed to trunk.

2016-04-07  Jakub Jelinek  

PR testsuite/70581
* gcc.dg/lto/simd-function_0.c: New test.

--- gcc/testsuite/gcc.dg/lto/simd-function_0.c.jj   2015-10-29 
09:14:35.0 +0100
+++ gcc/testsuite/gcc.dg/lto/simd-function_0.c  2016-04-07 18:50:46.975624860 
+0200
@@ -1,4 +1,5 @@
 /* { dg-lto-do link } */
+/* { dg-require-effective-target vect_simd_clones } */
 /* { dg-require-effective-target avx2 } */
 /* { dg-lto-options { { -fopenmp-simd -O3 -ffast-math -mavx2 -flto 
-flto-partition=max } } } */
 

Jakub


Re: [C PATCH] PR43651: add warning for duplicate qualifier

2016-04-07 Thread Joseph Myers
New options need documenting in invoke.texi.

-- 
Joseph S. Myers
jos...@codesourcery.com


[PATCH] Don't add REG_EQUAL notes in fwprop for paradoxical subregs (PR rtl-optimization/70574)

2016-04-07 Thread Jakub Jelinek
Hi!

The following testcase is miscompiled, because we have:
(set (reg:SI ...) (subreg:SI (reg:QI (...)) 0))
instruction and the fwprop attempts to propagate (const_int -1)
into the reg:QI use in there, but gives up because costs don't say it is
beneficial and adds instead REG_EQUAL (const_int -1) note on the insn.
That is wrong though, it is fine to optimize this insn to
(set (reg:SI ...) (const_int -1))
because the higher bits in the paradoxical subreg are undefined,
but as they are undefined, if the subreg is kept, those bits can be
anything.  If we say the subreg is equal to (const_int -1), it means
e.g. CSE2 can replace other places that need SImode -1 with the SET_DEST of
this insn, but then it really depends on what bits actually end up in the
register.  If we are unlucky, and it is e.g. spilled during LRA and reloaded
using QImode, the upper bits can be anything.

Not sure if this patch catches everything though, perhaps there could be
e.g.
(set (reg:SI ...) (plus:SI ((subreg:SI (reg:QI ...) 0) (const_int ...)))
and we'd still assign REG_EQUAL note.  So maybe instead we should walk the
*loc expression and look for paradoxical subregs, and for each of them, if
we find the DF_REF_REG (use) mentioned in their operand, clear
set_reg_equal.  Though of course, if DF_REF_REG (use) itself is a
paradoxical subreg, we could clear set_reg_equal without any walking.

2016-04-07  Jakub Jelinek  

PR rtl-optimization/70574
* fwprop.c (forward_propagate_and_simplify): Don't add
REG_EQUAL note if DF_REF_REG (use) is a paradoxical subreg.

* gcc.target/i386/avx2-pr70574.c: New test.

--- gcc/fwprop.c.jj 2016-01-04 14:55:53.0 +0100
+++ gcc/fwprop.c2016-04-07 18:01:42.953844357 +0200
@@ -1213,7 +1213,7 @@ forward_propagate_and_simplify (df_ref u
   rtx_insn *use_insn = DF_REF_INSN (use);
   rtx use_set = single_set (use_insn);
   rtx src, reg, new_rtx, *loc;
-  bool set_reg_equal;
+  bool set_reg_equal = true;
   machine_mode mode;
   int asm_use = -1;
 
@@ -1240,7 +1240,15 @@ forward_propagate_and_simplify (df_ref u
   /* Check if the use has a subreg, but the def had the whole reg.  Unlike the
  previous case, the optimization is possible and often useful indeed.  */
   else if (GET_CODE (reg) == SUBREG && REG_P (SET_DEST (def_set)))
-reg = SUBREG_REG (reg);
+{
+  /* If the use is a paradoxical subreg, make sure we don't add a
+REG_EQUAL note for it, because it is not equivalent, it is one
+possible value for it, but we can't rely on it holding that value.
+See PR70574.  */
+  if (paradoxical_subreg_p (reg))
+   set_reg_equal = false;
+  reg = SUBREG_REG (reg);
+}
 
   /* Make sure that we can treat REG as having the same mode as the
  source of DEF_SET.  */
@@ -1301,13 +1309,13 @@ forward_propagate_and_simplify (df_ref u
 otherwise.  We also don't want to install a note if we are merely
 propagating a pseudo since verifying that this pseudo isn't dead
 is a pain; moreover such a note won't help anything.  */
-  set_reg_equal = (note == NULL_RTX
-  && REG_P (SET_DEST (use_set))
-  && !REG_P (src)
-  && !(GET_CODE (src) == SUBREG
-   && REG_P (SUBREG_REG (src)))
-  && !reg_mentioned_p (SET_DEST (use_set),
-   SET_SRC (use_set)));
+  set_reg_equal &= (note == NULL_RTX
+   && REG_P (SET_DEST (use_set))
+   && !REG_P (src)
+   && !(GET_CODE (src) == SUBREG
+&& REG_P (SUBREG_REG (src)))
+   && !reg_mentioned_p (SET_DEST (use_set),
+SET_SRC (use_set)));
 }
 
   if (GET_MODE (*loc) == VOIDmode)
--- gcc/testsuite/gcc.target/i386/avx2-pr70574.c.jj 2016-04-07 
18:09:25.788519218 +0200
+++ gcc/testsuite/gcc.target/i386/avx2-pr70574.c2016-04-07 
18:09:21.825573327 +0200
@@ -0,0 +1,26 @@
+/* PR rtl-optimization/70574 */
+/* { dg-do run { target lp64 } } */
+/* { dg-require-effective-target avx2 } */
+/* { dg-options "-O -frerun-cse-after-loop -fno-tree-ccp -mcmodel=medium 
-mavx2" } */
+/* { dg-additional-options "-fPIC" { target fpic } } */
+
+#include "avx2-check.h"
+
+typedef char A __attribute__((vector_size (32)));
+typedef short B __attribute__((vector_size (32)));
+
+int
+foo (int x, __int128 y, __int128 z, A w)
+{
+  y <<= 64;
+  w *= (A) { 0, -1, z, 0, ~y };
+  return w[0] + ((B) { x, 0, y, 0, -1 } | 1)[4];
+}
+
+static void
+avx2_test ()
+{
+  int x = foo (0, 0, 0, (A) {});
+  if (x != -1)
+__builtin_abort ();
+}

Jakub


[PATCH], Re-fix PR 70381 (disable -mfloat128 by default) and add workaround for PR 70589

2016-04-07 Thread Michael Meissner
After applying the fix for PR 70381 to not enable -mfloat128 by default, I
discovered the IEEE 128-bit floating point emulation routines in libgcc are no
longer being built.

The reason for this is the configuration test involved compiling this program:

#pragma GCC target ("vsx,float128")
__float128 add (__float128 *a) { return *a + *(a+1); }

to see if the __float128 support was enabled.  Unfortunately, I discovered that
you can't currently set/disable float128 via the target option attribute or
target pragmas. This is due to the fact that if -mfloat128 is disabled, the
__float128 and __ibm128 keywords are not created.

I raised this as a separate bug (70589).

This patch does several things:

   1)   It disables using float128 in target attributes or target pragmas.

   2)   It fixes the configure test for software emulation to just see if the
ISA 2.06 (vsx) instruction set is available. The makefile options in
the PowerPC libgcc build ensures that -mfloat128 is used. I used
similar logic to detect ISA 3.0 to see if we have support for the IEEE
128-bit floating point hardware.

   3)   I updated the documentation for -mfloat128.

   4)   I added two executable tests to verify that the float emulation is
correct.  In working on adding the tests, I discovered I had the return
value from main inverted, and the test would fail.

I have run a boostrap build and a make check to verify that the IEEE 128-bit
floating point emulator in libgcc is indeed built. Are these patches ok to
install in the GCC trunk?

[gcc]
2016-04-07  Michael Meissner  

PR target/70589
* config/rs6000/rs6000.c (rs6000_opt_masks): Disable using the
target attribute and pragma from changing the -mfloat128
and -mfloat128-hardware options.

* doc/extend.texi (Additional Floating Types): Document PowerPC
__float128 restrictions.

[libgcc]
2016-04-07  Michael Meissner  

PR target/70381
* configure.ac (powerpc*-*-linux*): Rework tests to build
__float128 emulation routines to not depend on using #pragma GCC
target to enable -mfloat128.
* configure: Regnerate.

[gcc/testsuite]
2016-04-07  Michael Meissner  

PR target/70381
* gcc.target/powerpc/float128-1.c: New tests to make sure the
__float128 emulator is built and runs.
* gcc.target/powerpc/float128-1.c: Likewise.

* lib/target-supports.exp (check_ppc_float128_sw_available):
Rework tests for __float128 software and hardware
availability. Fix exit condition to return 0 on success.

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797
Index: gcc/config/rs6000/rs6000.c
===
--- gcc/config/rs6000/rs6000.c  (revision 234797)
+++ gcc/config/rs6000/rs6000.c  (working copy)
@@ -34381,8 +34381,8 @@ static struct rs6000_opt_mask const rs60
   { "dlmzb",   OPTION_MASK_DLMZB,  false, true  },
   { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
false, true  },
-  { "float128",OPTION_MASK_FLOAT128,   false, 
true  },
-  { "float128-hardware",   OPTION_MASK_FLOAT128_HW,false, true  },
+  { "float128",OPTION_MASK_FLOAT128,   false, 
false },
+  { "float128-hardware",   OPTION_MASK_FLOAT128_HW,false, false },
   { "fprnd",   OPTION_MASK_FPRND,  false, true  },
   { "hard-dfp",OPTION_MASK_DFP,false, 
true  },
   { "htm", OPTION_MASK_HTM,false, true  },
Index: gcc/doc/extend.texi
===
--- gcc/doc/extend.texi (revision 234797)
+++ gcc/doc/extend.texi (working copy)
@@ -954,9 +954,13 @@ typedef _Complex float __attribute__((mo
 typedef _Complex float __attribute__((mode(XC))) _Complex80;
 @end smallexample
 
-On PowerPC 64-bit Linux systems there are currently problems in using
-the complex @code{__float128} type.  When these problems are fixed,
-you would use:
+In order to use @code{__float128} and @code{__ibm128} on PowerPC Linux
+systems, you must use the @option{-mfloat128}. It is expected in
+future versions of GCC that @code{__float128} will be enabled
+automatically.  In addition, there are currently problems in using the
+complex @code{__float128} type.  When these problems are fixed, you
+would use the following syntax to declare @code{_Complex128} to be a
+complex @code{__float128} type:
 
 @smallexample
 typedef _Complex float __attribute__((mode(KC))) _Complex128;
Index: libgcc/configure.ac
===
--- libgcc/configure.ac

Re: [Patch] Avoid deadlock in guality tests.

2016-04-07 Thread Pedro Alves
On 04/07/2016 02:41 PM, Yvan Roux wrote:

> Here is the new patch which makes GDB pass all signals except SIGTRAP.
> validated on native armv8l target without regessions. ok for trunk ?

LGTM, FWIW.
Thanks,
Pedro Alves



Re: [PATCH] 69517 - [5/6 regression] SEGV on a VLA with excess initializer elements

2016-04-07 Thread Martin Sebor

I've spent a ton of time trying to implement the suggested
changes (far too much in large part because of my own mistakes)
but I don't think they will work.  I'll try to clean up what
I have and post it for review.  I wanted to respond to this
how in case you have some suggestions or concerns with the
direction I'm taking in the meantime.


But if even a few MB seems too strict, I would find having even
an exceedingly liberal limit (say 1GB) much preferable to none
at all as it makes it possible to exercise boundary conditions
such as the size overflow problem you noted below.


That sounds reasonable, as long as users with unusual needs can adjust
it with a flag, but even so I'm nervous about doing this in stage 4.  It
certainly isn't a regression.


I'm not comfortable adding a new option at this stage.  I'm also
not sure that an option to impose a static limit is the best
solution.  It seems that if we go to the trouble of making the limit
customizable it should be possible to change it without recompiling
everything (e.g., on ELF, we could check for a weak function and
call it to get the most up-to-date limit).

Let me restore the 4.9.3 behavior by setting the VLA size limit to
SIZE_MAX / 2 (that fixes the other regression that I just raised
in c++/70588 for the record).


I don't think modifying build_vec_init() alone would be sufficient.
For example, the function isn't called for a VLA with a constant
bound like this one:

  int A [2][N] = { 1, 2, 3, 4 };


That seems like a bug, due to array_of_runtime_bound_p returning false
for that array.


It seems that a complete fix would involve (among other things)
replacing calls to array_of_runtime_bound_p with
variably_modified_type_p or similar since the N3639 arrays are
just a subset of those accepted by G++.  Unfortunately, that has
other repercussions (e.g., c++70555).

I replaced the call to array_of_runtime_bound_p in build_vec_init
with one to variably_modified_type_p to get around the above.
That  works, but it's only good for checking for excess
initializers in build_vec_init.  It's too late to check for
overflow in the VLA bounds because by that time the code to
allocate the stack has already been emitted.


Also, I think we should check for invalid bounds in
compute_array_index_type, next to the UBsan code.  Checking bounds only
from cp_finish_decl means that we don't check uses of VLA types other
than variable declarations.


I don't see how to make this work.  compute_array_index_type
doesn't have access to the CONSTRUCTOR for the initializer of
the VLA the initializer hasn't been parsed yet).  Without it
it's not possible to detect VLA size overflow in cases such
as in:

T a [][N] = { { ... }, { ... } };

where the number of top-level elements determines whether or
not the size of the whole VLA would overflow or exceed the
maximum.

Given this, I believe the check does need to be implemented
somewhere in cp_finish_decl or one of the functions it calls
(such as check_initializer) and emitted before build_vec_init
is called or the initializer code it creates is emitted.



You mean VLA typedefs?  That's true, though I have consciously
avoided dealing with those.  They're outlawed in N3639 and so
I've been focusing just on variables.  But since GCC accepts
VLA typedefs too I was thinking I would bring them up at some
point in the future to decide what to do about them.


And cast to pointer to VLAs.  But for non-variable cases we don't care
about available stack, so we wouldn't want your allocation limit to apply.


I don't want to implement it now, but I think the same limit
should apply in all cases, otherwise code remains susceptible
to unsigned integer wrapping.  For example:

  extern size_t N;
  typedef int A [N];
  int *a = (int*)malloc (sizeof (A));   // possible wraparound
  a [N - 1] = 0;// out-of-bounds write

It seems that the typedef will need to be accepted (in case it's
unused) but the runtime sizeof would need to do the checking and
potentially throw.  I haven't thought through the ramifications
yet.




As for where to add the bounds checking code, I also at first
thought of checking the bounds parallel to the UBSan code in
compute_array_index_type() and even tried that approach. The
problem with it is that it only considers one array dimension
at a time, without the knowledge of the others.  As a result,
as noted in sanitizer/70051, it doesn't correctly detect
overflows in the bounds of multidimensional VLAs.


It doesn't, but I don't see why it couldn't.  It should be fine to check
each dimension for overflow separately; if an inner dimension doesn't
overflow, we can go on and consider the outer dimension.


As I explained above, I don't see how to make this work.



Incidentally, I was wondering if it would make sense to use the
overflowing calculation for both TYPE_SIZE and the sanity check when
we're doing both.


I'm not sure what you mean here.  Can you elaborate?




+  /* Avoid inst

Re: [PATCH] PR70117, ppc long double isinf

2016-04-07 Thread Alan Modra
On Thu, Apr 07, 2016 at 10:43:31AM -0400, David Edelsohn wrote:
> Yes, I have been following this entertaining thread.

How to waste lots of time over one bit.  Floating point is like that.
:-)

I see the bug was opened against 5.3, so OK to commit there after a
few days and maybe 4.9 too, Richard?

-- 
Alan Modra
Australia Development Lab, IBM


Re: openacc reference reductions

2016-04-07 Thread Cesar Philippidis
= OMP_CLAUSE_MAP
+			   && OMP_CLAUSE_MAP_IN_REDUCTION(c)))
 	  {
 	x = build_receiver_ref (var, true, ctx);
 	tree new_var = lookup_decl (var, ctx);
diff --git a/gcc/tree.h b/gcc/tree.h
index fa70596..87e7563 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -1536,6 +1536,9 @@ extern void protected_set_expr_location (tree, location_t);
treatment if OMP_CLAUSE_SIZE is zero.  */
 #define OMP_CLAUSE_MAP_MAYBE_ZERO_LENGTH_ARRAY_SECTION(NODE) \
   TREE_PROTECTED (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_MAP))
+/* Nonzero if this map clause is for an ACC parallel reduction variable.  */
+#define OMP_CLAUSE_MAP_IN_REDUCTION(NODE) \
+  TREE_PRIVATE (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_MAP))
 
 #define OMP_CLAUSE_PROC_BIND_KIND(NODE) \
   (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_PROC_BIND)->omp_clause.subcode.proc_bind_kind)


pr70533-20160407-full.diff.gz
Description: application/gzip


Re: [PATCH] PR70117, ppc long double isinf

2016-04-07 Thread Richard Biener
On April 8, 2016 5:03:04 AM GMT+02:00, Alan Modra  wrote:
>On Thu, Apr 07, 2016 at 10:43:31AM -0400, David Edelsohn wrote:
>> Yes, I have been following this entertaining thread.
>
>How to waste lots of time over one bit.  Floating point is like that.
>:-)
>
>I see the bug was opened against 5.3, so OK to commit there after a
>few days and maybe 4.9 too, Richard?

Yes please.

Richard.



Re: [gomp4] Avoiding predication for certain blocks

2016-04-07 Thread Thomas Schwinge
Hi!

I cleaned up this remnant from an earlier OpenACC execution model
implementation:

On Fri, 29 May 2015 18:23:21 +0200, Bernd Schmidt  
wrote:
> When predicating the code for OpenACC, we should avoid the entry block 
> in an offloaded region, which contains setup code that should be run in 
> every thread. The following patch adds a new marker statement that is 
> used to identify this block. Currently, predication doesn't happen 
> anyway due to an oversight in the algorithm, but I'll be fixing that in 
> a followup patch.
> 
> Committed on gomp-4_0-branch.

> --- gcc/gimple.def(revision 223867)
> +++ gcc/gimple.def(working copy)

> +/* GIMPLE_OMP_ENTRY_END marks the end of the unpredicated entry block
> +   into an offloaded region.  */
> +DEFGSCODE(GIMPLE_OMP_ENTRY_END, "gimple_omp_entry_end", GSS_BASE)

Committed to gomp-4_0-branch in r234822:

commit af76e7c5279c0eb87b6b91b42d2568679ad6bbb9
Author: tschwinge 
Date:   Fri Apr 8 06:30:08 2016 +

Remove GIMPLE_OMP_ENTRY_END

gcc/
* gimple.def: Remove GIMPLE_OMP_ENTRY_END.  Remove all uses.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gomp-4_0-branch@234822 
138bc75d-0d04-0410-961f-82ee72b054a4
---
 gcc/ChangeLog.gomp|  4 
 gcc/gimple-low.c  |  1 -
 gcc/gimple-pretty-print.c |  4 
 gcc/gimple.c  |  9 -
 gcc/gimple.def|  4 
 gcc/gimple.h  |  2 --
 gcc/omp-low.c | 15 +--
 7 files changed, 5 insertions(+), 34 deletions(-)

diff --git gcc/ChangeLog.gomp gcc/ChangeLog.gomp
index 3c9eed6..bffc261 100644
--- gcc/ChangeLog.gomp
+++ gcc/ChangeLog.gomp
@@ -1,3 +1,7 @@
+2016-04-08  Thomas Schwinge  
+
+   * gimple.def: Remove GIMPLE_OMP_ENTRY_END.  Remove all uses.
+
 2016-03-11  Cesar Philippidis  
 
* config/nvptx/nvptx.c (nvptx_gen_shuffle): Add support for QImode
diff --git gcc/gimple-low.c gcc/gimple-low.c
index 13c0165..eb90d48 100644
--- gcc/gimple-low.c
+++ gcc/gimple-low.c
@@ -313,7 +313,6 @@ lower_stmt (gimple_stmt_iterator *gsi, struct lower_data 
*data)
 case GIMPLE_OMP_ATOMIC_LOAD:
 case GIMPLE_OMP_ATOMIC_STORE:
 case GIMPLE_OMP_CONTINUE:
-case GIMPLE_OMP_ENTRY_END:
   break;
 
 case GIMPLE_CALL:
diff --git gcc/gimple-pretty-print.c gcc/gimple-pretty-print.c
index 50d61fa..e27214f 100644
--- gcc/gimple-pretty-print.c
+++ gcc/gimple-pretty-print.c
@@ -2304,10 +2304,6 @@ pp_gimple_stmt_1 (pretty_printer *buffer, gimple *gs, 
int spc, int flags)
   pp_string (buffer, "GIMPLE_SECTIONS_SWITCH");
   break;
 
-case GIMPLE_OMP_ENTRY_END:
-  pp_string (buffer, "GIMPLE_OMP_ENTRY_END");
-  break;
-
 case GIMPLE_OMP_MASTER:
 case GIMPLE_OMP_TASKGROUP:
 case GIMPLE_OMP_SECTION:
diff --git gcc/gimple.c gcc/gimple.c
index ee2..b0e19d5 100644
--- gcc/gimple.c
+++ gcc/gimple.c
@@ -828,15 +828,6 @@ gimple_build_debug_source_bind_stat (tree var, tree value,
 }
 
 
-/* Build a GIMPLE_OMP_ENTRY_END statement.  */
-
-gimple *
-gimple_build_omp_entry_end (void)
-{
-  return gimple_alloc (GIMPLE_OMP_ENTRY_END, 0);
-}
-
-
 /* Build a GIMPLE_OMP_CRITICAL statement.
 
BODY is the sequence of statements for which only one thread can execute.
diff --git gcc/gimple.def gcc/gimple.def
index faf0166..2ff22b8 100644
--- gcc/gimple.def
+++ gcc/gimple.def
@@ -227,10 +227,6 @@ DEFGSCODE(GIMPLE_OMP_ATOMIC_STORE, 
"gimple_omp_atomic_store",
iteration in partially lowered OpenMP code.  */
 DEFGSCODE(GIMPLE_OMP_CONTINUE, "gimple_omp_continue", GSS_OMP_CONTINUE)
 
-/* GIMPLE_OMP_ENTRY_END marks the end of the unpredicated entry block
-   into an offloaded region.  */
-DEFGSCODE(GIMPLE_OMP_ENTRY_END, "gimple_omp_entry_end", GSS_BASE)
-
 /* GIMPLE_OMP_CRITICAL  represents
 
#pragma omp critical [name]
diff --git gcc/gimple.h gcc/gimple.h
index 22d2a07..6d15dab 100644
--- gcc/gimple.h
+++ gcc/gimple.h
@@ -1451,7 +1451,6 @@ gdebug *gimple_build_debug_bind_stat (tree, tree, gimple 
* MEM_STAT_DECL);
 gdebug *gimple_build_debug_source_bind_stat (tree, tree, gimple * 
MEM_STAT_DECL);
 #define gimple_build_debug_source_bind(var,val,stmt)   \
   gimple_build_debug_source_bind_stat ((var), (val), (stmt) MEM_STAT_INFO)
-gimple *gimple_build_omp_entry_end (void);
 gomp_critical *gimple_build_omp_critical (gimple_seq, tree, tree);
 gomp_for *gimple_build_omp_for (gimple_seq, int, tree, size_t, gimple_seq);
 gomp_parallel *gimple_build_omp_parallel (gimple_seq, tree, tree, tree);
@@ -6061,7 +6060,6 @@ gimple_return_set_retbnd (gimple *gs, tree retval)
 case GIMPLE_OMP_ORDERED:   \
 case GIMPLE_OMP_CRITICAL:  \
 case GIMPLE_OMP_RETURN:\
-case GIMPLE_OMP_ENTRY_END: \
 case GIMPLE_OMP_ATOMIC_LOAD:   \
 case GIMPLE_OMP_ATOMIC_STORE:  \
 case GIMPLE_OMP_CONTINUE:  \
diff --git gcc/omp-low.c gcc/omp-low.c
index 299447e..9cb6425 10064