Re: [PATCH] [gomp] Recycle non-nested team if possible

2015-07-14 Thread Sebastian Huber



On 14/07/15 09:04, Sebastian Huber wrote:

#include assert.h

static inline struct gomp_team *
get_last_team (unsigned nthreads)
{
  struct gomp_thread *thr = gomp_thread ();
  if (thr-ts.team == NULL)
{
  struct gomp_thread_pool *pool = thr-thread_pool;
  if (pool != NULL)
{
  struct gomp_team *last_team = pool-last_team;
  if (last_team != NULL  last_team-nthreads == nthreads)
{
  pool-last_team = NULL;
  assert (last_team-barrier.total == nthreads);
  assert (last_team-barrier.awaited == nthreads);
  assert (last_team-barrier.awaited_final == nthreads);
  assert (last_team-barrier.generation % BAR_INCR != 0);


Sorry, this should be:

assert (last_team-barrier.generation % BAR_INCR == 0);

I pasted the wrong version, since I wanted to check that I get a failure 
in case generation % BAR_INCR != 0.



  return last_team;
}
}
}
  return NULL;
} 


--
Sebastian Huber, embedded brains GmbH

Address : Dornierstr. 4, D-82178 Puchheim, Germany
Phone   : +49 89 189 47 41-16
Fax : +49 89 189 47 41-09
E-Mail  : sebastian.hu...@embedded-brains.de
PGP : Public key available on request.

Diese Nachricht ist keine geschäftliche Mitteilung im Sinne des EHUG.



Re: [PATCH][4/n] Remove GENERIC stmt combining from SCCVN

2015-07-14 Thread Richard Biener
On Mon, 13 Jul 2015, Jeff Law wrote:

 On 07/13/2015 03:32 AM, Richard Biener wrote:
  On Mon, 13 Jul 2015, Richard Biener wrote:
  
   On Sun, 12 Jul 2015, Jeff Law wrote:
   
On 06/29/2015 01:58 AM, Richard Biener wrote:
 
 In principle the following works for the testcase (even w/o fixing
 the VRP part).
 
 Index: gcc/tree-ssa-dom.c
 ===
 --- gcc/tree-ssa-dom.c  (revision 225007)
 +++ gcc/tree-ssa-dom.c  (working copy)
 @@ -1409,6 +1409,14 @@ simplify_stmt_for_jump_threading (gimple
  return lookup_avail_expr (stmt, false);
}
 
 +static tree
 +dom_valueize (tree t)
 +{
 +  if (TREE_CODE (t) == SSA_NAME)
 +return SSA_NAME_VALUE (t);
 +  return t;
 +}
 +
/* Record into the equivalence tables any equivalences implied by
   traversing edge E (which are cached in E-aux).
 
 @@ -1429,7 +1437,33 @@ record_temporary_equivalences (edge e)
 
  /* If we have a simple NAME = VALUE equivalence, record it.
 */
  if (lhs  TREE_CODE (lhs) == SSA_NAME)
 -   const_and_copies-record_const_or_copy (lhs, rhs);
 +   {
 + gimple use_stmt;
 + imm_use_iterator iter;
 + const_and_copies-record_const_or_copy (lhs, rhs);
 + FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
 +   {
 + /* Only bother to record more equivalences for lhs that
 +can be directly used by e-dest.
 +???  If the code gets re-organized to a worklist to
 +catch more indirect opportunities and it is made to
 +handle PHIs then this should only consider use_stmts
 +in basic-blocks we have already visited.  */
 + if (!dominated_by_p (CDI_DOMINATORS,
 +  e-dest, gimple_bb (use_stmt)))
 +   continue;
 + tree lhs = gimple_get_lhs (use_stmt);
 + if (lhs  TREE_CODE (lhs) == SSA_NAME)
 +   {
 + tree res = gimple_fold_stmt_to_constant_1 (use_stmt,
 +
 dom_valueize,
 +
 no_follow_ssa_edges);
 + if (TREE_CODE (res) == SSA_NAME
 + || is_gimple_min_invariant (res))
 +   const_and_copies-record_const_or_copy (lhs, res);
 +   }
 +   }
 +   }
 
  /* If we have 0 = COND or 1 = COND equivalences, record them
into our expression hash tables.  */
 
 
 it's not using DOMs own stmt visiting machinery as that always
 modifies
 stmts in-place.  As stated in the comment it doesn't catch secondary
 opportunities.  That would be possible by using a work-list seeded
 by LHS we recorded new const/copies for and re-visiting their uses.
 You can get extra fancy here by properly handling PHIs and
 conditionals.  But it's a question of cost here, of course.
Right, the code you're modifying is only used by jump threading to
record
temporary equivalences, particularly equivalences that are specific to a
path.


 
 Note that I think this isn't really backward propagation but
 just context sensitive value-numbering.
I think that's because we're looking at the problem differently.  It's
certainly not backward propagation in the traditional dataflow sense, so
I'm
probably being too loose with terminology here.

When we discover something about X by means other than the definition of
X, we
can look at how X was set and possibly discover a value for source
operands of
that statement.  Similarly we can look at uses of X and possibly
discover a
value for the destination of those statement(s).  In both cases we're
going
backwards from an order-of-execution point of view and recording
additional
equivalences.

The existing code did the former (look at X's defining statement and try
to
discover an equivalence for a source operand in that statement). What we
need
to optimize this case is the latter.

I *think* these are closely enough related that some code can be
factored out
a bit and reused in both r_e_f_i_e and r_t_e to discover both types of
equivalences for DOM and for jump threading.
   
   Indeed - the odd thing here is that one function uses
   const_and_copies-record_const_or_copy directly while the other one
   record_equality (this function is _solely_ used by
   record_equivalences_from_incoming_edge).  I didn't want to introduce
   a callback to commonize the code (though in principle we could use
   a template function with a function template parameter...)
   
   That said, I don't see that record_equality does sth not suitable
   if called 

Re: [PATCH] [gomp] Recycle non-nested team if possible

2015-07-14 Thread Sebastian Huber

On 13/07/15 16:33, Sebastian Huber wrote:



On 13/07/15 16:17, Jakub Jelinek wrote:

On Mon, Jul 13, 2015 at 01:15:44PM +0200, Sebastian Huber wrote:

diff --git a/libgomp/team.c b/libgomp/team.c
index b98b233..0bcbaf8 100644
--- a/libgomp/team.c
+++ b/libgomp/team.c
@@ -134,6 +134,25 @@ gomp_thread_start (void *xdata)
return NULL;
  }
  +static struct gomp_team *
+get_recycable_team (unsigned nthreads)

That would be recyclable.
But I think get_last_team would be better.


Ok.


Also, please make it static inline.


Out of curiosity, does this make a difference for a static function in 
a module if it has the inline or not?





+  team = gomp_malloc (sizeof (*team) + nthreads * extra);
+
+#ifndef HAVE_SYNC_BUILTINS
+  gomp_mutex_init (team-work_share_list_free_lock);
+#endif

Avoiding gomp_mutex_destroy/gomp_mutex_init is fine,
but I must say I'm far less sure about gomp_sem_init (can you
add there a temporary assert that it has the expected value)
and even less about gomp_barrier_init (I think e.g. on Linux
generation will be very unlikely 0 that it should be, and not
sure about awaited_final value either).

Jakub


I didn't observe any testsuite failures on x86_64-unknown-linux-gnu 
with this patch. I will add asserts and re-run the testsuite tomorrow.




The team-master_release semaphore is not always zero after use. I got a 
test failure here:


pr29947-2.exe: libgomp/team.c:152: get_last_team: Assertion 
`last_team-master_release == 0' failed.


The state of barrier seems to be all right. I added these assertions:

#include assert.h

static inline struct gomp_team *
get_last_team (unsigned nthreads)
{
  struct gomp_thread *thr = gomp_thread ();
  if (thr-ts.team == NULL)
{
  struct gomp_thread_pool *pool = thr-thread_pool;
  if (pool != NULL)
{
  struct gomp_team *last_team = pool-last_team;
  if (last_team != NULL  last_team-nthreads == nthreads)
{
  pool-last_team = NULL;
  assert (last_team-barrier.total == nthreads);
  assert (last_team-barrier.awaited == nthreads);
  assert (last_team-barrier.awaited_final == nthreads);
  assert (last_team-barrier.generation % BAR_INCR != 0);
  return last_team;
}
}
}
  return NULL;
}

None of them fails if I run the test suite.

Running libgomp/testsuite/libgomp.c/c.exp ...
Running libgomp/testsuite/libgomp.c++/c++.exp ...
Running libgomp/testsuite/libgomp.fortran/fortran.exp ...
Running libgomp/testsuite/libgomp.graphite/graphite.exp ...
Running libgomp/testsuite/libgomp.oacc-c/c.exp ...
Running libgomp/testsuite/libgomp.oacc-c++/c++.exp ...
Running libgomp/testsuite/libgomp.oacc-fortran/fortran.exp ...

=== libgomp Summary ===

# of expected passes5987
# of expected failures  8
# of unsupported tests  278

Should I still leave the barrier init as is?

--
Sebastian Huber, embedded brains GmbH

Address : Dornierstr. 4, D-82178 Puchheim, Germany
Phone   : +49 89 189 47 41-16
Fax : +49 89 189 47 41-09
E-Mail  : sebastian.hu...@embedded-brains.de
PGP : Public key available on request.

Diese Nachricht ist keine geschäftliche Mitteilung im Sinne des EHUG.



Re: [PATCH, gomp4] Propagate independent clause for OpenACC kernels pass

2015-07-14 Thread Jakub Jelinek
On Tue, Jul 14, 2015 at 01:46:04PM +0800, Chung-Lin Tang wrote:
 this patch provides a 'bool independent' field in struct loop, which
 will be switched on by an independent clause in a #pragma acc loop 
 directive.
 I assume you'll be wiring it to the kernels parloops pass in a followup patch.
 
 Note: there are already a few other similar fields in struct loop, namely
 'safelen' and 'can_be_parallel', used by OMP simd safelen and GRAPHITE 
 respectively.
 The intention and/or setting of these fields are all a bit different, so I've
 decided to add a new bool for OpenACC.

How is it different though?  Can you cite exact definition of the
independent clause vs. safelen (set to INT_MAX)?
The OpenMP definition is:
A SIMD loop has logical iterations numbered 0,1,...,N-1 where N is the
number of loop iterations, and the logical numbering denotes the sequence in 
which the iterations would
be executed if the associated loop(s) were executed with no SIMD instructions. 
If the safelen
clause is used then no two iterations executed concurrently with SIMD 
instructions can have a
greater distance in the logical iteration space than its value.
...
Lexical forward dependencies in the iterations of the
original loop must be preserved within each SIMD chunk.

So e.g. safelen = 32 means for PTX you can safely implement it by
running up to 32 consecutive iterations by all threads in the warp
(assuming code that for some reason must be run by a single thread
(e.g. calls to functions that are marked so that they expect to be run
by the first thread in a warp initially) is run sequentially by increasing
iterator), but it doesn't mean the iterations have no dependencies in between
them whatsoever (see the above note about lexical forward dependencies),
so you can't parallelize it by assigning different iterations to different
threads outside of warp (or pthread_create created threads).
So if OpenACC independent means there are no dependencies in between
iterations, the OpenMP counterpart here is #pragma omp for simd schedule (auto)
or #pragma omp distribute parallel for simd schedule (auto).

Jakub


[gomp4, committed] Use marked_independent in oacc kernels region

2015-07-14 Thread Tom de Vries

Hi,

this patch uses the marked_independent field to skip the dependence 
analysis in parloops for loops in oacc kernels regions.


Bootstrapped and reg-tested on x86_64.

Committed to gomp-4_0-branch.

Thanks,
- Tom
Use marked_independent in oacc kernels region

2015-07-14  Tom de Vries  t...@codesourcery.com

	* tree-parloops.c (parallelize_loops): Use marked_independent flag in
	oacc kernels region.

	* c-c++-common/goacc/kernels-independent.c: New test.

	* testsuite/libgomp.oacc-c-c++-common/kernels-independent.c: New test.
---
 .../c-c++-common/goacc/kernels-independent.c   | 40 +++
 gcc/tree-parloops.c| 21 --
 .../kernels-independent.c  | 45 ++
 3 files changed, 103 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/c-c++-common/goacc/kernels-independent.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-independent.c

diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-independent.c b/gcc/testsuite/c-c++-common/goacc/kernels-independent.c
new file mode 100644
index 000..2f086b6
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/kernels-independent.c
@@ -0,0 +1,40 @@
+/* { dg-additional-options -O2 } */
+/* { dg-additional-options -ftree-parallelize-loops=32 } */
+/* { dg-additional-options -fdump-tree-parloops_oacc_kernels-all } */
+/* { dg-additional-options -fdump-tree-optimized } */
+
+#include stdlib.h
+
+#define N (1024 * 512)
+#define COUNTERTYPE unsigned int
+
+void
+foo (unsigned int *a,  unsigned int *b,  unsigned int *c)
+{
+
+  for (COUNTERTYPE i = 0; i  N; i++)
+a[i] = i * 2;
+
+  for (COUNTERTYPE i = 0; i  N; i++)
+b[i] = i * 4;
+
+#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N])
+  {
+#pragma acc loop independent
+for (COUNTERTYPE ii = 0; ii  N; ii++)
+  c[ii] = a[ii] + b[ii];
+  }
+
+  for (COUNTERTYPE i = 0; i  N; i++)
+if (c[i] != a[i] + b[i])
+  abort ();
+}
+
+/* Check that only one loop is analyzed, and that it can be parallelized.  */
+/* { dg-final { scan-tree-dump-times SUCCESS: may be parallelized, marked independent 1 parloops_oacc_kernels } } */
+/* { dg-final { scan-tree-dump-not FAILED: parloops_oacc_kernels } } */
+
+/* Check that the loop has been split off into a function.  */
+/* { dg-final { scan-tree-dump-times (?n);; Function .*foo.*\\._omp_fn\\.0 1 optimized } } */
+
+/* { dg-final { scan-tree-dump-times (?n)pragma omp target oacc_parallel.*num_gangs\\(32\\) 1 parloops_oacc_kernels } } */
diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c
index f27dfa9..149c336 100644
--- a/gcc/tree-parloops.c
+++ b/gcc/tree-parloops.c
@@ -2797,9 +2797,24 @@ parallelize_loops (bool oacc_kernels_p)
   if (!try_create_reduction_list (loop, reduction_list, oacc_kernels_p))
 	continue;
 
-  if (!flag_loop_parallelize_all
-	   !loop_parallel_p (loop, parloop_obstack))
-	continue;
+  if (!flag_loop_parallelize_all)
+	{
+	  bool independent = (oacc_kernels_p
+			   loop-marked_independent);
+
+	  if (independent)
+	{
+	  if (dump_file
+		   (dump_flags  TDF_DETAILS))
+		fprintf (dump_file,
+			   SUCCESS: may be parallelized, marked independent\n);
+	}
+	  else
+	independent = loop_parallel_p (loop, parloop_obstack);
+
+	  if (!independent)
+	continue;
+	}
 
   changed = true;
   if (dump_file  (dump_flags  TDF_DETAILS))
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-independent.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-independent.c
new file mode 100644
index 000..d169a5f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-independent.c
@@ -0,0 +1,45 @@
+/* { dg-do run } */
+/* { dg-additional-options -ftree-parallelize-loops=32 } */
+
+#include stdlib.h
+
+#define N (1024 * 512)
+#define COUNTERTYPE unsigned int
+
+void __attribute__((noinline,noclone))
+foo (unsigned int *a,  unsigned int *b,  unsigned int *c)
+{
+
+  for (COUNTERTYPE i = 0; i  N; i++)
+a[i] = i * 2;
+
+  for (COUNTERTYPE i = 0; i  N; i++)
+b[i] = i * 4;
+
+#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N])
+  {
+#pragma acc loop independent
+for (COUNTERTYPE ii = 0; ii  N; ii++)
+  c[ii] = a[ii] + b[ii];
+  }
+
+  for (COUNTERTYPE i = 0; i  N; i++)
+if (c[i] != a[i] + b[i])
+  abort ();
+}
+
+int
+main (void)
+{
+  unsigned int *__restrict a;
+  unsigned int *__restrict b;
+  unsigned int *__restrict c;
+
+  a = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
+  b = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
+  c = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
+
+  foo (a, b, c);
+
+  return 0;
+}
-- 
1.9.1



Re: [patch, driver] Ignore -ftree-parallelize-loops={0,1}

2015-07-14 Thread Tom de Vries

On 14/07/15 06:54, Jeff Law wrote:

On 07/13/2015 04:58 AM, Tom de Vries wrote:

On 07/07/15 09:53, Tom de Vries wrote:

Hi,

currently, we have these spec strings in gcc/gcc.c involving
ftree-parallelize-loops:
...
%{fopenacc|fopenmp|ftree-parallelize-loops=*:%:include(libgomp.spec)%(link_gomp)}



%{fopenacc|fopenmp|ftree-parallelize-loops=*:-pthread}
...

Actually, ftree-parallelize-loops={0,1} means that no parallelization is
done, but these spec strings still get activated for these values.


Attached patch fixes that, by introducing a spec function gt (short for
greather than), and using it in the spec lines.



Attached (untested) patch manages the same, without introducing the spec
function 'gt'. But the solution is a bit convoluted, so I prefer the one
with the gt function.

I prefer the one with the gt function :-)



Committed the patch using the gt function, as attached (formatting 
fixed, ChangeLog entry added).


Thanks,
- Tom

Ignore -ftree-parallelize-loops={0,1} using gt

2015-07-14  Tom de Vries  t...@codesourcery.com

	* gcc.c (greater_than_spec_func): Declare forward.
	(LINK_COMMAND_SPEC, GOMP_SELF_SPECS): Use gt to ignore
	-ftree-parallelize-loops={0,1}.
	(static_spec_functions): Add greater_than_spec_func function with name
	gt.
	(greater_than_spec_func): New function.
---
 gcc/gcc.c | 49 +++--
 1 file changed, 47 insertions(+), 2 deletions(-)

diff --git a/gcc/gcc.c b/gcc/gcc.c
index 0f29b78..92d0909 100644
--- a/gcc/gcc.c
+++ b/gcc/gcc.c
@@ -274,6 +274,7 @@ static const char *compare_debug_self_opt_spec_function (int, const char **);
 static const char *compare_debug_auxbase_opt_spec_function (int, const char **);
 static const char *pass_through_libs_spec_func (int, const char **);
 static const char *replace_extension_spec_func (int, const char **);
+static const char *greater_than_spec_func (int, const char **);
 static char *convert_white_space (char *);
 
 /* The Specs Language
@@ -881,7 +882,8 @@ proper position among the other output files.  */
 %{s} %{t} %{u*} %{z} %{Z} %{!nostdlib:%{!nostartfiles:%S}}  VTABLE_VERIFICATION_SPEC  \
 %{static:} %{L*} %(mfwrap) %(link_libgcc)  SANITIZER_EARLY_SPEC  %o\
  CHKP_SPEC  \
-%{fopenacc|fopenmp|ftree-parallelize-loops=*:%:include(libgomp.spec)%(link_gomp)}\
+%{fopenacc|fopenmp|%:gt(%{ftree-parallelize-loops=*} 1):\
+	%:include(libgomp.spec)%(link_gomp)}\
 %{fcilkplus:%:include(libcilkrts.spec)%(link_cilkrts)}\
 %{fgnu-tm:%:include(libitm.spec)%(link_itm)}\
 %(mflib)  STACK_SPLIT_SPEC \
@@ -1042,7 +1044,8 @@ static const char *const multilib_defaults_raw[] = MULTILIB_DEFAULTS;
 /* Linking to libgomp implies pthreads.  This is particularly important
for targets that use different start files and suchlike.  */
 #ifndef GOMP_SELF_SPECS
-#define GOMP_SELF_SPECS %{fopenacc|fopenmp|ftree-parallelize-loops=*:  \
+#define GOMP_SELF_SPECS \
+  %{fopenacc|fopenmp|%:gt(%{ftree-parallelize-loops=*} 1):  \
   -pthread}
 #endif
 
@@ -1482,6 +1485,7 @@ static const struct spec_function static_spec_functions[] =
   { compare-debug-auxbase-opt, compare_debug_auxbase_opt_spec_function },
   { pass-through-libs,	pass_through_libs_spec_func },
   { replace-extension,	replace_extension_spec_func },
+  { gt,			greater_than_spec_func },
 #ifdef EXTRA_SPEC_FUNCTIONS
   EXTRA_SPEC_FUNCTIONS
 #endif
@@ -9428,6 +9432,47 @@ replace_extension_spec_func (int argc, const char **argv)
   return result;
 }
 
+/* Returns  if the n in ARGV[1] == -opt=n is greater than ARGV[2].
+   Otherwise, return NULL.  */
+
+static const char *
+greater_than_spec_func (int argc, const char **argv)
+{
+  char *converted;
+
+  if (argc == 1)
+return NULL;
+
+  gcc_assert (argc == 3);
+  gcc_assert (argv[0][0] == '-');
+  gcc_assert (argv[0][1] == '\0');
+
+  /* Point p to n in in -opt=n.  */
+  const char *p = argv[1];
+  while (true)
+{
+  char c = *p;
+  if (c == '\0')
+	gcc_unreachable ();
+
+  ++p;
+
+  if (c == '=')
+	break;
+}
+
+  long arg = strtol (p, converted, 10);
+  gcc_assert (converted != p);
+
+  long lim = strtol (argv[2], converted, 10);
+  gcc_assert (converted != argv[2]);
+
+  if (arg  lim)
+return ;
+
+  return NULL;
+}
+
 /* Insert backslash before spaces in ORIG (usually a file path), to 
avoid being broken by spec parser.
 
-- 
1.9.1



Re: [PATCH] [gomp] Recycle non-nested team if possible

2015-07-14 Thread Sebastian Huber



On 14/07/15 09:19, Jakub Jelinek wrote:

On Tue, Jul 14, 2015 at 09:09:01AM +0200, Sebastian Huber wrote:

I pasted the wrong version, since I wanted to check that I get a failure in
case generation % BAR_INCR != 0.

There is also config/posix/bar* implementation, which would need to be
checked too.  I'd suggest first committing a patch with both sem and mutex
being destroyed (note, config/posix/bar* has two semaphores in it), and
perhaps if you feel strongly about it an incremental patch to avoid the
destroying/initialization of the barrier if it works even with
config/posix/bar*.

Jakub


If I destroy the barrier of the last team, and then initialize it later 
in gomp_new_team()


+static inline struct gomp_team *
+get_last_team (unsigned nthreads)
+{
+  struct gomp_thread *thr = gomp_thread ();
+  if (thr-ts.team == NULL)
+{
+  struct gomp_thread_pool *pool = thr-thread_pool;
+  if (pool != NULL)
+   {
+ struct gomp_team *last_team = pool-last_team;
+ if (last_team != NULL  last_team-nthreads == nthreads)
+   {
+ pool-last_team = NULL;
+ gomp_barrier_destroy (last_team-barrier);
+ return last_team;
+   }
+   }
+}
+  return NULL;
+}

then I get test suite failures. Is it safe to destroy the team barrier 
here or do we circumvent the last team mechanism which is supposed to 
delay the destruction?


Example failure:

WARNING: program timed out.
FAIL: libgomp.c/appendix-a/a.15.1.c execution test

--
Sebastian Huber, embedded brains GmbH

Address : Dornierstr. 4, D-82178 Puchheim, Germany
Phone   : +49 89 189 47 41-16
Fax : +49 89 189 47 41-09
E-Mail  : sebastian.hu...@embedded-brains.de
PGP : Public key available on request.

Diese Nachricht ist keine geschäftliche Mitteilung im Sinne des EHUG.



Re: [PATCH][AArch64] Handle -|x| case using a single csneg

2015-07-14 Thread Andrew Pinski
On Tue, Jul 14, 2015 at 3:06 AM, Andrew Pinski pins...@gmail.com wrote:
 On Tue, Jul 14, 2015 at 1:18 AM, Kyrill Tkachov kyrylo.tkac...@arm.com 
 wrote:
 Hi Segher,

 On 14/07/15 01:38, Segher Boessenkool wrote:

 On Mon, Jul 13, 2015 at 10:48:19AM +0100, Kyrill Tkachov wrote:

 For the testcase in the patch we were generating an extra neg
 instruction:
  cmp w0, wzr
  csneg   w0, w0, w0, ge
  neg w0, w0
  ret

 instead of the optimal:
  cmp w0, wzr
  csneg   w0, w0, w0, lt
  ret

 The reason is that combine tries to merge the operation into a negation
 of
 an abs.

 Before combine, you have two insns, a negation and an abs, so that is
 not so very strange :-)


 Well, because the aarch64 integer abs expander expands to a compare
 and a conditional negate, we have a compare followed by an if_then_else
 with a neg in it. That's followed by a neg of that:
 (insn 6 3 7 2 (set (reg:CC 66 cc)
 (compare:CC (reg/v:SI 76 [ x ])
 (const_int 0 [0]))) abs.c:1 374 {*cmpsi}
  (nil))
 (insn 7 6 8 2 (set (reg:SI 78 [ D.2683 ])
 (if_then_else:SI (lt (reg:CC 66 cc)
 (const_int 0 [0]))
 (neg:SI (reg/v:SI 76 [ x ]))
 (reg/v:SI 76 [ x ]))) abs.c:1 441 {csneg3si_insn}
  (expr_list:REG_DEAD (reg/v:SI 76 [ x ])
 (expr_list:REG_DEAD (reg:CC 66 cc)
 (expr_list:REG_EQUAL (abs:SI (reg/v:SI 76 [ x ]))
 (nil)
 (insn 8 7 13 2 (set (reg:SI 77 [ D.2683 ])
 (neg:SI (reg:SI 78 [ D.2683 ]))) abs.c:1 319 {negsi2}
  (expr_list:REG_DEAD (reg:SI 78 [ D.2683 ])
 (nil)))


 What does combine do when it props 7 into 8?  I suspect you want to
 optimize that instead of doing it any other way.
 That is if prop the neg into the two sides of the conditional and if
 one simplifies, produce a new rtl.

This should help code even like:
static inline int f(int a, int b, int c)
{
  if (a) return b;
  else return -c;
}

int g(int a, int b, int c)
{
  return -f(a, b, c);
}

Note the above code might be optimized at the tree level already.

Thanks,
Andrew


 Thanks,
 Andrew Pinski



 combine tries to convert it into a neg-of-an-abs in simplify_if_then_else


 Some archs have actual nabs insns btw (for floating point, anyway).

 Archs without abs or conditional assignment, and with cheap branches,
 get a branch around a neg followed by another neg, at expand time.
 This then isn't optimised away either.

 So I'd say expand should be made a bit smarter for this.  Failing
 that, your approach looks fine to me -- assuming you want to have a
 fake abs insn at all.

 On to the patch...


 +;; Combine will try merging (c  0 ? -x : x) into (-|x|).  This isn't a
 good

 x  0 here.


 Thanks, I'll fix that when committing if approved.

 Kyrill


 Segher




[Bug fortran/52846] [F2008] Support submodules - part 2/3

2015-07-14 Thread Paul Richard Thomas
Dear All,

Reinhold Bader has pointed out the naming the submodule files after
the submodule name and using .mod as the extension can potentially
lead to clashes. Therefore, I have written a patch to change gfortran
to follow the naming convention of another leading brand:

submodule filename = module@ancestor@@submodule.smod

The implementation is straightforward and the ChangeLog and the patch
provide an adequate description.

Bootstraps and regtests on x86_64 - OK for trunk?

Paul

2015-07-14  Paul Thomas  pa...@gcc.gnu.org

PR fortran/52846
* gfortran.h : Add 'submodule_name' to gfc_use_list structure.
* module.c (gfc_match_submodule): Define submodule_name and add
static 'submodule_name'.
(gfc_match_submodule): Build up submodule filenames, using '@'
as a delimiter. Store the output filename in 'submodule_name'.
(gfc_dump_module): If current state is COMP_SUBMODULE, write
to file 'submodule_name', using SUBMODULE_EXTENSION.
(gfc_use_module): Similarly, use the 'submodule_name' field in
the gfc_use_list structure and SUBMODULE_EXTENSION to read the
implicitly used submodule files.
Index: gcc/fortran/gfortran.h
===
*** gcc/fortran/gfortran.h  (revision 225410)
--- gcc/fortran/gfortran.h  (working copy)
*** gfc_use_rename;
*** 1556,1561 
--- 1556,1562 
  typedef struct gfc_use_list
  {
const char *module_name;
+   const char *submodule_name;
bool intrinsic;
bool non_intrinsic;
bool only_flag;
Index: gcc/fortran/module.c
===
*** gcc/fortran/module.c(revision 225410)
--- gcc/fortran/module.c(working copy)
*** along with GCC; see the file COPYING3.
*** 82,87 
--- 82,88 
  #include zlib.h

  #define MODULE_EXTENSION .mod
+ #define SUBMODULE_EXTENSION .smod

  /* Don't put any single quote (') in MOD_VERSION, if you want it to be
 recognized.  */
*** static gzFile module_fp;
*** 191,196 
--- 192,199 

  /* The name of the module we're reading (USE'ing) or writing.  */
  static const char *module_name;
+ /* The name of the .smod file that the submodule will write to.  */
+ static const char *submodule_name;
  static gfc_use_list *module_list;

  /* If we're reading an intrinsic module, this is its ID.  */
*** cleanup:
*** 718,723 
--- 721,727 

  /* Match a SUBMODULE statement.  */

+
  match
  gfc_match_submodule (void)
  {
*** gfc_match_submodule (void)
*** 750,758 
--- 754,767 
  while (last-next)
last = last-next;
  last-next = use_list;
+ use_list-submodule_name
+   = gfc_get_string (%s@%s, last-module_name, name);
}
else
+   {
  module_list = use_list;
+ use_list-submodule_name = gfc_get_string (name);
+   }

if (gfc_match_char (')') == MATCH_YES)
break;
*** gfc_match_submodule (void)
*** 765,770 
--- 774,782 
if (m != MATCH_YES)
  goto syntax;

+   submodule_name = gfc_get_string (%s@%s, use_list-submodule_name,
+  gfc_new_block-name);
+
if (!gfc_add_flavor (gfc_new_block-attr, FL_MODULE,
   gfc_new_block-name, NULL))
  return MATCH_ERROR;
*** gfc_dump_module (const char *name, int d
*** 5933,5939 
--- 5945,5960 
char *filename, *filename_tmp;
uLong crc, crc_old;

+   module_name = gfc_get_string (name);
+
+   if (gfc_state_stack-state == COMP_SUBMODULE)
+ {
+   name = submodule_name;
+   n = strlen (name) + strlen (SUBMODULE_EXTENSION) + 1;
+ }
+   else
  n = strlen (name) + strlen (MODULE_EXTENSION) + 1;
+
if (gfc_option.module_dir != NULL)
  {
n += strlen (gfc_option.module_dir);
*** gfc_dump_module (const char *name, int d
*** 5946,5951 
--- 5967,5976 
filename = (char *) alloca (n);
strcpy (filename, name);
  }
+
+   if (gfc_state_stack-state == COMP_SUBMODULE)
+ strcat (filename, SUBMODULE_EXTENSION);
+   else
  strcat (filename, MODULE_EXTENSION);

/* Name of the temporary file used to write the module.  */
*** gfc_dump_module (const char *name, int d
*** 5975,5981 

/* Write the module itself.  */
iomode = IO_OUTPUT;
-   module_name = gfc_get_string (name);

init_pi_tree ();

--- 6000,6005 
*** gfc_use_module (gfc_use_list *module)
*** 6706,6715 
  gfc_warning_now (OPT_Wuse_without_only,
 USE statement at %C has no ONLY qualifier);

!   filename = XALLOCAVEC (char, strlen (module_name) + strlen 
(MODULE_EXTENSION)
!  + 1);
strcpy (filename, module_name);
strcat (filename, MODULE_EXTENSION);

/* First, try to find an non-intrinsic module, unless the USE statement
   

Re: libgo patch committed: Remove unnecessary Entersyscall (RMs: OK for GCC 5 branch?)

2015-07-14 Thread Richard Biener
On Mon, Jul 13, 2015 at 8:55 PM, Ian Lance Taylor i...@golang.org wrote:
 This patch by Lynn Boger removes unnecessary duplicate calls to
 Entersyscall and Exitsyscall from the GNU/Linux Getdents function.
 The calls are duplicates because they are called by Syscall, also
 called by Getdents.  These duplicate calls sometimes cause the
 deadlock detector to fire incorrectly, reported as
 http://golang.org/issue/11406 .  This patch fixes that problem.
 Bootstrapped and ran Go tests on mainline, GCC 4.9 branch, and GCC 5
 branch.  Committed to mainline and 4.9 branch.

 Release managers: OK to commit to GCC 5 branch?

Ok.

Richard.

 Ian


Re: Merge trunk r225562 (2015-07-08) into gomp-4_0-branch

2015-07-14 Thread Tom de Vries

On 13/07/15 10:31, Thomas Schwinge wrote:

Hi Tom!

On Mon, 13 Jul 2015 09:20:16 +0200, Tom de Vries tom_devr...@mentor.com wrote:

On 12/07/15 11:39, Thomas Schwinge wrote:

On Fri, 10 Jul 2015 18:50:20 -0400, Nathan Sidwell nathan_sidw...@mentor.com 
wrote:

it looks like the most recent merge from trunk to gomp4 was early May.  I think
it is time for another one -- can you handle that?


Indeed :-) -- and, as it happens, resolving the merge artifacts is one
of the things I've been working on last week.  I hope I got that all
right, in particular gcc/tree-parloops.c (Tom),


I've looked at the merge commit, gcc/tree-parloops.c was not modified.


(Well, it was, but not substantially.)  You'd ported all your trunk
commits to gomp-4_0-branch already (thanks!), and in the functions where
I got merge conflicts, I just retained the code that was present on
gomp-4_0-branch already, which apparently was the right thing to do.  ;-)



gcc/tree-ssa-loop-ch.c (Tom),


That looks ok. I just wonder whether we could have derived
pass_ch_oacc_kernels from pass_ch instead of from ch_base, avoiding
duplicating the execute function, and have
pass_ch_oacc_kernels::process_loop_p call pass_ch::process_loop_p rather
than inline it.


Your call, depending on what makes the most sense regarding the semantics
of pass_ch_oacc_kernels.



I've build attached patch, and ran goacc.exp, that all went ok. I'll do 
bootstrap and reg-test, and commit.



I was just (pleasantly) surprised to find myself (capable of) doing a
little C++ programming, with classes, inheritance, and so on.  ;-)



Heh. I know the feeling :)

Thanks,
- Tom

Derive pass_ch_oacc_kernels from pass_ch

2015-07-14  Tom de Vries  t...@codesourcery.com

	* tree-ssa-loop-ch.c (pass_ch::pass_ch (pass_data, gcc::context)): New
	constructor.
	(pass_ch_oacc_kernels): Derive from pass_ch.
	(pass_ch_oacc_kernels::pass_ch_oacc_kernels(gcc::context)): Call pass_ch
	constructor.
	(pass_ch_oacc_kernels::execute): Remove.
	(pass_ch_oacc_kernels::process_loop_p): Rewrite using
	pass_ch::process_loop_p.
---
 gcc/tree-ssa-loop-ch.c | 25 -
 1 file changed, 8 insertions(+), 17 deletions(-)

diff --git a/gcc/tree-ssa-loop-ch.c b/gcc/tree-ssa-loop-ch.c
index 4256ef0..c28e1e8 100644
--- a/gcc/tree-ssa-loop-ch.c
+++ b/gcc/tree-ssa-loop-ch.c
@@ -165,6 +165,10 @@ public:
 : ch_base (pass_data_ch, ctxt)
   {}
 
+  pass_ch (pass_data data, gcc::context *ctxt)
+: ch_base (data, ctxt)
+  {}
+
   /* opt_pass methods: */
   virtual bool gate (function *) { return flag_tree_ch != 0; }
   
@@ -436,43 +440,30 @@ const pass_data pass_data_ch_oacc_kernels =
   TODO_cleanup_cfg, /* todo_flags_finish */
 };
 
-class pass_ch_oacc_kernels : public ch_base
+class pass_ch_oacc_kernels : public pass_ch
 {
 public:
   pass_ch_oacc_kernels (gcc::context *ctxt)
-: ch_base (pass_data_ch_oacc_kernels, ctxt)
+: pass_ch (pass_data_ch_oacc_kernels, ctxt)
   {}
 
   /* opt_pass methods: */
   virtual bool gate (function *) { return true; }
-  virtual unsigned int execute (function *);
 
 protected:
   /* ch_base method: */
   virtual bool process_loop_p (struct loop *loop);
 }; // class pass_ch_oacc_kernels
 
-unsigned int
-pass_ch_oacc_kernels::execute (function *fun)
-{
-  loop_optimizer_init (LOOPS_HAVE_PREHEADERS
-		   | LOOPS_HAVE_SIMPLE_LATCHES);
-
-  unsigned int res = copy_headers (fun);
-
-  loop_optimizer_finalize ();
-  return res;
-}
-
 } // anon namespace
 
 bool
 pass_ch_oacc_kernels::process_loop_p (struct loop *loop)
 {
-  if (do_while_loop_p (loop))
+  if (!loop-in_oacc_kernels_region)
 return false;
 
-  return loop-in_oacc_kernels_region;
+  return pass_ch::process_loop_p (loop);
 }
 
 gimple_opt_pass *
-- 
1.9.1



Re: [PATCH 2/5] Add struct tree_decl_map_hasher

2015-07-14 Thread Richard Biener
On Sun, Jul 12, 2015 at 5:49 PM, Tom de Vries tom_devr...@mentor.com wrote:
 On 12/07/15 17:45, Tom de Vries wrote:

 Hi,

 this patch series implements the forbidding of multi-step garbage
 collection liveness dependencies between caches.

 The first four patches downgrade 3 caches to non-cache, since they
 introduce multi-step dependencies. This allows us to decouple:
 - establishing a policy for multi-step dependencies in caches, and
 - fixing issues that allow us to use these 3 as caches again.

 1. Downgrade debug_args_for_decl to non-cache
 2. Add struct tree_decl_map_hasher
 3. Downgrade debug_expr_for_decl to non-cache
 4. Downgrade value_expr_for_decl to non-cache
 5. Don't mark live recursively in gt_cleare_cache

 Bootstrapped and reg-tested on x86_64, with ENABLE_CHECKING.

 I'll post the patches in response to this email.


 This patch introduces infrastructure for patches 3 and 4.

 OK for trunk?

Ok.

Richard.

 Thanks,
 - Tom



Re: [PATCH 1/5] Downgrade debug_args_for_decl to non-cache

2015-07-14 Thread Richard Biener
On Sun, Jul 12, 2015 at 5:47 PM, Tom de Vries tom_devr...@mentor.com wrote:
 On 12/07/15 17:45, Tom de Vries wrote:

 Hi,

 this patch series implements the forbidding of multi-step garbage
 collection liveness dependencies between caches.

 The first four patches downgrade 3 caches to non-cache, since they
 introduce multi-step dependencies. This allows us to decouple:
 - establishing a policy for multi-step dependencies in caches, and
 - fixing issues that allow us to use these 3 as caches again.

 1. Downgrade debug_args_for_decl to non-cache
 2. Add struct tree_decl_map_hasher
 3. Downgrade debug_expr_for_decl to non-cache
 4. Downgrade value_expr_for_decl to non-cache
 5. Don't mark live recursively in gt_cleare_cache

 Bootstrapped and reg-tested on x86_64, with ENABLE_CHECKING.

 I'll post the patches in response to this email.


 This patch downgrades debug_args_for_decl to non-cache.

 OK for trunk?

Ok.

Thanks,
Richard.

 Thanks,
 - Tom



Re: conditional lim

2015-07-14 Thread Richard Biener
On Mon, Jun 29, 2015 at 4:21 PM, Evgeniya Maenkova
evgeniya.maenk...@gmail.com wrote:
 On Mon, Jun 29, 2015 at 5:10 PM, Richard Biener
 richard.guent...@gmail.com wrote:
 On Tue, Jun 9, 2015 at 10:11 PM, Evgeniya Maenkova
 evgeniya.maenk...@gmail.com wrote:
 On Tue, Jun 9, 2015 at 3:46 PM, Richard Biener
 richard.guent...@gmail.com wrote:
 On Fri, May 29, 2015 at 3:14 PM, Evgeniya Maenkova
 evgeniya.maenk...@gmail.com wrote:
 Hi Richard,

 Here is some explanation. I hope you let me know if I need to clarify 
 something.

 Also, you asked me about concrete example, to make sure you don’t miss
 my answer here is the link:
 https://gcc.gnu.org/ml/gcc-patches/2015-05/msg02417.html.

 Also, I doubt whether it’s convenient for you to create a build with
 my patch or not. May be to clarify things you could send me some
 examples/concrete cases, then I’ll compile them with
 –fdump-tree-loopinit-details and –fdump-tree-lim-details and send you
 these dumps. May be these dumps will be useful. (I’ll only disable
 cleanup_cfg TODO after lim to let you know the exact picture after
 lim).

 What do you think?

 1.   invariantness _dom_walker –

 1.1   for each GIMPLE_COND in given bb calls handle_cond_stmt to call
 for true and false edges handle_branch_edge, which calls SET_TARGET_OF
 for all bb ‘predicated’ by given GIMPLE_COND.

 SET_TARGET_OF sets in basic_blocks aux 2 facts:

 a)  this is true or false edge;

 b)  link to cond stmt;

 Handle_branch_edge works this way:

 If (cond1)

   {

  bb1;

  if (cond2}

{

bb2;

 }

Being called for cond1, it sets cond1 as condition for both bb1 and
 bb2 (the whole branch for cond1, ie also for bb containing cond2),
 then this method will be called (as there is dominance order) for
 cond2 to correct things (ie to set cond2 as condition for bb2).

 Hmm, why not track the current condition as state during the DOM walk
 and thus avoid processing more than one basic-block in handle_branch_edge?
 Thus get rid of handle_branch_edge and instead do everything in 
 handle_cond_stmt
 plus the dom-walkers BB visitor?

 I need to look more carefully how to implement it, but I think I
 understand what you mean and this optimization of course looks
 reasonable to me. Will do.

 I see you don't handle BBs with multiple predecessors - that's ok, but
 are you sure you don't run into correctness issues when not marking such
 BBs as predicated?  This misses handling of, say

  if (a || b)
bb;

 which is a pity (but can be fixed later if desired).

 I had some test (in gcc testsuite or bootstrap build) which worked
 incorrectly because of multiple predecessors. As far as I remember the
 situation was (further, will make some notes about such tests to
 clarify this better), I mean with previous version of my code which
 handled bb with 2 predecessors:
 if (a)
   tmpvar=something;
 while()
   if (a || b)
   basic_block {do something with tmpvar;} // I mean basic block
 predicated by bb with a and bb with b

 So, if a is false, I mean we didn't do tmpvar=something (outside
 loop), BUT we are in basick_block  (we went by bb with b), we got
 Segmentation falt in basic_block {do something with tmpvar;}.

 I think we can reproduce all the details of this test if I remove not
 handling bb with 2 predecessors.

 So I wouldn't move bb with 2 predecessors (this is not always executed
 bb in any loop, not conditionally, they will not be moved at all).

 This is my more detail explanation on this point. Perhaps, I didn't
 understand your question about correctness. Could you repeat it in
 other words (based on this new clarification).

 So I think according to current code it will not be moved. What
 incorrectness do you mean?

 If the block isn't marked as predicated the question is whether it is
 handled correctly or assumed to be unconditionally executed.

 I note that collecting predicates has similarities to what if-conversion
 does in tree-ifcvt.c (even if its implementation is completely different,
 of course).


 Ok, I'll look at this. But could you please clarify your point?
 (Should I just take this into account with low priority and look at
 this later or you want some refactoring?)

 I just noted similar code exists elsewhere - it may be possible to
 factor it out but I didn't investigate.  And no, doing that isn't a 
 prerequesite
 for this patch.

 1.2   As 1.1 goes we identify whether some bb is predicated by some
 condition or not.

 bb-aux-type will be [TRUE/FALSE]_TARGET_OF and
 bb-aux-cond_stmt=cond stmt (the nearest condition).

 If bb is always executed bb-aux-type = ALWAYS_EXECUTED_IN,
 bb-loop-loop (this info was available in the clean build).

 1.3   As this walker is called in dominance order, information about
 condition is available when invariantness_dom_walker is called for
 given bb.  So we can make some computations based on bb-aux
 structure. This is done in check_conditionally_executed. The main goal
 of this 

[PATCH] Fix more genmatch case label indenting issues

2015-07-14 Thread Richard Biener

Committed.

Richard.

2015-07-14  Richard Biener  rguent...@suse.de

* genmatch.c (dt_node::gen_kids_1): Fix case label indenting.
(decision_tree::gen_gimple): Likewise.

Index: gcc/genmatch.c
===
--- gcc/genmatch.c  (revision 225761)
+++ gcc/genmatch.c  (working copy)
@@ -2383,8 +2383,8 @@ dt_node::gen_kids_1 (FILE *f, int indent
  fprintf_indent (f, indent,   }\n);
}
  fprintf_indent (f, indent, default:;\n);
- indent -= 4;
  fprintf_indent (f, indent, }\n);
+ indent -= 4;
}
 
   if (fns_len)
@@ -2403,8 +2403,8 @@ dt_node::gen_kids_1 (FILE *f, int indent
  switch (DECL_FUNCTION_CODE (fndecl))\n);
  fprintf_indent (f, indent,
{\n);
- indent += 8;
 
+ indent += 6;
  for (unsigned i = 0; i  fns_len; ++i)
{
  expr *e = as_a expr *(fns[i]-op);
@@ -2416,8 +2416,8 @@ dt_node::gen_kids_1 (FILE *f, int indent
}
 
  fprintf_indent (f, indent, default:;\n);
- indent -= 8;
- fprintf_indent (f, indent,   }\n);
+ fprintf_indent (f, indent, }\n);
+ indent -= 6;
  fprintf_indent (f, indent,   }\n);
}
 
@@ -2481,7 +2481,7 @@ dt_node::gen_kids_1 (FILE *f, int indent
 {
   indent -= 4;
   fprintf_indent (f, indent, default:;\n);
-  fprintf_indent (f, indent,   }\n);
+  fprintf_indent (f, indent, }\n);
 }
 
   for (unsigned i = 0; i  preds.length (); ++i)
@@ -2915,7 +2915,7 @@ decision_tree::gen_gimple (FILE *f)
  fprintf (f, }\n);
}
   fprintf (f,   default:;\n
- }\n);
+   }\n);
 
   fprintf (f,   return false;\n);
   fprintf (f, }\n);


Re: [PATCH][AArch64] Handle -|x| case using a single csneg

2015-07-14 Thread Kyrill Tkachov


On 14/07/15 11:06, Andrew Pinski wrote:

On Tue, Jul 14, 2015 at 1:18 AM, Kyrill Tkachov kyrylo.tkac...@arm.com wrote:

Hi Segher,

On 14/07/15 01:38, Segher Boessenkool wrote:

On Mon, Jul 13, 2015 at 10:48:19AM +0100, Kyrill Tkachov wrote:

For the testcase in the patch we were generating an extra neg
instruction:
  cmp w0, wzr
  csneg   w0, w0, w0, ge
  neg w0, w0
  ret

instead of the optimal:
  cmp w0, wzr
  csneg   w0, w0, w0, lt
  ret

The reason is that combine tries to merge the operation into a negation
of
an abs.

Before combine, you have two insns, a negation and an abs, so that is
not so very strange :-)


Well, because the aarch64 integer abs expander expands to a compare
and a conditional negate, we have a compare followed by an if_then_else
with a neg in it. That's followed by a neg of that:
(insn 6 3 7 2 (set (reg:CC 66 cc)
 (compare:CC (reg/v:SI 76 [ x ])
 (const_int 0 [0]))) abs.c:1 374 {*cmpsi}
  (nil))
(insn 7 6 8 2 (set (reg:SI 78 [ D.2683 ])
 (if_then_else:SI (lt (reg:CC 66 cc)
 (const_int 0 [0]))
 (neg:SI (reg/v:SI 76 [ x ]))
 (reg/v:SI 76 [ x ]))) abs.c:1 441 {csneg3si_insn}
  (expr_list:REG_DEAD (reg/v:SI 76 [ x ])
 (expr_list:REG_DEAD (reg:CC 66 cc)
 (expr_list:REG_EQUAL (abs:SI (reg/v:SI 76 [ x ]))
 (nil)
(insn 8 7 13 2 (set (reg:SI 77 [ D.2683 ])
 (neg:SI (reg:SI 78 [ D.2683 ]))) abs.c:1 319 {negsi2}
  (expr_list:REG_DEAD (reg:SI 78 [ D.2683 ])
 (nil)))


What does combine do when it props 7 into 8?  I suspect you want to
optimize that instead of doing it any other way.
That is if prop the neg into the two sides of the conditional and if
one simplifies, produce a new rtl.


Yeah, that's what combine tries in simplify_if_then_else, instead of
propagating the neg it tries a (neg (abs x)). I did consider telling it not
to do that, but how would it be gated?
Should we look if the one arm of the if_then_else is a neg of the other and
invert the comparison code instead of trying (neg (abs x)) when 
HAVE_conditional_move?

Kyrill


Thanks,
Andrew Pinski



combine tries to convert it into a neg-of-an-abs in simplify_if_then_else


Some archs have actual nabs insns btw (for floating point, anyway).

Archs without abs or conditional assignment, and with cheap branches,
get a branch around a neg followed by another neg, at expand time.
This then isn't optimised away either.

So I'd say expand should be made a bit smarter for this.  Failing
that, your approach looks fine to me -- assuming you want to have a
fake abs insn at all.

On to the patch...



+;; Combine will try merging (c  0 ? -x : x) into (-|x|).  This isn't a
good

x  0 here.


Thanks, I'll fix that when committing if approved.

Kyrill


Segher





Re: [PATCH] [gomp] Recycle non-nested team if possible

2015-07-14 Thread Jakub Jelinek
On Tue, Jul 14, 2015 at 11:28:18AM +0200, Sebastian Huber wrote:
 If I destroy the barrier of the last team, and then initialize it later in
 gomp_new_team()
 
 +static inline struct gomp_team *
 +get_last_team (unsigned nthreads)
 +{
 +  struct gomp_thread *thr = gomp_thread ();
 +  if (thr-ts.team == NULL)
 +{
 +  struct gomp_thread_pool *pool = thr-thread_pool;
 +  if (pool != NULL)
 +   {
 + struct gomp_team *last_team = pool-last_team;
 + if (last_team != NULL  last_team-nthreads == nthreads)
 +   {
 + pool-last_team = NULL;
 + gomp_barrier_destroy (last_team-barrier);
 + return last_team;
 +   }
 +   }
 +}
 +  return NULL;
 +}
 
 then I get test suite failures. Is it safe to destroy the team barrier here
 or do we circumvent the last team mechanism which is supposed to delay the
 destruction?

Then you indeed supposedly hit the reason why last_team exists.
Thus, if not reinitializing the team barrier works even with
--disable-linux-futex configured libgomp, I guess it is ok not to
destroy it and reinit it again.

Jakub


Re: [PATCH 4/5] Downgrade value_expr_for_decl to non-cache

2015-07-14 Thread Richard Biener
On Sun, Jul 12, 2015 at 5:53 PM, Tom de Vries tom_devr...@mentor.com wrote:
 On 12/07/15 17:45, Tom de Vries wrote:

 Hi,

 this patch series implements the forbidding of multi-step garbage
 collection liveness dependencies between caches.

 The first four patches downgrade 3 caches to non-cache, since they
 introduce multi-step dependencies. This allows us to decouple:
 - establishing a policy for multi-step dependencies in caches, and
 - fixing issues that allow us to use these 3 as caches again.

 1. Downgrade debug_args_for_decl to non-cache
 2. Add struct tree_decl_map_hasher
 3. Downgrade debug_expr_for_decl to non-cache
 4. Downgrade value_expr_for_decl to non-cache
 5. Don't mark live recursively in gt_cleare_cache

 Bootstrapped and reg-tested on x86_64, with ENABLE_CHECKING.

 I'll post the patches in response to this email.


 This patch downgrade value_expr_for_decl to non-cache.

 OK for trunk?

Similar to the debug_decl_map the idea of this hash is that
it records on-the-side info for an entity.  If that entity is no
longer needed then we should discard the reference so we
can collect the entity.  Whether the on-the-side info is used
in other means isn't relevant here.

Note that they are also not really caches in that removing
an entry for a key that is still live will cause information loss
that cannot be restored.

What's the bad side-effect of the late marking other than
affecting this (or other) caches?

What's your idea of restoring the idea of these maps under
the constraint you try to add?

Richard.

 Thanks,
 - Tom



Re: [PATCH] Yet another simple fix to enhance outer-loop vectorization.

2015-07-14 Thread Richard Biener
On Wed, Jun 17, 2015 at 6:35 PM, Yuri Rumyantsev ysrum...@gmail.com wrote:
 Richard,

 I attached updated patch.
 You asked me to explain why I did changes for renaming.
 If we do not change PHI arguments for inner loop header we can get the
 following IR:
 source outer loop:
   bb 5: outer-loop header
   # i_45 = PHI 0(4), i_18(9)
   # .MEM_17 = PHI .MEM_4(D)(4), .MEM_44(9)

   bb 6:inner-loop header
   # .MEM_46 = PHI .MEM_44(7), .MEM_17(5)

 after duplication we have (without argument correction):
   bb 12:
   # i_74 = PHI i_64(13), 0(17)
   # .MEM_73 = PHI .MEM_97(13), .MEM_4(D)(17)

   bb 15:
   # .MEM_63 = PHI .MEM_17(12), .MEM_97(16)

 and later we get verifier error:
 test1.c:20:6: error: definition in block 6 does not dominate use in block 10
  void foo (int n)
   ^
 for SSA_NAME: .MEM_17 in statement:
 .MEM_63 = PHI .MEM_17(10), .MEM_97(14)

 and you can see that we need to rename MEM_17 argument for out-coming
 edge to MEM_73 since
 MEM_17 was converted to MEM_73 in outer-loop header.

 This explains my simple fix in rename_variables_in_bb.
 Note also that loop distribution is not performed for outer loops.

Yes, I never committed the patch implementing this...  but I also now
see that loop-distribution always re-writes the virtual SSA web.

Richard.

 I also did a change in slpeel_can_duplicate_loop_p to simplify check.

 Any comments will be appreciated.

 Yuri.

 2015-06-17 15:24 GMT+03:00 Richard Biener richard.guent...@gmail.com:
 On Tue, Jun 16, 2015 at 4:12 PM, Yuri Rumyantsev ysrum...@gmail.com wrote:
 Thanks a lot Richard for your review.

 I presented updated patch which is not gated by force_vectorize.
 I added test on outer-loop in vect_enhance_data_refs_alignment
 and it returns false for it because we can not improve dr alighment
 through outer-loop peeling in general. So I assume that only
 versioning for alignment can be applied for targets do not support
 unaligned memory access.

 @@ -998,7 +998,12 @@
gimple stmt = DR_STMT (dr);
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
 +  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);

 +  /* Peeling of outer loops can't improve alignment.  */
 +  if (loop_vinfo  LOOP_VINFO_LOOP (loop_vinfo)-inner)
 +return false;
 +

 but this looks wrong.  It depends on the context (DRs in the outer loop
 can improve alignment by peeling the outer loop and we can still
 peel the inner loop for alignment).

 So IMHO the correct place to amend is vect_enhance_data_refs_alignment
 (which it seems doesn't consider peeling the inner loop).

 I'd say you should simply add

  || loop-inner)

 to the

   /* Check if we can possibly peel the loop.  */
   if (!vect_can_advance_ivs_p (loop_vinfo)
   || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
 do_peeling = false;

 check.

 I did not change tests for outer loops in slpeel_can_duplicate_loop_p
 as you proposed since it is not called outside vectorization.

 There is still no reason for this complex condition, so please remove it.

 _Please_ also generate diffs with -p, it is very tedious to see patch hunks
 without a function name context.

 You didn't explain why you needed the renaming changes as I don't
 remember needing it when using the code from loop distribution.

 I also noticed one not-resolved issue with outer-loop peeling - we don't
 consider remainder for possible vectorization of inner-loop as we can see
 on the following example:

   for (i = 0; i  n; i++) {
 diff = 0;
 for (j = 0; j  M; j++) {
   diff += in[j+i]*coeff[j];
 }
 out[i] = diff;
   }

 Is it worth to fix it?

 You mean vectorizing the inner loop in the niter peel epilogue loop
 of the outer loop?  Possibly yes.

 Thanks,
 Richard.

 2015-06-16  Yuri Rumyantsev  ysrum...@gmail.com

 * tree-vect-loop-manip.c (rename_variables_in_bb): Add argument
 to allow renaming of PHI arguments on edges incoming from outer
 loop header, add corresponding check before start PHI iterator.
 (slpeel_tree_duplicate_loop_to_edge_cfg): Introduce new bool
 variable DUPLICATE_OUTER_LOOP and set it to true for outer loops
 with true force_vectorize.  Set-up dominator for outer loop too.
 Pass DUPLICATE_OUTER_LOOP as argument to rename_variables_in_bb.
 (slpeel_can_duplicate_loop_p): Allow duplicate of outer loop if it
 was marked with force_vectorize and has restricted cfg.
 * tree-vect-data-refs.c (vector_alignment_reachable_p): Alignment can
 not be reachable for outer loops.
 (vect_enhance_data_refs_alignment): Add test on true value of
 do_peeling.

 gcc/testsuite/ChangeLog:
 * gcc.dg/vect/vect-outer-simd-2.c: New test.

 2015-06-09 16:26 GMT+03:00 Richard Biener richard.guent...@gmail.com:
 On Mon, Jun 8, 2015 at 12:27 PM, Yuri Rumyantsev ysrum...@gmail.com 
 wrote:
 Hi All,

 Here is a simple fix which allows duplication of outer loops to
 perform peeling for number of iterations if outer loop is marked with
 pragma omp simd.

Re: [PATCH, gomp4] Propagate independent clause for OpenACC kernels pass

2015-07-14 Thread Jakub Jelinek
On Tue, Jul 14, 2015 at 05:35:28PM +0800, Chung-Lin Tang wrote:
 The wording of OpenACC independent is more simple:
 ... the independent clause tells the implementation that the iterations of 
 this loop
 are data-independent with respect to each other. -- OpenACC spec 2.7.9
 
 I would say this implies even more relaxed conditions than OpenMP simd 
 safelen,
 essentially saying that the compiler doesn't even need dependence analysis; 
 just
 assume independence of iterations.

safelen is also saying that the compiler doesn't even need dependence
analysis.  It is just that only some transformations of the loop are ok
without dependence analysis, others need to be with dependence analysis.
Classical vectorization optimizations (instead of doing one iteration
at a time you can do up to safelen consecutive iterations together) for the
first statement in the loop, then second statement, etc. are ok without
dependence analysis, but e.g. reversing the loop and running first the last
iteration and so on up to first, or running the iterations in random orders
is not ok.

  So if OpenACC independent means there are no dependencies in between
  iterations, the OpenMP counterpart here is #pragma omp for simd schedule 
  (auto)
  or #pragma omp distribute parallel for simd schedule (auto).
 
 schedule(auto) appears to correspond to the OpenACC 'auto' clause, or
 what is implied in a kernels compute construct, but I'm not sure it implies
 no dependencies between iterations?

By the schedule(auto) I meant that the user tells the compiler it can
parallelize the loop with whatever schedule it wants.  Other schedules are
quite well defined, if the team has that many threads, which of the thread
gets which iteration, so user could rely on a particular parallelization and
the loop iterations still could not be 100% independent.  With
schedule(auto) you say it is up to the compiler to schedule them, thus they
really have to be all independent.

 Putting aside the semantic issues, as of currently safelen0 turns on a 
 certain amount of
 vectorization code that we are not currently using (and not likely at all for 
 nvptx).
 Right now, we're just trying to pass the new flag to a kernels tree-parloops 
 based pass.

In any case, when setting your flag you should also set safelen = INT_MAX,
as the OpenACC independent implies that you can vectorize the loop with any
vectorization factor without performing dependency analysis on the loop.
OpenACC is (hopefully) not just about PTX and most other targets will want
to vectorize such loops.

Jakub


Re: [PATCH 5/5] Don't mark live recursively in gt_cleare_cache

2015-07-14 Thread Richard Biener
On Sun, Jul 12, 2015 at 5:58 PM, Tom de Vries tom_devr...@mentor.com wrote:
 On 12/07/15 17:45, Tom de Vries wrote:

 Hi,

 this patch series implements the forbidding of multi-step garbage
 collection liveness dependencies between caches.

 The first four patches downgrade 3 caches to non-cache, since they
 introduce multi-step dependencies. This allows us to decouple:
 - establishing a policy for multi-step dependencies in caches, and
 - fixing issues that allow us to use these 3 as caches again.

 1. Downgrade debug_args_for_decl to non-cache
 2. Add struct tree_decl_map_hasher
 3. Downgrade debug_expr_for_decl to non-cache
 4. Downgrade value_expr_for_decl to non-cache
 5. Don't mark live recursively in gt_cleare_cache

 Bootstrapped and reg-tested on x86_64, with ENABLE_CHECKING.

 I'll post the patches in response to this email.


 This patch:
 - disables the recursive marking of cache entries during the cache-clear
   phase
 - Adds ENABLE_CHECKING code to check that we don't end up with partially
   dead cache entries

 OK for trunk?

It seems your docs do not match the implementation with regarding to
keep_cache_entry returning -1.  It looks like -1 is a I do not know value
which IMHO is bad to have.

I miss documentation of the ggc_marked_nonkey_p function and what it
is supposed to return.  As the function seems to be manually implemented
it also looks error-prone - I suppose gengtype could generate those.

+#ifdef ENABLE_CHECKING
+   gcc_assert (!ggc_marked_p (*iter));
+#endif

gcc_checking_assert ()

+#ifdef ENABLE_CHECKING
+   gcc_assert (H::ggc_marked_nonkey_p (*iter));
+#endif

likewise.

Rather than conditionalizing build of the ggc_marked_nonkey_p
functions can you mark them UNUSED (I suppose you only
avoid defined-but-not-used Werror errors)?  We try to get away
from conditional compilation.

Thanks,
Richard.

 Thanks,
 - Tom



Re: [PATCH 3/5] Downgrade debug_expr_for_decl to non-cache

2015-07-14 Thread Richard Biener
On Sun, Jul 12, 2015 at 5:51 PM, Tom de Vries tom_devr...@mentor.com wrote:
 On 12/07/15 17:45, Tom de Vries wrote:

 Hi,

 this patch series implements the forbidding of multi-step garbage
 collection liveness dependencies between caches.

 The first four patches downgrade 3 caches to non-cache, since they
 introduce multi-step dependencies. This allows us to decouple:
 - establishing a policy for multi-step dependencies in caches, and
 - fixing issues that allow us to use these 3 as caches again.

 1. Downgrade debug_args_for_decl to non-cache
 2. Add struct tree_decl_map_hasher
 3. Downgrade debug_expr_for_decl to non-cache
 4. Downgrade value_expr_for_decl to non-cache
 5. Don't mark live recursively in gt_cleare_cache

 Bootstrapped and reg-tested on x86_64, with ENABLE_CHECKING.

 I'll post the patches in response to this email.


 This patch downgrades debug_expr_for_decl to non-cache.

 OK for trunk?

Ok.

I'm somewhat more nervous about this one with regarding to
memory use.  What's the difference with -fmem-report for
the DECL_DEBUG_EXPR hash for a set of .ii files from GCC
itself (compiling with -g, of course)?  (just trying to see the
typical order of the size of that hash)

Thanks,
Richard.

 Thanks,
 - Tom



Re: [PATCH][AArch64] Handle -|x| case using a single csneg

2015-07-14 Thread pinskia




 On Jul 13, 2015, at 5:48 PM, Kyrill Tkachov kyrylo.tkac...@arm.com wrote:
 
 Hi all,
 
 For the testcase in the patch we were generating an extra neg instruction:
cmp w0, wzr
csneg   w0, w0, w0, ge
neg w0, w0
ret
 
 instead of the optimal:
cmp w0, wzr
csneg   w0, w0, w0, lt
ret
 
 The reason is that combine tries to merge the operation into a negation of an 
 abs.
 I considered teaching combine not to do that but it would require telling it 
 that it shouldn't
 do it if there is a conditional negate instruction. There's no optab for that 
 though :(
 Also, we already advertise that we have an abs optab, even though we expand 
 to a compare and
 a csneg anyway. This patch was the cleanest way I could do this. We just 
 match the neg of an abs
 and generate the same csneg sequence as for normal abs, just with the 
 comparison condition inverted.
 
 Bootstrapped and tested on aarch64.
 
 Ok for trunk?
 Thanks,
 Kyrill
 
 2015-07-13  Kyrylo Tkachov  kyrylo.tkac...@arm.com
 
* config/aarch64/aarch64.md (*absneg2mode_insn): New
define_and_split.

This pattern is incorrect as you need to say you are clobbering the flags 
register. Otherwise an optimization between combine and the splitter can move 
an instruction between it. Also it might be better to just have a define_split 
rather than a define_insn_and_split. Combine knows how to use define_split 
without being an insn. 

Thanks,
Andrew

 
 2015-07-13  Kyrylo Tkachov  kyrylo.tkac...@arm.com
 
* gcc.target/aarch64/neg-abs_1.c: New test.
 abs-neg.patch


Re: [gomp4.1 WIP] omp_target_* libgomp APIs

2015-07-14 Thread Jakub Jelinek
On Mon, Jul 13, 2015 at 05:26:43PM +0200, Jakub Jelinek wrote:
   Perhaps
   #define REFCOUNT_INFINITY (~(uintptr_t) 0)
   ?
  
  Probably, I don't know.
 
 Ok, I'll change this later.

Ok, here is what I've committed:

2015-07-14  Jakub Jelinek  ja...@redhat.com

* libgomp.h (REFCOUNT_INFINITY): Define.
* target.c (gomp_map_vars_existing, gomp_map_vars, gomp_unmap_vars,
gomp_offload_image_to_device, omp_target_associate_ptr,
omp_target_disassociate_ptr): Use REFCOUNT_INFINITY instead of
INT_MAX.

--- libgomp/libgomp.h.jj2015-07-09 09:42:47.0 +0200
+++ libgomp/libgomp.h   2015-07-14 10:18:42.992294453 +0200
@@ -679,6 +679,9 @@ struct target_mem_desc {
   struct target_var_desc list[];
 };
 
+/* Special value for refcount - infinity.  */
+#define REFCOUNT_INFINITY (~(uintptr_t) 0)
+
 struct splay_tree_key_s {
   /* Address of the host object.  */
   uintptr_t host_start;
--- libgomp/target.c.jj 2015-07-13 14:48:11.0 +0200
+++ libgomp/target.c2015-07-14 10:19:11.408614050 +0200
@@ -172,7 +172,7 @@ gomp_map_vars_existing (struct gomp_devi
(void *) (oldn-tgt-tgt_start + oldn-tgt_offset),
(void *) newn-host_start,
newn-host_end - newn-host_start);
-  if (oldn-refcount != INT_MAX)
+  if (oldn-refcount != REFCOUNT_INFINITY)
 oldn-refcount++;
 }
 
@@ -438,7 +438,7 @@ gomp_map_vars (struct gomp_device_descr
  tgt-list[j].key = k;
  tgt-list[j].copy_from = false;
  tgt-list[j].always_copy_from = false;
- if (k-refcount != INT_MAX)
+ if (k-refcount != REFCOUNT_INFINITY)
k-refcount++;
  gomp_map_pointer (tgt,
(uintptr_t) *(void **) hostaddrs[j],
@@ -580,7 +580,7 @@ gomp_unmap_vars (struct target_mem_desc
   bool do_unmap = false;
   if (k-refcount  1)
{
- if (k-refcount != INT_MAX)
+ if (k-refcount != REFCOUNT_INFINITY)
k-refcount--;
}
   else if (k-async_refcount  0)
@@ -727,7 +727,7 @@ gomp_offload_image_to_device (struct gom
   k-host_end = k-host_start + 1;
   k-tgt = tgt;
   k-tgt_offset = target_table[i].start;
-  k-refcount = INT_MAX;
+  k-refcount = REFCOUNT_INFINITY;
   k-async_refcount = 0;
   array-left = NULL;
   array-right = NULL;
@@ -752,7 +752,7 @@ gomp_offload_image_to_device (struct gom
   k-host_end = k-host_start + (uintptr_t) host_var_table[i * 2 + 1];
   k-tgt = tgt;
   k-tgt_offset = target_var-start;
-  k-refcount = INT_MAX;
+  k-refcount = REFCOUNT_INFINITY;
   k-async_refcount = 0;
   array-left = NULL;
   array-right = NULL;
@@ -1507,7 +1507,7 @@ omp_target_associate_ptr (void *host_ptr
   k-host_end = cur_node.host_end;
   k-tgt = tgt;
   k-tgt_offset = (uintptr_t) device_ptr + device_offset;
-  k-refcount = INT_MAX;
+  k-refcount = REFCOUNT_INFINITY;
   k-async_refcount = 0;
   array-left = NULL;
   array-right = NULL;
@@ -1550,7 +1550,7 @@ omp_target_disassociate_ptr (void *ptr,
 }
   if (n
n-host_start == cur_node.host_start
-   n-refcount == INT_MAX
+   n-refcount == REFCOUNT_INFINITY
n-tgt-tgt_start == 0
n-tgt-to_free == NULL
n-tgt-refcount == 1

Jakub


Re: [PATCH, gomp4] Propagate independent clause for OpenACC kernels pass

2015-07-14 Thread Thomas Schwinge
Hi Chung-Lin!

On Tue, 14 Jul 2015 13:46:04 +0800, Chung-Lin Tang clt...@codesourcery.com 
wrote:
 this patch provides a 'bool independent' field in struct loop, which
 will be switched on by an independent clause in a #pragma acc loop 
 directive.

Thanks!


This patch has been developed in context of OpenACC kernels constructs,
but, is there anything still to be done regarding OpenACC parallel
constructs?  That is, are we currently *using* the independent yes/no
information appropriately for these?


 * omp-low.c (struct omp_region): Add 'int kind' and
 'bool independent' fields.

 --- omp-low.c (revision 225758)
 +++ omp-low.c (working copy)
 @@ -136,8 +136,16 @@ struct omp_region
/* True if this is nested inside an OpenACC kernels construct.  */
bool inside_kernels_p;
  
 +  /* Records a generic kind field.  */
 +  int kind;
 +
/* For an OpenACC loop, the level of parallelism requested.  */
int gwv_this;
 +
 +  /* For an OpenACC loop directive, true if has the 'independent' clause.  */
 +  bool independent;
 +
 +  tree broadcast_array;
  };

I'm assuming just a patch conflict resolution hiccup; committed in
r225767:

commit 4a10c97b741bbc3d7278779337d5c0bfea8297c2
Author: tschwinge tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4
Date:   Tue Jul 14 09:30:02 2015 +

Code cleanup

gcc/
* omp-low.c (struct omp_region): Remove broadcast_array member.

... that mistakenly reappeared in r225759, after having been removed in
r225647.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gomp-4_0-branch@225767 
138bc75d-0d04-0410-961f-82ee72b054a4
---
 gcc/ChangeLog.gomp | 4 
 gcc/omp-low.c  | 2 --
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git gcc/ChangeLog.gomp gcc/ChangeLog.gomp
index dfe0c95..d7459d0 100644
--- gcc/ChangeLog.gomp
+++ gcc/ChangeLog.gomp
@@ -1,3 +1,7 @@
+2015-07-14  Thomas Schwinge  tho...@codesourcery.com
+
+   * omp-low.c (struct omp_region): Remove broadcast_array member.
+
 2015-07-14  Tom de Vries  t...@codesourcery.com
 
* tree-parloops.c (parallelize_loops): Use marked_independent flag in
diff --git gcc/omp-low.c gcc/omp-low.c
index fc6c7a9..0419dcd 100644
--- gcc/omp-low.c
+++ gcc/omp-low.c
@@ -144,8 +144,6 @@ struct omp_region
 
   /* For an OpenACC loop directive, true if has the 'independent' clause.  */
   bool independent;
-
-  tree broadcast_array;
 };
 
 /* Context structure.  Used to store information about each parallel


Grüße,
 Thomas


pgpzur1TWFsHn.pgp
Description: PGP signature


Re: [PATCH, gomp4] Propagate independent clause for OpenACC kernels pass

2015-07-14 Thread Chung-Lin Tang
On 15/7/14 3:00 PM, Jakub Jelinek wrote:
 On Tue, Jul 14, 2015 at 01:46:04PM +0800, Chung-Lin Tang wrote:
 this patch provides a 'bool independent' field in struct loop, which
 will be switched on by an independent clause in a #pragma acc loop 
 directive.
 I assume you'll be wiring it to the kernels parloops pass in a followup 
 patch.

 Note: there are already a few other similar fields in struct loop, namely
 'safelen' and 'can_be_parallel', used by OMP simd safelen and GRAPHITE 
 respectively.
 The intention and/or setting of these fields are all a bit different, so I've
 decided to add a new bool for OpenACC.
 
 How is it different though?  Can you cite exact definition of the
 independent clause vs. safelen (set to INT_MAX)?
 The OpenMP definition is:
 A SIMD loop has logical iterations numbered 0,1,...,N-1 where N is the
 number of loop iterations, and the logical numbering denotes the sequence in 
 which the iterations would
 be executed if the associated loop(s) were executed with no SIMD 
 instructions. If the safelen
 clause is used then no two iterations executed concurrently with SIMD 
 instructions can have a
 greater distance in the logical iteration space than its value.
 ...
 Lexical forward dependencies in the iterations of the
 original loop must be preserved within each SIMD chunk.

The wording of OpenACC independent is more simple:
... the independent clause tells the implementation that the iterations of 
this loop
are data-independent with respect to each other. -- OpenACC spec 2.7.9

I would say this implies even more relaxed conditions than OpenMP simd safelen,
essentially saying that the compiler doesn't even need dependence analysis; just
assume independence of iterations.

 So e.g. safelen = 32 means for PTX you can safely implement it by
 running up to 32 consecutive iterations by all threads in the warp
 (assuming code that for some reason must be run by a single thread
 (e.g. calls to functions that are marked so that they expect to be run
 by the first thread in a warp initially) is run sequentially by increasing
 iterator), but it doesn't mean the iterations have no dependencies in between
 them whatsoever (see the above note about lexical forward dependencies),
 so you can't parallelize it by assigning different iterations to different
 threads outside of warp (or pthread_create created threads).

 So if OpenACC independent means there are no dependencies in between
 iterations, the OpenMP counterpart here is #pragma omp for simd schedule 
 (auto)
 or #pragma omp distribute parallel for simd schedule (auto).

schedule(auto) appears to correspond to the OpenACC 'auto' clause, or
what is implied in a kernels compute construct, but I'm not sure it implies
no dependencies between iterations?

Putting aside the semantic issues, as of currently safelen0 turns on a certain 
amount of
vectorization code that we are not currently using (and not likely at all for 
nvptx).
Right now, we're just trying to pass the new flag to a kernels tree-parloops 
based pass.

Maybe this can all be reconciled later in a more precise way, e.g. have flags 
that correspond
specifically to phases of internal compiler passes (and selected by needs of 
the accel target),
instead of ones that are sort of associated with high-level language features.

Chung-Lin



Re: [PATCH] PR/66760, ipa-inline-analysis.c compile-time hog

2015-07-14 Thread Martin Jambor
Hi,

On Mon, Jul 13, 2015 at 03:49:05PM +0200, Richard Biener wrote:
 On Mon, Jul 13, 2015 at 3:46 PM, Paolo Bonzini pbonz...@redhat.com wrote:
 
 
  On 13/07/2015 15:45, Richard Biener wrote:
  It would be nice to have a patch that can be backported to the GCC 5 branch
  as well.  We can improve this on trunk as followup,no?
 
  The patch I've already posted can be backported. O:-)
 
 So unless Martin objects consider the patch approved for trunk and for
 backporting
 after 5.2 is released and trunk shows no issues.
 
 Martin - can you take care of committing if you are fine with it?

I will.  I will also do the renaming from func_body_info to
ipa_func_body_info as a followup.

Martin

 
 Thanks,
 Richard.
 
  Paolo


[patch] install missing rs6000 header, needed to build plugins

2015-07-14 Thread Matthias Klose
This installs the rs6000-cpus.def file for powerpc targets (included by
default64.h), which is needed to build plugins on powerpc64* targets.

Ok for the trunk, and the 5 branch? Tested with a build and installation.

Matthias

PR target/66840
* config/rs6000/t-rs6000 (TM_H): Add rs6000-cpus.def.

--- gcc/config/rs6000/t-rs6000
+++ gcc/config/rs6000/t-rs6000
@@ -19,6 +19,7 @@
 # http://www.gnu.org/licenses/.
 
 TM_H += $(srcdir)/config/rs6000/rs6000-builtin.def
+TM_H += $(srcdir)/config/rs6000/rs6000-cpus.def
 
 rs6000-c.o: $(srcdir)/config/rs6000/rs6000-c.c
$(COMPILE) $


Re: [PATCH 4/5] Downgrade value_expr_for_decl to non-cache

2015-07-14 Thread Richard Biener
On Tue, Jul 14, 2015 at 12:45 PM, Richard Biener
richard.guent...@gmail.com wrote:
 On Sun, Jul 12, 2015 at 5:53 PM, Tom de Vries tom_devr...@mentor.com wrote:
 On 12/07/15 17:45, Tom de Vries wrote:

 Hi,

 this patch series implements the forbidding of multi-step garbage
 collection liveness dependencies between caches.

 The first four patches downgrade 3 caches to non-cache, since they
 introduce multi-step dependencies. This allows us to decouple:
 - establishing a policy for multi-step dependencies in caches, and
 - fixing issues that allow us to use these 3 as caches again.

 1. Downgrade debug_args_for_decl to non-cache
 2. Add struct tree_decl_map_hasher
 3. Downgrade debug_expr_for_decl to non-cache
 4. Downgrade value_expr_for_decl to non-cache
 5. Don't mark live recursively in gt_cleare_cache

 Bootstrapped and reg-tested on x86_64, with ENABLE_CHECKING.

 I'll post the patches in response to this email.


 This patch downgrade value_expr_for_decl to non-cache.

 OK for trunk?

 Similar to the debug_decl_map the idea of this hash is that
 it records on-the-side info for an entity.  If that entity is no
 longer needed then we should discard the reference so we
 can collect the entity.  Whether the on-the-side info is used
 in other means isn't relevant here.

 Note that they are also not really caches in that removing
 an entry for a key that is still live will cause information loss
 that cannot be restored.

 What's the bad side-effect of the late marking other than
 affecting this (or other) caches?

 What's your idea of restoring the idea of these maps under
 the constraint you try to add?

For example have those special caches have two marking phases.
The first phase marks all non-key edges originating from each entry.
The second phase is the same as what we have now - unmarked
entries get removed.

The first phase would go with regular marking, the second when
processing caches.

You'd delay collecting the memory the non-key edges point to
to the next GC run, but I think that's a fair trade-off.

Richard.

 Richard.

 Thanks,
 - Tom



Re: [PATCH] Unswitching outer loops.

2015-07-14 Thread Richard Biener
On Fri, Jul 10, 2015 at 12:02 PM, Yuri Rumyantsev ysrum...@gmail.com wrote:
 Hi All,

 Here is presented simple transformation which tries to hoist out of
 outer-loop a check on zero trip count for inner-loop. This is very
 restricted transformation since it accepts outer-loops with very
 simple cfg, as for example:
 acc = 0;
for (i = 1; i = m; i++) {
   for (j = 0; j  n; j++)
  if (l[j] == i) { v[j] = acc; acc++; };
   acc = 1;
}

 Note that degenerative outer loop (without inner loop) will be
 completely deleted as dead code.
 The main goal of this transformation was to convert outer-loop to form
 accepted by outer-loop vectorization (such test-case is also included
 to patch).

 Bootstrap and regression testing did not show any new failures.

 Is it OK for trunk?

I think this is

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=23855

as well.  It has a patch adding a invariant loop guard hoisting
phase to loop-header copying.  Yeah, it needs updating to
trunk again I suppose.  It's always non-stage1 when I come
back to that patch.

Your patch seems to be very specific and only handles outer
loops of innermost loops.

Richard.

 ChangeLog:
 2015-07-10  Yuri Rumyantsev  ysrum...@gmail.com

 * tree-ssa-loop-unswitch.c: Include tree-cfgcleanup.h and
 gimple-iterator.h, add prototype for tree_unswitch_outer_loop.
 (tree_ssa_unswitch_loops): Add invoke of tree_unswitch_outer_loop.
 (tree_unswitch_outer_loop): New function.

 gcc/testsuite/ChangeLog:
 * gcc.dg/tree-ssa/unswitch-outer-loop-1.c: New test.
 * gcc.dg/vect/vect-outer-simd-3.c: New test.


Re: [PATCH][AArch64] Handle -|x| case using a single csneg

2015-07-14 Thread Kyrill Tkachov

Hi Segher,

On 14/07/15 01:38, Segher Boessenkool wrote:

On Mon, Jul 13, 2015 at 10:48:19AM +0100, Kyrill Tkachov wrote:

For the testcase in the patch we were generating an extra neg instruction:
 cmp w0, wzr
 csneg   w0, w0, w0, ge
 neg w0, w0
 ret

instead of the optimal:
 cmp w0, wzr
 csneg   w0, w0, w0, lt
 ret

The reason is that combine tries to merge the operation into a negation of
an abs.

Before combine, you have two insns, a negation and an abs, so that is
not so very strange :-)


Well, because the aarch64 integer abs expander expands to a compare
and a conditional negate, we have a compare followed by an if_then_else
with a neg in it. That's followed by a neg of that:
(insn 6 3 7 2 (set (reg:CC 66 cc)
(compare:CC (reg/v:SI 76 [ x ])
(const_int 0 [0]))) abs.c:1 374 {*cmpsi}
 (nil))
(insn 7 6 8 2 (set (reg:SI 78 [ D.2683 ])
(if_then_else:SI (lt (reg:CC 66 cc)
(const_int 0 [0]))
(neg:SI (reg/v:SI 76 [ x ]))
(reg/v:SI 76 [ x ]))) abs.c:1 441 {csneg3si_insn}
 (expr_list:REG_DEAD (reg/v:SI 76 [ x ])
(expr_list:REG_DEAD (reg:CC 66 cc)
(expr_list:REG_EQUAL (abs:SI (reg/v:SI 76 [ x ]))
(nil)
(insn 8 7 13 2 (set (reg:SI 77 [ D.2683 ])
(neg:SI (reg:SI 78 [ D.2683 ]))) abs.c:1 319 {negsi2}
 (expr_list:REG_DEAD (reg:SI 78 [ D.2683 ])
(nil)))


combine tries to convert it into a neg-of-an-abs in simplify_if_then_else



Some archs have actual nabs insns btw (for floating point, anyway).

Archs without abs or conditional assignment, and with cheap branches,
get a branch around a neg followed by another neg, at expand time.
This then isn't optimised away either.

So I'd say expand should be made a bit smarter for this.  Failing
that, your approach looks fine to me -- assuming you want to have a
fake abs insn at all.

On to the patch...



+;; Combine will try merging (c  0 ? -x : x) into (-|x|).  This isn't a good

x  0 here.


Thanks, I'll fix that when committing if approved.

Kyrill



Segher





Re: [PATCH][AArch64] Handle -|x| case using a single csneg

2015-07-14 Thread Andrew Pinski
On Tue, Jul 14, 2015 at 1:18 AM, Kyrill Tkachov kyrylo.tkac...@arm.com wrote:
 Hi Segher,

 On 14/07/15 01:38, Segher Boessenkool wrote:

 On Mon, Jul 13, 2015 at 10:48:19AM +0100, Kyrill Tkachov wrote:

 For the testcase in the patch we were generating an extra neg
 instruction:
  cmp w0, wzr
  csneg   w0, w0, w0, ge
  neg w0, w0
  ret

 instead of the optimal:
  cmp w0, wzr
  csneg   w0, w0, w0, lt
  ret

 The reason is that combine tries to merge the operation into a negation
 of
 an abs.

 Before combine, you have two insns, a negation and an abs, so that is
 not so very strange :-)


 Well, because the aarch64 integer abs expander expands to a compare
 and a conditional negate, we have a compare followed by an if_then_else
 with a neg in it. That's followed by a neg of that:
 (insn 6 3 7 2 (set (reg:CC 66 cc)
 (compare:CC (reg/v:SI 76 [ x ])
 (const_int 0 [0]))) abs.c:1 374 {*cmpsi}
  (nil))
 (insn 7 6 8 2 (set (reg:SI 78 [ D.2683 ])
 (if_then_else:SI (lt (reg:CC 66 cc)
 (const_int 0 [0]))
 (neg:SI (reg/v:SI 76 [ x ]))
 (reg/v:SI 76 [ x ]))) abs.c:1 441 {csneg3si_insn}
  (expr_list:REG_DEAD (reg/v:SI 76 [ x ])
 (expr_list:REG_DEAD (reg:CC 66 cc)
 (expr_list:REG_EQUAL (abs:SI (reg/v:SI 76 [ x ]))
 (nil)
 (insn 8 7 13 2 (set (reg:SI 77 [ D.2683 ])
 (neg:SI (reg:SI 78 [ D.2683 ]))) abs.c:1 319 {negsi2}
  (expr_list:REG_DEAD (reg:SI 78 [ D.2683 ])
 (nil)))


What does combine do when it props 7 into 8?  I suspect you want to
optimize that instead of doing it any other way.
That is if prop the neg into the two sides of the conditional and if
one simplifies, produce a new rtl.

Thanks,
Andrew Pinski



 combine tries to convert it into a neg-of-an-abs in simplify_if_then_else


 Some archs have actual nabs insns btw (for floating point, anyway).

 Archs without abs or conditional assignment, and with cheap branches,
 get a branch around a neg followed by another neg, at expand time.
 This then isn't optimised away either.

 So I'd say expand should be made a bit smarter for this.  Failing
 that, your approach looks fine to me -- assuming you want to have a
 fake abs insn at all.

 On to the patch...


 +;; Combine will try merging (c  0 ? -x : x) into (-|x|).  This isn't a
 good

 x  0 here.


 Thanks, I'll fix that when committing if approved.

 Kyrill


 Segher




Re: [PATCH][AArch64] Handle -|x| case using a single csneg

2015-07-14 Thread Andrew Pinski
On Tue, Jul 14, 2015 at 3:13 AM, Kyrill Tkachov kyrylo.tkac...@arm.com wrote:

 On 14/07/15 11:06, Andrew Pinski wrote:

 On Tue, Jul 14, 2015 at 1:18 AM, Kyrill Tkachov kyrylo.tkac...@arm.com
 wrote:

 Hi Segher,

 On 14/07/15 01:38, Segher Boessenkool wrote:

 On Mon, Jul 13, 2015 at 10:48:19AM +0100, Kyrill Tkachov wrote:

 For the testcase in the patch we were generating an extra neg
 instruction:
   cmp w0, wzr
   csneg   w0, w0, w0, ge
   neg w0, w0
   ret

 instead of the optimal:
   cmp w0, wzr
   csneg   w0, w0, w0, lt
   ret

 The reason is that combine tries to merge the operation into a negation
 of
 an abs.

 Before combine, you have two insns, a negation and an abs, so that is
 not so very strange :-)


 Well, because the aarch64 integer abs expander expands to a compare
 and a conditional negate, we have a compare followed by an if_then_else
 with a neg in it. That's followed by a neg of that:
 (insn 6 3 7 2 (set (reg:CC 66 cc)
  (compare:CC (reg/v:SI 76 [ x ])
  (const_int 0 [0]))) abs.c:1 374 {*cmpsi}
   (nil))
 (insn 7 6 8 2 (set (reg:SI 78 [ D.2683 ])
  (if_then_else:SI (lt (reg:CC 66 cc)
  (const_int 0 [0]))
  (neg:SI (reg/v:SI 76 [ x ]))
  (reg/v:SI 76 [ x ]))) abs.c:1 441 {csneg3si_insn}
   (expr_list:REG_DEAD (reg/v:SI 76 [ x ])
  (expr_list:REG_DEAD (reg:CC 66 cc)
  (expr_list:REG_EQUAL (abs:SI (reg/v:SI 76 [ x ]))
  (nil)
 (insn 8 7 13 2 (set (reg:SI 77 [ D.2683 ])
  (neg:SI (reg:SI 78 [ D.2683 ]))) abs.c:1 319 {negsi2}
   (expr_list:REG_DEAD (reg:SI 78 [ D.2683 ])
  (nil)))


 What does combine do when it props 7 into 8?  I suspect you want to
 optimize that instead of doing it any other way.
 That is if prop the neg into the two sides of the conditional and if
 one simplifies, produce a new rtl.


 Yeah, that's what combine tries in simplify_if_then_else, instead of
 propagating the neg it tries a (neg (abs x)). I did consider telling it not
 to do that, but how would it be gated?
 Should we look if the one arm of the if_then_else is a neg of the other and
 invert the comparison code instead of trying (neg (abs x)) when
 HAVE_conditional_move?

Does it do that even with insn 6 is not involved?
I doubt it.

Please look into that again.  I bet it is not creating the instruction
you want it to create and having the not in the last operand of the
if_then_else instead of the first one.

Thanks,
Andrew



 Kyrill


 Thanks,
 Andrew Pinski


 combine tries to convert it into a neg-of-an-abs in simplify_if_then_else

 Some archs have actual nabs insns btw (for floating point, anyway).

 Archs without abs or conditional assignment, and with cheap branches,
 get a branch around a neg followed by another neg, at expand time.
 This then isn't optimised away either.

 So I'd say expand should be made a bit smarter for this.  Failing
 that, your approach looks fine to me -- assuming you want to have a
 fake abs insn at all.

 On to the patch...


 +;; Combine will try merging (c  0 ? -x : x) into (-|x|).  This isn't
 a
 good

 x  0 here.


 Thanks, I'll fix that when committing if approved.

 Kyrill

 Segher




Re: [PATCH] Yet another simple fix to enhance outer-loop vectorization.

2015-07-14 Thread Richard Biener
On Mon, Jun 29, 2015 at 6:15 PM, Yuri Rumyantsev ysrum...@gmail.com wrote:
 Hi All,

 Here is updated patch containing missed change in
 slpeel_tree_peel_loop_to_edge which prevents renaming of exit PHI uses
 in inner loop.

Ok.

Thanks,
Richard.

 ChangeLog:
 2015-06-29  Yuri Rumyantsev  ysrum...@gmail.com

 * tree-vect-loop-manip.c (rename_variables_in_bb): Add argument
 to allow renaming of PHI arguments on edges incoming from outer
 loop header, add corresponding check before start PHI iterator.
 (slpeel_tree_duplicate_loop_to_edge_cfg): Introduce new bool
 variable DUPLICATE_OUTER_LOOP and set it to true for outer loops
 with true force_vectorize.  Set-up dominator for outer loop too.
 Pass DUPLICATE_OUTER_LOOP as argument to rename_variables_in_bb.
 (slpeel_can_duplicate_loop_p): Allow duplicate of outer loop if it
 was marked with force_vectorize and has restricted cfg.
 (slpeel_tree_peel_loop_to_edge): Do not rename exit PHI uses in
 inner loop.
 * tree-vect-data-refs.c (vect_enhance_data_refs_alignment): Do not
 do peeling for outer loops.

 gcc/testsuite/ChangeLog:
 * gcc.dg/vect/vect-outer-simd-2.c: New test.

 2015-06-17 19:35 GMT+03:00 Yuri Rumyantsev ysrum...@gmail.com:
 Richard,

 I attached updated patch.
 You asked me to explain why I did changes for renaming.
 If we do not change PHI arguments for inner loop header we can get the
 following IR:
 source outer loop:
   bb 5: outer-loop header
   # i_45 = PHI 0(4), i_18(9)
   # .MEM_17 = PHI .MEM_4(D)(4), .MEM_44(9)

   bb 6:inner-loop header
   # .MEM_46 = PHI .MEM_44(7), .MEM_17(5)

 after duplication we have (without argument correction):
   bb 12:
   # i_74 = PHI i_64(13), 0(17)
   # .MEM_73 = PHI .MEM_97(13), .MEM_4(D)(17)

   bb 15:
   # .MEM_63 = PHI .MEM_17(12), .MEM_97(16)

 and later we get verifier error:
 test1.c:20:6: error: definition in block 6 does not dominate use in block 10
  void foo (int n)
   ^
 for SSA_NAME: .MEM_17 in statement:
 .MEM_63 = PHI .MEM_17(10), .MEM_97(14)

 and you can see that we need to rename MEM_17 argument for out-coming
 edge to MEM_73 since
 MEM_17 was converted to MEM_73 in outer-loop header.

 This explains my simple fix in rename_variables_in_bb.
 Note also that loop distribution is not performed for outer loops.

 I also did a change in slpeel_can_duplicate_loop_p to simplify check.

 Any comments will be appreciated.

 Yuri.

 2015-06-17 15:24 GMT+03:00 Richard Biener richard.guent...@gmail.com:
 On Tue, Jun 16, 2015 at 4:12 PM, Yuri Rumyantsev ysrum...@gmail.com wrote:
 Thanks a lot Richard for your review.

 I presented updated patch which is not gated by force_vectorize.
 I added test on outer-loop in vect_enhance_data_refs_alignment
 and it returns false for it because we can not improve dr alighment
 through outer-loop peeling in general. So I assume that only
 versioning for alignment can be applied for targets do not support
 unaligned memory access.

 @@ -998,7 +998,12 @@
gimple stmt = DR_STMT (dr);
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
 +  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);

 +  /* Peeling of outer loops can't improve alignment.  */
 +  if (loop_vinfo  LOOP_VINFO_LOOP (loop_vinfo)-inner)
 +return false;
 +

 but this looks wrong.  It depends on the context (DRs in the outer loop
 can improve alignment by peeling the outer loop and we can still
 peel the inner loop for alignment).

 So IMHO the correct place to amend is vect_enhance_data_refs_alignment
 (which it seems doesn't consider peeling the inner loop).

 I'd say you should simply add

  || loop-inner)

 to the

   /* Check if we can possibly peel the loop.  */
   if (!vect_can_advance_ivs_p (loop_vinfo)
   || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
 do_peeling = false;

 check.

 I did not change tests for outer loops in slpeel_can_duplicate_loop_p
 as you proposed since it is not called outside vectorization.

 There is still no reason for this complex condition, so please remove it.

 _Please_ also generate diffs with -p, it is very tedious to see patch hunks
 without a function name context.

 You didn't explain why you needed the renaming changes as I don't
 remember needing it when using the code from loop distribution.

 I also noticed one not-resolved issue with outer-loop peeling - we don't
 consider remainder for possible vectorization of inner-loop as we can see
 on the following example:

   for (i = 0; i  n; i++) {
 diff = 0;
 for (j = 0; j  M; j++) {
   diff += in[j+i]*coeff[j];
 }
 out[i] = diff;
   }

 Is it worth to fix it?

 You mean vectorizing the inner loop in the niter peel epilogue loop
 of the outer loop?  Possibly yes.

 Thanks,
 Richard.

 2015-06-16  Yuri Rumyantsev  ysrum...@gmail.com

 * tree-vect-loop-manip.c (rename_variables_in_bb): Add argument
 to allow renaming of PHI arguments on edges incoming from outer
 loop header, add corresponding 

Re: [PATCH, gomp4] Propagate independent clause for OpenACC kernels pass

2015-07-14 Thread Thomas Schwinge
Hi!

On Tue, 14 Jul 2015 11:36:24 +0200, I wrote:
 On Tue, 14 Jul 2015 13:46:04 +0800, Chung-Lin Tang clt...@codesourcery.com 
 wrote:
  this patch provides a 'bool independent' field in struct loop, which
  will be switched on by an independent clause in a #pragma acc loop 
  directive.
 
 Thanks!
 
 
 This patch has been developed in context of OpenACC kernels constructs,
 but, is there anything still to be done regarding OpenACC parallel
 constructs?  That is, are we currently *using* the independent yes/no
 information appropriately for these?

Tom mentioned:

| openacc spec:
| ...
| 2.7.9 independent clause
| In a kernels construct, the independent clause tells the implementation 
| that the iterations of this loop are data-independent with respect to 
| each other. This allows the implementation to generate code to execute 
| the iterations in parallel with no synchronization.
| In a parallel construct, the independent clause is implied on all loop 
| directives without a seq clause.
| ...
|
| I think you're sort of asking if the seq clause has been implemented.
|
| openacc spec:
| ...
| 2.7.5 seq clause
| The seq clause specifies that the associated loop or loops are to be 
| executed sequentially by the accelerator. This clause will override any 
| automatic parallelization or vectorization.
| ...

Thanks, and right, I also realized that.  ;-)

Yet, my request is still to properly document this.  That is, if the
idea is that gcc/omp-low.c:scan_omp_for makes sure to emit a diagnostic
for the invalid combination of a gang/worker/vector clause together with
a seq clause, then in combination with the code newly added to
gcc/omp-low.c:find_omp_for_region_data to set region-independent = true
inside OpenACC parallel constructs, can't we then
assert(region-independent == true) in expand_omp_for_static_chunk and
expand_omp_for_static_nochunk?

So far it looks to me as if for OpenACC parallel constructs, we only have
a producer of region-independent = true, but no consumer.  Even if the
latter one is implicit, we should document (using an assertion) this
in some way, for clarity.

While looking at that code, are we convinced that the diagnostic
machinery and subsequent handling will do the right thing in such cases:

[...]
#pragma acc loop [gang/worker/vector]
  for [...]
#pragma acc loop seq
for[...]

To me it looks (but I have not verified) that in such cases, the inner
region's ctx-gwv_this will have been initialized from the outer_ctx, so
to some combination of gang/worker/vector, and will that then be used to
parallelize the inner loop, which shouldn't be done, as I'm understanding
this?

It seems to be as if this gang/worker/vector/seq/independent clause
handling code that is currently distributed over
gcc/omp-low.c:scan_omp_for and gcc/omp-low.c:find_omp_for_region_data
(and, worse, also the front ends; see below) should be merged into one
place.

gcc/fortran/openmp.c:resolve_oacc_loop_blocks emits a diagnostic for seq
with independent; gcc/omp-low.c:scan_omp_for doesn't.  Should it?  Then,
why do we need this Fortran-specific checking code?  Should the
additional checking being done there get moved to OMP lowering?

In the C and C++ front ends, there's related checking code for those
clause attached to OpenACC routine constructs; not sure if that checking
should also be handled during OMP lowering, in one place?  I couldn't
find similar code in the Fortran front end (but didn't look very hard).

This is reminiscent of the discussion started in
http://news.gmane.org/find-root.php?message_id=%3C87mw0zirnq.fsf%40schwinge.name%3E
and following messages, about using
gcc/omp-low.c:check_omp_nesting_restrictions to do such checking (which
Cesar has not yet followed up on, as far as I know).

A few more points:

Does OMP_CLAUSE_INDEPENDENT need to be handled in
gcc/c-family/c-omp.c:c_oacc_split_loop_clauses?

While looking at that, ;-) again a few more things...  Are others clauses
missing to be handled there: tile, device_type (probably not; has all
been processed before?)?  Why is the firstprivate clause being duplicated
for loop_clauses/non_loop_clauses?  In OpenACC, there is no firstprivate
clause for loop constructs.  Why is the private clause being duplicated
for loop_clauses/non_loop_clauses?  My understanding is that the private
clause in a combined OpenACC parallel loop construct is to be handled as
if it were attached to the loop construct (... which is in contrast to
firstprivate -- yes OpenACC 2.0a is a bit assymetric in that regard).

What about the corresponding Fortran code,
gcc/fortran/trans-openmp.c:gfc_trans_oacc_combined_directive?

Do we have test cases to cover all this?


Grüße,
 Thomas


pgpBi5kHnGJoP.pgp
Description: PGP signature


Re: [gomp] Move openacc vector worker single handling to RTL

2015-07-14 Thread Thomas Schwinge
Hi!

It's me, again.  ;-)

On Thu, 09 Jul 2015 20:25:22 -0400, Nathan Sidwell nat...@acm.org wrote:
 This is the patch I committed.  [...]

 --- config/nvptx/nvptx.c  (revision 225323)
 +++ config/nvptx/nvptx.c  (working copy)

 +/* Direction of the spill/fill and looping setup/teardown indicator.  */
 +
 +enum propagate_mask
 +  {
 +PM_read = 1  0,
 +PM_write = 1  1,
 +PM_loop_begin = 1  2,
 +PM_loop_end = 1  3,
 +
 +PM_read_write = PM_read | PM_write
 +  };
 +
 +/* Generate instruction(s) to spill or fill register REG to/from the
 +   worker broadcast array.  PM indicates what is to be done, REP
 +   how many loop iterations will be executed (0 for not a loop).  */
 +   
 +static rtx
 +nvptx_gen_wcast (rtx reg, propagate_mask pm, unsigned rep, wcast_data_t 
 *data)
 +{
 +  rtx  res;
 +  machine_mode mode = GET_MODE (reg);
 +
 +  switch (mode)
 +{
 +case BImode:
 +  {
 + rtx tmp = gen_reg_rtx (SImode);
 + 
 + start_sequence ();
 + if (pm  PM_read)
 +   emit_insn (gen_sel_truesi (tmp, reg, GEN_INT (1), const0_rtx));
 + emit_insn (nvptx_gen_wcast (tmp, pm, rep, data));
 + if (pm  PM_write)
 +   emit_insn (gen_rtx_SET (BImode, reg,
 +   gen_rtx_NE (BImode, tmp, const0_rtx)));
 + res = get_insns ();
 + end_sequence ();
 +  }
 +  break;
 +
 +default:
 +  {
 + rtx addr = data-ptr;
 +
 + if (!addr)
 +   {
 + unsigned align = GET_MODE_ALIGNMENT (mode) / BITS_PER_UNIT;
 +
 + if (align  worker_bcast_align)
 +   worker_bcast_align = align;
 + data-offset = (data-offset + align - 1)  ~(align - 1);
 + addr = data-base;
 + if (data-offset)
 +   addr = gen_rtx_PLUS (Pmode, addr, GEN_INT (data-offset));
 +   }
 + 
 + addr = gen_rtx_MEM (mode, addr);
 + addr = gen_rtx_UNSPEC (mode, gen_rtvec (1, addr), UNSPEC_SHARED_DATA);
 + if (pm  PM_read)
 +   res = gen_rtx_SET (mode, addr, reg);
 + if (pm  PM_write)
 +   res = gen_rtx_SET (mode, reg, addr);
 +
 + if (data-ptr)
 +   {
 + /* We're using a ptr, increment it.  */
 + start_sequence ();
 + 
 + emit_insn (res);
 + emit_insn (gen_adddi3 (data-ptr, data-ptr,
 +GEN_INT (GET_MODE_SIZE (GET_MODE (res);
 + res = get_insns ();
 + end_sequence ();
 +   }
 + else
 +   rep = 1;
 + data-offset += rep * GET_MODE_SIZE (GET_MODE (reg));
 +  }
 +  break;
 +}
 +  return res;
 +}

OK to commit the following, or should other PM_* combinations be handled
here, such as (PM_read | PM_write)?  (But I don't think so.)

commit a1909fecb28267aa76df538ad9e01e4d228f5f9a
Author: Thomas Schwinge tho...@codesourcery.com
Date:   Tue Jul 14 09:59:48 2015 +0200

nvptx: Avoid -Wuninitialized diagnostic

[...]/source-gcc/gcc/config/nvptx/nvptx.c: In function 'rtx_def* 
nvptx_gen_wcast(rtx, propagate_mask, unsigned int, wcast_data_t*)':
[...]/source-gcc/gcc/config/nvptx/nvptx.c:1258:8: warning: 'res' may be 
used uninitialized in this function [-Wuninitialized]

gcc/
* config/nvptx/nvptx.c (nvptx_gen_wcast): Mark unreachable code
path.
---
 gcc/config/nvptx/nvptx.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git gcc/config/nvptx/nvptx.c gcc/config/nvptx/nvptx.c
index 0e1e764..dfe5d34 100644
--- gcc/config/nvptx/nvptx.c
+++ gcc/config/nvptx/nvptx.c
@@ -1253,10 +1253,12 @@ nvptx_gen_wcast (rtx reg, propagate_mask pm, unsigned 
rep, wcast_data_t *data)

addr = gen_rtx_MEM (mode, addr);
addr = gen_rtx_UNSPEC (mode, gen_rtvec (1, addr), UNSPEC_SHARED_DATA);
-   if (pm  PM_read)
+   if (pm == PM_read)
  res = gen_rtx_SET (addr, reg);
-   if (pm  PM_write)
+   else if (pm == PM_write)
  res = gen_rtx_SET (reg, addr);
+   else
+ gcc_unreachable ();
 
if (data-ptr)
  {


Grüße,
 Thomas


signature.asc
Description: PGP signature


Re: [Patch wwwdocs] gcc-6/changes.html : Document AMD monitorx and mwaitx

2015-07-14 Thread Gerald Pfeifer
On Tue, 14 Jul 2015, Kumar, Venkataramanan wrote:
 Please let me know if it is good to commit.

This looks good to me.

Gerald


Re: [PATCH][AArch64] Handle -|x| case using a single csneg

2015-07-14 Thread Segher Boessenkool
On Tue, Jul 14, 2015 at 07:04:13PM +0800, pins...@gmail.com wrote:
 Combine knows how to use define_split without being an insn. 

Combine uses define_split in very different circumstances than it uses
define_insn.  In this case, define_split will only do anything if the
nabs is combined from three insns, while a define_insn handles it
being combined from two as well.

A define_split will be split during combine (and combine can then work
on its factors); a define_insn is only split _after_ combine.


Segher


Re: [Bug fortran/52846] [F2008] Support submodules - part 2/3

2015-07-14 Thread Paul Richard Thomas
PS I forgot to add that I have added a submodule file cleanup
procedure to fortran-modules.exp and cleanup lines to each of the
submodule testcases, such as:

! { dg-final { cleanup-submodules color_points_a } }

Paul

On 14 July 2015 at 13:10, Paul Richard Thomas
paul.richard.tho...@gmail.com wrote:
 Dear All,

 Reinhold Bader has pointed out the naming the submodule files after
 the submodule name and using .mod as the extension can potentially
 lead to clashes. Therefore, I have written a patch to change gfortran
 to follow the naming convention of another leading brand:

 submodule filename = module@ancestor@@submodule.smod

 The implementation is straightforward and the ChangeLog and the patch
 provide an adequate description.

 Bootstraps and regtests on x86_64 - OK for trunk?

 Paul

 2015-07-14  Paul Thomas  pa...@gcc.gnu.org

 PR fortran/52846
 * gfortran.h : Add 'submodule_name' to gfc_use_list structure.
 * module.c (gfc_match_submodule): Define submodule_name and add
 static 'submodule_name'.
 (gfc_match_submodule): Build up submodule filenames, using '@'
 as a delimiter. Store the output filename in 'submodule_name'.
 (gfc_dump_module): If current state is COMP_SUBMODULE, write
 to file 'submodule_name', using SUBMODULE_EXTENSION.
 (gfc_use_module): Similarly, use the 'submodule_name' field in
 the gfc_use_list structure and SUBMODULE_EXTENSION to read the
 implicitly used submodule files.



-- 
Outside of a dog, a book is a man's best friend. Inside of a dog it's
too dark to read.

Groucho Marx


Re: [gomp4.1] Handle linear clause modifiers in declare simd

2015-07-14 Thread Ilya Verbin
On Wed, Jul 01, 2015 at 12:55:38 +0200, Jakub Jelinek wrote:
   * cgraph.h (enum cgraph_simd_clone_arg_type): Add
   SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP,
   SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP,
   and SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP.
   (struct cgraph_simd_clone_arg): Adjust comment.
   * omp-low.c (simd_clone_clauses_extract): Honor
   OMP_CLAUSE_LINEAR_KIND.
   (simd_clone_mangle): Mangle the various linear kinds
   per the new ABI.
   (simd_clone_adjust_argument_types): Handle
   SIMD_CLONE_ARG_TYPE_LINEAR_*_CONSTANT_STEP.
   (simd_clone_init_simd_arrays): Don't do anything
   for uval.
   (simd_clone_adjust): Handle
   SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP like
   SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP.
   Handle SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP.
 c/
   * c-tree.h (c_finish_omp_clauses): Add declare_simd argument.
   * c-parser.c (c_parser_omp_clause_linear): Don't handle uval
   modifier in C.
   (c_parser_omp_all_clauses): If mask includes uniform clause,
   pass true to c_finish_omp_clauses' declare_simd.
   * c-typeck.c (c_finish_omp_clauses): Add declare_simd argument,
   don't set need_implicitly_determined if it is true.
 cp/
   * cp-tree.h (finish_omp_clauses): Add declare_simd argument.
   * parser.c (cp_parser_omp_all_clauses): If mask includes uniform
   clause, pass true to finish_omp_clauses' declare_simd.
   * pt.c (apply_late_template_attributes): Pass true to
   finish_omp_clauses' declare_simd.
   * semantics.c (finish_omp_clauses): Add declare_simd argument,
   don't set need_implicitly_determined if it is true.
 testsuite/
   * gcc.dg/gomp/clause-1.c (foo): Add some linear clause tests.
   * g++.dg/gomp/clause-3.C (foo): Likewise.
   * g++.dg/gomp/declare-simd-3.C: New test.

This caused:

gcc/tree-vect-stmts.c: In function ‘bool vectorizable_simd_clone_call(gimple, 
gimple_stmt_iterator*, gimple_statement_base**, slp_tree)’:
gcc/tree-vect-stmts.c:2810:13: error: enumeration value 
‘SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP’ not handled in switch 
[-Werror=switch]
  switch (n-simdclone-args[i].arg_type)
 ^
gcc/tree-vect-stmts.c:2810:13: error: enumeration value 
‘SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP’ not handled in switch 
[-Werror=switch]
gcc/tree-vect-stmts.c:2810:13: error: enumeration value 
‘SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP’ not handled in switch 
[-Werror=switch]
cc1plus: all warnings being treated as errors
make[4]: *** [tree-vect-stmts.o] Error 1

  -- Ilya


Re: [PATCH] [gomp] Recycle non-nested team if possible

2015-07-14 Thread Sebastian Huber



On 14/07/15 12:44, Jakub Jelinek wrote:

On Tue, Jul 14, 2015 at 11:28:18AM +0200, Sebastian Huber wrote:

If I destroy the barrier of the last team, and then initialize it later in
gomp_new_team()

+static inline struct gomp_team *
+get_last_team (unsigned nthreads)
+{
+  struct gomp_thread *thr = gomp_thread ();
+  if (thr-ts.team == NULL)
+{
+  struct gomp_thread_pool *pool = thr-thread_pool;
+  if (pool != NULL)
+   {
+ struct gomp_team *last_team = pool-last_team;
+ if (last_team != NULL  last_team-nthreads == nthreads)
+   {
+ pool-last_team = NULL;
+ gomp_barrier_destroy (last_team-barrier);
+ return last_team;
+   }
+   }
+}
+  return NULL;
+}

then I get test suite failures. Is it safe to destroy the team barrier here
or do we circumvent the last team mechanism which is supposed to delay the
destruction?

Then you indeed supposedly hit the reason why last_team exists.
Thus, if not reinitializing the team barrier works even with
--disable-linux-futex configured libgomp, I guess it is ok not to
destroy it and reinit it again.


The gomp_barrier_reinit() is only used for the pool-threads_dock 
barrier, but not for the team-barrier. I already test for
last_team-nthreads == nthreads, so I don't think it makes sense to 
reinit, which uses an atomic operation on Linux.


I run the test suite on x86_64-unknown-linux-gnu with the previously 
mentioned asserts and got no unexpected failures.


With --disable-linux-futex (without asserts) I got several failures, but 
none of them is related to my patch, e.g. they are of the following type


/tmp/ccw4RofR.o: In function `main':
data-already-3.f:(.text+0x56): undefined reference to `acc_copyin_array_h_'

=== libgomp tests ===

Schedule of variations:
unix

Running target unix
Using /usr/share/dejagnu/baseboards/unix.exp as board description file 
for target.
Using /usr/share/dejagnu/config/unix.exp as generic interface file for 
target.
Using 
/home/EB/sebastian_h/archive/gcc-git/libgomp/testsuite/config/default.exp as 
tool-and-target-specific interface file.
Running 
/home/EB/sebastian_h/archive/gcc-git/libgomp/testsuite/libgomp.c/c.exp ...
Running 
/home/EB/sebastian_h/archive/gcc-git/libgomp/testsuite/libgomp.c++/c++.exp 
...
Running 
/home/EB/sebastian_h/archive/gcc-git/libgomp/testsuite/libgomp.fortran/fortran.exp 
...
Running 
/home/EB/sebastian_h/archive/gcc-git/libgomp/testsuite/libgomp.graphite/graphite.exp 
...
Running 
/home/EB/sebastian_h/archive/gcc-git/libgomp/testsuite/libgomp.oacc-c/c.exp 
...
Running 
/home/EB/sebastian_h/archive/gcc-git/libgomp/testsuite/libgomp.oacc-c++/c++.exp 
...
Running 
/home/EB/sebastian_h/archive/gcc-git/libgomp/testsuite/libgomp.oacc-fortran/fortran.exp 
...
FAIL: libgomp.oacc-fortran/acc_on_device-1-1.f90 
-DACC_DEVICE_TYPE_host_nonshm=1 -DACC_MEM_SHARED=0  -O  (test for excess 
errors)
WARNING: libgomp.oacc-fortran/acc_on_device-1-1.f90 
-DACC_DEVICE_TYPE_host_nonshm=1 -DACC_MEM_SHARED=0  -O  compilation 
failed to produce executable
FAIL: libgomp.oacc-fortran/acc_on_device-1-2.f 
-DACC_DEVICE_TYPE_host_nonshm=1 -DACC_MEM_SHARED=0  -O  (test for excess 
errors)
WARNING: libgomp.oacc-fortran/acc_on_device-1-2.f 
-DACC_DEVICE_TYPE_host_nonshm=1 -DACC_MEM_SHARED=0  -O  compilation 
failed to produce executable
FAIL: libgomp.oacc-fortran/acc_on_device-1-3.f 
-DACC_DEVICE_TYPE_host_nonshm=1 -DACC_MEM_SHARED=0  -O  (test for excess 
errors)
WARNING: libgomp.oacc-fortran/acc_on_device-1-3.f 
-DACC_DEVICE_TYPE_host_nonshm=1 -DACC_MEM_SHARED=0  -O  compilation 
failed to produce executable
FAIL: libgomp.oacc-fortran/data-already-1.f 
-DACC_DEVICE_TYPE_host_nonshm=1 -DACC_MEM_SHARED=0  -O  (test for excess 
errors)
WARNING: libgomp.oacc-fortran/data-already-1.f 
-DACC_DEVICE_TYPE_host_nonshm=1 -DACC_MEM_SHARED=0  -O  compilation 
failed to produce executable
FAIL: libgomp.oacc-fortran/data-already-3.f 
-DACC_DEVICE_TYPE_host_nonshm=1 -DACC_MEM_SHARED=0  -O  (test for excess 
errors)
WARNING: libgomp.oacc-fortran/data-already-3.f 
-DACC_DEVICE_TYPE_host_nonshm=1 -DACC_MEM_SHARED=0  -O  compilation 
failed to produce executable
FAIL: libgomp.oacc-fortran/data-already-4.f 
-DACC_DEVICE_TYPE_host_nonshm=1 -DACC_MEM_SHARED=0  -O  (test for excess 
errors)
WARNING: libgomp.oacc-fortran/data-already-4.f 
-DACC_DEVICE_TYPE_host_nonshm=1 -DACC_MEM_SHARED=0  -O  compilation 
failed to produce executable
FAIL: libgomp.oacc-fortran/data-already-5.f 
-DACC_DEVICE_TYPE_host_nonshm=1 -DACC_MEM_SHARED=0  -O  (test for excess 
errors)
WARNING: libgomp.oacc-fortran/data-already-5.f 
-DACC_DEVICE_TYPE_host_nonshm=1 -DACC_MEM_SHARED=0  -O  compilation 
failed to produce executable
FAIL: libgomp.oacc-fortran/data-already-6.f 
-DACC_DEVICE_TYPE_host_nonshm=1 -DACC_MEM_SHARED=0  -O  (test for excess 
errors)
WARNING: libgomp.oacc-fortran/data-already-6.f 
-DACC_DEVICE_TYPE_host_nonshm=1 -DACC_MEM_SHARED=0  -O  compilation 
failed to 

Re: [PATCH][AArch64] Handle -|x| case using a single csneg

2015-07-14 Thread Segher Boessenkool
On Tue, Jul 14, 2015 at 09:18:06AM +0100, Kyrill Tkachov wrote:
 Before combine, you have two insns, a negation and an abs, so that is
 not so very strange :-)

Oh, hrm, my aarch64 cross was three months old, and this now changed.
Or I messed up.  Sorry for the noise.

It does look like the if_then_else stuff in combine (and simplify-rtx)
could use some improvement, probably more than just fixing this one
case.  Or look at the first huge case in combine_simplify_rtx, that
looks eerily similar.


Segher


Re: [PATCH][AArch64] Handle -|x| case using a single csneg

2015-07-14 Thread Kyrill Tkachov


On 14/07/15 11:40, Andrew Pinski wrote:

On Tue, Jul 14, 2015 at 3:13 AM, Kyrill Tkachov kyrylo.tkac...@arm.com wrote:

On 14/07/15 11:06, Andrew Pinski wrote:

On Tue, Jul 14, 2015 at 1:18 AM, Kyrill Tkachov kyrylo.tkac...@arm.com
wrote:

Hi Segher,

On 14/07/15 01:38, Segher Boessenkool wrote:

On Mon, Jul 13, 2015 at 10:48:19AM +0100, Kyrill Tkachov wrote:

For the testcase in the patch we were generating an extra neg
instruction:
   cmp w0, wzr
   csneg   w0, w0, w0, ge
   neg w0, w0
   ret

instead of the optimal:
   cmp w0, wzr
   csneg   w0, w0, w0, lt
   ret

The reason is that combine tries to merge the operation into a negation
of
an abs.

Before combine, you have two insns, a negation and an abs, so that is
not so very strange :-)


Well, because the aarch64 integer abs expander expands to a compare
and a conditional negate, we have a compare followed by an if_then_else
with a neg in it. That's followed by a neg of that:
(insn 6 3 7 2 (set (reg:CC 66 cc)
  (compare:CC (reg/v:SI 76 [ x ])
  (const_int 0 [0]))) abs.c:1 374 {*cmpsi}
   (nil))
(insn 7 6 8 2 (set (reg:SI 78 [ D.2683 ])
  (if_then_else:SI (lt (reg:CC 66 cc)
  (const_int 0 [0]))
  (neg:SI (reg/v:SI 76 [ x ]))
  (reg/v:SI 76 [ x ]))) abs.c:1 441 {csneg3si_insn}
   (expr_list:REG_DEAD (reg/v:SI 76 [ x ])
  (expr_list:REG_DEAD (reg:CC 66 cc)
  (expr_list:REG_EQUAL (abs:SI (reg/v:SI 76 [ x ]))
  (nil)
(insn 8 7 13 2 (set (reg:SI 77 [ D.2683 ])
  (neg:SI (reg:SI 78 [ D.2683 ]))) abs.c:1 319 {negsi2}
   (expr_list:REG_DEAD (reg:SI 78 [ D.2683 ])
  (nil)))


What does combine do when it props 7 into 8?  I suspect you want to
optimize that instead of doing it any other way.
That is if prop the neg into the two sides of the conditional and if
one simplifies, produce a new rtl.


Yeah, that's what combine tries in simplify_if_then_else, instead of
propagating the neg it tries a (neg (abs x)). I did consider telling it not
to do that, but how would it be gated?
Should we look if the one arm of the if_then_else is a neg of the other and
invert the comparison code instead of trying (neg (abs x)) when
HAVE_conditional_move?

Does it do that even with insn 6 is not involved?
I doubt it.

Please look into that again.  I bet it is not creating the instruction
you want it to create and having the not in the last operand of the
if_then_else instead of the first one.


Ok, I dug a bit further. There's a couple of issues here:
* combine_simplify_rtx tries to simplify the comparison in the if_then_else
then has the lines:
   if (cond_code == NE  COMPARISON_P (cond))
 return x;

which quits early and doesn't try optimising the (neg (if_then_else)) further.

* If I bypass that condition it proceeds further into simplify_unary_operation 
from
simplify-rtx.c which doesn't have the simplifcation rule we want.
If I add a [- (x ? -y : y) - !x ? -y : y] transformation there then it works.

I'll try to whip a patch into shape.

Thanks,
Kyrill



Thanks,
Andrew



Kyrill


Thanks,
Andrew Pinski


combine tries to convert it into a neg-of-an-abs in simplify_if_then_else


Some archs have actual nabs insns btw (for floating point, anyway).

Archs without abs or conditional assignment, and with cheap branches,
get a branch around a neg followed by another neg, at expand time.
This then isn't optimised away either.

So I'd say expand should be made a bit smarter for this.  Failing
that, your approach looks fine to me -- assuming you want to have a
fake abs insn at all.

On to the patch...



+;; Combine will try merging (c  0 ? -x : x) into (-|x|).  This isn't
a
good

x  0 here.


Thanks, I'll fix that when committing if approved.

Kyrill


Segher





Re: [PATCH][4/n] Remove GENERIC stmt combining from SCCVN

2015-07-14 Thread Richard Biener
On Tue, 14 Jul 2015, Richard Biener wrote:

 On Mon, 13 Jul 2015, Jeff Law wrote:
 
   2015-07-13  Richard Biener  rguent...@suse.de
   
 * tree-ssa-dom.c (record_temporary_equivalences): Merge
 wideing type conversion case from
   record_equivalences_from_incoming_edge
 and use record_equality to record equivalences.
 (record_equivalences_from_incoming_edge): Call
 record_temporary_equivalences.
  Yea, if testing is clean, that's OK.  Ought to be easier to then add code to
  handle looking at the uses of X to see if they create an equivalence for the
  destination of those use statements.
 
 Applied.  The following patch adds the equivalences for the destination
 of use stmts if they simplify.

Actually I can't use FOR_EACH_IMM_USE_STMT any longer because
record_equivalence ends up calling has_single_use which doens't
handle the special marker FOR_EACH_IMM_USE_STMT inserts.

Thus the following - bootstrapped and tested on x86_64-unknown-linux-gnu.

Ok?

Thanks,
Richard.

2015-07-14  Richard Biener  rguent...@suse.de

* tree-ssa-dom.c (dom_valueize): New function.
(record_temporary_equivalences): Also record equivalences
for dominating stmts that have uses of equivalences we are
about to record.

Index: gcc/tree-ssa-dom.c
===
--- gcc/tree-ssa-dom.c  (revision 225761)
+++ gcc/tree-ssa-dom.c  (working copy)
@@ -1401,6 +1401,20 @@ simplify_stmt_for_jump_threading (gimple
   return lookup_avail_expr (stmt, false);
 }
 
+/* Valueize hook for gimple_fold_stmt_to_constant_1.  */
+
+static tree
+dom_valueize (tree t)
+{
+  if (TREE_CODE (t) == SSA_NAME)
+{
+  tree tem = SSA_NAME_VALUE (t);
+  if (tem)
+   return tem;
+}
+  return t;
+}
+
 /* Record into the equivalence tables any equivalences implied by
traversing edge E (which are cached in E-aux).
 
@@ -1428,7 +1442,6 @@ record_temporary_equivalences (edge e)
 additional equivalences.  */
   if (lhs
   TREE_CODE (lhs) == SSA_NAME
-  is_gimple_constant (rhs)
   TREE_CODE (rhs) == INTEGER_CST)
{
  gimple defstmt = SSA_NAME_DEF_STMT (lhs);
@@ -1455,6 +1468,41 @@ record_temporary_equivalences (edge e)
}
}
}
+
+  /* If LHS is an SSA_NAME with a new equivalency then try if
+ stmts with uses of that LHS that dominate the edge destination
+simplify and allow further equivalences to be recorded.  */
+  if (lhs  TREE_CODE (lhs) == SSA_NAME)
+   {
+ use_operand_p use_p;
+ imm_use_iterator iter;
+ FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
+   {
+ gimple use_stmt = USE_STMT (use_p);
+
+ /* Only bother to record more equivalences for lhs that
+can be directly used by e-dest.
+???  If the code gets re-organized to a worklist to
+catch more indirect opportunities and it is made to
+handle PHIs then this should only consider use_stmts
+in basic-blocks we have already visited.  */
+ if (e-dest == gimple_bb (use_stmt)
+ || !dominated_by_p (CDI_DOMINATORS,
+ e-dest, gimple_bb (use_stmt)))
+   continue;
+ tree lhs2 = gimple_get_lhs (use_stmt);
+ if (lhs2  TREE_CODE (lhs2) == SSA_NAME)
+   {
+ tree res
+   = gimple_fold_stmt_to_constant_1 (use_stmt, dom_valueize,
+ no_follow_ssa_edges);
+ if (res
+  (TREE_CODE (res) == SSA_NAME
+ || is_gimple_min_invariant (res)))
+   record_equality (lhs2, res);
+   }
+   }
+   }
 
   /* If we have 0 = COND or 1 = COND equivalences, record them
 into our expression hash tables.  */


[PATCH] Add 'switch' statement to match.pd language

2015-07-14 Thread Richard Biener

The following as promised adds a 'switch' statement.  This way

 (if (A)
  B
  (if (B)
   C
   (if (C)
D
E)))

can now be written as

 (switch
  (if (A)
   B)
  (if (B)
   C)
  (if (C)
   D)
  E)

the ifs immediately nested in the switch cannot have else clauses and
I reject switches that can be expressed as single if.

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

I know Micha detests the extra 'if' as much as the extra braces thus
would have prefered

 (switch
  (A) B
  (B) C
  (C) D
  E)

but that's hard to unambiguously parse and adding a keyword
without braces like

 (switch
  if (A) B
  if (B) C
  if (C) D
  E)

looked too inconsistent with all the rest of the language.

Thus - barring any comments I plan to commit this tomorrow
and then (being done with IL changes) will update the internals
documentation to reflect recent changes.

Note that internally the switch statement is represented as
a if () { } else { if () {} else { if ()  chain.

Thanks,
Richard.

2015-07-14  Richard Biener  rguent...@suse.de

* genmatch.c (parser::peek, parser::peek_ident): Add argument
to tell how many tokens to peek ahead (default 1).
(parser::eat_token, parser::eat_ident): Return token consumed.
(parser::parse_result): Parse new switch statement.
* match.pd: Use case statements where appropriate.

Index: gcc/genmatch.c
===
*** gcc/genmatch.c  (revision 225765)
--- gcc/genmatch.c  (working copy)
*** public:
*** 3014,3026 
  
  private:
const cpp_token *next ();
!   const cpp_token *peek ();
!   const cpp_token *peek_ident (const char * = NULL);
const cpp_token *expect (enum cpp_ttype);
!   void eat_token (enum cpp_ttype);
const char *get_string ();
const char *get_ident ();
!   void eat_ident (const char *);
const char *get_number ();
  
id_base *parse_operation ();
--- 3014,3026 
  
  private:
const cpp_token *next ();
!   const cpp_token *peek (unsigned = 1);
!   const cpp_token *peek_ident (const char * = NULL, unsigned = 1);
const cpp_token *expect (enum cpp_ttype);
!   const cpp_token *eat_token (enum cpp_ttype);
const char *get_string ();
const char *get_ident ();
!   const cpp_token *eat_ident (const char *);
const char *get_number ();
  
id_base *parse_operation ();
*** parser::next ()
*** 3078,3084 
  /* Peek at the next non-whitespace token from R.  */
  
  const cpp_token *
! parser::peek ()
  {
const cpp_token *token;
unsigned i = 0;
--- 3078,3084 
  /* Peek at the next non-whitespace token from R.  */
  
  const cpp_token *
! parser::peek (unsigned num)
  {
const cpp_token *token;
unsigned i = 0;
*** parser::peek ()
*** 3086,3093 
  {
token = cpp_peek_token (r, i++);
  }
!   while (token-type == CPP_PADDING
! token-type != CPP_EOF);
/* If we peek at EOF this is a fatal error as it leaves the
   cpp_reader in unusable state.  Assume we really wanted a
   token and thus this EOF is unexpected.  */
--- 3086,3094 
  {
token = cpp_peek_token (r, i++);
  }
!   while ((token-type == CPP_PADDING
!  token-type != CPP_EOF)
!|| (--num  0));
/* If we peek at EOF this is a fatal error as it leaves the
   cpp_reader in unusable state.  Assume we really wanted a
   token and thus this EOF is unexpected.  */
*** parser::peek ()
*** 3100,3108 
 token is not an identifier or equal to ID if supplied).  */
  
  const cpp_token *
! parser::peek_ident (const char *id)
  {
!   const cpp_token *token = peek ();
if (token-type != CPP_NAME)
  return 0;
  
--- 3101,3109 
 token is not an identifier or equal to ID if supplied).  */
  
  const cpp_token *
! parser::peek_ident (const char *id, unsigned num)
  {
!   const cpp_token *token = peek (num);
if (token-type != CPP_NAME)
  return 0;
  
*** parser::expect (enum cpp_ttype tk)
*** 3131,3140 
  
  /* Consume the next token from R and assert it is of type TK.  */
  
! void
  parser::eat_token (enum cpp_ttype tk)
  {
!   expect (tk);
  }
  
  /* Read the next token from R and assert it is of type CPP_STRING and
--- 3132,3141 
  
  /* Consume the next token from R and assert it is of type TK.  */
  
! const cpp_token *
  parser::eat_token (enum cpp_ttype tk)
  {
!   return expect (tk);
  }
  
  /* Read the next token from R and assert it is of type CPP_STRING and
*** parser::get_ident ()
*** 3159,3171 
  
  /* Eat an identifier token with value S from R.  */
  
! void
  parser::eat_ident (const char *s)
  {
const cpp_token *token = peek ();
const char *t = get_ident ();
if (strcmp (s, t) != 0)
  fatal_at (token, expected '%s' got '%s'\n, s, t);
  }
  
  /* Read the next token from R and assert it is of type CPP_NUMBER and
--- 3160,3173 
  
  /* 

Re: [PATCH, RFC] combine: Don't create insv insns unless HAVE_insv

2015-07-14 Thread Segher Boessenkool
On Mon, Jul 13, 2015 at 10:50:28PM -0600, Jeff Law wrote:
 On 07/12/2015 07:56 AM, Segher Boessenkool wrote:
 Currently combine tries to make assignments to bitfields (of a register)
 whenever it can.  If the target has no insv pattern, the result will not
 ever match (if the MD is sane at all).  Doing insv on registers generates
 worse code than what you get if you express things directly (with and/ior),
 so many targets do not _want_ to have insv patterns.
 
 This patch changes combine to not generate insv patterns if the target
 does not have any.
 
 Bootstrapped and regression checked on powerpc64-linux (with and without
 insv patterns there).  Also built on many other targets, for many months.
 
 I'm vaguely aware there have been changes to extzv etc. so there now are
 extzvmode; I'll investigate if that means anything for insv as well.
 It's also a new #ifdef HAVE_xxx.  But we're not clean there yet so I hope
 to get away with that ;-)
 
 Comments?  Complaints?

 Well, I'd rather avoid the #ifdef.  Just because we aren't clean yet 
 doesn't mean we need to introduce more stuff to clean up later.

This patch is very old, from long before the HAVE_* conversion ;-)
I'll clean it up, the insvmode needs handling anyway.

 It'd also be nice to have a testcase or two.  Guessing they'd be target 
 dependent, but that's OK with me.

I can make some for the powerpc insert things, when that goes in.  What you
need to test is that combine does *not* create insv from thin air, so that
it _can_ create other things.  It is pretty hard to test if things are *not*
done :-)


Segher


Committed: partial sync with src repo: bfd depends on zlib

2015-07-14 Thread Hans-Peter Nilsson
I tried the good old combined-tree approach (which some call
ubertree), and was bitten by bfd not depending on zlib *in the
gcc version of toplevel Makefile.in*.  So, I did a partial sync
to include the particular commit from the binutils-gdb git and
committed as obvious after a auccessful build of
mmix-knuth-mmixware.

(Test-results however are abysmal, likely due to a single ld bug
causing segfault with -flto.)

Committed:
Sync with src:
2015-03-30  H.J. Lu  hongjiu...@intel.com
* Makefile.def (dependencies): Add all-zlib to all-bfd.
* Makefile.in: Regenerated.

Index: Makefile.def
===
--- Makefile.def(revision 225768)
+++ Makefile.def(working copy)
@@ -402,6 +402,7 @@
 dependencies = { module=configure-bfd; on=configure-intl; };
 dependencies = { module=all-bfd; on=all-libiberty; };
 dependencies = { module=all-bfd; on=all-intl; };
+dependencies = { module=all-bfd; on=all-zlib; };
 dependencies = { module=configure-opcodes; on=configure-libiberty; hard=true; 
};
 dependencies = { module=all-opcodes; on=all-libiberty; };

Index: Makefile.in
===
--- Makefile.in (revision 225768)
+++ Makefile.in (working copy)
@@ -49977,6 +49977,14 @@
 all-stage4-bfd: maybe-all-stage4-intl
 all-stageprofile-bfd: maybe-all-stageprofile-intl
 all-stagefeedback-bfd: maybe-all-stagefeedback-intl
+all-bfd: maybe-all-zlib
+
+all-stage1-bfd: maybe-all-stage1-zlib
+all-stage2-bfd: maybe-all-stage2-zlib
+all-stage3-bfd: maybe-all-stage3-zlib
+all-stage4-bfd: maybe-all-stage4-zlib
+all-stageprofile-bfd: maybe-all-stageprofile-zlib
+all-stagefeedback-bfd: maybe-all-stagefeedback-zlib
 configure-opcodes: configure-libiberty

 configure-stage1-opcodes: configure-stage1-libiberty


Re: [PATCH][AArch64] Handle -|x| case using a single csneg

2015-07-14 Thread Kyrill Tkachov


On 14/07/15 13:55, Segher Boessenkool wrote:

On Tue, Jul 14, 2015 at 09:18:06AM +0100, Kyrill Tkachov wrote:

Before combine, you have two insns, a negation and an abs, so that is
not so very strange :-)

Oh, hrm, my aarch64 cross was three months old, and this now changed.
Or I messed up.  Sorry for the noise.

It does look like the if_then_else stuff in combine (and simplify-rtx)
could use some improvement, probably more than just fixing this one
case.  Or look at the first huge case in combine_simplify_rtx, that
looks eerily similar.


Yeah, removing an early return in combine_simplify_rtx and
adding the simplification to simplify_unary_operation did
the trick.

I'll see if it survives testing.

Thanks,
Kyrill




Segher





RE: [PATCH] MIPS: Correctly update the isa and arch_test_option_p variables after the arch dependency handling code in mips.exp

2015-07-14 Thread Andrew Bennett
 Yeah, I agree that this doesn't really fit the model that well,
 but like you say, we're stretching the logic a bit :-).  When I wrote it,
 the architectures formed a nice tree in which moving to leaf nodes only
 added features.  So in the pre-r6 days:
 
 # Handle dependencies between the pre-arch options and the arch option.
 # This should mirror the arch and post-arch code below.
 if { !$arch_test_option_p } {
 
 increased the architecture from the --target_board default to match
 the features required by the test, whereas:
 
 # Handle dependencies between the arch option and the post-arch options.
 # This should mirror the arch and pre-arch code above.
 if { $arch_test_option_p } {
 
 turned off features from the --target_board default to match a lower
 architecture required by the test.  So in the pre-r6 days, all the code
 in the second block was turning something off when going to a lower
 architecture.  The blocks were mutually-exclusive and writing it this
 way reduced the number of redundant options.  (Admittedly you could argue
 that it's daft to worry about that given the kind of command lines you
 tend to get from the rest of mips.exp. :-))
 
 r6 is the first time we've had to turn something off when moving up.
 -mnan and -mabs are also the first options where old architectures
 support only A, higher revisions support A and B, and the newest
 revision supports only B.  I think I'd prefer to acknowledge that
 and have:
 
 # Handle dependencies between the arch option and the post-arch options.
 # This should mirror the arch and pre-arch code above.  For pre-r6
 # architectures this only needs to be done when we've moved down
 # to a lower architecture and might need to turn features off,
 # but moving up from pre-r6 to r6 can remove features too.
 if { $arch_test_option_p || ($orig_isa_rev  6  $isa_rev = 6) } {
 
 I think the existing r6-r5 case really is different: there we're
 forcing a -mnan option not because the architecture needs it but
 because the environment might.

Hi,

An updated patch and ChangeLog which reflects the comments is below.
I have tested it on the mips-img-elf and mips-mti-elf toolchains
with all the valid multilib configurations and there are no new regressions.

Ok to commit?


Regards,


Andrew



testsuite/
* gcc.target/mips/mips.exp (mips-dg-options): Allow the post-arch
code to be run when the pre-arch code increases the isa_rev to 
mips32r6 or greater. 


diff --git a/gcc/testsuite/gcc.target/mips/mips.exp 
b/gcc/testsuite/gcc.target/mips/mips.exp
index 1dd4173..b3617e4 100644
--- a/gcc/testsuite/gcc.target/mips/mips.exp
+++ b/gcc/testsuite/gcc.target/mips/mips.exp
@@ -1045,6 +1045,7 @@ proc mips-dg-options { args } {
 set arch [mips_option options arch]
 set isa [mips_arch_info $arch isa]
 set isa_rev [mips_arch_info $arch isa_rev]
+set orig_isa_rev $isa_rev
 
 # If the test forces a 32-bit architecture, force -mgp32.
 # Force the current -mgp setting otherwise; if we don't,
@@ -1279,8 +1280,11 @@ proc mips-dg-options { args } {
 }
 
 # Handle dependencies between the arch option and the post-arch options.
-# This should mirror the arch and pre-arch code above.
-if { $arch_test_option_p } {
+# This should mirror the arch and pre-arch code above.  For pre-r6
+# architectures this only needs to be done when we've moved down
+# to a lower architecture and might need to turn features off,
+# but moving up from pre-r6 to r6 can remove features too.
+if { $arch_test_option_p || ($orig_isa_rev  6  $isa_rev = 6) } {
if { $isa  2 } {
mips_make_test_option options -mno-branch-likely
mips_make_test_option options -mno-fix-r1



[PATCH] Fix PR66863

2015-07-14 Thread Richard Biener

The following fixes PR66863.

Bootstrapped and tested on x86_64-unknown-linux-gnu.

Richard.

2015-07-14  Richard Biener  rguent...@suse.de

PR tree-optimization/66863
* tree-vrp.c (register_edge_assert_for_2): Properly restrict
what we record for conversion use stmt lhs inequalities.

* gcc.dg/torture/pr66863.c: New testcase.

Index: gcc/tree-vrp.c
===
--- gcc/tree-vrp.c  (revision 225761)
+++ gcc/tree-vrp.c  (working copy)
@@ -5381,7 +5381,17 @@ register_edge_assert_for_2 (tree name, e
  cst = int_const_binop (code, val, cst);
}
  else if (CONVERT_EXPR_CODE_P (code))
-   cst = fold_convert (TREE_TYPE (name2), val);
+   {
+ /* For truncating conversions require that the constant
+fits in the truncated type if we are going to record
+an inequality.  */
+ if (comp_code == NE_EXPR
+  (TYPE_PRECISION (TREE_TYPE (name2))
+  TYPE_PRECISION (TREE_TYPE (name)))
+  ! int_fits_type_p (val, TREE_TYPE (name2)))
+   continue;
+ cst = fold_convert (TREE_TYPE (name2), val);
+   }
  else
continue;
 
Index: gcc/testsuite/gcc.dg/torture/pr66863.c
===
--- gcc/testsuite/gcc.dg/torture/pr66863.c  (revision 0)
+++ gcc/testsuite/gcc.dg/torture/pr66863.c  (working copy)
@@ -0,0 +1,25 @@
+/* { dg-do run } */
+
+int a, b;
+
+int
+fn1 (int p1)
+{
+  if (p1  -2147483647) 
+return 0;
+  else 
+return 1;
+}
+
+int
+fn2 (int p1, short p2)
+{
+  return p2 ? p1 % p2 : 0; 
+}
+
+int
+main ()
+{
+  b = fn2 (fn1 (a), a);
+  return 0;
+}


[PATCH][RFC] Consolidate -O3 torture options

2015-07-14 Thread Richard Biener

The following patch tries to consolidate the -O3 torture testing
options in the attempt to reduce testing time while not losing
coverage.

It drops testing of -funroll-all-loops (which nobody should use)
and retains only one non-default -O3 set of options - namely
-O3 plus those flags that would be enabled by -fprofile-use.

One should hope for ~20% less time in the C and dg tortures this way.

Didn't look into other tortures to apply the same yet (objc-torture?)

Currently testing on x86_64-unknown-linux-gnu.

For weird flag combinations we do have contributors that test
them and regularly report bugzillas.

Ok?

Comments?

Thanks,
Richard.

2015-07-14  Richard Biener  rguent...@suse.de

* lib/c-torture.exp (C_TORTURE_OPTIONS): Remove
{ -O3 -fomit-frame-pointer },
{ -O3 -fomit-frame-pointer -funroll-loops } and
{ -O3 -fomit-frame-pointer -funroll-all-loops -finline-functions }
in favor of
{ -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer
  -finline-functions }
* lib/gcc-dg.exp (DG_TORTURE_OPTIONS): Likewise.

Index: gcc/testsuite/lib/c-torture.exp
===
--- gcc/testsuite/lib/c-torture.exp (revision 225768)
+++ gcc/testsuite/lib/c-torture.exp (working copy)
@@ -53,9 +53,7 @@ if [info exists TORTURE_OPTIONS] {
{ -O0 } \
{ -O1 } \
{ -O2 } \
-   { -O3 -fomit-frame-pointer } \
-   { -O3 -fomit-frame-pointer -funroll-loops } \
-   { -O3 -fomit-frame-pointer -funroll-all-loops -finline-functions } \
+   { -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer 
-finline-functions } \
{ -O3 -g } \
{ -Os } \
{ -Og -g } ]
Index: gcc/testsuite/lib/gcc-dg.exp
===
--- gcc/testsuite/lib/gcc-dg.exp(revision 225768)
+++ gcc/testsuite/lib/gcc-dg.exp(working copy)
@@ -74,9 +74,7 @@ if [info exists TORTURE_OPTIONS] {
{ -O0 } \
{ -O1 } \
{ -O2 } \
-   { -O3 -fomit-frame-pointer } \
-   { -O3 -fomit-frame-pointer -funroll-loops } \
-   { -O3 -fomit-frame-pointer -funroll-all-loops -finline-functions } \
+   { -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer 
-finline-functions } \
{ -O3 -g } \
{ -Os } ]
 


Re: Committed: partial sync with src repo: bfd depends on zlib

2015-07-14 Thread H.J. Lu
On Tue, Jul 14, 2015 at 5:22 AM, Hans-Peter Nilsson h...@bitrange.com wrote:
 I tried the good old combined-tree approach (which some call
 ubertree), and was bitten by bfd not depending on zlib *in the
 gcc version of toplevel Makefile.in*.  So, I did a partial sync
 to include the particular commit from the binutils-gdb git and
 committed as obvious after a auccessful build of
 mmix-knuth-mmixware.

 (Test-results however are abysmal, likely due to a single ld bug
 causing segfault with -flto.)

 Committed:
 Sync with src:
 2015-03-30  H.J. Lu  hongjiu...@intel.com
 * Makefile.def (dependencies): Add all-zlib to all-bfd.
 * Makefile.in: Regenerated.

The shared files between gcc and binutils are way out of sync.
I have synced binutils with gcc.  I started submitting GCC
patches to sync with binutils:

https://gcc.gnu.org/ml/gcc-patches/2015-04/msg00707.html

I will commit changes to GCC to sync GCC with binutils.

In the meantime, you can use the shared files from binutils
with GCC.

-- 
H.J.


Re: [gomp4.1] Handle linear clause modifiers in declare simd

2015-07-14 Thread Jakub Jelinek
On Tue, Jul 14, 2015 at 02:41:04PM +0300, Ilya Verbin wrote:
 This caused:
 
 gcc/tree-vect-stmts.c: In function ‘bool vectorizable_simd_clone_call(gimple, 
 gimple_stmt_iterator*, gimple_statement_base**, slp_tree)’:
 gcc/tree-vect-stmts.c:2810:13: error: enumeration value 
 ‘SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP’ not handled in switch 
 [-Werror=switch]
   switch (n-simdclone-args[i].arg_type)
  ^
 gcc/tree-vect-stmts.c:2810:13: error: enumeration value 
 ‘SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP’ not handled in switch 
 [-Werror=switch]
 gcc/tree-vect-stmts.c:2810:13: error: enumeration value 
 ‘SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP’ not handled in switch 
 [-Werror=switch]
 cc1plus: all warnings being treated as errors
 make[4]: *** [tree-vect-stmts.o] Error 1

Oops, missed that warning (and haven't bootstrapped the branch
for a while now).

Fixed thusly, to handle VAL/UVAL better we'll need to find the last
store into the memory and determine if the stored value is linear.
Something deferred for later.

2015-07-14  Jakub Jelinek  ja...@redhat.com

* tree-vect-stmts.c (vectorizable_simd_clone_call): Handle
SIMD_CLONE_ARG_TYPE_LINEAR_{REF,VAL,UVAL}_CONSTANT_STEP.

--- gcc/tree-vect-stmts.c.jj2015-07-14 14:30:06.0 +0200
+++ gcc/tree-vect-stmts.c   2015-07-14 14:45:08.032376586 +0200
@@ -2825,6 +2825,7 @@ vectorizable_simd_clone_call (gimple stm
  i = -1;
break;
  case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
+ case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
if (arginfo[i].dt == vect_constant_def
|| arginfo[i].dt == vect_external_def
|| (arginfo[i].linear_step
@@ -2832,6 +2833,8 @@ vectorizable_simd_clone_call (gimple stm
  i = -1;
break;
  case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
+ case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
+ case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
/* FORNOW */
i = -1;
break;


Jakub


Re: [patch] install missing rs6000 header, needed to build plugins

2015-07-14 Thread David Edelsohn
On Tue, Jul 14, 2015 at 5:57 AM, Matthias Klose d...@ubuntu.com wrote:
 This installs the rs6000-cpus.def file for powerpc targets (included by
 default64.h), which is needed to build plugins on powerpc64* targets.

 Ok for the trunk, and the 5 branch? Tested with a build and installation.

Okay.

Thanks, David


Re: [gomp4.1] depend(sink) and depend(source) parsing for C

2015-07-14 Thread Aldy Hernandez

On 07/13/2015 06:56 AM, Jakub Jelinek wrote:

On Sat, Jul 11, 2015 at 11:35:36AM -0700, Aldy Hernandez wrote:


Everything addressed except this, which I'll address as a follow-up:


If you want to spend time on something still in the FE, it would be nice to
resolve the C++ iteration var issue (i.e. increase OMP_FOR number of
arguments, so that it could have yet another (optional) vector, say
OMP_FOR_ORIG_DECLS.  If that vector would be NULL, the gimplifier would
assume that all the decls in OMP_FOR_INIT are the ones present in the
source, if it would be present, you'd use them for the variable checking
instead of the ones from OMP_FOR_INIT (but, replace them with the
decls from OMP_FOR_INIT after the checking).

There is another issue - if some iterator var has pointer type, supposedly
we want somewhere in the FEs already multiply it by the size of what they
point to (and convert to sizetype).  For C FE, it can be done already during
parsing, we should know the type of the iterator var already at that point,
for C++ FE it needs to be done only in finish_omp_clauses if
!processing_template_decl, because in templates we might not know the type.


Tested on x86-64 Linux.

Ok for branch?

commit 6ca9b034b05e3106eac6c7b9c6dc1885e1569320
Author: Aldy Hernandez al...@redhat.com
Date:   Wed Jun 24 15:08:49 2015 -0700

* coretypes.h (struct gomp_ordered): Define.
* gimple-pretty-print.c (dump_gimple_omp_block): Do not handle
GIMPLE_OMP_ORDERED.
(dump_gimple_omp_ordered): New.
(pp_gimple_stmt_1): Handle GIMPLE_OMP_ORDERED.
* gimple-walk.c (walk_gimple_op): Same.
* gimple.c (gimple_build_omp_ordered): Generate gimple_ordered.
* gimple.def (GIMPLE_OMP_ORDERED): Add clauses.
* gimple.h (struct gomp_ordered): New.
(is_a_helpergomp_ordered *::test): New.
(is_a_helpergomp_ordered *): New.
(gimple_omp_ordered_clauses): New.
(gimple_omp_ordered_clauses_ptr): New.
(gimple_omp_ordered_set_clauses): New.
* gimplify.c (struct gimplify_omp_ctx): Add iter_vars field.
(delete_omp_context): Release iter_vars.
(gimplify_scan_omp_clauses): Handle OMP_CLAUSE_DEPEND_{SINK,SOURCE}.
(gimplify_omp_for): Initialize iter_vars.
(gimplify_expr): Handle OMP_ORDERED clauses.
* omp-low.c (check_omp_nesting_restrictions): Type check
OMP_CLAUSE_DEPEND_{KIND,SOURCE}.
(lower_depend_clauses): Add cases for OMP_CLAUSE_DEPEND_{SOURCE,SINK}.
* tree-inline.c (remap_gimple_stmt): Call gimple_build_omp_ordered
with clauses.
* tree-pretty-print.c (dump_omp_clause): Handle
OMP_CLAUSE_DEPEND_SINK.
c/
* c-parser.c (c_parser_omp_clause_depend_sink): New.
(c_parser_omp_clause_depend): Call it.
* c-typeck.c (c_finish_omp_clauses): Handle
OMP_CLAUSE_DEPEND_SINK.
cp/

* parser.c (cp_parser_omp_clause_depend_sink): New.
(cp_parser_omp_clause_depend): Call it.
* pt.c (tsubst_omp_clause_decl): Handle OMP_CLAUSE_DEPEND_KIND.
(tsubst_expr): Handle OMP_ORDERED_CLAUSES.
* semantics.c (finish_omp_clauses): Handle OMP_CLAUSE_DEPEND_KIND.

diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c
index cd3bd5a..211857a 100644
--- a/gcc/c/c-parser.c
+++ b/gcc/c/c-parser.c
@@ -11701,6 +11701,95 @@ c_parser_omp_clause_simdlen (c_parser *parser, tree 
list)
   return c;
 }
 
+/* OpenMP 4.1:
+   vec:
+ identifier [+/- integer]
+ vec , identifier [+/- integer]
+*/
+
+static tree
+c_parser_omp_clause_depend_sink (c_parser *parser, location_t clause_loc,
+tree list)
+{
+  tree vec = NULL;
+  if (c_parser_next_token_is_not (parser, CPP_NAME)
+  || c_parser_peek_token (parser)-id_kind != C_ID_ID)
+{
+  c_parser_error (parser, expected identifier);
+  return list;
+}
+
+  while (c_parser_next_token_is (parser, CPP_NAME)
+ c_parser_peek_token (parser)-id_kind == C_ID_ID)
+{
+  tree t = lookup_name (c_parser_peek_token (parser)-value);
+  tree addend = NULL;
+
+  if (t == NULL_TREE)
+   {
+ undeclared_variable (c_parser_peek_token (parser)-location,
+  c_parser_peek_token (parser)-value);
+ t = error_mark_node;
+   }
+
+  c_parser_consume_token (parser);
+
+  if (t != error_mark_node)
+   {
+ bool neg;
+
+ if (c_parser_next_token_is (parser, CPP_MINUS))
+   neg = true;
+ else if (c_parser_next_token_is (parser, CPP_PLUS))
+   neg = false;
+ else
+   {
+ addend = integer_zero_node;
+ goto add_to_vector;
+   }
+ c_parser_consume_token (parser);
+
+ if (c_parser_next_token_is_not (parser, CPP_NUMBER))
+   {
+ c_parser_error (parser, expected integer);
+ return list;
+   }
+
+ addend = c_parser_peek_token 

Re: [gomp4.1] depend(sink) and depend(source) parsing for C

2015-07-14 Thread Jakub Jelinek
On Tue, Jul 14, 2015 at 07:06:52AM -0700, Aldy Hernandez wrote:
 On 07/13/2015 06:56 AM, Jakub Jelinek wrote:
 On Sat, Jul 11, 2015 at 11:35:36AM -0700, Aldy Hernandez wrote:
 
 Everything addressed except this, which I'll address as a follow-up:
 
 If you want to spend time on something still in the FE, it would be nice to
 resolve the C++ iteration var issue (i.e. increase OMP_FOR number of
 arguments, so that it could have yet another (optional) vector, say
 OMP_FOR_ORIG_DECLS.  If that vector would be NULL, the gimplifier would
 assume that all the decls in OMP_FOR_INIT are the ones present in the
 source, if it would be present, you'd use them for the variable checking
 instead of the ones from OMP_FOR_INIT (but, replace them with the
 decls from OMP_FOR_INIT after the checking).
 
 There is another issue - if some iterator var has pointer type, supposedly
 we want somewhere in the FEs already multiply it by the size of what they
 point to (and convert to sizetype).  For C FE, it can be done already during
 parsing, we should know the type of the iterator var already at that point,
 for C++ FE it needs to be done only in finish_omp_clauses if
 !processing_template_decl, because in templates we might not know the type.
 
 Tested on x86-64 Linux.
 
 Ok for branch?

Can you please fix:
Blocks of 8 spaces should be replaced with tabs.
248:+   (parser, identifier,
579:+ stmt-code == GIMPLE_OMP_ORDERED.  */
798:+   (gimple_omp_for_clauses (octx-stmt),
914:+for (k = 0; k  o; k++)
915:+  {
932:+bar (i, j, 0);
955:+bar (i, j, 0);
973:+  (s1,
(in the patch on lines starting with + replace sequences of 8 spaces with
tabs)?

Ok with that, thanks.

Jakub


RE: [PATCH, MIPS] Support new interrupt handler options

2015-07-14 Thread Robert Suchanek
Hi Catherine,

 I'm getting build errors with the current TOT and your patch.
 
 The first errors that I encounter are:
 gcc/config/mips/mips.c:1355:1: warning: 'mips_int_mask
 mips_interrupt_mask(tree)' defined but not used [-Wunused-function]
 gcc/config/mips/mips.c:1392:1: warning: 'mips_shadow_set
 mips_use_shadow_register_set(tree)' defined but not used [-Wunused-function]
 
 Removing these two functions results in further errors that I have not
 investigated.
 Will you try applying and building your patch again?

I have no explanation why this could happen.  My guess is that a part of the 
patch
did not apply correctly.  Those functions are used in mips_compute_frame_info().

I did notice and fixed warnings about unused variables/arguments.

 
 I have a couple of further comments on the existing patch, see below.

Comments added.  Please have a look at the attached revised patch.
Tested against r225768.

Regards,
Robert


gcc/
* config/mips/mips.c (mips_int_mask): New enum.
(mips_shadow_set): Likewise.
(int_mask): New variable.
(use_shadow_register_set_p): Change type to enum mips_shadow_set.
(machine_function): Add int_mask and use_shadow_register_set.
(mips_attribute_table): Add attribute handlers for interrupt and
use_shadow_register_set.
(mips_interrupt_mask): New static function.
(mips_handle_interrupt_attr): Likewise.
(mips_handle_use_shadow_register_set_attr): Likewise.
(mips_use_shadow_register_set): Change return type to enum
mips_shadow_set.  Add argument handling for use_shadow_register_set
attribute.
(mips_interrupt_extra_called_saved_reg_p): Update the conditional to
compare with mips_shadow_set enum.
(mips_compute_frame_info): Add interrupt mask and
use_shadow_register_set to per-function information structure.
Add a stack slot for EPC unconditionally.
(mips_expand_prologue): Compare use_shadow_register_set value
with mips_shadow_set enum.  Save EPC always in K1, clobber only K1 for
masked interrupt register but in EIC mode use K0 and save Cause in K0.
EPC saved and restored unconditionally.  Use PMODE_INSN macro when
copying the stack pointer from the shadow register set.
* config/mips/mips.h (SR_IM0): New define.
* config/mips/mips.md (mips_rdpgpr): Rename to...
(mips_rdpgpr_mode): ...this.  Use the Pmode iterator.
* doc/extend.texi (Declaring Attributes of Functions): Document
optional arguments for interrupt and use_shadow_register_set
attributes.

gcc/testsuite/
* gcc.target/mips/interrupt_handler-4.c: New test.
---
 gcc/config/mips/mips.c | 286 +
 gcc/config/mips/mips.h |   3 +
 gcc/config/mips/mips.md|  10 +-
 gcc/doc/extend.texi|  22 +-
 .../gcc.target/mips/interrupt_handler-4.c  |  31 +++
 5 files changed, 290 insertions(+), 62 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/mips/interrupt_handler-4.c

diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c
index 671fed8..70240f7 100644
--- a/gcc/config/mips/mips.c
+++ b/gcc/config/mips/mips.c
@@ -382,6 +382,30 @@ struct GTY(())  mips_frame_info {
   HOST_WIDE_INT hard_frame_pointer_offset;
 };
 
+/* Enumeration for masked vectored (VI) and non-masked (EIC) interrupts.  */
+enum mips_int_mask
+{
+  INT_MASK_EIC = -1,
+  INT_MASK_SW0 = 0,
+  INT_MASK_SW1 = 1,
+  INT_MASK_HW0 = 2,
+  INT_MASK_HW1 = 3,
+  INT_MASK_HW2 = 4,
+  INT_MASK_HW3 = 5,
+  INT_MASK_HW4 = 6,
+  INT_MASK_HW5 = 7
+};
+
+/* Enumeration to mark the existence of the shadow register set.
+   SHADOW_SET_INTSTACK indicates a shadow register set with a valid stack
+   pointer.  */
+enum mips_shadow_set
+{
+  SHADOW_SET_NO,
+  SHADOW_SET_YES,
+  SHADOW_SET_INTSTACK
+};
+
 struct GTY(())  machine_function {
   /* The next floating-point condition-code register to allocate
  for ISA_HAS_8CC targets, relative to ST_REG_FIRST.  */
@@ -434,8 +458,12 @@ struct GTY(())  machine_function {
   /* True if this is an interrupt handler.  */
   bool interrupt_handler_p;
 
-  /* True if this is an interrupt handler that uses shadow registers.  */
-  bool use_shadow_register_set_p;
+  /* Records the way in which interrupts should be masked.  Only used if
+ interrupts are not kept masked.  */
+  enum mips_int_mask int_mask;
+
+  /* Records if this is an interrupt handler that uses shadow registers.  */
+  enum mips_shadow_set use_shadow_register_set;
 
   /* True if this is an interrupt handler that should keep interrupts
  masked.  */
@@ -717,6 +745,10 @@ const enum reg_class 
mips_regno_to_class[FIRST_PSEUDO_REGISTER] = {
   ALL_REGS,ALL_REGS,   ALL_REGS,   ALL_REGS
 };
 
+static tree mips_handle_interrupt_attr (tree *, tree, tree, int, bool *);
+static tree 

Re: [PATCH][RFC] Consolidate -O3 torture options

2015-07-14 Thread Jeff Law

On 07/14/2015 05:58 AM, Richard Biener wrote:


The following patch tries to consolidate the -O3 torture testing
options in the attempt to reduce testing time while not losing
coverage.

It drops testing of -funroll-all-loops (which nobody should use)
and retains only one non-default -O3 set of options - namely
-O3 plus those flags that would be enabled by -fprofile-use.

One should hope for ~20% less time in the C and dg tortures this way.

Didn't look into other tortures to apply the same yet (objc-torture?)

Currently testing on x86_64-unknown-linux-gnu.

For weird flag combinations we do have contributors that test
them and regularly report bugzillas.

Ok?

Comments?

Thanks,
Richard.

2015-07-14  Richard Biener  rguent...@suse.de

* lib/c-torture.exp (C_TORTURE_OPTIONS): Remove
{ -O3 -fomit-frame-pointer },
{ -O3 -fomit-frame-pointer -funroll-loops } and
{ -O3 -fomit-frame-pointer -funroll-all-loops -finline-functions }
in favor of
{ -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer
  -finline-functions }
* lib/gcc-dg.exp (DG_TORTURE_OPTIONS): Likewise.
I think this is OK -- I've occasionally wondered about the additional 
coverage we get vs the amount of time spent for the various options.


I can't recall specific cases where one of those 3 options would trigger 
a failure, but the two didn't. I'm sure it's happened, but it's just 
common enough to warrant the amount of time we spend testing it.


This patch has the additional benefit that I think we can eliminate 
scanning the source for loops and eliminating the -funroll[-all]-loops 
options.   Hmm, that code may have already been dead... Hmmm.


jeff




Re: [PATCH] Sync and use zlib.m4 with binutils-gdb

2015-07-14 Thread H.J. Lu
On Tue, Apr 14, 2015 at 12:11 PM, H.J. Lu hongjiu...@intel.com wrote:
 This patch syncs zlib.m4 with binutils-gdb and uses AM_ZLIB from zlib.m4
 in gcc/configure.ac.  OK for trunk?

 Thanks.

 H.J.
 --
 config/

 * zlib.m4: Sync with binutils-gdb.

 gcc/

 * Makefile.in (top_srcdir): New.
 * configure.ac: Use AM_ZLIB.
 * configure: Regeneated.
 ---
  config/zlib.m4   | 27 ++-
  gcc/Makefile.in  |  3 +++
  gcc/aclocal.m4   |  1 +
  gcc/configure| 13 +
  gcc/configure.ac | 10 +-
  5 files changed, 28 insertions(+), 26 deletions(-)


I am checking in this patch.


-- 
H.J.


Re: Tests for libgomp based on OpenMP Examples 4.0.2.

2015-07-14 Thread Jakub Jelinek
Hi!

Note Richard reported that simd-8.c and simd-8.f90 tests fail on i686,
can you please adjust them so that they don't perform direct comparison of
floating point values, but allow some small epsilon (0.00390625
or something similar)?  The last iteration multiplies 998 by 1.8, adds
that to 1498.5, then on the sum performs sum + sum * 1.5, so with i?86
excess precision it might compute slightly different result.

Jakub


Re: Committed: partial sync with src repo: bfd depends on zlib

2015-07-14 Thread Hans-Peter Nilsson
On Tue, 14 Jul 2015, H.J. Lu wrote:
 The shared files between gcc and binutils are way out of sync.

Yeah I noticed, but didn't go for a full sync as that'd require
testing other builds too.  Thankfully your imported src commit
was local.

 I have synced binutils with gcc.  I started submitting GCC
 patches to sync with binutils:

 https://gcc.gnu.org/ml/gcc-patches/2015-04/msg00707.html

 I will commit changes to GCC to sync GCC with binutils.

Good!

 In the meantime, you can use the shared files from binutils
 with GCC.

The commit I did means I don't have to, but this was for a
bare-iron cross-target meaning likely less issues than a
self-hosted target with all bells, whistles and subdirs; people
may see other failures for other targets.

brgds, H-P


Re: [RFC, PR target/65105] Use vector instructions for scalar 64bit computations on 32bit target

2015-07-14 Thread Ilya Enkovich
Hi,

Any comments on this?

Thanks,
Ilya

2015-06-19 16:21 GMT+03:00 Ilya Enkovich enkovich@gmail.com:
 Hi,

 This patch tries to improve 64bit integer computations on 32bit target.  This 
 is achieved by an additional i386 target pass which searches for all 
 conversion candidates and tries to transform them into vector mode when 
 profitable.

 Initial problem discussion had several assumptions that this optimization 
 should be done in RA.  But implementation of this in RA seems really complex. 
  I don't believe it can be done in a reasonalble time.  And taking into 
 account quite narrow performance impact, I believe a separate conversion pass 
 is a better solution.

 Here is shortly a list of changes:

 1. Add insn templates for 64bit and/ior/xor/zext for 32bit target to avoid 
 split on expand
 2. Add new pass to convert scalar computation into vector.  The flow of the 
 pass is following:
 a. Find all instructions we may convert
 b. Split them into chains of dependant instructions
 c. Estimate if chain conversion is profitable
 d. Convert chain if profitable
 3. Add splits for not converted insns

 Current cost model uses processor_costs table to estimate how much gain somes 
 from a single instruction usage vs pair of instruction + estimate cost of 
 scalar-vector and back conversions.  Cost estimation doesn't actually use 
 CFG and have a (lot of) room for improvement.  The problem here is a lack of 
 workloads to be used for tuning.

 Added DI insns and splits for 32bit target delay insns split until 
 reload_completed.  It is a potential degradation for cases when conversion 
 doesn't happen. Pass probably may be moved before spli1 pass to allow early 
 split of not converted insns.  Or new pass itself may split not converted 
 chains.

 I also had to modify register constraint of movdi for sse-mem alternative.  
 I understand we don't like this alternative for 64bit target but for 32bit it 
 is more useful.  E.g. I see mem-mem copies go through xmm instead of GPR 
 pair with this change.  May we have separate xmm register alternatives for 
 32bit and bit targets in movdi?

 Any comments/advices on this approach?

 I ran it for SPEC2000 and EMMBC1.1/2.0 on Avoton server.  The only test where 
 conversion was applied is 186.crafty.  I got +6% on -O2 and +10% on -Ofast + 
 -flto.

 Thanks,
 Ilya
 --
 2015-06-19  Ilya Enkovich  enkovich@gmail.com

 * config/i386/i386.c: Include dbgcnt.h.
 (has_non_address_hard_reg): New.
 (convertible_comparison_p): New.
 (scalar_to_vector_candidate_p): New.
 (remove_non_convertible_regs): New.
 (scalar_chain): New.
 (scalar_chain::scalar_chain): New.
 (scalar_chain::~scalar_chain): New.
 (scalar_chain::add_to_queue): New.
 (scalar_chain::mark_dual_mode_def): New.
 (scalar_chain::analyze_register_chain): New.
 (scalar_chain::add_insn): New.
 (scalar_chain::build): New.
 (scalar_chain::compute_convert_gain): New.
 (scalar_chain::replace_with_subreg): New.
 (scalar_chain::replace_with_subreg_in_insn): New.
 (scalar_chain::make_vector_copies): New.
 (scalar_chain::convert_reg): New.
 (scalar_chain::convert_op): New.
 (scalar_chain::convert_insn): New.
 (scalar_chain::convert): New.
 (convert_scalars_to_vector): New.
 (pass_data_stv): New.
 (pass_stv): New.
 (make_pass_stv): New.
 (ix86_option_override): Created and register stv pass.
 * config/i386/i386.md (SWIM1248x): New.
 (*movdi_internal): Remove '*' modifier for xmm to mem alternative.
 (andmode3): Use SWIM1248x iterator instead of SWIM.
 (*anddi3_doubleword): New.
 (*zextmode_doubleword): New.
 (*zextqi_doubleword): New.
 (codemode3): Use SWIM1248x iterator instead of SWIM.
 (*codedi3_doubleword): New.
 * dbgcnt.def (stv_conversion): New.


 diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
 index 153dd85..6995afd 100644
 --- a/gcc/config/i386/i386.c
 +++ b/gcc/config/i386/i386.c
 @@ -110,6 +110,7 @@ along with GCC; see the file COPYING3.  If not see
  #include tree-iterator.h
  #include tree-chkp.h
  #include rtl-chkp.h
 +#include dbgcnt.h

  static rtx legitimize_dllimport_symbol (rtx, bool);
  static rtx legitimize_pe_coff_extern_decl (rtx, bool);
 @@ -2549,6 +2550,868 @@ rest_of_handle_insert_vzeroupper (void)
return 0;
  }

 +/* Return 1 if INSN uses or defines a hard register.
 +   Clobbers and flags definition are ignored.  */
 +
 +static bool
 +has_non_address_hard_reg (rtx_insn *insn)
 +{
 +  df_ref ref;
 +  FOR_EACH_INSN_DEF (ref, insn)
 +if (HARD_REGISTER_P (DF_REF_REAL_REG (ref))
 +!DF_REF_FLAGS_IS_SET (ref, DF_REF_MUST_CLOBBER)
 +DF_REF_REGNO (ref) != FLAGS_REG)
 +  return true;
 +
 +  FOR_EACH_INSN_USE (ref, insn)
 +if (!DF_REF_REG_MEM_P(ref)  

[PATCH] gcc: fix building w/isl-0.15

2015-07-14 Thread Mike Frysinger
---
 gcc/config.in   |  6 ++
 gcc/configure   | 31 +++
 gcc/configure.ac| 14 ++
 gcc/graphite-dependences.c  | 14 +++---
 gcc/graphite-optimize-isl.c |  8 ++--
 gcc/graphite-poly.h |  5 +
 6 files changed, 69 insertions(+), 9 deletions(-)

diff --git a/gcc/config.in b/gcc/config.in
index b031a62..23e1757 100644
--- a/gcc/config.in
+++ b/gcc/config.in
@@ -1326,6 +1326,12 @@
 #endif
 
 
+/* Define if isl_options_set_schedule_serialize_sccs exists. */
+#ifndef USED_FOR_TARGET
+#undef HAVE_ISL_OPTIONS_SET_SCHEDULE_SERIALIZE_SCCS
+#endif
+
+
 /* Define if isl_schedule_constraints_compute_schedule exists. */
 #ifndef USED_FOR_TARGET
 #undef HAVE_ISL_SCHED_CONSTRAINTS_COMPUTE_SCHEDULE
diff --git a/gcc/configure b/gcc/configure
index 9561e5c..6e81298 100755
--- a/gcc/configure
+++ b/gcc/configure
@@ -28456,6 +28456,8 @@ fi
 
 # Check whether isl_schedule_constraints_compute_schedule is available;
 # it's new in ISL-0.13.
+# Check whether isl_options_set_schedule_serialize_sccs is available;
+# it's new in ISL-0.15.
 if test x${ISLLIBS} != x ; then
   saved_CXXFLAGS=$CXXFLAGS
   CXXFLAGS=$CXXFLAGS $ISLINC
@@ -28485,6 +28487,29 @@ rm -f core conftest.err conftest.$ac_objext \
   { $as_echo $as_me:${as_lineno-$LINENO}: result: 
$ac_has_isl_schedule_constraints_compute_schedule 5
 $as_echo $ac_has_isl_schedule_constraints_compute_schedule 6; }
 
+  { $as_echo $as_me:${as_lineno-$LINENO}: checking Checking for 
isl_options_set_schedule_serialize_sccs 5
+$as_echo_n checking Checking for isl_options_set_schedule_serialize_sccs...  
6; }
+  cat confdefs.h - _ACEOF conftest.$ac_ext
+/* end confdefs.h.  */
+#include isl/schedule.h
+int
+main ()
+{
+isl_options_set_schedule_serialize_sccs (NULL, 0);
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_link $LINENO; then :
+  ac_has_isl_options_set_schedule_serialize_sccs=yes
+else
+  ac_has_isl_options_set_schedule_serialize_sccs=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+conftest$ac_exeext conftest.$ac_ext
+  { $as_echo $as_me:${as_lineno-$LINENO}: result: 
$ac_has_isl_options_set_schedule_serialize_sccs 5
+$as_echo $ac_has_isl_options_set_schedule_serialize_sccs 6; }
+
   LIBS=$saved_LIBS
   CXXFLAGS=$saved_CXXFLAGS
 
@@ -28493,6 +28518,12 @@ $as_echo 
$ac_has_isl_schedule_constraints_compute_schedule 6; }
 $as_echo #define HAVE_ISL_SCHED_CONSTRAINTS_COMPUTE_SCHEDULE 1 confdefs.h
 
   fi
+
+  if test x$ac_has_isl_options_set_schedule_serialize_sccs = xyes; then
+
+$as_echo #define HAVE_ISL_OPTIONS_SET_SCHEDULE_SERIALIZE_SCCS 1 confdefs.h
+
+  fi
 fi
 
 # Check for plugin support
diff --git a/gcc/configure.ac b/gcc/configure.ac
index cb14639..7fb964a 100644
--- a/gcc/configure.ac
+++ b/gcc/configure.ac
@@ -5725,6 +5725,8 @@ fi
 
 # Check whether isl_schedule_constraints_compute_schedule is available;
 # it's new in ISL-0.13.
+# Check whether isl_options_set_schedule_serialize_sccs is available;
+# it's new in ISL-0.15.
 if test x${ISLLIBS} != x ; then
   saved_CXXFLAGS=$CXXFLAGS
   CXXFLAGS=$CXXFLAGS $ISLINC
@@ -5738,6 +5740,13 @@ if test x${ISLLIBS} != x ; then
   [ac_has_isl_schedule_constraints_compute_schedule=no])
   AC_MSG_RESULT($ac_has_isl_schedule_constraints_compute_schedule)
 
+  AC_MSG_CHECKING([Checking for isl_options_set_schedule_serialize_sccs])
+  AC_TRY_LINK([#include isl/schedule.h],
+  [isl_options_set_schedule_serialize_sccs (NULL, 0);],
+  [ac_has_isl_options_set_schedule_serialize_sccs=yes],
+  [ac_has_isl_options_set_schedule_serialize_sccs=no])
+  AC_MSG_RESULT($ac_has_isl_options_set_schedule_serialize_sccs)
+
   LIBS=$saved_LIBS
   CXXFLAGS=$saved_CXXFLAGS
 
@@ -5745,6 +5754,11 @@ if test x${ISLLIBS} != x ; then
  AC_DEFINE(HAVE_ISL_SCHED_CONSTRAINTS_COMPUTE_SCHEDULE, 1,
[Define if isl_schedule_constraints_compute_schedule exists.])
   fi
+
+  if test x$ac_has_isl_options_set_schedule_serialize_sccs = xyes; then
+ AC_DEFINE(HAVE_ISL_OPTIONS_SET_SCHEDULE_SERIALIZE_SCCS, 1,
+   [Define if isl_options_set_schedule_serialize_sccs exists.])
+  fi
 fi
 
 GCC_ENABLE_PLUGINS
diff --git a/gcc/graphite-dependences.c b/gcc/graphite-dependences.c
index 50fe73e..9a0986d 100644
--- a/gcc/graphite-dependences.c
+++ b/gcc/graphite-dependences.c
@@ -205,7 +205,7 @@ scop_get_transformed_schedule (scop_p scop, vecpoly_bb_p 
pbbs)
 /* Helper function used on each MAP of a isl_union_map.  Computes the
maximal output dimension.  */
 
-static int
+static isl_stat
 max_number_of_out_dimensions (__isl_take isl_map *map, void *user)
 {
   int global_max = *((int *) user);
@@ -217,7 +217,7 @@ max_number_of_out_dimensions (__isl_take isl_map *map, void 
*user)
 
   isl_map_free (map);
   isl_space_free (space);
-  return 0;
+  return isl_stat_ok;
 }
 
 /* Extends the output dimension of MAP to MAX dimensions.  */
@@ -241,12 +241,12 @@ struct extend_schedule_str {
 
 /* Helper 

[C/C++ PATCH] Implement -Wtautological-compare (PR c++/66555, c/54979)

2015-07-14 Thread Marek Polacek
Code such as if (i == i) is hardly ever desirable, so we should be able
to warn about this to prevent dumb mistakes.

This ought to be easy in principle: when we see a comparison, just check
for operand_equal_p of operands, and if they're the same, warn.

In reality, it of course ain't that simple.
Doing just this resulted in a spate of warnings when crunching the sse.md
file, where we have a code like mode_nunits[V16SImode] == 
mode_nunits[V32HImode].
Both V16SImode and V32HImode happen to be defined to 76 so we issued a bogus
warning.  To mollify these bogus warnings I had to use walk_tree and bail out if
we find an ARRAY_REF with a constant index in the comparison.
(We're still able to warn for a[i] == a[i].)
I also had to jump through the hoops to not warn on several other cases.

This warning is enabled by -Wall.

Bootstrapped/regtested on x86_64-linux, ok for trunk?

2015-07-14  Marek Polacek  pola...@redhat.com

PR c++/66555
PR c/54979
* c-common.c (find_array_ref_with_const_idx_r): New function.
(warn_tautological_cmp): New function.
* c-common.h (warn_tautological_cmp): Declare.
* c.opt (Wtautological-compare): New option.

* c-typeck.c (parser_build_binary_op): Call warn_tautological_cmp.

* call.c (build_new_op_1): Call warn_tautological_cmp.
* pt.c (tsubst_copy_and_build): Use sentinel to suppress tautological
compare warnings.

* doc/invoke.texi: Document -Wtautological-compare.

* c-c++-common/Wtautological-compare-1.c: New test.

diff --git gcc/c-family/c-common.c gcc/c-family/c-common.c
index 84e7242..6ceed36 100644
--- gcc/c-family/c-common.c
+++ gcc/c-family/c-common.c
@@ -1849,6 +1849,70 @@ warn_logical_operator (location_t location, enum 
tree_code code, tree type,
 }
 }
 
+/* Helper function for warn_tautological_cmp.  Look for ARRAY_REFs
+   with constant indices.  */
+
+static tree
+find_array_ref_with_const_idx_r (tree *expr_p, int *walk_subtrees, void *data)
+{
+  tree expr = *expr_p;
+
+  if ((TREE_CODE (expr) == ARRAY_REF
+   || TREE_CODE (expr) == ARRAY_RANGE_REF)
+   TREE_CODE (TREE_OPERAND (expr, 1)) == INTEGER_CST)
+{
+  *(bool *) data = true;
+  *walk_subtrees = 0;
+}
+
+  return NULL_TREE;
+}
+
+/* Warn if a self-comparison always evaluates to true or false.  LOC
+   is the location of the comparison with code CODE, LHS and RHS are
+   operands of the comparison.  */
+
+void
+warn_tautological_cmp (location_t loc, enum tree_code code, tree lhs, tree rhs)
+{
+  if (TREE_CODE_CLASS (code) != tcc_comparison)
+return;
+
+  /* We do not warn for constants because they are typical of macro
+ expansions that test for features, sizeof, and similar.  */
+  if (CONSTANT_CLASS_P (lhs) || CONSTANT_CLASS_P (rhs))
+return;
+
+  /* Don't warn for e.g.
+ HOST_WIDE_INT n;
+ ...
+ if (n == (long) n) ...
+   */
+  if ((CONVERT_EXPR_P (lhs) || TREE_CODE (lhs) == NON_LVALUE_EXPR)
+  ^ (CONVERT_EXPR_P (rhs) || TREE_CODE (rhs) == NON_LVALUE_EXPR))
+return;
+
+  if (operand_equal_p (lhs, rhs, 0))
+{
+  /* Don't warn about array references with constant indices;
+these are likely to come from a macro.  */
+  bool found = false;
+  walk_tree_without_duplicates (lhs, find_array_ref_with_const_idx_r,
+   found);
+  if (found)
+   return;
+  const bool always_true = (code == EQ_EXPR || code == LE_EXPR
+   || code == GE_EXPR || code == UNLE_EXPR
+   || code == UNGE_EXPR || code == UNEQ_EXPR);
+  if (always_true)
+   warning_at (loc, OPT_Wtautological_compare,
+   self-comparison always evaluates to true);
+  else
+   warning_at (loc, OPT_Wtautological_compare,
+   self-comparison always evaluates to false);
+}
+}
+
 /* Warn about logical not used on the left hand side operand of a comparison.
This function assumes that the LHS is inside of TRUTH_NOT_EXPR.
Do not warn if RHS is of a boolean type.  */
diff --git gcc/c-family/c-common.h gcc/c-family/c-common.h
index a2a4621..b891bbd 100644
--- gcc/c-family/c-common.h
+++ gcc/c-family/c-common.h
@@ -812,6 +812,7 @@ extern bool warn_if_unused_value (const_tree, location_t);
 extern void warn_logical_operator (location_t, enum tree_code, tree,
   enum tree_code, tree, enum tree_code, tree);
 extern void warn_logical_not_parentheses (location_t, enum tree_code, tree);
+extern void warn_tautological_cmp (location_t, enum tree_code, tree, tree);
 extern void check_main_parameter_types (tree decl);
 extern bool c_determine_visibility (tree);
 extern bool vector_types_compatible_elements_p (tree, tree);
diff --git gcc/c-family/c.opt gcc/c-family/c.opt
index 285952e..2f6369b 100644
--- gcc/c-family/c.opt
+++ gcc/c-family/c.opt
@@ -840,6 +840,10 @@ Wsystem-headers
 C ObjC C++ ObjC++ Warning
 ; 

RE: [PATCH, MIPS] Fix restoration of hi/lo in MIPS64R2 interrupt handlers

2015-07-14 Thread Robert Suchanek
Hi Catherine,

 Hi Robert,
 The patch is OK, but will you please name the test something other than the
 date?

OK. I'll change it to interrupt_handler-5.c, add a comment and commit after
approval for the new interrupt handler options.

Regards,
Robert

diff --git a/gcc/testsuite/gcc.target/mips/interrupt_handler-5.c 
b/gcc/testsuite/gcc.target/mips/interrupt_handler-5.c
new file mode 100644
index 000..6419479
--- /dev/null
+++ b/gcc/testsuite/gcc.target/mips/interrupt_handler-5.c
@@ -0,0 +1,8 @@
+/* Test the interrupt handler with an accumulator.  */
+/* { dg-do assemble } */
+/* { dg-options -mips64r2 } */
+_Accum a;
+__attribute__((interrupt))
+void foo () {
+  a = a*a;
+}
-- 
2.2.2



[committed, PATCH] Sync toplevel configure with binutils-gdb

2015-07-14 Thread H.J. Lu
Sync with binutils-gdb:
2015-05-13  John David Anglin  dave.ang...@bell.net

* configure.ac: Disable configuration of GDB for HPUX targets.
* configure: Regenerate.

2015-04-01  H.J. Lu  hongjiu...@intel.com

* configure.ac: Add --with-system-zlib.
* configure: Regenerated.

2015-01-28  James Bowman  james.bow...@ftdichip.com

* configure.ac: Add FT32 support.
* configure: Regenerate.

2015-01-12  Anthony Green  gr...@moxielogic.com

* configure.ac: Don't disable gprof for moxie.
* configure: Rebuild.
---
 ChangeLog| 23 +++
 configure| 27 ++-
 configure.ac | 21 -
 3 files changed, 61 insertions(+), 10 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 719fa2d..27edb46 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,6 +1,29 @@
 2015-07-14  H.J. Lu  hongjiu...@intel.com
 
Sync with binutils-gdb:
+   2015-05-13  John David Anglin  dave.ang...@bell.net
+
+   * configure.ac: Disable configuration of GDB for HPUX targets.
+   * configure: Regenerate.
+
+   2015-04-01  H.J. Lu  hongjiu...@intel.com
+
+   * configure.ac: Add --with-system-zlib.
+   * configure: Regenerated.
+
+   2015-01-28  James Bowman  james.bow...@ftdichip.com
+
+   * configure.ac: Add FT32 support.
+   * configure: Regenerate.
+
+   2015-01-12  Anthony Green  gr...@moxielogic.com
+
+   * configure.ac: Don't disable gprof for moxie.
+   * configure: Rebuild.
+
+2015-07-14  H.J. Lu  hongjiu...@intel.com
+
+   Sync with binutils-gdb:
2015-05-01  H.J. Lu  hongjiu...@intel.com
 
PR ld/18355
diff --git a/configure b/configure
index d7bc62f..f060986 100755
--- a/configure
+++ b/configure
@@ -748,6 +748,7 @@ ospace_frag'
 ac_user_opts='
 enable_option_checking
 with_build_libsubdir
+with_system_zlib
 enable_as_accelerator_for
 enable_offload_targets
 enable_gold
@@ -1519,6 +1520,7 @@ Optional Packages:
   --with-PACKAGE[=ARG]use PACKAGE [ARG=yes]
   --without-PACKAGE   do not use PACKAGE (same as --with-PACKAGE=no)
   --with-build-libsubdir=DIR  Directory where to find libraries for build 
system
+  --with-system-zlib  use installed libz
   --with-mpc=PATH specify prefix directory for installed MPC package.
   Equivalent to --with-mpc-include=PATH/include plus
   --with-mpc-lib=PATH/lib
@@ -2855,6 +2857,12 @@ if test x$with_gnu_as = xno ; then
 fi
 
 use_included_zlib=
+
+# Check whether --with-system-zlib was given.
+if test ${with_system_zlib+set} = set; then :
+  withval=$with_system_zlib;
+fi
+
 # Make sure we don't let ZLIB be added if we didn't want it.
 if test x$with_system_zlib = xyes ; then
   use_included_zlib=no
@@ -3521,6 +3529,9 @@ case ${target} in
   rs6000-*-aix*)
 noconfigdirs=$noconfigdirs ${libgcj}
 ;;
+  ft32-*-*)
+noconfigdirs=$noconfigdirs ${libgcj}
+;;
   *-*-lynxos*)
 noconfigdirs=$noconfigdirs ${libgcj}
 ;;
@@ -3754,8 +3765,11 @@ case ${target} in
   fr30-*-elf*)
 noconfigdirs=$noconfigdirs gdb
 ;;
+  ft32-*-*)
+noconfigdirs=$noconfigdirs target-rda gprof
+;;
   moxie-*-*)
-noconfigdirs=$noconfigdirs gprof
+noconfigdirs=$noconfigdirs
 ;;
   h8300*-*-*)
 noconfigdirs=$noconfigdirs target-libgloss
@@ -3765,6 +3779,12 @@ case ${target} in
 ;;
   hppa1.1-*-osf* | hppa1.1-*-bsd* )
 ;;
+  hppa*64*-*-hpux*)
+noconfigdirs=$noconfigdirs gdb
+;;
+  hppa*-*-hpux11*)
+noconfigdirs=$noconfigdirs gdb ld
+;;
   hppa*64*-*-linux*)
 ;;
   hppa*-*-linux*)
@@ -3774,9 +3794,6 @@ case ${target} in
   hppa*-*-openbsd* | \
   hppa*64*-*-*)
 ;;
-  hppa*-hp-hpux11*)
-noconfigdirs=$noconfigdirs ld
-;;
   hppa*-*-pro*)
 ;;
   hppa*-*-*)
@@ -3791,7 +3808,7 @@ case ${target} in
 ;;
   ia64*-**-hpux*)
 # No ld support yet.
-noconfigdirs=$noconfigdirs libgui itcl ld
+noconfigdirs=$noconfigdirs gdb libgui itcl ld
 ;;
   ia64*-*-*vms*)
 # No ld support yet.
diff --git a/configure.ac b/configure.ac
index dfc7e75..603bdf6 100644
--- a/configure.ac
+++ b/configure.ac
@@ -245,6 +245,8 @@ if test x$with_gnu_as = xno ; then
 fi
 
 use_included_zlib=
+AC_ARG_WITH(system-zlib,
+[AS_HELP_STRING([--with-system-zlib], [use installed libz])])
 # Make sure we don't let ZLIB be added if we didn't want it.
 if test x$with_system_zlib = xyes ; then
   use_included_zlib=no
@@ -867,6 +869,9 @@ case ${target} in
   rs6000-*-aix*)
 noconfigdirs=$noconfigdirs ${libgcj}
 ;;
+  ft32-*-*)
+noconfigdirs=$noconfigdirs ${libgcj}
+;;
   *-*-lynxos*)
 noconfigdirs=$noconfigdirs ${libgcj}
 ;;
@@ -1100,8 +1105,11 @@ case ${target} in
   fr30-*-elf*)
 noconfigdirs=$noconfigdirs gdb
 ;;
+  ft32-*-*)
+noconfigdirs=$noconfigdirs target-rda gprof
+;;
   moxie-*-*)
-noconfigdirs=$noconfigdirs gprof
+

Re: [PATCH, PR testsuite/66734] Support MPX tests in lto.exp

2015-07-14 Thread Jeff Law

On 07/14/2015 09:49 AM, Ilya Enkovich wrote:

Hi,

This patch initializes MPX paths in lto.exp to give check_effective_target_mpx 
a chance.  Is it OK for trunk?

Thanks,
Ilya
--
gcc/testsuite/

2015-07-14  Ilya Enkovich  enkovich@gmail.com

PR testsuite/66734
* gcc.dg/lto/lto.exp: Initialize MPX.

OK.
jeff



Re: [PATCH] Disable all target libraries if not building gcc

2015-07-14 Thread H.J. Lu
On Tue, Mar 17, 2015 at 5:08 PM, H.J. Lu hjl.to...@gmail.com wrote:
 On Mon, Mar 16, 2015 at 8:01 PM, H.J. Lu hjl.to...@gmail.com wrote:
 On Mon, Mar 16, 2015 at 7:26 PM, H.J. Lu hjl.to...@gmail.com wrote:
 On Mon, Mar 16, 2015 at 4:43 PM, H.J. Lu hjl.to...@gmail.com wrote:
 On Mon, Mar 16, 2015 at 4:41 PM, Joseph Myers jos...@codesourcery.com 
 wrote:
 On Mon, 16 Mar 2015, H.J. Lu wrote:

 On Mon, Mar 16, 2015 at 3:50 PM, Joseph Myers jos...@codesourcery.com 
 wrote:
  On Fri, 13 Mar 2015, H.J. Lu wrote:
 
  I am working on SHF_COMPRESSED support:
 
  http://www.sco.com/developers/gabi/latest/ch4.sheader.html
 
  I will import zlib from GCC source tree into binutils-gdb tree.  But 
  zlib
  failed to build as a target library in binutils-gdb.  There is no 
  need to
  build target libraries if not building gcc.  OK for master?
 
  This is definitely wrong.  newlib / libgloss is a target library that
  certainly makes sense to build separately from GCC, and the toplevel
  configure / build code should be identical in all three repositories 
  (GCC,
  binutils-gdb, newlib-cygwin).

 We need to work out something to avoid building target libraries
 in binutils.

 I suggest not building zlib as a target library unless libgcj is also
 being built as a target library, given that there should be no need to
 built it as a target library in isolation.


 The logic is there.  But somehow it doesn't work for binutils.
 I will take another look.


 Here is a patch.  It excludes target-zlib if target-libjava isn't built.
 Tested in binutils and GCC with java.


 This version is more flexible to support future target libraries
 which depend on target zlib.


 I reverted my previous commit and checked in this one.


I am checking this into GCC.


-- 
H.J.
From e8c4c38a1cbbaa6105dab86c2c7a3f597c498632 Mon Sep 17 00:00:00 2001
From: H.J. Lu hjl.to...@gmail.com
Date: Tue, 14 Jul 2015 08:29:32 -0700
Subject: [PATCH] Sync toplevel configure with binutils-gdb

	Sync with binutils-gdb:
	2015-03-17  H.J. Lu  hongjiu...@intel.com

	* configure.ac (target_configdirs): Exclude target-zlib if
	target-libjava isn't built.
	* configure: Regenerated.
---
 ChangeLog| 9 +
 configure| 9 +
 configure.ac | 9 +
 3 files changed, 27 insertions(+)

diff --git a/ChangeLog b/ChangeLog
index 27edb46..c1582b9 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,6 +1,15 @@
 2015-07-14  H.J. Lu  hongjiu...@intel.com
 
 	Sync with binutils-gdb:
+	2015-03-17  H.J. Lu  hongjiu...@intel.com
+
+	* configure.ac (target_configdirs): Exclude target-zlib if
+	target-libjava isn't built.
+	* configure: Regenerated.
+
+2015-07-14  H.J. Lu  hongjiu...@intel.com
+
+	Sync with binutils-gdb:
 	2015-05-13  John David Anglin  dave.ang...@bell.net
 
 	* configure.ac: Disable configuration of GDB for HPUX targets.
diff --git a/configure b/configure
index f060986..5ba9489 100755
--- a/configure
+++ b/configure
@@ -6575,6 +6575,15 @@ for i in ${target_configdirs_all} ; do
   fi
 done
 
+# Exclude target-zlib if target-libjava isn't built.
+case ${target_configdirs} in
+*target-libjava*)
+  ;;
+*)
+  target_configdirs=`echo ${target_configdirs} | sed -e 's/target-zlib//'`
+  ;;
+esac
+
 # libiberty-linker-plugin is special: it doesn't have its own source directory,
 # so we have to add it after the preceding checks.
 if test x$extra_linker_plugin_flags$extra_linker_plugin_configure_flags != x
diff --git a/configure.ac b/configure.ac
index 603bdf6..2ff9be0 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2265,6 +2265,15 @@ for i in ${target_configdirs_all} ; do
   fi
 done
 
+# Exclude target-zlib if target-libjava isn't built.
+case ${target_configdirs} in
+*target-libjava*)
+  ;;
+*)
+  target_configdirs=`echo ${target_configdirs} | sed -e 's/target-zlib//'`
+  ;;
+esac
+
 # libiberty-linker-plugin is special: it doesn't have its own source directory,
 # so we have to add it after the preceding checks.
 if test x$extra_linker_plugin_flags$extra_linker_plugin_configure_flags != x
-- 
2.4.3



[gomp4.1] Parsing of {use,is}_device_ptr clauses

2015-07-14 Thread Jakub Jelinek
Hi!

I've committed this patch to parse/gimplify/omp lower {use,is}_device_ptr
clauses which I understood are meant to act like firstprivate clause with
special side-effects, but they are ignored for now during OpenMP expansion.

I'm committing this now, since it still shouldn't break the tests.

I've skipped parsing of struct members in map/to/from clauses for now,
because it is too unclear (#383) to me, waiting for clarifications.

The next step, adjusting gimplifier to the new rules, will break some of the
tests, and then we need to find out how to implement firstprivate clause on
target construct, how to implement newly the pointer translation for array
sections base (that should now be handled like private with special action),
how to implement is_device_ptr (supposedly just as firstprivate - I think
this clause got added just because of OpenCL) and how to implement
use_device_ptr.

2015-07-14  Jakub Jelinek  ja...@redhat.com

* tree-core.h (enum omp_clause_code): Add
OMP_CLAUSE_USE_DEVICE_PTR and OMP_CLAUSE_IS_DEVICE_PTR.
* gimplify.c (gimplify_scan_omp_clauses): Handle
OMP_CLAUSE_USE_DEVICE_PTR and OMP_CLAUSE_IS_DEVICE_PTR
like OMP_CLAUSE_FIRSTPRIVATE.
(gimplify_adjust_omp_clauses): Handle OMP_CLAUSE_{USE,IS}_DEVICE_PTR.
* tree.c (omp_clause_num_ops): Add entries for
OMP_CLAUSE_{USE,IS}_DEVICE_PTR.
(omp_clause_code_name): Likewise.
(walk_tree_1): Handle OMP_CLAUSE_{USE,IS}_DEVICE_PTR.
* tree-nested.c (convert_nonlocal_omp_clauses,
convert_local_omp_clauses): Likewise.
* omp-low.c (scan_sharing_clauses): Likewise.
* tree-pretty-print.c (dump_omp_clause): Likewise.
c-family/
* c-omp.c (c_omp_split_clauses): Improve function comment.
Handle OMP_CLAUSE_IS_DEVICE_PTR.  Adjust for OMP_CLAUSE_PRIVATE
and OMP_CLAUSE_FIRSTPRIVATE being supported also on
#pragma omp target construct.
* c-pragma.h (enum pragma_omp_clause): Add
PRAGMA_OMP_CLAUSE_IS_DEVICE_PTR and
PRAGMA_OMP_CLAUSE_USE_DEVICE_PTR.
c/
* c-parser.c (c_parser_omp_clause_name): Parse use_device_ptr
and is_device_ptr.
(c_parser_omp_clause_use_device_ptr,
c_parser_omp_clause_is_device_ptr): New functions.
(c_parser_omp_all_clauses): Handle
PRAGMA_OMP_CLAUSE_{IS,USE}_DEVICE_PTR.
(OMP_TARGET_DATA_CLAUSE_MASK): Add PRAGMA_OMP_CLAUSE_USE_DEVICE_PTR.
(OMP_TARGET_CLAUSE_MASK): Add PRAGMA_OMP_CLAUSE_IS_DEVICE_PTR.
* c-typeck.c (c_finish_omp_clauses): Handle
OMP_CLAUSE_{USE,IS}_DEVICE_PTR.
cp/
* parser.c (cp_parser_omp_clause_name): Parse use_device_ptr
and is_device_ptr.
(cp_parser_omp_all_clauses): Handle OMP_CLAUSE_{USE,IS}_DEVICE_PTR.
(OMP_TARGET_DATA_CLAUSE_MASK): Add PRAGMA_OMP_CLAUSE_USE_DEVICE_PTR.
(OMP_TARGET_CLAUSE_MASK): Add PRAGMA_OMP_CLAUSE_IS_DEVICE_PTR.
* pt.c (tsubst_omp_clauses): Handle OMP_CLAUSE_{USE,IS}_DEVICE_PTR.
* semantics.c (finish_omp_clauses): Likewise.

--- gcc/gimplify.c.jj   2015-07-14 14:29:49.274536887 +0200
+++ gcc/gimplify.c  2015-07-14 16:17:52.993914085 +0200
@@ -6394,6 +6394,13 @@ gimplify_scan_omp_clauses (tree *list_p,
}
  goto do_notice;
 
+   case OMP_CLAUSE_USE_DEVICE_PTR:
+ flags = GOVD_FIRSTPRIVATE | GOVD_EXPLICIT;
+ goto do_add;
+   case OMP_CLAUSE_IS_DEVICE_PTR:
+ flags = GOVD_FIRSTPRIVATE | GOVD_EXPLICIT;
+ goto do_add;
+
do_add:
  decl = OMP_CLAUSE_DECL (c);
do_add_decl:
@@ -6957,6 +6964,8 @@ gimplify_adjust_omp_clauses (gimple_seq
case OMP_CLAUSE_SIMD:
case OMP_CLAUSE_HINT:
case OMP_CLAUSE_DEFAULTMAP:
+   case OMP_CLAUSE_USE_DEVICE_PTR:
+   case OMP_CLAUSE_IS_DEVICE_PTR:
case OMP_CLAUSE__CILK_FOR_COUNT_:
case OMP_CLAUSE_ASYNC:
case OMP_CLAUSE_WAIT:
--- gcc/tree.c.jj   2015-07-14 14:29:49.032547791 +0200
+++ gcc/tree.c  2015-07-14 14:49:57.73189 +0200
@@ -291,6 +291,8 @@ unsigned const char omp_clause_num_ops[]
   2, /* OMP_CLAUSE_FROM  */
   2, /* OMP_CLAUSE_TO  */
   2, /* OMP_CLAUSE_MAP  */
+  1, /* OMP_CLAUSE_USE_DEVICE_PTR  */
+  1, /* OMP_CLAUSE_IS_DEVICE_PTR  */
   2, /* OMP_CLAUSE__CACHE_  */
   1, /* OMP_CLAUSE_DEVICE_RESIDENT  */
   1, /* OMP_CLAUSE_USE_DEVICE  */
@@ -358,6 +360,8 @@ const char * const omp_clause_code_name[
   from,
   to,
   map,
+  use_device_ptr,
+  is_device_ptr,
   _cache_,
   device_resident,
   use_device,
@@ -11388,6 +11392,8 @@ walk_tree_1 (tree *tp, walk_tree_fn func
case OMP_CLAUSE_GRAINSIZE:
case OMP_CLAUSE_NUM_TASKS:
case OMP_CLAUSE_HINT:
+   case OMP_CLAUSE_USE_DEVICE_PTR:
+   case OMP_CLAUSE_IS_DEVICE_PTR:
case OMP_CLAUSE__LOOPTEMP_:
case OMP_CLAUSE__SIMDUID_:
case OMP_CLAUSE__CILK_FOR_COUNT_:
--- gcc/tree-nested.c.jj2015-07-14 14:29:49.329534408 +0200
+++ 

[committed, PATCH] Update copyright year in include

2015-07-14 Thread H.J. Lu
---
 include/ansidecl.h  | 4 +---
 include/demangle.h  | 5 ++---
 include/dwarf2.def  | 4 +---
 include/dwarf2.h| 4 +---
 include/dyn-string.h| 3 +--
 include/fibheap.h   | 3 +--
 include/filenames.h | 2 +-
 include/floatformat.h   | 3 +--
 include/fnmatch.h   | 2 +-
 include/gcc-c-fe.def| 2 +-
 include/gcc-c-interface.h   | 2 +-
 include/gcc-interface.h | 2 +-
 include/gdb/gdb-index.h | 2 +-
 include/getopt.h| 3 +--
 include/hashtab.h   | 3 +--
 include/leb128.h| 2 +-
 include/longlong.h  | 2 +-
 include/lto-symtab.h| 2 +-
 include/md5.h   | 2 +-
 include/objalloc.h  | 2 +-
 include/obstack.h   | 4 +---
 include/plugin-api.h| 2 +-
 include/safe-ctype.h| 2 +-
 include/sha1.h  | 3 +--
 include/simple-object.h | 2 +-
 include/sort.h  | 2 +-
 include/splay-tree.h| 3 +--
 include/symcat.h| 2 +-
 include/timeval-utils.h | 2 +-
 include/vtv-change-permission.h | 3 +--
 include/xregex2.h   | 3 +--
 include/xtensa-config.h | 3 +--
 32 files changed, 33 insertions(+), 52 deletions(-)

diff --git a/include/ansidecl.h b/include/ansidecl.h
index 04d75c3..224627d 100644
--- a/include/ansidecl.h
+++ b/include/ansidecl.h
@@ -1,7 +1,5 @@
 /* ANSI and traditional C compatability macros
-   Copyright 1991, 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
-   2002, 2003, 2004, 2005, 2006, 2007, 2009, 2010, 2013
-   Free Software Foundation, Inc.
+   Copyright (C) 1991-2015 Free Software Foundation, Inc.
This file is part of the GNU C Library.
 
 This program is free software; you can redistribute it and/or modify
diff --git a/include/demangle.h b/include/demangle.h
index d2a6731..26c59b5 100644
--- a/include/demangle.h
+++ b/include/demangle.h
@@ -1,7 +1,6 @@
 /* Defs for interface to demanglers.
-   Copyright 1992, 1993, 1994, 1995, 1996, 1997, 1998, 2000, 2001, 2002,
-   2003, 2004, 2005, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
-   
+   Copyright (C) 1992-2015 Free Software Foundation, Inc.
+
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License
as published by the Free Software Foundation; either version 2, or
diff --git a/include/dwarf2.def b/include/dwarf2.def
index ea8127c..e61cfbe 100644
--- a/include/dwarf2.def
+++ b/include/dwarf2.def
@@ -1,9 +1,7 @@
 /* -*- c -*-
Declarations and definitions of codes relating to the DWARF2 and
DWARF3 symbolic debugging information formats.
-   Copyright (C) 1992, 1993, 1995, 1996, 1997, 1999, 2000, 2001, 2002,
-   2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
-   Free Software Foundation, Inc.
+   Copyright (C) 1992-2015 Free Software Foundation, Inc.
 
Written by Gary Funck (g...@intrepid.com) The Ada Joint Program
Office (AJPO), Florida State University and Silicon Graphics Inc.
diff --git a/include/dwarf2.h b/include/dwarf2.h
index e05955c..4ada871 100644
--- a/include/dwarf2.h
+++ b/include/dwarf2.h
@@ -1,8 +1,6 @@
 /* Declarations and definitions of codes relating to the DWARF2 and
DWARF3 symbolic debugging information formats.
-   Copyright (C) 1992, 1993, 1995, 1996, 1997, 1999, 2000, 2001, 2002,
-   2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
-   Free Software Foundation, Inc.
+   Copyright (C) 1992-2015 Free Software Foundation, Inc.
 
Written by Gary Funck (g...@intrepid.com) The Ada Joint Program
Office (AJPO), Florida State University and Silicon Graphics Inc.
diff --git a/include/dyn-string.h b/include/dyn-string.h
index 2b14727..7c3684b 100644
--- a/include/dyn-string.h
+++ b/include/dyn-string.h
@@ -1,6 +1,5 @@
 /* An abstract string datatype.
-   Copyright (C) 1998, 1999, 2000, 2002, 2004, 2005, 2009
-   Free Software Foundation, Inc.
+   Copyright (C) 1998-2015 Free Software Foundation, Inc.
Contributed by Mark Mitchell (m...@markmitchell.com).
 
 This file is part of GCC.
diff --git a/include/fibheap.h b/include/fibheap.h
index a3d09dd..85b10c5 100644
--- a/include/fibheap.h
+++ b/include/fibheap.h
@@ -1,6 +1,5 @@
 /* A Fibonacci heap datatype.
-   Copyright 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2009
-   Free Software Foundation, Inc.
+   Copyright (C) 1998-2015 Free Software Foundation, Inc.
Contributed by Daniel Berlin (d...@cgsoftware.com).
 
 This file is part of GCC.
diff --git a/include/filenames.h b/include/filenames.h
index 470c5e0..1161daa 100644
--- a/include/filenames.h
+++ b/include/filenames.h
@@ -5,7 +5,7 @@
use forward- and back-slash in path names interchangeably, and
some of them have case-insensitive file names.
 
-   Copyright 2000, 2001, 2007, 2010 Free Software Foundation, Inc.
+   Copyright 

[PATCH, PR testsuite/66734] Support MPX tests in lto.exp

2015-07-14 Thread Ilya Enkovich
Hi,

This patch initializes MPX paths in lto.exp to give check_effective_target_mpx 
a chance.  Is it OK for trunk?

Thanks,
Ilya
--
gcc/testsuite/

2015-07-14  Ilya Enkovich  enkovich@gmail.com

PR testsuite/66734
* gcc.dg/lto/lto.exp: Initialize MPX.


diff --git a/gcc/testsuite/gcc.dg/lto/lto.exp b/gcc/testsuite/gcc.dg/lto/lto.exp
index c1d7c4c..7b919c2 100644
--- a/gcc/testsuite/gcc.dg/lto/lto.exp
+++ b/gcc/testsuite/gcc.dg/lto/lto.exp
@@ -42,6 +42,7 @@ if { ![check_effective_target_lto] } {
 
 gcc_init
 lto_init no-mathlib
+mpx_init
 
 # Define an identifier for use with this suite to avoid name conflicts
 # with other lto tests running at the same time.


RE: [PATCH, MIPS] Fix restoration of hi/lo in MIPS64R2 interrupt handlers

2015-07-14 Thread Matthew Fortune
Robert Suchanek robert.sucha...@imgtec.com writes:
  Hi Robert,
  The patch is OK, but will you please name the test something other than the
  date?
 
 OK. I'll change it to interrupt_handler-5.c, add a comment and commit after
 approval for the new interrupt handler options.

I believe this change is independent of the new attributes so feel free to 
commit
it before.

Thanks,
Matthew


Re: [PATCH, ARM] stop changing signedness in PROMOTE_MODE

2015-07-14 Thread Richard Earnshaw
On 13/07/15 16:29, Michael Matz wrote:
 Hi,
 
 On Mon, 13 Jul 2015, Richard Biener wrote:
 
 On Fri, Jul 10, 2015 at 5:46 PM, Jim Wilson jim.wil...@linaro.org wrote:
 On Tue, Jul 7, 2015 at 2:35 PM, Richard Biener
 richard.guent...@gmail.com wrote:
 On July 7, 2015 6:29:21 PM GMT+02:00, Jim Wilson jim.wil...@linaro.org 
 wrote:
 signed sub-word locals.  Thus to detect the need for a conversion, you
 have to have the decls, and we don't have them here.  There is also

 It probably is.  The decks for the parameter based SSA names are 
 available, for the PHI destination there might be no decl.

 I tried looking again, and found the decls.  I'm able to get correct
 code for my testcase with the attached patch to force the conversion.
 It is rather inelegant, but I think I can cache the values I need to
 make this simpler and cleaner.  I still don't have decls from
 insert_part_to_rtx_on_edge and insert_rtx_to_part_on_edge, but it
 looks like those are for breaking cycles, and hence might not need
 conversions.

 Yes, that looks like a defect.  CCing Micha who wrote this code
 
 I think it's a backend bug that parameters and locals are extended 
 differently.  The code in tree-outof-ssa was written with the assumption 
 that the modes of RTL objects might be different (larger) than the tree 
 types suggest, but that they be _consistent_, i.e. that the particular 
 extension depends on only the types, not on the tree-type of the decl.
 

We went through this a couple of weeks back.  The backend documentation
for PROMOTE_MODE says:

 For most machines, the macro definition does not change @var{unsignedp}.
However, some machines, have instructions that preferentially handle
either signed or unsigned quantities of certain modes.  For example, on
the DEC Alpha, 32-bit loads from memory and 32-bit add instructions
sign-extend the result to 64 bits.  On such machines, set
@var{unsignedp} according to which kind of extension is more efficient.

So clearly it anticipates that all permitted extensions should work, and
in particular it makes no mention of this having to match some
abi-mandated promotions.  That makes this a MI bug not a target one.

R.


 I think the above assumption does make sense because it's also a 
 fundamental assumption in the whole gimple pipeline, types matter, not the 
 objects (or better, we slowly but surely work towards this).  Hence such 
 mismatches should either not exist (changing the backend), or should be 
 exposed explicitely during gimplification already.  The latter is a large 
 change, though.
 
 I think dealing with this situation in outof-ssa is a hack and I don't 
 like it.  It would extend the ugliness of different modes for same types 
 even more, and that's something we should (gradually) move away from.
 
 
 Ciao,
 Michael.
 



Re: [C++ Patch] Prefer error + inform to two errors in check_template_shadow

2015-07-14 Thread Paolo Carlini

Hi,

On 07/14/2015 05:07 PM, Jason Merrill wrote:

On 07/13/2015 09:41 AM, Paolo Carlini wrote:

+++ testsuite/g++.dg/template/crash81.C (working copy)
@@ -3,6 +3,6 @@
  struct A
  {
templateT::X struct X; // { dg-error 'T' has not been 
declared T }
-  // { dg-error declaration of 'templateint X struct A::X' 
A::X { target *-*-* } 5 }
-  // { dg-error shadows template parm 'int X' shadow { target 
*-*-* } 5 }
+  // { dg-error declaration of 'templateint X struct A::X' 
shadows A::X { target *-*-* } 5 }

+  // { dg-message template parameter 'X'  { target *-*-* } 5 }


I don't see any reason to check for specific diagnostics here; the 
latter two messages are poor error-recovery, not something to test for.
Indeed, I noticed the error recovery issue, which I didn't know, and 
considered looking into it, when I'm done with some other things. In the 
meanwhile I'm shortening the expected error/inform.


Thanks,
Paolo.


Re: [PATCH] [gomp] Recycle non-nested team if possible

2015-07-14 Thread Jakub Jelinek
On Tue, Jul 14, 2015 at 09:09:01AM +0200, Sebastian Huber wrote:
 I pasted the wrong version, since I wanted to check that I get a failure in
 case generation % BAR_INCR != 0.

There is also config/posix/bar* implementation, which would need to be
checked too.  I'd suggest first committing a patch with both sem and mutex
being destroyed (note, config/posix/bar* has two semaphores in it), and
perhaps if you feel strongly about it an incremental patch to avoid the
destroying/initialization of the barrier if it works even with
config/posix/bar*.

Jakub


Re: [PATCH] [gomp] Recycle non-nested team if possible

2015-07-14 Thread Jakub Jelinek
Hi!

On Tue, Jul 14, 2015 at 01:47:41PM +0200, Sebastian Huber wrote:
 With --disable-linux-futex (without asserts) I got several failures, but
 none of them is related to my patch, e.g. they are of the following type
 
 /tmp/ccw4RofR.o: In function `main':
 data-already-3.f:(.text+0x56): undefined reference to `acc_copyin_array_h_'

CCing Thomas on that, that is for the OpenACC folks to resolve.

 2015-07-14  Sebastian Huber  sebastian.hu...@embedded-brains.de
 
   * team.c (get_last_team): New.
   (gomp_new_team): Recycle last non-nested team if possible.
   (gomp_team_end): Move team work share list free lock destruction
   to...
   (free_team): ...here.

Ok for trunk (please put a space in between to and ... and ... and here).

 +  if (team == NULL)
 +{
 +  size_t extra = sizeof (team-ordered_release[0])
 + + sizeof (team-implicit_task[0]);

And make sure to use tab instead of 8 spaces on the above line.

Jakub


Re: [PING] Re: [PATCH] c/66516 - missing diagnostic on taking the address of a builtin function

2015-07-14 Thread Jason Merrill
I wonder if it would make sense to handle this when we actually try to 
emit a reference to one of these functions in the back end, rather than 
in many places through the front end.


If it's going to stay in the front end, the C and C++ front-ends should 
share an implementation of this function, in c-common.c.


Most of the calls in the C++ front end can be replaced by a single call 
in mark_rvalue_use.



-#ifdef ENABLE_CHECKING
+#if 0 // def ENABLE_CHECKING


This change is unrelated.


+#define DECL_IS_GCC_BUILTIN(DECL) \


This macro name could be clearer.  DECL_IS_ONLY_BUILTIN? 
DECL_IS_NOFALLBACK_BUILTIN?


Jason




Re: [PATCH] PR ld/18355: --enable-shared doesn't work

2015-07-14 Thread H.J. Lu
On Fri, May 1, 2015 at 8:32 AM, H.J. Lu hjl.to...@gmail.com wrote:
 On Wed, Apr 29, 2015 at 9:12 AM, H.J. Lu hongjiu...@intel.com wrote:
 When bfd is configured as a shared library, we need to configure zlib
 with --enable-host-shared since zlib is used by bfd.  Any comments,
 feedbacks, objections?


 H.J.
 --
 PR ld/18355
 * Makefile.def: Add extra_configure_flags to host zlib.
 * configure.ac (extra_host_zlib_configure_flags): New.  Set
 to --enable-host-shared When bfd is to be built as shared
 library.  AC_SUBST.
 * Makefile.in: Regenerated.
 * configure: Likewise.

 I am checking it in.


I am checking it into GCC.

-- 
H.J.


Re: [C++ Patch] Prefer error + inform to two errors in check_template_shadow

2015-07-14 Thread Jason Merrill

On 07/13/2015 09:41 AM, Paolo Carlini wrote:

+++ testsuite/g++.dg/template/crash81.C (working copy)
@@ -3,6 +3,6 @@
  struct A
  {
templateT::X struct X; // { dg-error 'T' has not been declared T }
-  // { dg-error declaration of 'templateint X struct A::X' A::X { target 
*-*-* } 5 }
-  // { dg-error shadows template parm 'int X' shadow { target *-*-* } 5 }
+  // { dg-error declaration of 'templateint X struct A::X' shadows A::X 
{ target *-*-* } 5 }
+  // { dg-message template parameter 'X'  { target *-*-* } 5 }


I don't see any reason to check for specific diagnostics here; the 
latter two messages are poor error-recovery, not something to test for.


OK with that change.

Jason



Re: [PATCH, ARM] stop changing signedness in PROMOTE_MODE

2015-07-14 Thread Richard Biener
On July 14, 2015 6:13:13 PM GMT+02:00, Richard Earnshaw 
richard.earns...@foss.arm.com wrote:
On 13/07/15 16:29, Michael Matz wrote:
 Hi,
 
 On Mon, 13 Jul 2015, Richard Biener wrote:
 
 On Fri, Jul 10, 2015 at 5:46 PM, Jim Wilson jim.wil...@linaro.org
wrote:
 On Tue, Jul 7, 2015 at 2:35 PM, Richard Biener
 richard.guent...@gmail.com wrote:
 On July 7, 2015 6:29:21 PM GMT+02:00, Jim Wilson
jim.wil...@linaro.org wrote:
 signed sub-word locals.  Thus to detect the need for a
conversion, you
 have to have the decls, and we don't have them here.  There is
also

 It probably is.  The decks for the parameter based SSA names are
available, for the PHI destination there might be no decl.

 I tried looking again, and found the decls.  I'm able to get
correct
 code for my testcase with the attached patch to force the
conversion.
 It is rather inelegant, but I think I can cache the values I need
to
 make this simpler and cleaner.  I still don't have decls from
 insert_part_to_rtx_on_edge and insert_rtx_to_part_on_edge, but it
 looks like those are for breaking cycles, and hence might not need
 conversions.

 Yes, that looks like a defect.  CCing Micha who wrote this code
 
 I think it's a backend bug that parameters and locals are extended 
 differently.  The code in tree-outof-ssa was written with the
assumption 
 that the modes of RTL objects might be different (larger) than the
tree 
 types suggest, but that they be _consistent_, i.e. that the
particular 
 extension depends on only the types, not on the tree-type of the
decl.
 

We went through this a couple of weeks back.  The backend documentation
for PROMOTE_MODE says:

 For most machines, the macro definition does not change
@var{unsignedp}.
However, some machines, have instructions that preferentially handle
either signed or unsigned quantities of certain modes.  For example, on
the DEC Alpha, 32-bit loads from memory and 32-bit add instructions
sign-extend the result to 64 bits.  On such machines, set
@var{unsignedp} according to which kind of extension is more
efficient.

So clearly it anticipates that all permitted extensions should work,
and
in particular it makes no mention of this having to match some
abi-mandated promotions.  That makes this a MI bug not a target one.

We could also decide that it is a documentation defect.  Are there any other 
targets with this inconsistency?

FWIW I'd prefer to expose the promoted incoming decls after gimplification. 
Independent on any inconsistency.

Richard.

R.


 I think the above assumption does make sense because it's also a 
 fundamental assumption in the whole gimple pipeline, types matter,
not the 
 objects (or better, we slowly but surely work towards this).  Hence
such 
 mismatches should either not exist (changing the backend), or should
be 
 exposed explicitely during gimplification already.  The latter is a
large 
 change, though.
 
 I think dealing with this situation in outof-ssa is a hack and I
don't 
 like it.  It would extend the ugliness of different modes for same
types 
 even more, and that's something we should (gradually) move away from.
 
 
 Ciao,
 Michael.
 




Re: [PATCH, ARM] stop changing signedness in PROMOTE_MODE

2015-07-14 Thread Jim Wilson
On Tue, Jul 14, 2015 at 9:13 AM, Richard Earnshaw
richard.earns...@foss.arm.com wrote:
 We went through this a couple of weeks back.  The backend documentation
 for PROMOTE_MODE says:

I disagree that this is a fully resolved issue.  I see clear problems
with how the ARM port uses PROMOTE_MODE.

  For most machines, the macro definition does not change @var{unsignedp}.
 However, some machines, have instructions that preferentially handle
 either signed or unsigned quantities of certain modes.  For example, on
 the DEC Alpha, 32-bit loads from memory and 32-bit add instructions
 sign-extend the result to 64 bits.  On such machines, set
 @var{unsignedp} according to which kind of extension is more efficient.

The Alpha case is different than the ARM case.  The Alpha only changes
sign for 32-bit values in 64-bit registers.  The alpha happens to have
a nice set of instructions that operates on 32-bit values, that
accepts these wrong-signed values and handle them correctly.  Thus on
the alpha, it appears that there are no extra zero/sign extends
required, and everything is the same speed or faster with the wrong
sign.  The MIPS port works the same way.  This is not true on the arm
though.  The arm port is changing sign on 8 and 16 bit value, but does
not have instructions that directly operate on 8 and 16 bit values.
This requires the compiler to emit extra zero/sign extend instructions
that affect the performance of the code.  Other than the thumb1 8-bit
load, and the pre-armv6 use of andi for 8-bit zero-extend, I haven't
seen anything that is faster in the wrong sign, and there are a number
of operations that are actually slower because of the extra zero/sign
extends required by the arm PROMOTE_MODE definition.  I've given some
examples.

I have since noticed that the relatively new pass to optimize away
unnecessary zero/sign extensions is actually fixing some of the damage
caused by the ARM PROMOTE_MODE definition.  But there are still some
cases that won't get fixed, as per my earlier examples.  It is better
to emit the fast code at the beginning than to rely on an optimization
pass to convert the slow code into fast code.  So I think the ARM
PROMOTE_MODE definition still needs to be fixed.

 So clearly it anticipates that all permitted extensions should work, and
 in particular it makes no mention of this having to match some
 abi-mandated promotions.  That makes this a MI bug not a target one.

If you look at older gcc releases, like gcc-2.95.3, you will see that
there is only PROMOTE_MODE and a boolean PROMOTE_FUNCTION_ARGS which
says whether PROMOTE_MODE is applied to function arguments or not.
You can't have different zero/sign extensions for parameters and
locals in gcc-2.95.3.  The fact that you can have it today is more an
accident than a deliberate choice.  When PROMOTE_FUNCTION_ARGS was
hookized, it was made into a separate function, and for the ARM port,
the code for PROMOTE_MODE was not correctly copied into the new hook,
resulting in the accidental difference for parameter and local
zero/sign promotion for ARM.  The PROMOTE_MODE docs were written back
when different sign/zero extensions for parms/locals weren't possible,
and hence did not consider the case.

Now that we do have the problem, we can't fix it without an ARM port
ABI change, which is undesirable, so we may have to fix it with a MI
change.

There were two MI changes suggested, one was fixing the out-of-ssa
pass to handle SUBREG_PROMOTED_P promotions.  The other was to
disallow creating PHI nodes between parms and locals.  I haven't had a
chance to try implementing the second one yet; I hope to work on that
today.

Jim


Re: Committed: partial sync with src repo: bfd depends on zlib

2015-07-14 Thread H.J. Lu
On Tue, Jul 14, 2015 at 6:46 AM, Hans-Peter Nilsson h...@bitrange.com wrote:
 On Tue, 14 Jul 2015, H.J. Lu wrote:
 The shared files between gcc and binutils are way out of sync.

 Yeah I noticed, but didn't go for a full sync as that'd require
 testing other builds too.  Thankfully your imported src commit
 was local.

 I have synced binutils with gcc.  I started submitting GCC
 patches to sync with binutils:

 https://gcc.gnu.org/ml/gcc-patches/2015-04/msg00707.html

 I will commit changes to GCC to sync GCC with binutils.

 Good!

 In the meantime, you can use the shared files from binutils
 with GCC.

 The commit I did means I don't have to, but this was for a
 bare-iron cross-target meaning likely less issues than a
 self-hosted target with all bells, whistles and subdirs; people
 may see other failures for other targets.


I have synced toplevel files as well as config/include directories
between GCC and binutils-gdb.  Please let me know if I have
missed anything.

Thanks.

-- 
H.J.


Re: [PATCH, ARM] stop changing signedness in PROMOTE_MODE

2015-07-14 Thread Richard Biener
On July 14, 2015 6:49:18 PM GMT+02:00, Jim Wilson jim.wil...@linaro.org wrote:
On Tue, Jul 14, 2015 at 9:13 AM, Richard Earnshaw
richard.earns...@foss.arm.com wrote:
 We went through this a couple of weeks back.  The backend
documentation
 for PROMOTE_MODE says:

I disagree that this is a fully resolved issue.  I see clear problems
with how the ARM port uses PROMOTE_MODE.

  For most machines, the macro definition does not change
@var{unsignedp}.
 However, some machines, have instructions that preferentially handle
 either signed or unsigned quantities of certain modes.  For example,
on
 the DEC Alpha, 32-bit loads from memory and 32-bit add instructions
 sign-extend the result to 64 bits.  On such machines, set
 @var{unsignedp} according to which kind of extension is more
efficient.

The Alpha case is different than the ARM case.  The Alpha only changes
sign for 32-bit values in 64-bit registers.  The alpha happens to have
a nice set of instructions that operates on 32-bit values, that
accepts these wrong-signed values and handle them correctly.  Thus on
the alpha, it appears that there are no extra zero/sign extends
required, and everything is the same speed or faster with the wrong
sign.  The MIPS port works the same way.  This is not true on the arm
though.  The arm port is changing sign on 8 and 16 bit value, but does
not have instructions that directly operate on 8 and 16 bit values.
This requires the compiler to emit extra zero/sign extend instructions
that affect the performance of the code.  Other than the thumb1 8-bit
load, and the pre-armv6 use of andi for 8-bit zero-extend, I haven't
seen anything that is faster in the wrong sign, and there are a number
of operations that are actually slower because of the extra zero/sign
extends required by the arm PROMOTE_MODE definition.  I've given some
examples.

I have since noticed that the relatively new pass to optimize away
unnecessary zero/sign extensions is actually fixing some of the damage
caused by the ARM PROMOTE_MODE definition.  But there are still some
cases that won't get fixed, as per my earlier examples.  It is better
to emit the fast code at the beginning than to rely on an optimization
pass to convert the slow code into fast code.  So I think the ARM
PROMOTE_MODE definition still needs to be fixed.

 So clearly it anticipates that all permitted extensions should work,
and
 in particular it makes no mention of this having to match some
 abi-mandated promotions.  That makes this a MI bug not a target one.

If you look at older gcc releases, like gcc-2.95.3, you will see that
there is only PROMOTE_MODE and a boolean PROMOTE_FUNCTION_ARGS which
says whether PROMOTE_MODE is applied to function arguments or not.
You can't have different zero/sign extensions for parameters and
locals in gcc-2.95.3.  The fact that you can have it today is more an
accident than a deliberate choice.  When PROMOTE_FUNCTION_ARGS was
hookized, it was made into a separate function, and for the ARM port,
the code for PROMOTE_MODE was not correctly copied into the new hook,
resulting in the accidental difference for parameter and local
zero/sign promotion for ARM.  The PROMOTE_MODE docs were written back
when different sign/zero extensions for parms/locals weren't possible,
and hence did not consider the case.

Now that we do have the problem, we can't fix it without an ARM port
ABI change, which is undesirable, so we may have to fix it with a MI
change.

There were two MI changes suggested, one was fixing the out-of-ssa
pass to handle SUBREG_PROMOTED_P promotions.  The other was to
disallow creating PHI nodes between parms and locals.  I haven't had a
chance to try implementing the second one yet; I hope to work on that
today.

I will definitely veto such restriction.

Richard.

Jim




Re: [C/C++ PATCH] PR c++/66572. Fix Wlogical-op false positive

2015-07-14 Thread Marek Polacek
Sorry it's taken so long to respond.

On Wed, Jun 24, 2015 at 05:43:05PM +0300, Mikhail Maltsev wrote:
  That looks wrong, because with TREE_CONSTANT we'd warn in C but not in C++
  for the following:
  
  const int a = 4;
  void
  f (void)
  {
const int b = 4;
static const int c = 5;
if (a  a) {}
if (b  b) {}
if (c  c) {}
  }
  
 Actually for this case the patch silences the warning both for C and
 C++. It's interesting that Clang warns like this:

That's really not what I see.  With your patch, cc1 warns on that testcase
while cc1plus does not.
 
 test.c:7:10: warning: use of logical '' with constant operand
 [-Wconstant-logical-operand]
 
 It does not warn for my testcase with templates. It also does not warn
 about:
 
 void
 bar (const int parm_a)
 {
   const bool a = parm_a;
   if (a  a) {}
   if (a || a) {}
   if (parm_a  parm_a) {}
   if (parm_a || parm_a) {}
 }

I think we want 4 warnings here, but vanilla cc1 only issues 2 warnings.
 
 Maybe my snippet does not express clearly enough what it was supposed to
 express. I actually meant something like this:
 
   templateclass _U1, class _U2, class = typename
  enable_if__and_is_convertible_U1, _T1,
   is_convertible_U2, _T2::value::type
   constexpr pair(pair_U1, _U2 __p)
   : first(std::forward_U1(__p.first)),
 second(std::forward_U2(__p.second)) { }
 
 (it's std::pair move constructor)
 It is perfectly possible that the user will construct an std::pairT, T
 object from an std::pairU, U. In this case we get an and of two
 identical is_convertible instantiations. The difference is that here
 there is a clever __and_ template which helps to avoid warnings. Well,
 at least I now know a good way to suppress them in my code :).
 
 Though I still think that this warning is bogus. Probably the correct
 (and the hard) way to check templates is to compare ASTs of the operands
 before any substitutions.
 
 But for now I could try to implement an idea, which I mentioned in the
 bug report: add a new flag to enum tsubst_flags, and set it when we
 check ASTs which depend on parameters of a template being instantiated
 (we already have similar checks for macro expansions). What do you think
 about such approach?

Ok, in that case I think easiest would the following (I hit the same issue
when writing the -Wtautological-compare patch):

Bootstrapped/regtested on x86_64-linux, ok for trunk?

2015-07-14  Marek Polacek  pola...@redhat.com

PR c++/66572
* pt.c (tsubst_copy_and_build): Add warn_logical_op sentinel.

* g++.dg/warn/Wlogical-op-2.C: New test.

diff --git gcc/cp/pt.c gcc/cp/pt.c
index 2097963..0c9712a 100644
--- gcc/cp/pt.c
+++ gcc/cp/pt.c
@@ -14893,6 +14893,7 @@ tsubst_copy_and_build (tree t,
   {
warning_sentinel s1(warn_type_limits);
warning_sentinel s2(warn_div_by_zero);
+   warning_sentinel s3(warn_logical_op);
tree op0 = RECUR (TREE_OPERAND (t, 0));
tree op1 = RECUR (TREE_OPERAND (t, 1));
tree r = build_x_binary_op
diff --git gcc/testsuite/g++.dg/warn/Wlogical-op-2.C 
gcc/testsuite/g++.dg/warn/Wlogical-op-2.C
index e69de29..755db08 100644
--- gcc/testsuite/g++.dg/warn/Wlogical-op-2.C
+++ gcc/testsuite/g++.dg/warn/Wlogical-op-2.C
@@ -0,0 +1,30 @@
+// PR c++/66572
+// { dg-do compile { target c++11 } }
+// { dg-options -Wlogical-op }
+
+struct false_type
+{
+static constexpr bool value = false;
+};
+
+struct true_type
+{
+static constexpr bool value = true;
+};
+
+templatetypename T
+struct is_unsigned : false_type {};
+
+template
+struct is_unsignedunsigned : true_type {};
+
+templatetypename T1, typename T2
+bool foo()
+{
+return is_unsignedT1::value  is_unsignedT2::value;
+}
+
+int main()
+{
+foounsigned, unsigned();
+}

Marek


Re: [PATCH] Fix PR c++/66850 (Adding a forward declaration causes ICE on valid code)

2015-07-14 Thread Jason Merrill

OK.

Jason


Re: [PATCH][C++] PR 65071

2015-07-14 Thread Jason Merrill

On 07/11/2015 03:40 PM, Paolo Carlini wrote:

I'm going to ping this on behalf of Andrea: the patch seems almost
obvious to me and so small to not immediately require a copyright
assignment (not sure whether Andrea already has it on file?!?). I also
double checked that it still applies cleanly and passes testing.


OK, thanks.

Jason




Re: [PATCH 6/n] OpenMP 4.0 offloading infrastructure: option handling

2015-07-14 Thread Richard Biener
On July 14, 2015 10:03:32 PM GMT+02:00, Thomas Schwinge 
tho...@codesourcery.com wrote:
Hi!

OK for gcc-5-branch?

OK

Richard

On Wed, 29 Apr 2015 18:26:06 +0200, I wrote:
 Committed in r222583:
 
 commit df615909263269988fd9611f8d007902580829d9
 Author: tschwinge tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4
 Date:   Wed Apr 29 16:23:26 2015 +
 
 [PR libgomp/65099] nvptx mkoffload: pass -m32 or -m64 to the
compiler
 
 ... depending on -foffload-abi=[...].
 
 Coding style/code copied from
gcc/config/i386/intelmic-mkoffload.c for
 consistency.
 
  gcc/
  * config/nvptx/mkoffload.c (target_ilp32): New variable.
  (main): Set it depending on -foffload-abi=[...].
  (compile_native, main): Use it to pass -m32 or -m64 to the
  compiler.
 
 git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@222583
138bc75d-0d04-0410-961f-82ee72b054a4
 ---
  gcc/ChangeLog|8 
  gcc/config/nvptx/mkoffload.c |   18 +-
  2 files changed, 25 insertions(+), 1 deletion(-)
 
 diff --git gcc/ChangeLog gcc/ChangeLog
 index aaa06c3..d7455e4 100644
 --- gcc/ChangeLog
 +++ gcc/ChangeLog
 @@ -1,3 +1,11 @@
 +2015-04-29  Thomas Schwinge  tho...@codesourcery.com
 +
 +PR libgomp/65099
 +* config/nvptx/mkoffload.c (target_ilp32): New variable.
 +(main): Set it depending on -foffload-abi=[...].
 +(compile_native, main): Use it to pass -m32 or -m64 to the
 +compiler.
 +
  2015-04-29  Alan Lawrence  alan.lawre...@arm.com
  
  PR target/65770
 diff --git gcc/config/nvptx/mkoffload.c gcc/config/nvptx/mkoffload.c
 index dbc68bc..8687154 100644
 --- gcc/config/nvptx/mkoffload.c
 +++ gcc/config/nvptx/mkoffload.c
 @@ -126,6 +126,9 @@ static id_map *var_ids, **vars_tail = var_ids;
  static const char *ptx_name;
  static const char *ptx_cfile_name;
  
 +/* Shows if we should compile binaries for i386 instead of x86-64. 
*/
 +bool target_ilp32 = false;
 +
  /* Delete tempfiles.  */
  
  /* Unlink a temporary file unless requested otherwise.  */
 @@ -885,6 +888,7 @@ compile_native (const char *infile, const char
*outfile, const char *compiler)
struct obstack argv_obstack;
obstack_init (argv_obstack);
obstack_ptr_grow (argv_obstack, compiler);
 +  obstack_ptr_grow (argv_obstack, target_ilp32 ? -m32 : -m64);
obstack_ptr_grow (argv_obstack, infile);
obstack_ptr_grow (argv_obstack, -c);
obstack_ptr_grow (argv_obstack, -o);
 @@ -962,11 +966,23 @@ main (int argc, char **argv)
   passed with @file.  Expand them into argv before processing. 
*/
expandargv (argc, argv);
  
 +  /* Find out whether we should compile binaries for i386 or x86-64.
 */
 +  for (int i = argc - 1; i  0; i--)
 +if (strncmp (argv[i], -foffload-abi=, sizeof
(-foffload-abi=) - 1) == 0)
 +  {
 +if (strstr (argv[i], ilp32))
 +  target_ilp32 = true;
 +else if (!strstr (argv[i], lp64))
 +  fatal_error (input_location,
 +   unrecognizable argument of option -foffload-abi);
 +break;
 +  }
 +
struct obstack argv_obstack;
obstack_init (argv_obstack);
obstack_ptr_grow (argv_obstack, driver);
obstack_ptr_grow (argv_obstack, -xlto);
 -  obstack_ptr_grow (argv_obstack, -m64);
 +  obstack_ptr_grow (argv_obstack, target_ilp32 ? -m32 : -m64);
obstack_ptr_grow (argv_obstack, -S);
  
for (int ix = 1; ix != argc; ix++)


Grüße,
 Thomas




RE: [PATCH, MIPS] Support new interrupt handler options

2015-07-14 Thread Moore, Catherine


 -Original Message-
 From: Robert Suchanek [mailto:robert.sucha...@imgtec.com]
 Sent: Tuesday, July 14, 2015 11:14 AM
 To: Moore, Catherine; Matthew Fortune; gcc-patches@gcc.gnu.org
 Subject: RE: [PATCH, MIPS] Support new interrupt handler options
 
 Hi Catherine,
 
  I'm getting build errors with the current TOT and your patch.
 
  The first errors that I encounter are:
  gcc/config/mips/mips.c:1355:1: warning: 'mips_int_mask
  mips_interrupt_mask(tree)' defined but not used [-Wunused-function]
  gcc/config/mips/mips.c:1392:1: warning: 'mips_shadow_set
  mips_use_shadow_register_set(tree)' defined but not used
  [-Wunused-function]
 
  Removing these two functions results in further errors that I have not
  investigated.
  Will you try applying and building your patch again?
 
 I have no explanation why this could happen.  My guess is that a part of the
 patch did not apply correctly.  Those functions are used in
 mips_compute_frame_info().
 
 I did notice and fixed warnings about unused variables/arguments.

I re-ran patch with the verbose option and found that it was silently 
discarding hunks (starting with #7) because it thought it was garbage.

 
 
  I have a couple of further comments on the existing patch, see below.
 
 Comments added.  Please have a look at the attached revised patch.
 Tested against r225768.
 
 Regards,
 Robert
 
 
 gcc/
   * config/mips/mips.c (mips_int_mask): New enum.
   (mips_shadow_set): Likewise.
   (int_mask): New variable.
   (use_shadow_register_set_p): Change type to enum
 mips_shadow_set.
   (machine_function): Add int_mask and use_shadow_register_set.
   (mips_attribute_table): Add attribute handlers for interrupt and
   use_shadow_register_set.
   (mips_interrupt_mask): New static function.
   (mips_handle_interrupt_attr): Likewise.
   (mips_handle_use_shadow_register_set_attr): Likewise.
   (mips_use_shadow_register_set): Change return type to enum
   mips_shadow_set.  Add argument handling for
 use_shadow_register_set
   attribute.
   (mips_interrupt_extra_called_saved_reg_p): Update the conditional
 to
   compare with mips_shadow_set enum.
   (mips_compute_frame_info): Add interrupt mask and
   use_shadow_register_set to per-function information structure.
   Add a stack slot for EPC unconditionally.
   (mips_expand_prologue): Compare use_shadow_register_set value
   with mips_shadow_set enum.  Save EPC always in K1, clobber only K1
 for
   masked interrupt register but in EIC mode use K0 and save Cause in
 K0.
   EPC saved and restored unconditionally.  Use PMODE_INSN macro
 when
   copying the stack pointer from the shadow register set.
   * config/mips/mips.h (SR_IM0): New define.
   * config/mips/mips.md (mips_rdpgpr): Rename to...
   (mips_rdpgpr_mode): ...this.  Use the Pmode iterator.
   * doc/extend.texi (Declaring Attributes of Functions): Document
   optional arguments for interrupt and use_shadow_register_set
   attributes.
 
 gcc/testsuite/
   * gcc.target/mips/interrupt_handler-4.c: New test.

This is now OK to commit.
Catherine



Re: [gomp4, fortran] Patch to fix continuation checks of OpenACC and OpenMP directives

2015-07-14 Thread Cesar Philippidis
On 07/14/2015 02:20 PM, Ilmir Usmanov wrote:
 Ping

Sorry, I thought I had already approved this. It's fine for gomp-4_0-branch.

Cesar

 On 07.07.2015 14:27, Ilmir Usmanov wrote:
 Ping

 30.06.2015, 03:43, Ilmir Usmanov m...@ilmir.us:
 Hi Cesar!

 Thanks for your review!

 08.06.2015, 17:59, Cesar Philippidis ce...@codesourcery.com:
   On 06/07/2015 02:05 PM, Ilmir Usmanov wrote:
Fixed fortran mail-list address. Sorry for inconvenience.

08.06.2015, 00:01, Ilmir Usmanov m...@ilmir.us:
Hi Cesar!

This patch fixes checks of OpenMP and OpenACC continuations in
case if someone mixes them (i.e. continues OpenMP directive with
!$ACC sentinel or vice versa).

OK for gomp branch?
   Thanks for working on this. Does this fix PR63858 by any chance?
 No problem. I had a feeling that something is wrong in the scanner since
 I've committed an initial support of OpenACC ver. 1.0 to gomp branch
 (more than a year ago).
 Now it does fix the PR, because I've added support of fixed form to the
 patch. BTW, your test in the PR has a wrong continuation. Fixed test
 added to the patch.

   two minor nits...

0001-Fix-mix-of-OpenACC-and-OpenMP-sentinels-in-continuat.patch

From 5492bf5bc991b6924f5e3b35c11eeaed745df073 Mon Sep 17
 00:00:00 2001
From: Ilmir Usmanov i.usma...@samsung.com
Date: Sun, 7 Jun 2015 23:55:22 +0300
Subject: [PATCH] Fix mix of OpenACC and OpenMP sentinels in
 continuation

---
 gcc/fortran/ChangeLog | 5 +
   Use ChangeLog.gomp for gomp-4_0-branch.
 Done.

+ /* In case we have an OpenMP directive continued by OpenACC
+ sentinel, or vice versa, we get both openmp_flag and
+ openacc_flag on. */
+
+ if (openacc_flag  openmp_flag)
+ {
+ int is_openmp = 0;
+ for (i = 0; i  5; i++, c = next_char ())
+ {
+ if (gfc_wide_tolower (c) != (unsigned char) !$acc[i])
+ is_openmp = 1;
+ if (i == 4)
+ old_loc = gfc_current_locus;
+ }
+ gfc_error (Wrong %s continuation at %C: expected %s, got %s,
+ is_openmp ? OpenACC : OpenMP,
+ is_openmp ? !$ACC : !$OMP,
+ is_openmp ? !$OMP : !$ACC);
   I think it's better for the translation project if you made this a
   complete string. So maybe change this line into

 gfc_error (is_openmp ? Wrong continuation at %C: expected
 !$ACC, got
 !$OMP,
: Wrong continuation at %C: expected !$OMP, got
 !$ACC);
 Done

   Other than that, it looks fine.

   Thanks,
   Cesar
 OK for gomp branch?

 -- 
 Ilmir.
 -- 
 Ilmir.




[nios2] [0/7] Support for Nios II R2

2015-07-14 Thread Sandra Loosemore

I will shortly begin committing a patch series to add GCC support for
Nios II R2, a revision of the original Nios II instruction set.  I
previously wrote up some notes on the technical changes from R1 to R2
when I posted the corresponding binutils patches, here:

https://sourceware.org/ml/binutils/2015-07/msg00014.html

The patch series is in 7 parts.  Parts 1-3 add support for the R2
re-encodings of the base R1 instruction set.  Parts 4-7 include
support for generating the new R2-specific instructions.

[1] Add -march=, -mbmx, -mcdx flags
[2] Adjust for reduced offsets in R2 load/store IO insns
[3] Correct nested function trampolines for R2 encodings
[4] Support new R2 instructions
[5] Support R2 CDX load/store multiple instructions
[6] Update function prologues/epilogues for R2 CDX
[7] Add new intrinsics

The patches are self-contained enough build individually when applied
in sequence, but I've only tested them together as a group.  Locally,
we have been building and testing three multilibs for nios2-elf: the
default R1, plain R2, and R2 with CDX and BMX extensions enabled.  For
now we are leaving R1 as the only multilib being built by default.

Presently there is no support for R2 on nios2-linux-gnu targets.  This
isn't a fundamental limitation of the architecture, we just don't have
kernel or glibc/dynamic linker support yet.  I've regression-tested
the patches on the default R1 nios2-linux-gnu target to ensure they
don't break anything, though.

-Sandra



PR65742: OpenACC acc_on_device fixes

2015-07-14 Thread Thomas Schwinge
Hi!

Per your request in
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65742#c8, »can you please
fix the gcc 5 branch«, I'm planning to apply the following to
gcc-5-branch tomorrow (but wanted to give you a chance to veto, given
that your backport request pre-dates the branch freeze by a week):

commit b73b9881a781f8e5572ce6c6a38f51696fc09b83
Author: Thomas Schwinge tho...@codesourcery.com
Date:   Tue Jul 14 15:27:49 2015 +0200

OpenACC acc_on_device fixes

Backport trunk r223801:

PR libgomp/65742

gcc/
* builtins.c (expand_builtin_acc_on_device): Don't use open-coded
sequence for !ACCEL_COMPILER.

libgomp/
* oacc-init.c (plugin/plugin-host.h): Include.
(acc_on_device): Check whether we're in an offloaded region for
host_nonshm
plugin. Don't use __builtin_acc_on_device.
* plugin/plugin-host.c (GOMP_OFFLOAD_openacc_parallel): Set
nonshm_exec flag in thread-local data.
(GOMP_OFFLOAD_openacc_create_thread_data): Allocate thread-local
data for host_nonshm plugin.
(GOMP_OFFLOAD_openacc_destroy_thread_data): Free thread-local data
for host_nonshm plugin.
* plugin/plugin-host.h: New.

Mark parameters with ATTRIBUTE_UNUSED

Backport trunk r223805:

* builtins.c (expand_builtin_acc_on_device): Mark parameters
with ATTRIBUTE_UNUSED.

[PR libgomp/65742, PR middle-end/66332] XFAIL acc_on_device compile-time 
evaluation

The OpenACC 2.0a specification mandates differently, but we currently do 
get a
library call in the host code.

Backport trunk r224028:

PR libgomp/65742
PR middle-end/66332

gcc/testsuite/
* c-c++-common/goacc/acc_on_device-2.c: XFAIL for C, too.
---
 gcc/builtins.c |   12 +++
 gcc/testsuite/c-c++-common/goacc/acc_on_device-2.c |   10 +-
 libgomp/oacc-init.c|   14 ++--
 libgomp/plugin/plugin-host.c   |   21 +--
 libgomp/plugin/plugin-host.h   |   37 
 8 files changed, 127 insertions(+), 12 deletions(-)

diff --git gcc/builtins.c gcc/builtins.c
index 9263777..bcbc11d 100644
--- gcc/builtins.c
+++ gcc/builtins.c
@@ -5915,8 +5915,10 @@ expand_stack_save (void)
acceleration device (ACCEL_COMPILER conditional).  */
 
 static rtx
-expand_builtin_acc_on_device (tree exp, rtx target)
+expand_builtin_acc_on_device (tree exp ATTRIBUTE_UNUSED,
+ rtx target ATTRIBUTE_UNUSED)
 {
+#ifdef ACCEL_COMPILER
   if (!validate_arglist (exp, INTEGER_TYPE, VOID_TYPE))
 return NULL_RTX;
 
@@ -5925,13 +5927,8 @@ expand_builtin_acc_on_device (tree exp, rtx target)
   /* Return (arg == v1 || arg == v2) ? 1 : 0.  */
   machine_mode v_mode = TYPE_MODE (TREE_TYPE (arg));
   rtx v = expand_normal (arg), v1, v2;
-#ifdef ACCEL_COMPILER
   v1 = GEN_INT (GOMP_DEVICE_NOT_HOST);
   v2 = GEN_INT (ACCEL_COMPILER_acc_device);
-#else
-  v1 = GEN_INT (GOMP_DEVICE_NONE);
-  v2 = GEN_INT (GOMP_DEVICE_HOST);
-#endif
   machine_mode target_mode = TYPE_MODE (integer_type_node);
   if (!target || !register_operand (target, target_mode))
 target = gen_reg_rtx (target_mode);
@@ -5945,6 +5942,9 @@ expand_builtin_acc_on_device (tree exp, rtx target)
   emit_label (done_label);
 
   return target;
+#else
+  return NULL;
+#endif
 }
 
 
diff --git gcc/testsuite/c-c++-common/goacc/acc_on_device-2.c 
gcc/testsuite/c-c++-common/goacc/acc_on_device-2.c
index 2f4ee2b..7fe4e4e 100644
--- gcc/testsuite/c-c++-common/goacc/acc_on_device-2.c
+++ gcc/testsuite/c-c++-common/goacc/acc_on_device-2.c
@@ -20,10 +20,18 @@ f (void)
 }
 
 /* With -fopenacc, we're expecting the builtin to be expanded, so no calls.
+
TODO: in C++, even under extern C, the use of enum for acc_device_t
perturbs expansion as a builtin, which expects an int parameter.  It's fine
when changing acc_device_t to plain int, but that's not what we're doing in
openacc.h.
-   { dg-final { scan-rtl-dump-times \\\(call \[^\\n\]* acc_on_device 0 
expand { xfail c++ } } } */
+
+   TODO: given that we can't expand acc_on_device in
+   gcc/builtins.c:expand_builtin_acc_on_device for in the !ACCEL_COMPILER case
+   (because at that point we don't know whether we're acc_device_host or
+   acc_device_host_nonshm), we'll (erroneously) get a library call in the host
+   code.
+
+   { dg-final { scan-rtl-dump-times \\\(call \[^\\n\]* acc_on_device 0 
expand { xfail { c || c++ } } } } */
 
 /* { dg-final { cleanup-rtl-dump expand } } */
diff --git libgomp/oacc-init.c libgomp/oacc-init.c
index dc40fb6..a7c2e0d 100644
--- libgomp/oacc-init.c
+++ libgomp/oacc-init.c
@@ -29,6 +29,7 @@
 #include libgomp.h
 #include oacc-int.h
 #include openacc.h
+#include plugin/plugin-host.h
 #include assert.h
 #include stdlib.h
 #include strings.h
@@ -548,11 +549,18 @@ ialias 

[patch] Adjust *-streamer.h

2015-07-14 Thread Andrew MacLeod

THIs patch just does a minor cleanup..

gimple-pretty-print.h doesn't need to include pretty-print.h since 
tree-pretty-print.h already does.
I also cleaned up the 4 streamer files, such that each one includes just 
the previous one, and then make each source file include just he one it 
needs.


so lto-streamer.h - data-streamer.h - tree-streamer.h  - 
gimple-streamer.h

and tree-streamer.h also includes streamer-hooks.h

This makes these files match their compilation requirements, and only 
one ever needs to be included. I also added them to each source file and 
ran include reduction to ensure they each had the exact one they needed. 
which they did.


The rest of the the patch is simply removing headers that are redundant 
now (ie,if tree-streamer.h is included, you don't need to include 
lto-streamer.h or streamer-hooks.h.


bootstraps on  x86_64-unknown-linux-gnu, and no new regressions

 OK for trunk?

Andrew



	* gimple-pretty-print.h: Don't include pretty-print.h.
	* tree-streamer.h: Don't include lto-streamer.h.
	* gimple-streamer.h: Include tree-streamer.h rather than lto-streamer.h.
	* gimple-streamer-in.c: Remove redundant includes.
	* gimple-streamer-out.c: Likewise.
	* ipa-devirt.c: Likewise.
	* ipa-icf.c: Likewise.
	* ipa-inline-analysis.c: Likewise.
	* ipa-polymorphic-call.c: Likewise.
	* ipa-profile.c: Likewise.
	* ipa-prop.c: Likewise.
	* ipa-pure-const.c: Likewise.
	* lto-cgraph.c: Likewise.
	* lto-streamer-in.c: Likewise.
	* lto-streamer-out.c: Likewise.
	* lto-streamer.c: Likewise.
	* tree-streamer-in.c: Likewise.
	* tree-streamer-out.c: Likewise.
	* tree-streamer.c: Likewise.
	* lto/lto.c: Likewise.

Index: gimple-pretty-print.h
===
*** gimple-pretty-print.h	(revision 225741)
--- gimple-pretty-print.h	(working copy)
*** along with GCC; see the file COPYING3.
*** 21,27 
  #ifndef GCC_GIMPLE_PRETTY_PRINT_H
  #define GCC_GIMPLE_PRETTY_PRINT_H
  
- #include pretty-print.h
  #include tree-pretty-print.h
  
  /* In gimple-pretty-print.c  */
--- 21,26 
Index: tree-streamer.h
===
*** tree-streamer.h	(revision 225741)
--- tree-streamer.h	(working copy)
*** along with GCC; see the file COPYING3.
*** 23,29 
  #define GCC_TREE_STREAMER_H
  
  #include streamer-hooks.h
- #include lto-streamer.h
  #include data-streamer.h
  
  /* Cache of pickled nodes.  Used to avoid writing the same node more
--- 23,28 
Index: gimple-streamer.h
===
*** gimple-streamer.h	(revision 225741)
--- gimple-streamer.h	(working copy)
*** along with GCC; see the file COPYING3.
*** 22,28 
  #ifndef GCC_GIMPLE_STREAMER_H
  #define GCC_GIMPLE_STREAMER_H
  
! #include lto-streamer.h
  
  /* In gimple-streamer-in.c  */
  void input_bb (struct lto_input_block *, enum LTO_tags, struct data_in *,
--- 22,28 
  #ifndef GCC_GIMPLE_STREAMER_H
  #define GCC_GIMPLE_STREAMER_H
  
! #include tree-streamer.h
  
  /* In gimple-streamer-in.c  */
  void input_bb (struct lto_input_block *, enum LTO_tags, struct data_in *,
Index: gimple-streamer-in.c
===
*** gimple-streamer-in.c	(revision 225741)
--- gimple-streamer-in.c	(working copy)
*** along with GCC; see the file COPYING3.
*** 35,42 
  #include tree-eh.h
  #include gimple-iterator.h
  #include cgraph.h
- #include data-streamer.h
- #include tree-streamer.h
  #include gimple-streamer.h
  #include value-prof.h
  
--- 35,40 
Index: gimple-streamer-out.c
===
*** gimple-streamer-out.c	(revision 225741)
--- gimple-streamer-out.c	(working copy)
*** along with GCC; see the file COPYING3.
*** 34,43 
  #include gimple-iterator.h
  #include gimple-ssa.h
  #include cgraph.h
- #include data-streamer.h
  #include gimple-streamer.h
- #include lto-streamer.h
- #include tree-streamer.h
  #include value-prof.h
  
  /* Output PHI function PHI to the main stream in OB.  */
--- 34,40 
Index: ipa-devirt.c
===
*** ipa-devirt.c	(revision 225741)
--- ipa-devirt.c	(working copy)
*** along with GCC; see the file COPYING3.
*** 128,134 
  #include expr.h
  #include tree-pass.h
  #include target.h
- #include tree-pretty-print.h
  #include ipa-utils.h
  #include internal-fn.h
  #include gimple-fold.h
--- 128,133 
*** along with GCC; see the file COPYING3.
*** 143,149 
  #include gimple-pretty-print.h
  #include stor-layout.h
  #include intl.h
- #include streamer-hooks.h
  #include lto-streamer.h
  
  /* Hash based set of pairs of types.  */
--- 142,147 
Index: ipa-icf.c
===
*** ipa-icf.c	(revision 225741)
--- ipa-icf.c	(working copy)

[gomp4] New test loop independent clause

2015-07-14 Thread James Norris

Hi,

The attached adds testing for the independent clause
with the loop directive in Fortran.

Committed to gomp-4_0-branch.

Jim
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/kernels-independent.f90 b/libgomp/testsuite/libgomp.oacc-fortran/kernels-independent.f90
new file mode 100644
index 000..9f17308
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-independent.f90
@@ -0,0 +1,43 @@
+! { dg-do run } */
+! { dg-additional-options -cpp }
+! { dg-additional-options -ftree-parallelize-loops=32 }
+
+#define N (1024 * 512)
+
+subroutine foo (a,  b,  c)
+  integer, parameter :: n = N
+  integer, dimension (n) :: a
+  integer, dimension (n) :: b
+  integer, dimension (n) :: c
+  integer i, ii
+
+  do i = 1, n
+a(i) = i * 2;
+  end do
+
+  do i = 1, n
+b(i) = i * 4;
+  end do
+
+  !$acc kernels copyin (a(1:n), b(1:n)) copyout (c(1:n))
+!$acc loop independent
+do ii = 1, n
+  c(ii) = a(ii) + b(ii)
+end do
+  !$acc end kernels
+
+  do i = 1, n
+if (c(i) .ne. a(i) + b(i)) call abort
+  end do
+
+end subroutine
+
+program main
+  integer, parameter :: n = N
+  integer :: a(n)
+  integer :: b(n)
+  integer :: c(n)
+
+  call foo (a, b, c)
+
+end program main


Re: [PATCH][C++] Fix PR65091

2015-07-14 Thread Jason Merrill

On 07/12/2015 01:53 PM, Paolo Carlini wrote:

On 07/11/2015 09:46 PM, Paolo Carlini wrote:

I'm going to ping this one too: a tad less trivial than the other one
- a little explanation here or in a comment would definitely help -
but certainly it looks much simpler than my own tries a while ago...
Regression testing information is also missing.

 in fact, one could argue that most of difference between Andrea's
patch and my original try here:

 https://gcc.gnu.org/ml/gcc-patches/2015-06/msg00821.html

is mostly a matter of style + TYPE_P vs the narrower identifier_p...


I'd prefer to fix this in the part of cp_parser_unqualified_id you 
quote; here ~x isn't really an unqualified-id, so we shouldn't treat it 
as one, at least when we're parsing tentatively.


Jason



[PATCH] PR fortran/66864 -- Use the right precision in FLOOR()

2015-07-14 Thread Steve Kargl
The attached patch fixes an issue in the constant 
folding of FLOOR().  In the old code, the precision
of an temporary MPFR variable was set according to
the requested kind of the INTEGER result, ie., the
wrong precision.  The patch sets the precision to
the precision of the arg of FLOOR().  Regression
tested on trunk.  OK to commit.

2015-07-14  Steven G. Kargl  ka...@gcc.gnu.org

* simplify.c (gfc_simplify_floor): Set precision of temporary to
that of arg.

2015-07-14  Steven G. Kargl  ka...@gcc.gnu.org

gfortran.dg/pr66864.f90: New test.


-- 
Steve
Index: gcc/fortran/simplify.c
===
--- gcc/fortran/simplify.c	(revision 225784)
+++ gcc/fortran/simplify.c	(working copy)
@@ -2351,9 +2351,7 @@ gfc_simplify_floor (gfc_expr *e, gfc_exp
   if (e-expr_type != EXPR_CONSTANT)
 return NULL;
 
-  gfc_set_model_kind (kind);
-
-  mpfr_init (floor);
+  mpfr_init2 (floor, mpfr_get_prec (e-value.real));
   mpfr_floor (floor, e-value.real);
 
   result = gfc_get_constant_expr (BT_INTEGER, kind, e-where);
Index: gcc/testsuite/gfortran.dg/pr66864.f90
===
--- gcc/testsuite/gfortran.dg/pr66864.f90	(revision 0)
+++ gcc/testsuite/gfortran.dg/pr66864.f90	(working copy)
@@ -0,0 +1,16 @@
+! { dg-do run }
+! PR fortran/66864
+!
+program t
+   implicit none
+   real(8) x
+   x = 2.0d0**26.5d0
+   if (floor(x) /= 94906265) call abort
+   if (floor(2.0d0**26.5d0)/= 94906265) call abort
+   x = 777666555.6d0
+   if (floor(x) /= 777666555) call abort
+   if (floor(777666555.6d0) /= 777666555) call abort
+   x = 2000111222.6d0
+   if (floor(x) /= 2000111222) call abort
+   if (floor(2000111222.6d0) /= 2000111222) call abort
+end program t


RE: [PATCH] [aarch64] Implemented reciprocal square root (rsqrt) estimation in -ffast-math

2015-07-14 Thread Evandro Menezes
I ran a simple test on A57 rev. 0, looping a million times around sqrt{,f} and 
the respective series iterations with the values in the sequence 1..100 and 
got these results:

sqrt(x):36593844/s  1/sqrt(x):  18283875/s
3 Steps:47922557/s  3 Steps:49005194/s

sqrtf(x):   143988480/s 1/sqrtf(x): 69516857/s
2 Steps:78740157/s  2 Steps:80385852/s

I'm a bit surprised that the 3-iteration series for DP is faster than sqrt(), 
but not that it's much faster for the reciprocal of sqrt().  As for SP, the 
2-iteration series is faster only for the reciprocal for sqrtf().

There might still be some leg for this patch in real-world cases which I'd like 
to investigate.

-- 
Evandro Menezes  Austin, TX


 -Original Message-
 From: gcc-patches-ow...@gcc.gnu.org [mailto:gcc-patches-ow...@gcc.gnu.org] On
 Behalf Of Kumar, Venkataramanan
 Sent: Monday, June 29, 2015 13:50
 To: pins...@gmail.com; Dr. Philipp Tomsich
 Cc: James Greenhalgh; Benedikt Huber; gcc-patches@gcc.gnu.org; Marcus
 Shawcroft; Ramana Radhakrishnan; Richard Earnshaw
 Subject: RE: [PATCH] [aarch64] Implemented reciprocal square root (rsqrt)
 estimation in -ffast-math
 
 Hi,
 
  -Original Message-
  From: pins...@gmail.com [mailto:pins...@gmail.com]
  Sent: Monday, June 29, 2015 10:23 PM
  To: Dr. Philipp Tomsich
  Cc: James Greenhalgh; Kumar, Venkataramanan; Benedikt Huber; gcc-
  patc...@gcc.gnu.org; Marcus Shawcroft; Ramana Radhakrishnan; Richard
  Earnshaw
  Subject: Re: [PATCH] [aarch64] Implemented reciprocal square root
  (rsqrt) estimation in -ffast-math
 
 
 
 
 
   On Jun 29, 2015, at 4:44 AM, Dr. Philipp Tomsich
  philipp.toms...@theobroma-systems.com wrote:
  
   James,
  
   On 29 Jun 2015, at 13:36, James Greenhalgh
  james.greenha...@arm.com wrote:
  
   On Mon, Jun 29, 2015 at 10:18:23AM +0100, Kumar, Venkataramanan
  wrote:
  
   -Original Message-
   From: Dr. Philipp Tomsich
   [mailto:philipp.toms...@theobroma-systems.com]
   Sent: Monday, June 29, 2015 2:17 PM
   To: Kumar, Venkataramanan
   Cc: pins...@gmail.com; Benedikt Huber; gcc-patches@gcc.gnu.org
   Subject: Re: [PATCH] [aarch64] Implemented reciprocal square root
   (rsqrt) estimation in -ffast-math
  
   Kumar,
  
   This does not come unexpected, as the initial estimation and each
   iteration will add an architecturally-defined number of bits of
   precision (ARMv8 guarantuees only a minimum number of bits
  provided
   per operation… the exact number is specific to each micro-arch,
  though).
   Depending on your architecture and on the required number of
   precise bits by any given benchmark, one may see miscompares.
  
   True.
  
   I would be very uncomfortable with this approach.
  
   Same here. The default must be safe. Always.
   Unlike other architectures, we don’t have a problem with making the
   proper defaults for “safety”, as the ARMv8 ISA guarantees a minimum
   number of precise bits per iteration.
  
   From Richard Biener's post in the thread Michael Matz linked
   earlier in the thread:
  
 It would follow existing practice of things we allow in
 -funsafe-math-optimizations.  Existing practice in that we
 want to allow -ffast-math use with common benchmarks we care
 about.
  
 https://gcc.gnu.org/ml/gcc-patches/2009-11/msg00100.html
  
   With the solution you seem to be converging on (2-steps for some
   microarchitectures, 3 for others), a binary generated for one
   micro-arch may drop below a minimum guarantee of precision when run
   on another. This seems to go against the spirit of the practice
   above. I would only support adding this optimization to -Ofast if
   we could keep to architectural guarantees of precision in the
   generated code
  (i.e. 3-steps everywhere).
  
   I don't object to adding a -mlow-precision-recip-sqrt style
   option, which would be off by default, would enable the 2-step
   mode, and would need to be explicitly enabled (i.e. not implied by
   -mcpu=foo) but I don't see what this buys you beyond the Gromacs
   boost (and even there you would be creating an Invalid Run as
   optimization flags must be applied across all workloads).
  
   Any flag that reduces precision (and thus breaks IEEE floating-point
   semantics) needs to be gated with an “unsafe” flag (i.e. one that is
   never
  on by default).
   As a consequence, the “peak”-tuning for SPEC will turn this on… but
   barely anyone else would.
  
   For the 3-step optimization, it is clear to me that for generic
   tuning we don't want this to be enabled by default experimental
   results and advice in this thread argues against it for thunderx
   and cortex-
  a57 targets.
   However, enabling it based on the CPU tuning selected seems fine to me.
  
   I do not agree on this one, as I would like to see the safe form (i.e.
   3 and 5 iterations respectively) to become the default. Most
   “server-type” chips should not 

[patch, nios2] stack limit checking improvements

2015-07-14 Thread Sandra Loosemore
In preparation for some other changes to the Nios II prologue code that 
are in my queue, I've checked in the attached patch to the stack limit 
checking logic.  With this patch, the check generation code now takes an 
offset so that the check can be emitted before the SP adjustment; this 
allows cases where multiple stack adjustments are emitted to do only a 
single stack limit check.


While I was at it, I also added support for -fstack-limit-symbol as well 
as -fstack-limit-register.


-Sandra

2015-07-14  Sandra Loosemore  san...@codesourcery.com

	gcc/
	* config/nios2/nios2.c (TEMP_REG_NUM): Move define up in file.
	(nios2_emit_stack_limit_check): Add size parameter.  Handle
	-fstack-limit-symbol as well as -fstack-limit-register.
	(nios2_expand_prologue): Emit only a single stack limit check,
	even if multiple stack adjustments are required.
	(nios2_option_override): Diagnose unsupported combination of -fpic
	and -stack-limit-symbol.

	gcc/testsuite/
	* gcc.target/nios2/nios2-stack-check-1.c: Adjust patterns.
	* gcc.target/nios2/nios2-stack-check-2.c: Likewise.
	* gcc.target/nios2/nios2-stack-check-3.c: New test case.
Index: gcc/testsuite/gcc.target/nios2/nios2-stack-check-1.c
===
--- gcc/testsuite/gcc.target/nios2/nios2-stack-check-1.c	(revision 225722)
+++ gcc/testsuite/gcc.target/nios2/nios2-stack-check-1.c	(working copy)
@@ -1,7 +1,8 @@
 /* { dg-do compile } */
 /* { dg-options -fstack-limit-register=et } */
-/* { dg-final { scan-assembler bgeu\\tsp, et } } */
-/* { dg-final { scan-assembler trap\\t3 } } */
+/* { dg-final { scan-assembler bgeu\\tsp,  } } */
+/* { dg-final { scan-assembler trap\\t3|trap.n\\t3 } } */
+
 /* check stack checking */
 void test()
 {
Index: gcc/testsuite/gcc.target/nios2/nios2-stack-check-2.c
===
--- gcc/testsuite/gcc.target/nios2/nios2-stack-check-2.c	(revision 225722)
+++ gcc/testsuite/gcc.target/nios2/nios2-stack-check-2.c	(working copy)
@@ -1,7 +1,7 @@
 /* { dg-do compile } */
 /* { dg-options   } */
-/* { dg-final { scan-assembler-not bgeu\\tsp, et } } */
-/* { dg-final { scan-assembler-not break\\t3 } } */
+/* { dg-final { scan-assembler-not trap\\t3|trap.n\\t3 } } */
+
 /* check stack checking */
 void test()
 {
Index: gcc/testsuite/gcc.target/nios2/nios2-stack-check-3.c
===
--- gcc/testsuite/gcc.target/nios2/nios2-stack-check-3.c	(revision 0)
+++ gcc/testsuite/gcc.target/nios2/nios2-stack-check-3.c	(revision 0)
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options -fstack-limit-symbol=__stackend -fno-pic } */
+/* { dg-final { scan-assembler movhi\\t.*, %hiadj\\(__stackend.*\\) } } */
+/* { dg-final { scan-assembler addi\\t.*, .*, %lo\\(__stackend.*\\) } } */
+/* { dg-final { scan-assembler bgeu\\tsp,  } } */
+/* { dg-final { scan-assembler trap\\t3|trap.n\\t3 } } */
+
+/* check stack checking */
+void test()
+{
+  int a, b, c;
+}
Index: gcc/config/nios2/nios2.c
===
--- gcc/config/nios2/nios2.c	(revision 225722)
+++ gcc/config/nios2/nios2.c	(working copy)
@@ -456,20 +456,47 @@ restore_reg (int regno, unsigned offset)
   RTX_FRAME_RELATED_P (insn) = 1;
 }
 
-/* Emit conditional trap for checking stack limit.  */
+/* Temp regno used inside prologue/epilogue.  */
+#define TEMP_REG_NUM 8
+
+/* Emit conditional trap for checking stack limit.  SIZE is the number of
+   additional bytes required.  
+
+   GDB prologue analysis depends on this generating a direct comparison
+   to the SP register, so the adjustment to add SIZE needs to be done on
+   the other operand to the comparison.  Use TEMP_REG_NUM as a temporary,
+   if necessary.  */
 static void
-nios2_emit_stack_limit_check (void)
+nios2_emit_stack_limit_check (int size)
 {
-  if (REG_P (stack_limit_rtx))
-emit_insn (gen_ctrapsi4 (gen_rtx_LTU (VOIDmode, stack_pointer_rtx,
-	  stack_limit_rtx),
-			 stack_pointer_rtx, stack_limit_rtx, GEN_INT (3)));
+  rtx sum;
+
+  if (GET_CODE (stack_limit_rtx) == SYMBOL_REF)
+{
+  /* This generates a %hiadj/%lo pair with the constant size
+	 add handled by the relocations.  */
+  sum = gen_rtx_REG (Pmode, TEMP_REG_NUM);
+  emit_move_insn (sum, plus_constant (Pmode, stack_limit_rtx, size));
+}
+  else if (!REG_P (stack_limit_rtx))
+sorry (Unknown form for stack limit expression);
+  else if (size == 0)
+sum = stack_limit_rtx;
+  else if (SMALL_INT (size))
+{
+  sum = gen_rtx_REG (Pmode, TEMP_REG_NUM);
+  emit_move_insn (sum, plus_constant (Pmode, stack_limit_rtx, size));
+}
   else
-sorry (only register based stack limit is supported);
-}
+{
+  sum = gen_rtx_REG (Pmode, TEMP_REG_NUM);
+  emit_move_insn (sum, gen_int_mode (size, Pmode));
+  emit_insn (gen_add2_insn (sum, stack_limit_rtx));
+}
 
-/* Temp regno used inside prologue/epilogue.  */

Re: [PATCH 6/n] OpenMP 4.0 offloading infrastructure: option handling

2015-07-14 Thread Thomas Schwinge
Hi!

OK for gcc-5-branch?

On Wed, 29 Apr 2015 18:26:06 +0200, I wrote:
 Committed in r222583:
 
 commit df615909263269988fd9611f8d007902580829d9
 Author: tschwinge tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4
 Date:   Wed Apr 29 16:23:26 2015 +
 
 [PR libgomp/65099] nvptx mkoffload: pass -m32 or -m64 to the compiler
 
 ... depending on -foffload-abi=[...].
 
 Coding style/code copied from gcc/config/i386/intelmic-mkoffload.c for
 consistency.
 
   gcc/
   * config/nvptx/mkoffload.c (target_ilp32): New variable.
   (main): Set it depending on -foffload-abi=[...].
   (compile_native, main): Use it to pass -m32 or -m64 to the
   compiler.
 
 git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@222583 
 138bc75d-0d04-0410-961f-82ee72b054a4
 ---
  gcc/ChangeLog|8 
  gcc/config/nvptx/mkoffload.c |   18 +-
  2 files changed, 25 insertions(+), 1 deletion(-)
 
 diff --git gcc/ChangeLog gcc/ChangeLog
 index aaa06c3..d7455e4 100644
 --- gcc/ChangeLog
 +++ gcc/ChangeLog
 @@ -1,3 +1,11 @@
 +2015-04-29  Thomas Schwinge  tho...@codesourcery.com
 +
 + PR libgomp/65099
 + * config/nvptx/mkoffload.c (target_ilp32): New variable.
 + (main): Set it depending on -foffload-abi=[...].
 + (compile_native, main): Use it to pass -m32 or -m64 to the
 + compiler.
 +
  2015-04-29  Alan Lawrence  alan.lawre...@arm.com
  
   PR target/65770
 diff --git gcc/config/nvptx/mkoffload.c gcc/config/nvptx/mkoffload.c
 index dbc68bc..8687154 100644
 --- gcc/config/nvptx/mkoffload.c
 +++ gcc/config/nvptx/mkoffload.c
 @@ -126,6 +126,9 @@ static id_map *var_ids, **vars_tail = var_ids;
  static const char *ptx_name;
  static const char *ptx_cfile_name;
  
 +/* Shows if we should compile binaries for i386 instead of x86-64.  */
 +bool target_ilp32 = false;
 +
  /* Delete tempfiles.  */
  
  /* Unlink a temporary file unless requested otherwise.  */
 @@ -885,6 +888,7 @@ compile_native (const char *infile, const char *outfile, 
 const char *compiler)
struct obstack argv_obstack;
obstack_init (argv_obstack);
obstack_ptr_grow (argv_obstack, compiler);
 +  obstack_ptr_grow (argv_obstack, target_ilp32 ? -m32 : -m64);
obstack_ptr_grow (argv_obstack, infile);
obstack_ptr_grow (argv_obstack, -c);
obstack_ptr_grow (argv_obstack, -o);
 @@ -962,11 +966,23 @@ main (int argc, char **argv)
   passed with @file.  Expand them into argv before processing.  */
expandargv (argc, argv);
  
 +  /* Find out whether we should compile binaries for i386 or x86-64.  */
 +  for (int i = argc - 1; i  0; i--)
 +if (strncmp (argv[i], -foffload-abi=, sizeof (-foffload-abi=) - 1) 
 == 0)
 +  {
 + if (strstr (argv[i], ilp32))
 +   target_ilp32 = true;
 + else if (!strstr (argv[i], lp64))
 +   fatal_error (input_location,
 +unrecognizable argument of option -foffload-abi);
 + break;
 +  }
 +
struct obstack argv_obstack;
obstack_init (argv_obstack);
obstack_ptr_grow (argv_obstack, driver);
obstack_ptr_grow (argv_obstack, -xlto);
 -  obstack_ptr_grow (argv_obstack, -m64);
 +  obstack_ptr_grow (argv_obstack, target_ilp32 ? -m32 : -m64);
obstack_ptr_grow (argv_obstack, -S);
  
for (int ix = 1; ix != argc; ix++)


Grüße,
 Thomas


signature.asc
Description: PGP signature


RE: [PATCH] [aarch64] Implemented reciprocal square root (rsqrt) estimation in -ffast-math

2015-07-14 Thread Evandro Menezes
For both FRECPE and FRSQRTE the ARMv8 ISA guide states in their pseudo-code 
that:

Result is double-precision and a multiple of 1/256 in the range 1 to 511/256.

This suggests that the estimate is merely 8 bits long.

IIRC, x86 returns 12 bits for its equivalent insns, requiring then a single 
series iteration for both SP and DP to achieve a precise enough result.

-- 
Evandro Menezes  Austin, TX


 -Original Message-
 From: gcc-patches-ow...@gcc.gnu.org [mailto:gcc-patches-ow...@gcc.gnu.org] On
 Behalf Of Dr. Philipp Tomsich
 Sent: Monday, June 29, 2015 3:47
 To: Kumar, Venkataramanan
 Cc: pins...@gmail.com; Benedikt Huber; gcc-patches@gcc.gnu.org
 Subject: Re: [PATCH] [aarch64] Implemented reciprocal square root (rsqrt)
 estimation in -ffast-math
 
 Kumar,
 
 This does not come unexpected, as the initial estimation and each iteration
 will add an architecturally-defined number of bits of precision (ARMv8
 guarantuees only a minimum number of bits provided per operation… the exact
 number is specific to each micro-arch, though).
 Depending on your architecture and on the required number of precise bits by
 any given benchmark, one may see miscompares.
 
 Do you know the exact number of bits that the initial estimate and the
 subsequent refinement steps add for your micro-arch?
 
 Thanks,
 Philipp.
 
  On 29 Jun 2015, at 10:17, Kumar, Venkataramanan
 venkataramanan.ku...@amd.com wrote:
 
 
  Hmm,  Reducing the iterations to 1 step for float and 2 steps for
 double
 
  I got VE (miscompares) on following benchmarks 416.gamess
  453.povray
  454.calculix
  459.GemsFDTD
 
  Benedikt , I have ICE for 444.namd with your patch,  not sure if something
 wrong in my local tree.
 
  Regards,
  Venkat.
 
  -Original Message-
  From: pins...@gmail.com [mailto:pins...@gmail.com]
  Sent: Sunday, June 28, 2015 8:35 PM
  To: Kumar, Venkataramanan
  Cc: Dr. Philipp Tomsich; Benedikt Huber; gcc-patches@gcc.gnu.org
  Subject: Re: [PATCH] [aarch64] Implemented reciprocal square root
  (rsqrt) estimation in -ffast-math
 
 
 
 
 
  On Jun 25, 2015, at 9:44 AM, Kumar, Venkataramanan
  venkataramanan.ku...@amd.com wrote:
 
  I got around ~12% gain with -Ofast -mcpu=cortex-a57.
 
  I get around 11/12% on thunderX with the patch and the decreasing the
  iterations change (1/2) compared to without the patch.
 
  Thanks,
  Andrew
 
 
 
  Regards,
  Venkat.
 
  -Original Message-
  From: gcc-patches-ow...@gcc.gnu.org [mailto:gcc-patches-
  ow...@gcc.gnu.org] On Behalf Of Dr. Philipp Tomsich
  Sent: Thursday, June 25, 2015 9:13 PM
  To: Kumar, Venkataramanan
  Cc: Benedikt Huber; pins...@gmail.com; gcc-patches@gcc.gnu.org
  Subject: Re: [PATCH] [aarch64] Implemented reciprocal square root
  (rsqrt) estimation in -ffast-math
 
  Kumar,
 
  what is the relative gain that you see on Cortex-A57?
 
  Thanks,
  Philipp.
 
  On 25 Jun 2015, at 17:35, Kumar, Venkataramanan
  venkataramanan.ku...@amd.com wrote:
 
  Changing to  1 step for float and 2 steps for double gives
  better gains
  now for gromacs on cortex-a57.
 
  Regards,
  Venkat.
  -Original Message-
  From: gcc-patches-ow...@gcc.gnu.org [mailto:gcc-patches-
  ow...@gcc.gnu.org] On Behalf Of Benedikt Huber
  Sent: Thursday, June 25, 2015 4:09 PM
  To: pins...@gmail.com
  Cc: gcc-patches@gcc.gnu.org; philipp.tomsich@theobroma-
  systems.com
  Subject: Re: [PATCH] [aarch64] Implemented reciprocal square root
  (rsqrt) estimation in -ffast-math
 
  Andrew,
 
  This is NOT a win on thunderX at least for single precision
  because you have
  to do the divide and sqrt in the same time as it takes 5
  multiples (estimate and step are multiplies in the thunderX pipeline).
  Doubles is 10 multiplies which is just the same as what the patch
  does (but it is really slightly less than 10, I rounded up). So
  in the end this is NOT a win at all for thunderX unless we do one
  less step for both single
  and double.
 
  Yes, the expected benefit from rsqrt estimation is implementation
  specific. If one has a better initial rsqrte or an application
  that can trade precision for execution time, we could offer a
  command line option to do only 2 steps for doulbe and 1 step for
  float; similar to -
  mrecip-precision for PowerPC.
  What are your thoughts on that?
 
  Best regards,
  Benedikt
 



Re: [nvptx offloading] Only 64-bit configurations are currently supported

2015-07-14 Thread Thomas Schwinge
Hi!

OK for gcc-5-branch?

On Wed, 8 Jul 2015 17:03:02 +0200, I wrote:
 On Wed, 18 Feb 2015 09:50:15 +0100, I wrote:
  So far, we have concentrated only on the 64-bit x86_64 configuration;
  32-bit has several known issues to be resolved.
  https://gcc.gnu.org/PR65099 filed.
 
 I have committed the following patch in r225560.  This gets us rid of the
 lots of expected FAILs in the 32-bit part of
 RUNTESTFLAGS='--target_board=unix\{-m64,-m32\}' testing, for example.
 
 commit fe265ad3c9624da88f43be349137696449148f4f
 Author: tschwinge tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4
 Date:   Wed Jul 8 14:59:59 2015 +
 
 [nvptx offloading] Only 64-bit configurations are currently supported
 
   PR libgomp/65099
   gcc/
   * config/nvptx/mkoffload.c (main): Create an offload image only in
   64-bit configurations.
   libgomp/
   * plugin/plugin-nvptx.c (nvptx_get_num_devices): Return 0 if not
   in a 64-bit configuration.
   * testsuite/libgomp.oacc-c++/c++.exp: Don't attempt nvidia
   offloading testing if no such device is available.
   * testsuite/libgomp.oacc-c/c.exp: Likewise.
   * testsuite/libgomp.oacc-fortran/fortran.exp: Likewise.
 
 git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@225560 
 138bc75d-0d04-0410-961f-82ee72b054a4
 ---
  gcc/ChangeLog  |6 +++
  gcc/config/nvptx/mkoffload.c   |   56 
 +++-
  libgomp/ChangeLog  |   10 
  libgomp/plugin/plugin-nvptx.c  |5 ++
  libgomp/testsuite/libgomp.oacc-c++/c++.exp |6 +++
  libgomp/testsuite/libgomp.oacc-c/c.exp |6 +++
  libgomp/testsuite/libgomp.oacc-fortran/fortran.exp |6 +++
  7 files changed, 70 insertions(+), 25 deletions(-)
 
 diff --git gcc/ChangeLog gcc/ChangeLog
 index 33a2fa0..4c83723 100644
 --- gcc/ChangeLog
 +++ gcc/ChangeLog
 @@ -1,3 +1,9 @@
 +2015-07-08  Thomas Schwinge  tho...@codesourcery.com
 +
 + PR libgomp/65099
 + * config/nvptx/mkoffload.c (main): Create an offload image only in
 + 64-bit configurations.
 +
  2015-07-08  Martin Liska  mli...@suse.cz
  
   PR bootstrap/66744
 diff --git gcc/config/nvptx/mkoffload.c gcc/config/nvptx/mkoffload.c
 index 8687154..8bc08bf 100644
 --- gcc/config/nvptx/mkoffload.c
 +++ gcc/config/nvptx/mkoffload.c
 @@ -993,37 +993,43 @@ main (int argc, char **argv)
   obstack_ptr_grow (argv_obstack, argv[ix]);
  }
  
 -  ptx_name = make_temp_file (.mkoffload);
 -  obstack_ptr_grow (argv_obstack, -o);
 -  obstack_ptr_grow (argv_obstack, ptx_name);
 -  obstack_ptr_grow (argv_obstack, NULL);
 -  const char **new_argv = XOBFINISH (argv_obstack, const char **);
 -
 -  char *execpath = getenv (GCC_EXEC_PREFIX);
 -  char *cpath = getenv (COMPILER_PATH);
 -  char *lpath = getenv (LIBRARY_PATH);
 -  unsetenv (GCC_EXEC_PREFIX);
 -  unsetenv (COMPILER_PATH);
 -  unsetenv (LIBRARY_PATH);
 -
 -  fork_execute (new_argv[0], CONST_CAST (char **, new_argv), true);
 -  obstack_free (argv_obstack, NULL);
 -
 -  xputenv (concat (GCC_EXEC_PREFIX=, execpath, NULL));
 -  xputenv (concat (COMPILER_PATH=, cpath, NULL));
 -  xputenv (concat (LIBRARY_PATH=, lpath, NULL));
 -
 -  in = fopen (ptx_name, r);
 -  if (!in)
 -fatal_error (input_location, cannot open intermediate ptx file);
 -
ptx_cfile_name = make_temp_file (.c);
  
out = fopen (ptx_cfile_name, w);
if (!out)
  fatal_error (input_location, cannot open '%s', ptx_cfile_name);
  
 -  process (in, out);
 +  /* PR libgomp/65099: Currently, we only support offloading in 64-bit
 + configurations.  */
 +  if (!target_ilp32)
 +{
 +  ptx_name = make_temp_file (.mkoffload);
 +  obstack_ptr_grow (argv_obstack, -o);
 +  obstack_ptr_grow (argv_obstack, ptx_name);
 +  obstack_ptr_grow (argv_obstack, NULL);
 +  const char **new_argv = XOBFINISH (argv_obstack, const char **);
 +
 +  char *execpath = getenv (GCC_EXEC_PREFIX);
 +  char *cpath = getenv (COMPILER_PATH);
 +  char *lpath = getenv (LIBRARY_PATH);
 +  unsetenv (GCC_EXEC_PREFIX);
 +  unsetenv (COMPILER_PATH);
 +  unsetenv (LIBRARY_PATH);
 +
 +  fork_execute (new_argv[0], CONST_CAST (char **, new_argv), true);
 +  obstack_free (argv_obstack, NULL);
 +
 +  xputenv (concat (GCC_EXEC_PREFIX=, execpath, NULL));
 +  xputenv (concat (COMPILER_PATH=, cpath, NULL));
 +  xputenv (concat (LIBRARY_PATH=, lpath, NULL));
 +
 +  in = fopen (ptx_name, r);
 +  if (!in)
 + fatal_error (input_location, cannot open intermediate ptx file);
 +
 +  process (in, out);
 +}
 +
fclose (out);
  
compile_native (ptx_cfile_name, outname, collect_gcc);
 diff --git libgomp/ChangeLog libgomp/ChangeLog
 index 8839397..34f3a1c 100644
 --- libgomp/ChangeLog
 +++ libgomp/ChangeLog
 @@ -1,3 +1,13 @@
 +2015-07-08  Thomas Schwinge  tho...@codesourcery.com
 +
 + PR libgomp/65099
 + * 

  1   2   >