Re: [PATCH] libgomp: Introduce gomp_thread::spare_team

2015-07-12 Thread Sebastian Huber

Ping.

On 07/07/15 14:17, Sebastian Huber wrote:

Try to re-use the previous team to avoid the use of malloc() and free()
in the normal case where number of threads is the same.  Avoid
superfluous destruction and initialization of team synchronization
objects.

Using the microbenchmark posted here

https://gcc.gnu.org/ml/gcc-patches/2008-03/msg00930.html

shows an improvement in the parallel bench test case (target
x86_64-unknown-linux-gnu, median out of 9 test runs, iteration count
increased to 20).

Before the patch:

parallel bench 11.2284 seconds

After the patch:

parallel bench 10.7575 seconds

libgomp/ChangeLog
2015-07-07  Sebastian Huber  

* libgomp.h (gomp_thread): Add spare_team field.
* team.c (gomp_thread_start): Initialize spare team for non-TLS
targets.
(gomp_new_team): Use spare team if possible.
(free_team): Destroy more team objects.
(gomp_free_thread): Free spare team if necessary.
(free_non_nested_team): New.
(gomp_team_end): Move some team object destructions to
free_team().  Use free_non_nested_team().
---
  libgomp/libgomp.h |  3 +++
  libgomp/team.c| 63 ---
  2 files changed, 45 insertions(+), 21 deletions(-)

diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
index 5ed0f78..563c1e2 100644
--- a/libgomp/libgomp.h
+++ b/libgomp/libgomp.h
@@ -448,6 +448,9 @@ struct gomp_thread
  
/* User pthread thread pool */

struct gomp_thread_pool *thread_pool;
+
+  /* Spare team ready for re-use in gomp_new_team()  */
+  struct gomp_team *spare_team;
  };
  
  
diff --git a/libgomp/team.c b/libgomp/team.c

index b98b233..cc19eb0 100644
--- a/libgomp/team.c
+++ b/libgomp/team.c
@@ -77,6 +77,7 @@ gomp_thread_start (void *xdata)
struct gomp_thread local_thr;
thr = &local_thr;
pthread_setspecific (gomp_tls_key, thr);
+  thr->spare_team = NULL;
  #endif
gomp_sem_init (&thr->release, 0);
  
@@ -140,19 +141,35 @@ gomp_thread_start (void *xdata)

  struct gomp_team *
  gomp_new_team (unsigned nthreads)
  {
+  struct gomp_thread *thr = gomp_thread ();
+  struct gomp_team *spare_team = thr->spare_team;
struct gomp_team *team;
-  size_t size;
int i;
  
-  size = sizeof (*team) + nthreads * (sizeof (team->ordered_release[0])

- + sizeof (team->implicit_task[0]));
-  team = gomp_malloc (size);
+  if (spare_team && spare_team->nthreads == nthreads)
+{
+  thr->spare_team = NULL;
+  team = spare_team;
+}
+  else
+{
+  size_t extra = sizeof (team->ordered_release[0])
+ + sizeof (team->implicit_task[0]);
+  team = gomp_malloc (sizeof (*team) + nthreads * extra);
+
+#ifndef HAVE_SYNC_BUILTINS
+  gomp_mutex_init (&team->work_share_list_free_lock);
+#endif
+  gomp_barrier_init (&team->barrier, nthreads);
+  gomp_sem_init (&team->master_release, 0);
+  gomp_mutex_init (&team->task_lock);
+
+  team->nthreads = nthreads;
+}
  
team->work_share_chunk = 8;

  #ifdef HAVE_SYNC_BUILTINS
team->single_count = 0;
-#else
-  gomp_mutex_init (&team->work_share_list_free_lock);
  #endif
team->work_shares_to_free = &team->work_shares[0];
gomp_init_work_share (&team->work_shares[0], false, nthreads);
@@ -163,14 +180,9 @@ gomp_new_team (unsigned nthreads)
  team->work_shares[i].next_free = &team->work_shares[i + 1];
team->work_shares[i].next_free = NULL;
  
-  team->nthreads = nthreads;

-  gomp_barrier_init (&team->barrier, nthreads);
-
-  gomp_sem_init (&team->master_release, 0);
team->ordered_release = (void *) &team->implicit_task[nthreads];
team->ordered_release[0] = &team->master_release;
  
-  gomp_mutex_init (&team->task_lock);

team->task_queue = NULL;
team->task_count = 0;
team->task_queued_count = 0;
@@ -187,6 +199,10 @@ gomp_new_team (unsigned nthreads)
  static void
  free_team (struct gomp_team *team)
  {
+  gomp_sem_destroy (&team->master_release);
+#ifndef HAVE_SYNC_BUILTINS
+  gomp_mutex_destroy (&team->work_share_list_free_lock);
+#endif
gomp_barrier_destroy (&team->barrier);
gomp_mutex_destroy (&team->task_lock);
free (team);
@@ -225,6 +241,8 @@ gomp_free_thread (void *arg __attribute__((unused)))
  {
struct gomp_thread *thr = gomp_thread ();
struct gomp_thread_pool *pool = thr->thread_pool;
+  if (thr->spare_team)
+free_team (thr->spare_team);
if (pool)
  {
if (pool->threads_used > 0)
@@ -835,6 +853,18 @@ gomp_team_start (void (*fn) (void *), void *data, unsigned 
nthreads,
  free (affinity_thr);
  }
  
+static void

+free_non_nested_team (struct gomp_team *team, struct gomp_thread *thr)
+{
+  struct gomp_thread_pool *pool = thr->thread_pool;
+  if (pool->last_team)
+{
+  if (thr->spare_team)
+   free_team (thr->spare_team);
+  thr->spare_team = pool->last_team;
+}
+  pool->last_team = team;
+}
  
  /* Terminate the current team.  This is o

Re: [PATCH 3/7] Fix trinary op

2015-07-12 Thread Mikhail Maltsev
On 07/10/2015 11:44 PM, Jeff Law wrote:
> 
> OK after regression testing.
> 
> jeff
> 
Bootstrapped and regtested on x86_64-unknown-linux-gnu. Applied as r225727.

-- 
Regards,
Mikhail Maltsev


Re: [PATCH][4/n] Remove GENERIC stmt combining from SCCVN

2015-07-12 Thread Jeff Law

On 06/29/2015 01:58 AM, Richard Biener wrote:


In principle the following works for the testcase (even w/o fixing
the VRP part).

Index: gcc/tree-ssa-dom.c
===
--- gcc/tree-ssa-dom.c  (revision 225007)
+++ gcc/tree-ssa-dom.c  (working copy)
@@ -1409,6 +1409,14 @@ simplify_stmt_for_jump_threading (gimple
return lookup_avail_expr (stmt, false);
  }

+static tree
+dom_valueize (tree t)
+{
+  if (TREE_CODE (t) == SSA_NAME)
+return SSA_NAME_VALUE (t);
+  return t;
+}
+
  /* Record into the equivalence tables any equivalences implied by
 traversing edge E (which are cached in E->aux).

@@ -1429,7 +1437,33 @@ record_temporary_equivalences (edge e)

/* If we have a simple NAME = VALUE equivalence, record it.  */
if (lhs && TREE_CODE (lhs) == SSA_NAME)
-   const_and_copies->record_const_or_copy (lhs, rhs);
+   {
+ gimple use_stmt;
+ imm_use_iterator iter;
+ const_and_copies->record_const_or_copy (lhs, rhs);
+ FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
+   {
+ /* Only bother to record more equivalences for lhs that
+can be directly used by e->dest.
+???  If the code gets re-organized to a worklist to
+catch more indirect opportunities and it is made to
+handle PHIs then this should only consider use_stmts
+in basic-blocks we have already visited.  */
+ if (!dominated_by_p (CDI_DOMINATORS,
+  e->dest, gimple_bb (use_stmt)))
+   continue;
+ tree lhs = gimple_get_lhs (use_stmt);
+ if (lhs && TREE_CODE (lhs) == SSA_NAME)
+   {
+ tree res = gimple_fold_stmt_to_constant_1 (use_stmt,
+dom_valueize,
+
no_follow_ssa_edges);
+ if (TREE_CODE (res) == SSA_NAME
+ || is_gimple_min_invariant (res))
+   const_and_copies->record_const_or_copy (lhs, res);
+   }
+   }
+   }

/* If we have 0 = COND or 1 = COND equivalences, record them
  into our expression hash tables.  */


it's not using DOMs own stmt visiting machinery as that always modifies
stmts in-place.  As stated in the comment it doesn't catch secondary
opportunities.  That would be possible by using a work-list seeded
by LHS we recorded new const/copies for and re-visiting their uses.
You can get extra fancy here by properly handling PHIs and
conditionals.  But it's a question of cost here, of course.
Right, the code you're modifying is only used by jump threading to 
record temporary equivalences, particularly equivalences that are 
specific to a path.





Note that I think this isn't really "backward propagation" but
just context sensitive value-numbering.
I think that's because we're looking at the problem differently.  It's 
certainly not backward propagation in the traditional dataflow sense, so 
I'm probably being too loose with terminology here.


When we discover something about X by means other than the definition of 
X, we can look at how X was set and possibly discover a value for source 
operands of that statement.  Similarly we can look at uses of X and 
possibly discover a value for the destination of those statement(s).  In 
both cases we're going backwards from an order-of-execution point of 
view and recording additional equivalences.


The existing code did the former (look at X's defining statement and try 
to discover an equivalence for a source operand in that statement). 
What we need to optimize this case is the latter.


I *think* these are closely enough related that some code can be 
factored out a bit and reused in both r_e_f_i_e and r_t_e to discover 
both types of equivalences for DOM and for jump threading.


Jeff


[committed] fix double word typos throughout compiler

2015-07-12 Thread Aldy Hernandez
As a general rule, I avoid committing typo fixes to areas of the 
compiler that I'm not directly touching.  It's usually not worth the 
hassle of writing a ChangeLog entry, and I have a hard enough time 
keeping my ADHD in check.


But today, while reading our vector and hash implementation in between 
episodes of House of Cards, I found one too many double double typos.  I 
wondered how many others I could find with:


grep " \([A-Za-z]\+\) \1 " gcc/* -r

Lo and behold, after ignoring "template template", "long long", and 
"Never Never Land", we still get:


121 files changed, 292 insertions(+), 155 deletions(-)

And that's ignoring everything outside of gcc/ !! (volunteers welcome).

There are still a couple "address address" instances in reload1.c, but 
I've never understood reload, so I don't know if it's a typo or a 
feature (along with the rest of file ;-))).


Anyhow... committed as obvious.

Tested on x86-64 Linux by making the compiler with 
--enable-languages=all,go,ada --disable-bootstrap --disable-multilib.


Aldy
* bitmap.h: Fix double word typos.
* builtins.c: Same.
* calls.c: Same.
* cfgloopmanip.c: Same.
* cgraph.c: Same.
* cgraph.h: Same.
* cgraphclones.c: Same.
* combine.c: Same.
* config/aarch64/aarch64-protos.h: Same.
* config/aarch64/aarch64.c: Same.
* config/aarch64/aarch64.md: Same.
* config/arm/arm.md: Same.
* config/arm/arm1020e.md: Same.
* config/arm/arm1026ejs.md: Same.
* config/arm/arm926ejs.md: Same.
* config/arm/fa526.md: Same.
* config/arm/fa606te.md: Same.
* config/arm/fa626te.md: Same.
* config/arm/fa726te.md: Same.
* config/arm/fmp626.md: Same.
* config/darwin.c: Same.
* config/epiphany/epiphany.c: Same.
* config/frv/frv.c: Same.
* config/ft32/ft32.c: Same.
* config/gnu-user.h: Same.
* config/h8300/constraints.md: Same.
* config/i386/i386.c: Same.
* config/i386/i386.md: Same.
* config/iq2000/iq2000.md: Same.
* config/mips/mips.c: Same.
* config/mmix/mmix.md: Same.
* config/moxie/moxie.c: Same.
* config/nds32/nds32.md: Same.
* config/pa/pa.h: Same.
* config/rs6000/aix.h: Same.
* config/rs6000/rs6000.h: Same.
* config/sh/sh.c: Same.
* config/tilegx/tilegx.md: Same.
* config/tilepro/gen-mul-tables.cc: Same.
* cse.c: Same.
* dbxout.c: Same.
* doc/invoke.texi: Same.
* dse.c: Same.
* dwarf2out.c: Same.
* final.c: Same.
* gcc.c: Same.
* genmatch.c: Same.
* gimplify.c: Same.
* hash-table.h: Same.
* internal-fn.c: Same.
* ipa-cp.c: Same.
* ipa-devirt.c: Same.
* ipa-icf.c: Same.
* ipa-icf.h: Same.
* ipa-profile.c: Same.
* ipa-prop.c: Same.
* ipa-prop.h: Same.
* ira.c: Same.
* omp-low.c: Same.
* reg-stack.c: Same.
* regcprop.c: Same.
* reorg.c: Same.
* rtl.h: Same.
* sbitmap.h: Same.
* tree-eh.c: Same.
* tree-inline.c: Same.
* tree-sra.c: Same.
* tree-ssa-dom.c: Same.
* tree-ssa-loop-ivopts.c: Same.
* tree-ssa-structalias.c: Same.
* tree-ssa-tail-merge.c: Same.
* tree-ssa-ter.c: Same.
* tree-ssa-threadupdate.c: Same.
* tree-ssa-uninit.c: Same.
* tree-ssanames.c: Same.
* tree-vect-loop-manip.c: Same.
* tree-vrp.c: Same.
* tree.c: Same.
* valtrack.c: Same.
* vec.h: Same.
ada/
* gcc-interface/utils.c: Fix double word typos.
* gnat_ugn.texi: Same.
* init.c: Same.
* par.adb: Same.
* projects.texi: Same.
* sem_ch4.adb: Same.
* sem_ch6.adb: Same.
* sem_dim.adb: Same.
* sem_eval.adb: Same.
* sem_prag.adb: Same.
* sem_warn.adb: Same.
lto/
* lto-lang.c: Fix double word typos.
java/
* class.c: Fix double word typos.
* java-except.h: Same.
* jcf-reader.c: Same.
testsuite/
* gcc.dg/20020219-1.c: Fix double word typos.
* gcc.dg/20020919-1.c: Same.
fortran/
* trans-stmt.c: Fix double word typos.
objc/
* objc-map.h: Fix double word typos.
cp/
* call.c: Fix double word typos.
* cp-array-notation.c: Same.
* cp-tree.h: Same.
* init.c: Same.
* name-lookup.c: Same.
* parser.c: Same.
c-family/
* c-common.c: Fix double word typos.
go/
* gofrontend/backend.h: Fix double word typos.
* gofrontend/expressions.cc: Same.
* gospec.c: Same.

commit 2f39266e228f85e392d8dbd45d427d4143061f34
Author: Aldy Hernandez 
Date:   Sun Jul 12 21:23:07 2015 -0700

Fix double word typos.

diff --git a/gcc/ada/gcc-interface/utils.c b/gcc/ada/gcc-interfac

[PATCH] remove some usage of expr_list from read_rtx

2015-07-12 Thread tbsaunde+gcc
From: Trevor Saunders 

Hi,

It seems much simpler for read_rtx to just add rtxs to a vector than to deal
with a bunch of expr list rtxen.


bootstrapped + regtested on x86_64-linux-gnu, ok?

Trev

gcc/ChangeLog:

2015-07-12  Trevor Saunders  

* gensupport.c (rtx_handle_directive): Adjust.
* read-rtl.c (apply_iterators): Take vector to add rtxs to
instead of expr list rtx.
(add_define_attr_for_define_subst): Likewise.
(add_define_subst_attr): Likewise.
(read_subst_mapping): Likewise.
(read_rtx): Likewise.
* rtl.h (read_rtx): Adjust.
---
 gcc/gensupport.c | 12 
 gcc/read-rtl.c   | 47 ---
 gcc/rtl.h|  2 +-
 3 files changed, 25 insertions(+), 36 deletions(-)

diff --git a/gcc/gensupport.c b/gcc/gensupport.c
index 729366c..e673b5c 100644
--- a/gcc/gensupport.c
+++ b/gcc/gensupport.c
@@ -26,6 +26,7 @@
 #include "errors.h"
 #include "read-md.h"
 #include "gensupport.h"
+#include "vec.h"
 
 #define MAX_OPERANDS 40
 
@@ -2248,11 +2249,14 @@ process_define_subst (void)
 static void
 rtx_handle_directive (int lineno, const char *rtx_name)
 {
-  rtx queue, x;
+  auto_vec subrtxs;
+  if (!read_rtx (rtx_name, &subrtxs))
+return;
 
-  if (read_rtx (rtx_name, &queue))
-for (x = queue; x; x = XEXP (x, 1))
-  process_rtx (XEXP (x, 0), lineno);
+  rtx x;
+  unsigned int i;
+  FOR_EACH_VEC_ELT (subrtxs, i, x)
+process_rtx (x, lineno);
 }
 
 /* Comparison function for the mnemonic hash table.  */
diff --git a/gcc/read-rtl.c b/gcc/read-rtl.c
index e8c849f..0f9e618 100644
--- a/gcc/read-rtl.c
+++ b/gcc/read-rtl.c
@@ -506,7 +506,7 @@ add_current_iterators (void **slot, void *data 
ATTRIBUTE_UNUSED)
Build a list of expanded rtxes in the EXPR_LIST pointed to by QUEUE.  */
 
 static void
-apply_iterators (rtx original, rtx *queue)
+apply_iterators (rtx original, vec *queue)
 {
   unsigned int i;
   const char *condition;
@@ -519,8 +519,7 @@ apply_iterators (rtx original, rtx *queue)
 {
   /* Raise an error if any attributes were used.  */
   apply_attribute_uses ();
-  XEXP (*queue, 0) = original;
-  XEXP (*queue, 1) = NULL_RTX;
+  queue->safe_push (original);
   return;
 }
 
@@ -572,8 +571,7 @@ apply_iterators (rtx original, rtx *queue)
}
}
   /* Add the new rtx to the end of the queue.  */
-  XEXP (*queue, 0) = x;
-  XEXP (*queue, 1) = NULL_RTX;
+  queue->safe_push (x);
 
   /* Lexicographically increment the iterator value sequence.
 That is, cycle through iterator values, starting from the right,
@@ -590,10 +588,6 @@ apply_iterators (rtx original, rtx *queue)
break;
  iterator->current_value = iterator->values;
}
-
-  /* At least one more rtx to go.  Allocate room for it.  */
-  XEXP (*queue, 1) = rtx_alloc (EXPR_LIST);
-  queue = &XEXP (*queue, 1);
 }
 }
 
@@ -945,7 +939,7 @@ read_mapping (struct iterator_group *group, htab_t table)
define_subst ATTR_NAME should be applied.  This attribute is set and
defined implicitly and automatically.  */
 static void
-add_define_attr_for_define_subst (const char *attr_name, rtx *queue)
+add_define_attr_for_define_subst (const char *attr_name, vec *queue)
 {
   rtx const_str, return_rtx;
 
@@ -960,14 +954,13 @@ add_define_attr_for_define_subst (const char *attr_name, 
rtx *queue)
   XSTR (return_rtx, 1) = xstrdup ("no,yes");
   XEXP (return_rtx, 2) = const_str;
 
-  XEXP (*queue, 0) = return_rtx;
-  XEXP (*queue, 1) = NULL_RTX;
+  queue->safe_push (return_rtx);
 }
 
 /* This routine generates DEFINE_SUBST_ATTR expression with operands
ATTR_OPERANDS and places it to QUEUE.  */
 static void
-add_define_subst_attr (const char **attr_operands, rtx *queue)
+add_define_subst_attr (const char **attr_operands, vec *queue)
 {
   rtx return_rtx;
   int i;
@@ -978,8 +971,7 @@ add_define_subst_attr (const char **attr_operands, rtx 
*queue)
   for (i = 0; i < 4; i++)
 XSTR (return_rtx, i) = xstrdup (attr_operands[i]);
 
-  XEXP (*queue, 0) = return_rtx;
-  XEXP (*queue, 1) = NULL_RTX;
+  queue->safe_push (return_rtx);
 }
 
 /* Read define_subst_attribute construction.  It has next form:
@@ -992,18 +984,17 @@ add_define_subst_attr (const char **attr_operands, rtx 
*queue)
 
 static void
 read_subst_mapping (htab_t subst_iters_table, htab_t subst_attrs_table,
-   rtx *queue)
+   vec *queue)
 {
   struct mapping *m;
   struct map_value **end_ptr;
   const char *attr_operands[4];
-  rtx * queue_elem = queue;
   int i;
 
   for (i = 0; i < 4; i++)
 attr_operands[i] = read_string (false);
 
-  add_define_subst_attr (attr_operands, queue_elem);
+  add_define_subst_attr (attr_operands, queue);
 
   bind_subst_iter_and_attr (attr_operands[1], attr_operands[0]);
 
@@ -1015,11 +1006,7 @@ read_subst_mapping (htab_t subst_iters_table, htab_t 
subst_attrs_table,
   end_ptr = add_map_value (end_

Re: [PATCH] Fix PR c++/66850 (Adding a forward declaration causes ICE on valid code)

2015-07-12 Thread Patrick Palka
On Sun, Jul 12, 2015 at 7:31 PM, Patrick Palka  wrote:
> This patch attempts resolve the mentioned PR by fixing two underlying
> issues:

I should note that there is some overlap between the fixes.
Technically fix #2 is sufficient to resolve the PR, whereas fix #1 can
only resolve the first test case in namespace X below.  But fix #1
seems to be sensible even if it is redundant.


[PATCH] Fix PR c++/66850 (Adding a forward declaration causes ICE on valid code)

2015-07-12 Thread Patrick Palka
This patch attempts resolve the mentioned PR by fixing two underlying
issues:

1.

When a template is first declared we currently take care to set
the DECL_CONTEXT of each of its template template parms to point to it (line
5119 in pt.c:push_template_decl_real).  However, we currently don't do
this for subsequent redeclarations of the template
(push_template_decl_real is only called for the first declaration).
If a DECL_CONTEXT of a template template parm is not set,
lookup_template_class_1 gets confused when it attempts to instantiate
the template template parm.

This patch makes the function redeclare_class_template to also set the
DECL_CONTEXTs of a template template parm.

2.

When the DECL_CONTEXT of a template template parm is not set,
lookup_template_class_1 instead uses current_template_args to synthesize
an argument list instead of using the TI_ARGS of the DECL_CONTEXT.
Using current_template_args is not 100% right, however, since we may not
currently be in the same parameter level that the template template parm
was defined in.  We may be in a deeper parameter level if 1) a nested
template has been defined in the meantime or 2) if a nested parameter
list has been started in the meantime.  Parameter levels that are greater
than the level of the given template template parm are irrelevant.

This patch peels off these irrelevant parameter levels from
current_template_parms before augmenting the argument list in
lookup_template_class_1.

OK to commit after bootstrap + regtest?

gcc/cp/ChangeLog:

PR c++/66850
* pt.c (redeclare_class_template): Set the DECL_CONTEXTs of each
template template parm in the redeclaration.
(lookup_template_class_1): Peel off irrelevant template levels
from current_template_parms before augmenting the argument
list.

gcc/testsuite/ChangeLog:

PR c++/66850
* g++.dg/template/pr66850.C: New test.
---
 gcc/cp/pt.c | 25 ---
 gcc/testsuite/g++.dg/template/pr66850.C | 44 +
 2 files changed, 66 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/template/pr66850.C

diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index 2097963..6bdfd33 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -5297,6 +5297,14 @@ redeclare_class_template (tree type, tree parms)
/* Update the new parameters, too; they'll be used as the
   parameters for any members.  */
TREE_PURPOSE (TREE_VEC_ELT (parms, i)) = tmpl_default;
+
+  /* Give each template template parm in this redeclaration a
+DECL_CONTEXT of the template for which they are a parameter.  */
+  if (TREE_CODE (parm) == TEMPLATE_DECL)
+   {
+ gcc_assert (DECL_CONTEXT (parm) == NULL_TREE);
+ DECL_CONTEXT (parm) = tmpl;
+   }
 }
 
 return true;
@@ -7749,9 +7757,20 @@ lookup_template_class_1 (tree d1, tree arglist, tree 
in_decl, tree context,
   if (outer)
outer = TI_ARGS (get_template_info (DECL_TEMPLATE_RESULT (outer)));
   else if (current_template_parms)
-   /* This is an argument of the current template, so we haven't set
-  DECL_CONTEXT yet.  */
-   outer = current_template_args ();
+   {
+ /* This is an argument of the current template, so we haven't set
+DECL_CONTEXT yet.  */
+ tree relevant_template_parms;
+
+ /* Parameter levels that are greater than the level of the given
+template template parm are irrelevant.  */
+ relevant_template_parms = current_template_parms;
+ while (TMPL_PARMS_DEPTH (relevant_template_parms)
+!= TEMPLATE_TYPE_LEVEL (TREE_TYPE (templ)))
+   relevant_template_parms = TREE_CHAIN (relevant_template_parms);
+
+ outer = template_parms_to_args (relevant_template_parms);
+   }
 
   if (outer)
arglist = add_to_template_args (outer, arglist);
diff --git a/gcc/testsuite/g++.dg/template/pr66850.C 
b/gcc/testsuite/g++.dg/template/pr66850.C
new file mode 100644
index 000..31c1290
--- /dev/null
+++ b/gcc/testsuite/g++.dg/template/pr66850.C
@@ -0,0 +1,44 @@
+// PR c++/66850
+// Each namespace contains an otherwise standalone test case, none of which
+// should cause an ICE.
+
+namespace X {
+  template  class> struct Sort;
+
+  template  class Comparator>
+  struct Sort
+  {
+template 
+struct less_than
+{
+  Comparator a;
+};
+  };
+}
+
+namespace Y {
+  template  struct integral_constant {};
+
+  template  class> struct Sort;
+
+  template  class Comparator>
+  struct Sort
+  {
+  template  struct less_than:
+  integral_constant::value> {};
+  };
+}
+
+namespace Z {
+  template  class> struct Sort;
+
+  template  class Comparator>
+  struct Sort
+  {
+template 
+struct less_than
+{
+  Comparator a;
+};
+  };
+}
-- 
2.5.0.rc1.40.ge088f2b.dirty



Re: [PATCH g++ driver] Push -static-libstdc++ back onto the command line to allow spec substitutions to use it.

2015-07-12 Thread Iain Sandoe
Ping

On 18 Jun 2015, at 09:12, Iain Sandoe wrote:

> Hi,
> 
> This came up in a User question last night and reminded me that I had a patch 
> for it in my Q.
> 
> 
> 
> Usually g++ driver support for -static-libstdc++ is provided by "-Bstatic 
> -lstdc++ -Bdynamic" and is currently disabled for targets without that linker 
> support.  However, actually, there is still an opportunity for those targets 
> to implement the functionality with a spec substituation like:
> 
> %{static-libstdc++:%:replace-outfile(-lstdc++ libstdc++.a%s)}
> 
> However, this doesn't currently work because the g++ driver swallows 
> "-static-libstdc++" so it's never seen by the specs.
> The patch below pushes -static-libstdc++ onto the output command line (for 
> targets without -Bstatic/dynamic)  so that such specs have an opportunity to 
> fire.
> 
> OK for trunk?
> Iain
> 
> gcc/cp:
>   * g++spec.c (lang_specific_driver): Push "-static-libstdc++" back to 
> the output
>   command line when the target linker does not support -Bstatic/dynamic 
> so that it
>   is available for spec substitions to act on.
> 
> 
> ---
> gcc/cp/g++spec.c | 9 +
> 1 file changed, 9 insertions(+)
> 
> diff --git a/gcc/cp/g++spec.c b/gcc/cp/g++spec.c
> index 6536d7e..48a9708 100644
> --- a/gcc/cp/g++spec.c
> +++ b/gcc/cp/g++spec.c
> @@ -363,6 +363,15 @@ lang_specific_driver (struct cl_decoded_option 
> **in_decoded_options,
>  &new_decoded_options[j]);
> j++;
>   }
> +#else
> +  /* For linkers that don't support Bstatic/dynamic push the flag back
> + so that spec substitution can see it.  */
> +  if (library > 1 && !static_link)
> +{
> +  generate_option (OPT_static_libstdc__, NULL, 0, CL_DRIVER,
> +   &new_decoded_options[j]);
> +  j++;
> +}
> #endif
>}
>  if (saw_math)
> -- 
> 2.2.1
> 



Re: [v3 PATCH] Implement make_array and to_array from the Fundamentals v2 TS draft

2015-07-12 Thread Ville Voutilainen
On 12 July 2015 at 21:45, Ville Voutilainen  wrote:
> Tested on Linux-PPC64.
>
> 2015-07-12  Ville Voutilainen  
> Implement std::experimental::fundamentals_v2::make_array and
> std::experimental::fundamentals_v2::to_array.
> * include/Makefile.am: Add array.
> * include/Makefile.in: Add array.
> * include/experimental/array: New.
> * testsuite/experimental/array/make_array.cc: Likewise.
> * testsuite/experimental/array/neg.cc: Likewise.

Very minor cleanup in a new patch, use is_void<_D> instead of is_same<_D, void>,
indent the static assert a bit more clearly.
diff --git a/libstdc++-v3/include/Makefile.am b/libstdc++-v3/include/Makefile.am
index 05be8ad..41fc4af 100644
--- a/libstdc++-v3/include/Makefile.am
+++ b/libstdc++-v3/include/Makefile.am
@@ -646,6 +646,7 @@ experimental_builddir = ./experimental
 experimental_headers = \
${experimental_srcdir}/algorithm \
${experimental_srcdir}/any \
+   ${experimental_srcdir}/array \
${experimental_srcdir}/chrono \
${experimental_srcdir}/deque \
${experimental_srcdir}/erase_if.h \
@@ -657,6 +658,7 @@ experimental_headers = \
${experimental_srcdir}/memory \
${experimental_srcdir}/numeric \
${experimental_srcdir}/optional \
+   ${experimental_srcdir}/propagate_const \
${experimental_srcdir}/ratio \
${experimental_srcdir}/set \
${experimental_srcdir}/string \
diff --git a/libstdc++-v3/include/Makefile.in b/libstdc++-v3/include/Makefile.in
index bab83b4..b2a140c 100644
--- a/libstdc++-v3/include/Makefile.in
+++ b/libstdc++-v3/include/Makefile.in
@@ -935,6 +935,7 @@ experimental_builddir = ./experimental
 experimental_headers = \
${experimental_srcdir}/algorithm \
${experimental_srcdir}/any \
+   ${experimental_srcdir}/array \
${experimental_srcdir}/chrono \
${experimental_srcdir}/deque \
${experimental_srcdir}/erase_if.h \
@@ -946,6 +947,7 @@ experimental_headers = \
${experimental_srcdir}/memory \
${experimental_srcdir}/numeric \
${experimental_srcdir}/optional \
+   ${experimental_srcdir}/propagate_const \
${experimental_srcdir}/ratio \
${experimental_srcdir}/set \
${experimental_srcdir}/string \
diff --git a/libstdc++-v3/include/experimental/array 
b/libstdc++-v3/include/experimental/array
new file mode 100644
index 000..1ce4118
--- /dev/null
+++ b/libstdc++-v3/include/experimental/array
@@ -0,0 +1,107 @@
+//  -*- C++ -*-
+
+// Copyright (C) 2015 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// Under Section 7 of GPL version 3, you are granted additional
+// permissions described in the GCC Runtime Library Exception, version
+// 3.1, as published by the Free Software Foundation.
+
+// You should have received a copy of the GNU General Public License and
+// a copy of the GCC Runtime Library Exception along with this program;
+// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+// .
+
+/** @file experimental/array
+ *  This is a TS C++ Library header.
+ */
+
+#ifndef _GLIBCXX_EXPERIMENTAL_ARRAY
+#define _GLIBCXX_EXPERIMENTAL_ARRAY 1
+
+#pragma GCC system_header
+
+#if __cplusplus <= 201103L
+# include 
+#else
+
+#include 
+#include 
+#include 
+
+namespace std _GLIBCXX_VISIBILITY(default)
+{
+namespace experimental
+{
+inline namespace fundamentals_v2
+{
+_GLIBCXX_BEGIN_NAMESPACE_VERSION
+
+  /**
+   * @defgroup make_array Array creation functions
+   * @ingroup experimental
+   *
+   * Array creation functions as described in N4529,
+   * Working Draft, C++ Extensions for Library Fundamentals, Version 2
+   *
+   * @{
+   */
+
+template 
+struct __is_reference_wrapper : false_type
+{ };
+
+template 
+struct __is_reference_wrapper> : true_type
+{ };
+
+template 
+constexpr auto make_array(_Types&&... __t)
+  -> array,
+ common_type_t<_Types...>,
+ _D>,
+   sizeof...(_Types)>
+{
+  static_assert(__or_<
+  __not_>,
+  __and_<__not_<__is_reference_wrapper>>...>>
+::value,
+"make_array cannot be used without an explicit target type "
+"if any of the types given is a reference_wrapper");
+  return {{__t...}};
+}
+
+template 
+constexpr array, _N>
+__to_array(_Tp (&__a)[_N],
+   index_sequence<_Idx...>)
+{
+  return {{__a[_Idx]...}}

[PATCH] PR/66760, ipa-inline-analysis.c compile-time hog

2015-07-12 Thread Paolo Bonzini
From: bonz...@gnu.org

In this PR, a lot of time is spent doing the same ipa_load_from_parm_agg
query over and over.  Luckily a memoization scheme is already there, it's
just not used by ipa-inline-analysis.c.  The patch moves the cache struct
(struct func_body_info) to ipa-prop.h and modify ipa-inline-analysis.c.
On some testcases from PR26854 the "alias stmt walking" timevar goes
off the profile while it used to be 30-70%.

Bootstrapped (regtest in progress) on x86_64-pc-linux-gnu.

Please commit the patch for me if approved, as I don't have anymore
the key I used to use for gcc.gnu.org.  One of these days I'll send
my new SSH public key to the overseers.

Paolo

* ipa-inline-analysis.c (unmodified_parm_or_parm_agg_item): Accept
struct func_body_info* instead of struct ipa_node_params*, expecting
fbi->info to be filled in.  Replace throughout.  Adjust call to
ipa_load_from_parm_agg.
(set_cond_stmt_execution_predicate): Accept struct func_body_info*
instead of struct ipa_node_params*.  Adjust calls to other functions
so that they pass either fbi or fbi->info.
(set_switch_stmt_execution_predicate): Likewise.
(will_be_nonconstant_predicate): Likewise.
(compute_bb_predicates): Likewise.
(estimate_function_body_sizes): Move asserts earlier.  Fill in
struct func_body_info, replace parms_info with fbi.info.  Adjust
calls to functions that now accept struct func_body_info.
* ipa-prop.c (struct param_aa_status, struct ipa_bb_info,
struct func_body_info).  Move to ipa-prop.h.
(ipa_load_from_parm_agg_1): Rename to ipa_load_from_parm_agg,
remove static.  Adjust callers.
(ipa_load_from_parm_agg): Remove.
* ipa-prop.h (struct param_aa_status, struct ipa_bb_info,
struct func_body_info).  Move from ipa-prop.c.
(ipa_load_from_parm_agg): Adjust prototype.

diff --git a/gcc/ipa-inline-analysis.c b/gcc/ipa-inline-analysis.c
index d5dbfbd..81a6860 100644
--- a/gcc/ipa-inline-analysis.c
+++ b/gcc/ipa-inline-analysis.c
@@ -1574,7 +1574,7 @@ unmodified_parm (gimple stmt, tree op)
loaded.  */
 
 static bool
-unmodified_parm_or_parm_agg_item (struct ipa_node_params *info,
+unmodified_parm_or_parm_agg_item (struct func_body_info *fbi,
  gimple stmt, tree op, int *index_p,
  struct agg_position_info *aggpos)
 {
@@ -1583,7 +1583,7 @@ unmodified_parm_or_parm_agg_item (struct ipa_node_params 
*info,
   gcc_checking_assert (aggpos);
   if (res)
 {
-  *index_p = ipa_get_param_decl_index (info, res);
+  *index_p = ipa_get_param_decl_index (fbi->info, res);
   if (*index_p < 0)
return false;
   aggpos->agg_contents = false;
@@ -1599,13 +1599,14 @@ unmodified_parm_or_parm_agg_item (struct 
ipa_node_params *info,
   stmt = SSA_NAME_DEF_STMT (op);
   op = gimple_assign_rhs1 (stmt);
   if (!REFERENCE_CLASS_P (op))
-   return unmodified_parm_or_parm_agg_item (info, stmt, op, index_p,
+   return unmodified_parm_or_parm_agg_item (fbi, stmt, op, index_p,
 aggpos);
 }
 
   aggpos->agg_contents = true;
-  return ipa_load_from_parm_agg (info, stmt, op, index_p, &aggpos->offset,
-&aggpos->by_ref);
+  return ipa_load_from_parm_agg (fbi, fbi->info->descriptors,
+stmt, op, index_p, &aggpos->offset,
+NULL, &aggpos->by_ref);
 }
 
 /* See if statement might disappear after inlining.
@@ -1744,7 +1745,7 @@ eliminated_by_inlining_prob (gimple stmt)
predicates to the CFG edges.   */
 
 static void
-set_cond_stmt_execution_predicate (struct ipa_node_params *info,
+set_cond_stmt_execution_predicate (struct func_body_info *fbi,
   struct inline_summary *summary,
   basic_block bb)
 {
@@ -1767,7 +1768,7 @@ set_cond_stmt_execution_predicate (struct ipa_node_params 
*info,
   /* TODO: handle conditionals like
  var = op0 < 4;
  if (var != 0).  */
-  if (unmodified_parm_or_parm_agg_item (info, last, op, &index, &aggpos))
+  if (unmodified_parm_or_parm_agg_item (fbi, last, op, &index, &aggpos))
 {
   code = gimple_cond_code (last);
   inverted_code = invert_tree_comparison (code, HONOR_NANS (op));
@@ -1810,8 +1811,7 @@ set_cond_stmt_execution_predicate (struct ipa_node_params 
*info,
   || gimple_call_num_args (set_stmt) != 1)
 return;
   op2 = gimple_call_arg (set_stmt, 0);
-  if (!unmodified_parm_or_parm_agg_item
-  (info, set_stmt, op2, &index, &aggpos))
+  if (!unmodified_parm_or_parm_agg_item (fbi, set_stmt, op2, &index, &aggpos))
 return;
   FOR_EACH_EDGE (e, ei, bb->succs) if (e->flags & EDGE_FALSE_VALUE)
 {
@@ -1827,7 +1827,7 @@ set_cond_stmt_execution_predicate (struct ipa_node_params 
*info,
predicates to the CFG edges.   */
 
 static void
-set_switch_stmt_execution_predicate (struct ipa_node_params *info,
+set_switch_stmt_execution_predicate (struct func_body_info *fbi,
  

Re: [PATCH] rs6000: Revamp rotate-and-mask and insert

2015-07-12 Thread David Edelsohn
On Sun, Jul 12, 2015 at 1:08 PM, Segher Boessenkool
 wrote:
> This rewrites all the rotate-and-mask and insert patterns.
>
> The goals are to have simpler, shorter, less error-prone code (with much
> fewer machine description patterns), as well as to get better optimised
> machine code.
>
> All "mask only" insns are now handled by a single pattern; all rotate (or
> shift) and mask by another pattern; and all insert insns by just a few
> patterns as well.  Also, more patterns that can be done by a combination
> of two of these (or a load immediate and one) are now done that way.
>
> This removes a few constraints and output modifiers.  I don't think any
> inline assembler code uses those (they really shouldn't), but if in fact
> it is used, it is easy to put back simpler definitions just for
> compatibility.  We can use a few free letters for both constraints and
> modifiers.
>
> Bootstrapped and tested on powerpc64-linux with the usual options
> (-m32,-m32/-mpowerpc64,-m64,-m64/-mlra); no regressions.
>
> This reduces the number of generated instructions for 32-bit by about
> 1 in 2000, and for 64-bit by about 1 in 700.  It always does that by
> decreasing path length so it should never make performance worse either.
>
> There are two FIXMEs for pretty harmless stuff, but the rtx_costs one
> at least probably wants fixing soonish.
>
> Oh, and this patch should be applied after the HAVE_insv patch I posted
> earlier today, for good results.
>
>
> Is this okay for trunk?
>
>
> 2015-07-12  Segher Boessenkool  
>
> PR target/66217
> * config/rs6000/constraints.md ("S", "T", "t"): Delete.
> * config/rs6000/predicates.md (mask_operand, mask_operand_wrap,
> mask64_operand, mask64_2_operand, any_mask_operand, and64_2_operand,
> and_2rld_operand):  Delete.
> (and_operand): Adjust.
> (rotate_mask_operator): New.
> * config/rs6000/rs6000-protos.h (build_mask64_2_operands,
> includes_lshift_p, includes_rshift_p, includes_rldic_lshift_p,
> includes_rldicr_lshift_p, insvdi_rshift_rlwimi_p, extract_MB,
> extract_ME): Delete.
> (rs6000_is_valid_mask, rs6000_is_valid_and_mask,
> rs6000_is_valid_shift_mask, rs6000_is_valid_insert_mask,
> rs6000_insn_for_and_mask, rs6000_insn_for_shift_mask,
> rs6000_insn_for_insert_mask, rs6000_is_valid_2insn_and,
> rs6000_emit_2insn_and): New.
> * config/rs6000/rs6000.c (num_insns_constant): Adjust.
> (build_mask64_2_operands, includes_lshift_p, includes_rshift_p,
> includes_rldic_lshift_p, includes_rldicr_lshift_p,
> insvdi_rshift_rlwimi_p, extract_MB, extract_ME): Delete.
> (rs6000_is_valid_mask, rs6000_is_valid_and_mask,
> rs6000_insn_for_and_mask, rs6000_is_valid_shift_mask,
> s6000_insn_for_shift_mask, rs6000_is_valid_insert_mask,
> rs6000_insn_for_insert_mask, rs6000_is_valid_2insn_and,
> rs6000_emit_2insn_and): New.
> (print_operand) <'b', 'B', 'm', 'M', 's', 'S', 'W'>: Delete.
> (rs6000_rtx_costs) : Delete mask_operand and mask64_operand
> handling.
> : Don't fall through to next case.
> : Handle the various rotate-and-mask cases directly.
> : Always cost as one insn.
> * config/rs6000/rs6000.md (splitter for bswap:SI): Adjust.
> (and3): Adjust expander for the new patterns.
> (and3_imm, and3_imm_dot, and3_imm_dot2,
> and3_imm_mask_dot, and3_imm_mask_dot2): Adjust condition.
> (*and3_imm_dot_shifted): New.
> (*and3_mask): Delete, rewrite as ...
> (and3_mask): ... New.
> (*and3_mask_dot, *and3_mask_dot): Rewrite.
> (andsi3_internal0_nomc): Delete.
> (*andsi3_internal6): Delete.
> (*and3_2insn): New.
> (insv, insvsi_internal, *insvsi_internal1, *insvsi_internal2,
> *insvsi_internal3, *insvsi_internal4, *insvsi_internal5,
> *insvsi_internal6, insvdi_internal, *insvdi_internal2,
> *insvdi_internal3): Delete.
> (*rotl3_mask, *rotl3_mask_dot, *rotl3_mask_dot2,
> *rotl3_insert, *rotl3_insert_2, *rotl3_insert_3,
> *rotl3_insert_4, two splitters for multi-precision shifts,
> *ior_mask): New.
> (extzv, extzvdi_internal, *extzvdi_internal1, *extzvdi_internal2,
> *rotlsi3_mask, *rotlsi3_mask_dot, *rotlsi3_mask_dot2,
> *ashlsi3_imm_mask, *ashlsi3_imm_mask_dot, *ashlsi3_imm_mask_dot2,
> *lshrsi3_imm_mask, *lshrsi3_imm_mask_dot, *lshrsi3_imm_mask_dot2):
> Delete.
> (ashr3): Delete expander.
> (*ashr3): Rename to ...
> (ashr3): ... This.
> (ashrdi3_no_power, *ashrdisi3_noppc64be): Delete.
> (*rotldi3_internal4, *rotldi3_internal5 and split,
> *rotldi3_internal6 and split, *ashldi3_internal4, ashldi3_internal5
> and split, *ashldi3_internal6 and split, *ashldi3_internal7,
> ashldi3_internal8 and split, *

Re: Fix PR43404, PR48470, PR64744 ICE on naked functions

2015-07-12 Thread Alexander Basov
ping

2015-06-29 16:32 GMT+03:00 Alexander Basov :
> I've updated patch with attributes lookup.
> is it OK?
>
> --
> Alexander
>
> 2015-06-26 9:33 GMT+03:00 Alexander Basov :
>> 2015-06-25 21:47 GMT+03:00 Jeff Law :
>>> On 06/03/2015 02:15 PM, Alexander Basov wrote:

 Hello Jeff,
 please find updated patch attached

>> diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
>> index b190f91..c6db8a9 100644
>> --- a/gcc/cfgexpand.c
>> +++ b/gcc/cfgexpand.c
>> @@ -1382,7 +1382,15 @@ expand_one_var (tree var, bool toplevel, bool
>> really_expand)
>>  else
>>{
>>  if (really_expand)
>> -expand_one_stack_var (origvar);
>> +{
>> +  if (!targetm.calls.allocate_stack_slots_for_args ())
>> +error ("cannot allocate stack for variable %q+D, naked
>> function.",
>> +   var);
>> +
>> +  expand_one_stack_var (origvar);
>> +}
>
> So how do you know ORIGVAR is an argument here before issuing the
> error?  ie, shouldn't you verify that the underlying object is a
> PARM_DECL? If there's some way we already know we're dealing with a
> PARM_DECL, then just say so.

 In case of naked function stack should not be used not only for function
 args, but also for any local variables.
 So, i think we don't need to check if underlying object is a PARM_DECL.
>>>
>>> Then that would indicate that we're using the wrong test
>>> (allocate_stack_slot_for_args).  That hook is for whether or not arguments
>>> should have stack slots allocated.  Yet you're issuing an error for more
>>> than just PARM_DECLs.
>>>
>>> Shouldn't you instead be checking if the current function is a naked
>>> function or not by checking the attributes of the current function?
>>>
>>> Jeff
>>
>> What allocate_stack_slots_for_args  does, it only checks if current
>> function is naked or not.
>> May be it will be better to remove allocate_stack_slots_for_args and
>> replace if with explicit checking of naked attribute?
>>
>> --
>> Alexander



-- 
Alexander


[v3 PATCH] Implement make_array and to_array from the Fundamentals v2 TS draft

2015-07-12 Thread Ville Voutilainen
Tested on Linux-PPC64.

2015-07-12  Ville Voutilainen  
Implement std::experimental::fundamentals_v2::make_array and
std::experimental::fundamentals_v2::to_array.
* include/Makefile.am: Add array.
* include/Makefile.in: Add array.
* include/experimental/array: New.
* testsuite/experimental/array/make_array.cc: Likewise.
* testsuite/experimental/array/neg.cc: Likewise.
diff --git a/libstdc++-v3/include/Makefile.am b/libstdc++-v3/include/Makefile.am
index 05be8ad..41fc4af 100644
--- a/libstdc++-v3/include/Makefile.am
+++ b/libstdc++-v3/include/Makefile.am
@@ -646,6 +646,7 @@ experimental_builddir = ./experimental
 experimental_headers = \
${experimental_srcdir}/algorithm \
${experimental_srcdir}/any \
+   ${experimental_srcdir}/array \
${experimental_srcdir}/chrono \
${experimental_srcdir}/deque \
${experimental_srcdir}/erase_if.h \
@@ -657,6 +658,7 @@ experimental_headers = \
${experimental_srcdir}/memory \
${experimental_srcdir}/numeric \
${experimental_srcdir}/optional \
+   ${experimental_srcdir}/propagate_const \
${experimental_srcdir}/ratio \
${experimental_srcdir}/set \
${experimental_srcdir}/string \
diff --git a/libstdc++-v3/include/Makefile.in b/libstdc++-v3/include/Makefile.in
index bab83b4..b2a140c 100644
--- a/libstdc++-v3/include/Makefile.in
+++ b/libstdc++-v3/include/Makefile.in
@@ -935,6 +935,7 @@ experimental_builddir = ./experimental
 experimental_headers = \
${experimental_srcdir}/algorithm \
${experimental_srcdir}/any \
+   ${experimental_srcdir}/array \
${experimental_srcdir}/chrono \
${experimental_srcdir}/deque \
${experimental_srcdir}/erase_if.h \
@@ -946,6 +947,7 @@ experimental_headers = \
${experimental_srcdir}/memory \
${experimental_srcdir}/numeric \
${experimental_srcdir}/optional \
+   ${experimental_srcdir}/propagate_const \
${experimental_srcdir}/ratio \
${experimental_srcdir}/set \
${experimental_srcdir}/string \
diff --git a/libstdc++-v3/include/experimental/array 
b/libstdc++-v3/include/experimental/array
new file mode 100644
index 000..1e1b60e
--- /dev/null
+++ b/libstdc++-v3/include/experimental/array
@@ -0,0 +1,106 @@
+//  -*- C++ -*-
+
+// Copyright (C) 2015 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// Under Section 7 of GPL version 3, you are granted additional
+// permissions described in the GCC Runtime Library Exception, version
+// 3.1, as published by the Free Software Foundation.
+
+// You should have received a copy of the GNU General Public License and
+// a copy of the GCC Runtime Library Exception along with this program;
+// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+// .
+
+/** @file experimental/array
+ *  This is a TS C++ Library header.
+ */
+
+#ifndef _GLIBCXX_EXPERIMENTAL_ARRAY
+#define _GLIBCXX_EXPERIMENTAL_ARRAY 1
+
+#pragma GCC system_header
+
+#if __cplusplus <= 201103L
+# include 
+#else
+
+#include 
+#include 
+#include 
+
+namespace std _GLIBCXX_VISIBILITY(default)
+{
+namespace experimental
+{
+inline namespace fundamentals_v2
+{
+_GLIBCXX_BEGIN_NAMESPACE_VERSION
+
+  /**
+   * @defgroup make_array Array creation functions
+   * @ingroup experimental
+   *
+   * Array creation functions as described in N4529,
+   * Working Draft, C++ Extensions for Library Fundamentals, Version 2
+   *
+   * @{
+   */
+
+template 
+struct __is_reference_wrapper : false_type
+{ };
+
+template 
+struct __is_reference_wrapper> : true_type
+{ };
+
+template 
+constexpr auto make_array(_Types&&... __t)
+  -> array,
+ common_type_t<_Types...>,
+ _D>,
+   sizeof...(_Types)>
+{
+  static_assert(__or_<__not_>,
+__and_<__not_<__is_reference_wrapper>>...>>
+::value,
+"make_array cannot be used without an explicit target type "
+"if any of the types given is a reference_wrapper");
+  return {{__t...}};
+}
+
+template 
+constexpr array, _N>
+__to_array(_Tp (&__a)[_N],
+   index_sequence<_Idx...>)
+{
+  return {{__a[_Idx]...}};
+}
+
+template 
+constexpr array, _N> to_array(_Tp (&__a)[_N])
+{
+  return __to_array(__a, make_index_sequence<_N>{});
+}
+
+  // @} group make_array
+  _GLIBCXX_END_NAMESPACE_VERSION
+} // namespace fundamentals_v2
+

[PATCH, rtl-optimization]: Fix PR66838, Calling multiple SYSV AMD64 ABI functions from MS x64 ABI one results in clobbered parameters

2015-07-12 Thread Uros Bizjak
Another missing case of CALL_INSN_FUNCTION_USAGE, where clobbered
registers are also marked, this time in postreload/
reload_cse_move2add.

Fixed compiler now generates following code

   callsysv_abi_func
   movl$global, %esi
   movl$.LC2, %edi
   callsysv_abi_func
   movl$global, %esi
   movl$.LC3, %edi
   callsysv_abi_func

which correctly reloads %esi for every sysv_abi function call.

2015-07-12  Uros Bizjak  

PR rtl-optimization/66838
* postreload.c (reload_cse_move2add): Also process
CALL_INSN_FUNCTION_USAGE when resetting information of
call-clobbered registers.

testsuite/ChangeLog:

2015-07-12  Uros Bizjak  

PR rtl-optimization/66838
* gcc.target/i386/pr66838.c: New test.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

OK for mainline and gcc-5 branch?

Uros.
Index: postreload.c
===
--- postreload.c(revision 225722)
+++ postreload.c(working copy)
@@ -2127,6 +2127,8 @@ reload_cse_move2add (rtx_insn *first)
 unknown values.  */
   if (CALL_P (insn))
{
+ rtx link;
+
  for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
{
  if (call_used_regs[i])
@@ -2133,6 +2135,21 @@ reload_cse_move2add (rtx_insn *first)
/* Reset the information about this register.  */
reg_mode[i] = VOIDmode;
}
+
+ for (link = CALL_INSN_FUNCTION_USAGE (insn); link;
+  link = XEXP (link, 1))
+   {
+ rtx setuse = XEXP (link, 0);
+ rtx usage_rtx = XEXP (setuse, 0);
+ if (GET_CODE (setuse) == CLOBBER
+ && REG_P (usage_rtx))
+   {
+ unsigned int end_regno = END_REGNO (usage_rtx);
+ for (unsigned int r = REGNO (usage_rtx); r < end_regno; ++r)
+   /* Reset the information about this register.  */
+   reg_mode[r] = VOIDmode;
+   }
+   }
}
 }
   return changed;
Index: testsuite/gcc.target/i386/pr66838.c
===
--- testsuite/gcc.target/i386/pr66838.c (revision 0)
+++ testsuite/gcc.target/i386/pr66838.c (working copy)
@@ -0,0 +1,36 @@
+/* { dg-do run { target lp64 } } */
+/* { dg-options "-O2" } */
+
+void abort (void);
+
+char global;
+
+__attribute__((sysv_abi, noinline, noclone))
+void sysv_abi_func(char const *desc, void *local)
+{
+  register int esi asm ("esi");
+  register int edi asm ("edi");
+  
+  if (local != &global)
+abort ();
+
+  /* Clobber some of the extra SYSV ABI registers.  */
+  asm volatile ("movl\t%2, %0\n\tmovl\t%2, %1"
+   : "=r" (esi), "=r" (edi)
+   : "i" (0xdeadbeef));
+}
+
+__attribute__((ms_abi, noinline, noclone))
+void ms_abi_func ()
+{
+  sysv_abi_func ("1st call", &global);
+  sysv_abi_func ("2nd call", &global);
+  sysv_abi_func ("3rd call", &global);
+}
+
+int
+main(void)
+{
+  ms_abi_func();
+  return 0;
+}


Re: [PATCH] PR target/66824: Allow software FP SFmode in FP splitter

2015-07-12 Thread Uros Bizjak
On Sat, Jul 11, 2015 at 9:23 PM, H.J. Lu  wrote:
> On Thu, Jul 09, 2015 at 01:58:22PM -0700, H.J. Lu wrote:
>> On Thu, Jul 09, 2015 at 12:13:38PM -0700, H.J. Lu wrote:
>> > ix86_split_long_move can optimize floating point constant move, which
>> > can be used to optimize SFmode move for IA MCU.
>> >
>> > OK for trunk if there is no regression?
>> >
>> >
>> > H.J.
>> > ---
>> > gcc/
>> >
>> > PR target/66824
>> > * config/i386/i386.c (ix86_split_to_parts): Allow SFmode move
>> > for IA MCU.
>> > (ix86_split_long_move): Support single move.
>> > * config/i386/i386.md (FP splitter): Allow SFmode for IA MCU.
>> >
>> > gcc/testsuite/
>> >
>> > PR target/66824
>> > * gcc.target/i386/pr66824.c: New test.
>> > ---
>>
>>
>> I missed the testcase.  Here is the updated patch.
>>
>
> ix86_split_long_move can optimize floating point constant move, which
> can be used to optimize SFmode move with software floating point.
>
> OK for trunk if there are no regressions?

No, this patch is wrong. Please investigate why "*movsf_internal"
doesn't use "?r/rmF" alternative in case FP regs are unavailable.
Perhaps you should add new alternative with a conditional constraint,
but without "?". And... please use:

#define TARGET_HARD_FP_REGS(TARGET_80387 || TARGET_MMX || TARGET_SSE)

Uros.


Re: [PATCH][C++] Fix PR65091

2015-07-12 Thread Paolo Carlini

On 07/11/2015 09:46 PM, Paolo Carlini wrote:

Hi,

I'm going to ping this one too: a tad less trivial than the other one 
- a little explanation here or in a comment would definitely help - 
but certainly it looks much simpler than my own tries a while ago... 
Regression testing information is also missing.
... in fact, one could argue that most of difference between Andrea's 
patch and my original try here:


https://gcc.gnu.org/ml/gcc-patches/2015-06/msg00821.html

is mostly a matter of style + TYPE_P vs the narrower identifier_p...

Paolo.


[PATCH] rs6000: Revamp rotate-and-mask and insert

2015-07-12 Thread Segher Boessenkool
This rewrites all the rotate-and-mask and insert patterns.

The goals are to have simpler, shorter, less error-prone code (with much
fewer machine description patterns), as well as to get better optimised
machine code.

All "mask only" insns are now handled by a single pattern; all rotate (or
shift) and mask by another pattern; and all insert insns by just a few
patterns as well.  Also, more patterns that can be done by a combination
of two of these (or a load immediate and one) are now done that way.

This removes a few constraints and output modifiers.  I don't think any
inline assembler code uses those (they really shouldn't), but if in fact
it is used, it is easy to put back simpler definitions just for
compatibility.  We can use a few free letters for both constraints and
modifiers.

Bootstrapped and tested on powerpc64-linux with the usual options
(-m32,-m32/-mpowerpc64,-m64,-m64/-mlra); no regressions.

This reduces the number of generated instructions for 32-bit by about
1 in 2000, and for 64-bit by about 1 in 700.  It always does that by
decreasing path length so it should never make performance worse either.

There are two FIXMEs for pretty harmless stuff, but the rtx_costs one
at least probably wants fixing soonish.

Oh, and this patch should be applied after the HAVE_insv patch I posted
earlier today, for good results.


Is this okay for trunk?


2015-07-12  Segher Boessenkool  

PR target/66217
* config/rs6000/constraints.md ("S", "T", "t"): Delete.
* config/rs6000/predicates.md (mask_operand, mask_operand_wrap,
mask64_operand, mask64_2_operand, any_mask_operand, and64_2_operand,
and_2rld_operand):  Delete.
(and_operand): Adjust.
(rotate_mask_operator): New.
* config/rs6000/rs6000-protos.h (build_mask64_2_operands,
includes_lshift_p, includes_rshift_p, includes_rldic_lshift_p,
includes_rldicr_lshift_p, insvdi_rshift_rlwimi_p, extract_MB,
extract_ME): Delete.
(rs6000_is_valid_mask, rs6000_is_valid_and_mask,
rs6000_is_valid_shift_mask, rs6000_is_valid_insert_mask,
rs6000_insn_for_and_mask, rs6000_insn_for_shift_mask,
rs6000_insn_for_insert_mask, rs6000_is_valid_2insn_and,
rs6000_emit_2insn_and): New.
* config/rs6000/rs6000.c (num_insns_constant): Adjust.
(build_mask64_2_operands, includes_lshift_p, includes_rshift_p,
includes_rldic_lshift_p, includes_rldicr_lshift_p,
insvdi_rshift_rlwimi_p, extract_MB, extract_ME): Delete.
(rs6000_is_valid_mask, rs6000_is_valid_and_mask,
rs6000_insn_for_and_mask, rs6000_is_valid_shift_mask,
s6000_insn_for_shift_mask, rs6000_is_valid_insert_mask,
rs6000_insn_for_insert_mask, rs6000_is_valid_2insn_and,
rs6000_emit_2insn_and): New.
(print_operand) <'b', 'B', 'm', 'M', 's', 'S', 'W'>: Delete.
(rs6000_rtx_costs) : Delete mask_operand and mask64_operand
handling.
: Don't fall through to next case.
: Handle the various rotate-and-mask cases directly.
: Always cost as one insn.
* config/rs6000/rs6000.md (splitter for bswap:SI): Adjust.
(and3): Adjust expander for the new patterns.
(and3_imm, and3_imm_dot, and3_imm_dot2,
and3_imm_mask_dot, and3_imm_mask_dot2): Adjust condition.
(*and3_imm_dot_shifted): New.
(*and3_mask): Delete, rewrite as ...
(and3_mask): ... New.
(*and3_mask_dot, *and3_mask_dot): Rewrite.
(andsi3_internal0_nomc): Delete.
(*andsi3_internal6): Delete.
(*and3_2insn): New.
(insv, insvsi_internal, *insvsi_internal1, *insvsi_internal2,
*insvsi_internal3, *insvsi_internal4, *insvsi_internal5,
*insvsi_internal6, insvdi_internal, *insvdi_internal2,
*insvdi_internal3): Delete.
(*rotl3_mask, *rotl3_mask_dot, *rotl3_mask_dot2,
*rotl3_insert, *rotl3_insert_2, *rotl3_insert_3,
*rotl3_insert_4, two splitters for multi-precision shifts,
*ior_mask): New.
(extzv, extzvdi_internal, *extzvdi_internal1, *extzvdi_internal2,
*rotlsi3_mask, *rotlsi3_mask_dot, *rotlsi3_mask_dot2,
*ashlsi3_imm_mask, *ashlsi3_imm_mask_dot, *ashlsi3_imm_mask_dot2,
*lshrsi3_imm_mask, *lshrsi3_imm_mask_dot, *lshrsi3_imm_mask_dot2):
Delete.
(ashr3): Delete expander.
(*ashr3): Rename to ...
(ashr3): ... This.
(ashrdi3_no_power, *ashrdisi3_noppc64be): Delete.
(*rotldi3_internal4, *rotldi3_internal5 and split,
*rotldi3_internal6 and split, *ashldi3_internal4, ashldi3_internal5
and split, *ashldi3_internal6 and split, *ashldi3_internal7,
ashldi3_internal8 and split, *ashldi3_internal9 and split): Delete.
(*anddi3_2rld, *anddi3_2rld_dot, *anddi3_2rld_dot2): Delete.
(splitter for loading a mask): Adjust.

---
 gcc/config/rs6000/constraints.md  |   18 -
 gcc/config/rs6000/predicates.md   |  170 +

Re: [RFC] two-phase marking in gt_cleare_cache

2015-07-12 Thread Tom de Vries

On 12/07/15 17:43, Tom de Vries wrote:

On 09/07/15 14:24, Michael Matz wrote:

Hi,

On Thu, 9 Jul 2015, Tom de Vries wrote:


Given this I think the call to gt_ggc_mx is superfluous because it
wouldn't work relyably for multi-step dependencies anyway.  Hence a
situation that works with that call in place, and breaking without
it is actually a bug waiting to be uncovered.


Attached patch tries to get multi-step dependencies right, without
using iteration-till-fixed-point.


And for the record, attached patch implements a naive iterative
approach.


What uses do multi-step dependencies have?  As in, I think this goes into
the wrong direction, we lived without this since years, so why should
this
situation be handled at all?  It's about cache hash tables, so they
shouldn't contain anything that is only pointed to at by entries in those
tables.

If anything we rather should check, that calling gt_ggc_mx on anything
retained in the hash tables doesn't generate newly live objects.



Hi Michael,

I'm trying to get to a defined policy for what is allowed for caches.
Either forbidding or allowing multi-step dependencies, I don't really mind.

Until now, we didn't have a good way of allowing them. I came up with a
runtime efficient but not exhaustive variant, which I posted here:
https://gcc.gnu.org/ml/gcc-patches/2015-07/msg00711.html.
As contrast and for the record, I posted the exhaustive but not runtime
efficient variant here:
https://gcc.gnu.org/ml/gcc-patches/2015-07/msg00730.html.

I managed to write a patch series that implements the forbidding of
multi-step dependencies. I'll post this soon.



https://gcc.gnu.org/ml/gcc-patches/2015-07/msg00970.html

Thanks,
- Tom



[PATCH 5/5] Don't mark live recursively in gt_cleare_cache

2015-07-12 Thread Tom de Vries

On 12/07/15 17:45, Tom de Vries wrote:

Hi,

this patch series implements the forbidding of multi-step garbage
collection liveness dependencies between caches.

The first four patches downgrade 3 caches to non-cache, since they
introduce multi-step dependencies. This allows us to decouple:
- establishing a policy for multi-step dependencies in caches, and
- fixing issues that allow us to use these 3 as caches again.

1. Downgrade debug_args_for_decl to non-cache
2. Add struct tree_decl_map_hasher
3. Downgrade debug_expr_for_decl to non-cache
4. Downgrade value_expr_for_decl to non-cache
5. Don't mark live recursively in gt_cleare_cache

Bootstrapped and reg-tested on x86_64, with ENABLE_CHECKING.

I'll post the patches in response to this email.



This patch:
- disables the recursive marking of cache entries during the cache-clear
  phase
- Adds ENABLE_CHECKING code to check that we don't end up with partially
  dead cache entries

OK for trunk?

Thanks,
- Tom

[PATCH 5/5] Don't mark live recursively in gt_cleare_cache

2015-07-10  Tom de Vries  

	PR libgomp/66714
	* hash-table.h (gt_cleare_cache): Mark cache entry non-recursively.
	(gt_cleare_cache) [ENABLE_CHECKING]: Assert non-key components of live
	entry already marked.  Assert dead key component implies dead entry.
	* tree.h (struct tree_decl_map_cache_hasher) [ENABLE_CHECKING]: Add new
	function ggc_marked_nonkey_p.
	* tree.c (struct tree_vec_map_cache_hasher) [ENABLE_CHECKING]: Same.
	* ubsan.c (struct tree_type_map_cache_hasher) [ENABLE_CHECKING]: Same.
	* varasm.c (struct tm_clone_hasher) [ENABLE_CHECKING]: Same.
	* hash-traits.h (struct ggc_cache_remove) [ENABLE_CHECKING]: Same.
	* trans-mem.c (struct tm_wrapper_hasher) [ENABLE_CHECKING]: Same.

	* testsuite/libgomp.c/pr66714.c: New test.
---
 gcc/hash-table.h  | 64 +--
 gcc/hash-traits.h |  4 +++
 gcc/trans-mem.c   |  6 
 gcc/tree.c|  6 
 gcc/tree.h|  6 
 gcc/ubsan.c   |  6 
 gcc/varasm.c  |  6 
 libgomp/testsuite/libgomp.c/pr66714.c | 17 ++
 8 files changed, 113 insertions(+), 2 deletions(-)
 create mode 100644 libgomp/testsuite/libgomp.c/pr66714.c

diff --git a/gcc/hash-table.h b/gcc/hash-table.h
index 12e0c96..282ba8a 100644
--- a/gcc/hash-table.h
+++ b/gcc/hash-table.h
@@ -1046,14 +1046,74 @@ gt_cleare_cache (hash_table *h)
   if (!h)
 return;
 
+  /* There are roughly 2 types of cache entries.
+
+ I.
+
+ The simple one, that uses ggc_cache_remove::keep_cache_entry.
+
+   int keep_cache_entry (T &e) { return ggc_marked_p (e) ? -1 : 0; }
+
+ The function returns either live (-1) or dead (0), dependent on whether the
+ entry was marked during the marking phase.
+
+ If the entry is dead, we clear the slot holding the entry.  The slot can be
+ now be reused, and the entry will be freed during the sweeping phase.
+
+ If the entry is live we're done.  The entry itself, and anything reachable
+ from the entry have been marked during the marking phase.
+
+
+ II.
+
+ The complex one, with a non-standard keep_cache_entry.
+
+ Say we have a cache entry E with key field to and non-key field from:
+
+   struct sE {
+	 type1 from;
+	 type2 to;
+   };
+   typedef struct sE *E;
+
+ and a keep_cache_entry function:
+
+   int keep_cache_entry (E &e) { return ggc_marked_p (e->from); }
+
+ The function returns either live (1) or dead (0), dependent on whether the
+ from field of the entry was marked during the marking phase.
+
+ If the from field is dead, we clear the slot holding the entry.  The slot
+ can be now be reused, and the from field will be freed during the sweeping
+ phase.  The to field will be freed during the sweeping phase dependent on
+ whether it was marked live during the marking phase.  Furthermore, we check
+ that the entry was not marked.  If that that check fails, it means that
+ we ended up with a live entry with a dead from field.
+
+ If the from field is live, we mark the entry non-recursively live, since
+ the cache may hold the only reference to the entry.
+ However, we check that anything reachable from the entry has already been
+ marked during the marking phase.  If that that check fails, it means that
+ we ended up with a live entry with a dead to field.  */
+
   for (typename table::iterator iter = h->begin (); iter != h->end (); ++iter)
 if (!table::is_empty (*iter) && !table::is_deleted (*iter))
   {
 	int res = H::keep_cache_entry (*iter);
 	if (res == 0)
-	  h->clear_slot (&*iter);
+	  {
+#ifdef ENABLE_CHECKING
+	gcc_assert (!ggc_marked_p (*iter));
+#endif
+	h->clear_slot (&*iter);
+	  }
 	else if (res != -1)
-	  gt_ggc_mx (*iter);
+	  {
+	ggc_set_mark (*iter);
+#ifdef ENABLE_CHECKING
+	gcc_assert (H::ggc_

[PATCH 4/5] Downgrade value_expr_for_decl to non-cache

2015-07-12 Thread Tom de Vries

On 12/07/15 17:45, Tom de Vries wrote:

Hi,

this patch series implements the forbidding of multi-step garbage
collection liveness dependencies between caches.

The first four patches downgrade 3 caches to non-cache, since they
introduce multi-step dependencies. This allows us to decouple:
- establishing a policy for multi-step dependencies in caches, and
- fixing issues that allow us to use these 3 as caches again.

1. Downgrade debug_args_for_decl to non-cache
2. Add struct tree_decl_map_hasher
3. Downgrade debug_expr_for_decl to non-cache
4. Downgrade value_expr_for_decl to non-cache
5. Don't mark live recursively in gt_cleare_cache

Bootstrapped and reg-tested on x86_64, with ENABLE_CHECKING.

I'll post the patches in response to this email.


This patch downgrade value_expr_for_decl to non-cache.

OK for trunk?

Thanks,
- Tom

[PATCH 4/5] Downgrade value_expr_for_decl to non-cache

Without this patch, but with patch "Don't mark live recursively in
gt_cleare_cache" when compiling soft-fp/divtf3.c -m32 we run into:
...
0x133f37e void gt_cleare_cache(hash_table*)
	  /home/vries/gcc_versions/devel/src/gcc/hash-table.h:1114
0x133bfdb gt_clear_caches_gt_tree_h()
	  ./gt-tree.h:475
0x6c8d0f gt_clear_caches()
	 ./gtype-c.h:151
0xa8ef1e ggc_mark_roots()
	 /home/vries/gcc_versions/devel/src/gcc/ggc-common.c:103
0x7f7bfe ggc_collect()
	 /home/vries/gcc_versions/devel/src/gcc/ggc-page.c:2183
Please submit a full bug report,
with preprocessed source if appropriate.
Please include the complete backtrace with any bug report.
See  for instructions.
...

The offending cache entry is:
...
(gdb) call debug_generic_expr ( (*iter).base.from )

(gdb) call debug_generic_expr ( (*iter).to )
*.result_ptr
...

2015-07-10  Tom de Vries  

	PR libgomp/66714
	* tree.c (value_expr_for_decl): Use tree_decl_map_hasher instead of
	tree_decl_map_cache_hasher. Don't use cache GTY attribute.
	(init_ttree): Allocate value_expr_for_decl using new type.
---
 gcc/tree.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/tree.c b/gcc/tree.c
index 6038fff..bb4467d 100644
--- a/gcc/tree.c
+++ b/gcc/tree.c
@@ -253,8 +253,13 @@ static GTY ((cache))
 static GTY (())
  hash_table *debug_expr_for_decl;
 
+/* TODO: Figure out whether we can declare value_expr_for_decl as:
 static GTY ((cache))
  hash_table *value_expr_for_decl;
+*/
+
+static GTY (())
+ hash_table *value_expr_for_decl;
 
 struct tree_vec_map_cache_hasher : ggc_cache_ptr_hash
 {
@@ -667,7 +672,7 @@ init_ttree (void)
 = hash_table::create_ggc (512);
 
   value_expr_for_decl
-= hash_table::create_ggc (512);
+= hash_table::create_ggc (512);
 
   int_cst_hash_table = hash_table::create_ggc (1024);
 
-- 
1.9.1



[PATCH 3/5] Downgrade debug_expr_for_decl to non-cache

2015-07-12 Thread Tom de Vries

On 12/07/15 17:45, Tom de Vries wrote:

Hi,

this patch series implements the forbidding of multi-step garbage
collection liveness dependencies between caches.

The first four patches downgrade 3 caches to non-cache, since they
introduce multi-step dependencies. This allows us to decouple:
- establishing a policy for multi-step dependencies in caches, and
- fixing issues that allow us to use these 3 as caches again.

1. Downgrade debug_args_for_decl to non-cache
2. Add struct tree_decl_map_hasher
3. Downgrade debug_expr_for_decl to non-cache
4. Downgrade value_expr_for_decl to non-cache
5. Don't mark live recursively in gt_cleare_cache

Bootstrapped and reg-tested on x86_64, with ENABLE_CHECKING.

I'll post the patches in response to this email.


This patch downgrades debug_expr_for_decl to non-cache.

OK for trunk?

Thanks,
- Tom

[PATCH 3/5] Downgrade debug_expr_for_decl to non-cache

Without this patch, but with patch "Don't mark live recursively in
gt_cleare_cache" when compiling libgcov-driver.c -m32 we run into:
...
0x133f37e void gt_cleare_cache(hash_table*)
	  /home/vries/gcc_versions/devel/src/gcc/hash-table.h:1114
0x133bfdb gt_clear_caches_gt_tree_h()
	  ./gt-tree.h:475
0x6c8d0f gt_clear_caches()
	 ./gtype-c.h:151
0xa8ef1e ggc_mark_roots()
	 /home/vries/gcc_versions/devel/src/gcc/ggc-common.c:103
0x7f7bfe ggc_collect()
	 /home/vries/gcc_versions/devel/src/gcc/ggc-page.c:2183
Please submit a full bug report,
with preprocessed source if appropriate.
Please include the complete backtrace with any bug report.
See  for instructions.
...

The offending cache entry is:

(gdb) call debug_generic_expr ( (*iter).base.from )
cur_ent$0
(gdb) call debug_generic_expr ( (*iter).to )
cur_ent[0]
...

2015-07-10  Tom de Vries  

	PR libgomp/66714
	* tree.c (debug_expr_for_decl): Use tree_decl_map_hasher
	instead of tree_decl_map_cache_hasher. Don't use cache GTY attribute.
	(init_ttree): Allocate debug_expr_for_decl using new type.
---
 gcc/tree.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/tree.c b/gcc/tree.c
index 5e27e48..6038fff 100644
--- a/gcc/tree.c
+++ b/gcc/tree.c
@@ -245,8 +245,13 @@ static GTY ((cache)) hash_table *cl_option_hash_table;
 /* General tree->tree mapping  structure for use in hash tables.  */
 
 
+/* TODO: Figure out whether we can declare debug_expr_for_decl as:
 static GTY ((cache))
  hash_table *debug_expr_for_decl;
+*/
+
+static GTY (())
+ hash_table *debug_expr_for_decl;
 
 static GTY ((cache))
  hash_table *value_expr_for_decl;
@@ -659,7 +664,7 @@ init_ttree (void)
 = hash_table::create_ggc (TYPE_HASH_INITIAL_SIZE);
 
   debug_expr_for_decl
-= hash_table::create_ggc (512);
+= hash_table::create_ggc (512);
 
   value_expr_for_decl
 = hash_table::create_ggc (512);
-- 
1.9.1



[PATCH 2/5] Add struct tree_decl_map_hasher

2015-07-12 Thread Tom de Vries

On 12/07/15 17:45, Tom de Vries wrote:

Hi,

this patch series implements the forbidding of multi-step garbage
collection liveness dependencies between caches.

The first four patches downgrade 3 caches to non-cache, since they
introduce multi-step dependencies. This allows us to decouple:
- establishing a policy for multi-step dependencies in caches, and
- fixing issues that allow us to use these 3 as caches again.

1. Downgrade debug_args_for_decl to non-cache
2. Add struct tree_decl_map_hasher
3. Downgrade debug_expr_for_decl to non-cache
4. Downgrade value_expr_for_decl to non-cache
5. Don't mark live recursively in gt_cleare_cache

Bootstrapped and reg-tested on x86_64, with ENABLE_CHECKING.

I'll post the patches in response to this email.



This patch introduces infrastructure for patches 3 and 4.

OK for trunk?

Thanks,
- Tom

[PATCH 2/5] Add struct tree_decl_map_hasher

2015-07-10  Tom de Vries  

	PR libgomp/66714
	* tree.h (struct tree_decl_map_hasher): new struct.
---
 gcc/tree.h | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/gcc/tree.h b/gcc/tree.h
index 250f99d..8d8fb7e 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -4642,6 +4642,16 @@ struct tree_decl_map_cache_hasher : ggc_cache_ptr_hash
   }
 };
 
+struct tree_decl_map_hasher : ggc_ptr_hash
+{
+  static hashval_t hash (tree_decl_map *m) { return tree_decl_map_hash (m); }
+  static bool
+  equal (tree_decl_map *a, tree_decl_map *b)
+  {
+return tree_decl_map_eq (a, b);
+  }
+};
+
 #define tree_int_map_eq tree_map_base_eq
 #define tree_int_map_hash tree_map_base_hash
 #define tree_int_map_marked_p tree_map_base_marked_p
-- 
1.9.1



[PATCH 1/5] Downgrade debug_args_for_decl to non-cache

2015-07-12 Thread Tom de Vries

On 12/07/15 17:45, Tom de Vries wrote:

Hi,

this patch series implements the forbidding of multi-step garbage
collection liveness dependencies between caches.

The first four patches downgrade 3 caches to non-cache, since they
introduce multi-step dependencies. This allows us to decouple:
- establishing a policy for multi-step dependencies in caches, and
- fixing issues that allow us to use these 3 as caches again.

1. Downgrade debug_args_for_decl to non-cache
2. Add struct tree_decl_map_hasher
3. Downgrade debug_expr_for_decl to non-cache
4. Downgrade value_expr_for_decl to non-cache
5. Don't mark live recursively in gt_cleare_cache

Bootstrapped and reg-tested on x86_64, with ENABLE_CHECKING.

I'll post the patches in response to this email.


This patch downgrades debug_args_for_decl to non-cache.

OK for trunk?

Thanks,
- Tom

[PATCH 1/5] Downgrade debug_args_for_decl to non-cache

Without this patch, but with patch "Don't mark live recursively in
gt_cleare_cache" when compiling libgcov-driver.c -m32 we run into:
...
0x133e0e8 void gt_cleare_cache(hash_table*)
/home/vries/gcc_versions/devel/src/gcc/hash-table.h:1114
0x133b51f gt_clear_caches_gt_tree_h()
./gt-tree.h:425
0x6c835f gt_clear_caches()
./gtype-c.h:151
0xa8e56e ggc_mark_roots()
/home/vries/gcc_versions/devel/src/gcc/ggc-common.c:103
0x7f724e ggc_collect()
/home/vries/gcc_versions/devel/src/gcc/ggc-page.c:2183
Please submit a full bug report,
with preprocessed source if appropriate.
Please include the complete backtrace with any bug report.
See  for instructions.
...

The offending cache entry is:
...
(gdb) call debug_generic_expr ( (*iter).base.from )
merge_summary.isra.1
(gdb) call debug_generic_expr ( (*iter).to.m_vecdata[0] )
all_prg
(gdb) call debug_generic_expr ( (*iter).to.m_vecdata[1] )
D#8
...

2015-07-10  Tom de Vries  

	PR libgomp/66714
	* tree.c (struct tree_vec_map_hasher): New struct.
	(debug_args_for_decl): Use tree_vec_map_hasher instead of
	tree_vec_map_cache_hasher.  Don't use cache GTY attribute.
	(decl_debug_args_insert): Allocate debug_args_for_decl using new type.
---
 gcc/tree.c | 19 ++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/gcc/tree.c b/gcc/tree.c
index 6628a38..5e27e48 100644
--- a/gcc/tree.c
+++ b/gcc/tree.c
@@ -268,8 +268,25 @@ struct tree_vec_map_cache_hasher : ggc_cache_ptr_hash
   }
 };
 
+struct tree_vec_map_hasher : ggc_ptr_hash
+{
+  static hashval_t hash (tree_vec_map *m) { return DECL_UID (m->base.from); }
+
+  static bool
+  equal (tree_vec_map *a, tree_vec_map *b)
+  {
+return a->base.from == b->base.from;
+  }
+};
+
+/* TODO: Figure out whether we can declare debug_args_for_decl as:
+
 static GTY ((cache))
  hash_table *debug_args_for_decl;
+*/
+
+static GTY (())
+ hash_table *debug_args_for_decl;
 
 static void set_type_quals (tree, int);
 static void print_type_hash_statistics (void);
@@ -6870,7 +6887,7 @@ decl_debug_args_insert (tree from)
   if (DECL_HAS_DEBUG_ARGS_P (from))
 return decl_debug_args_lookup (from);
   if (debug_args_for_decl == NULL)
-debug_args_for_decl = hash_table::create_ggc (64);
+debug_args_for_decl = hash_table::create_ggc (64);
   h = ggc_alloc ();
   h->base.from = from;
   h->to = NULL;
-- 
1.9.1



Don't allow multi-step dependencies in caches

2015-07-12 Thread Tom de Vries

Hi,

this patch series implements the forbidding of multi-step garbage 
collection liveness dependencies between caches.


The first four patches downgrade 3 caches to non-cache, since they 
introduce multi-step dependencies. This allows us to decouple:

- establishing a policy for multi-step dependencies in caches, and
- fixing issues that allow us to use these 3 as caches again.

1. Downgrade debug_args_for_decl to non-cache
2. Add struct tree_decl_map_hasher
3. Downgrade debug_expr_for_decl to non-cache
4. Downgrade value_expr_for_decl to non-cache
5. Don't mark live recursively in gt_cleare_cache

Bootstrapped and reg-tested on x86_64, with ENABLE_CHECKING.

I'll post the patches in response to this email.

Thanks,
- Thanks


Re: [RFC] two-phase marking in gt_cleare_cache

2015-07-12 Thread Tom de Vries

On 09/07/15 14:24, Michael Matz wrote:

Hi,

On Thu, 9 Jul 2015, Tom de Vries wrote:


Given this I think the call to gt_ggc_mx is superfluous because it
wouldn't work relyably for multi-step dependencies anyway.  Hence a
situation that works with that call in place, and breaking without
it is actually a bug waiting to be uncovered.


Attached patch tries to get multi-step dependencies right, without
using iteration-till-fixed-point.


And for the record, attached patch implements a naive iterative
approach.


What uses do multi-step dependencies have?  As in, I think this goes into
the wrong direction, we lived without this since years, so why should this
situation be handled at all?  It's about cache hash tables, so they
shouldn't contain anything that is only pointed to at by entries in those
tables.

If anything we rather should check, that calling gt_ggc_mx on anything
retained in the hash tables doesn't generate newly live objects.



Hi Michael,

I'm trying to get to a defined policy for what is allowed for caches. 
Either forbidding or allowing multi-step dependencies, I don't really mind.


Until now, we didn't have a good way of allowing them. I came up with a 
runtime efficient but not exhaustive variant, which I posted here: 
https://gcc.gnu.org/ml/gcc-patches/2015-07/msg00711.html.
As contrast and for the record, I posted the exhaustive but not runtime 
efficient variant here: 
https://gcc.gnu.org/ml/gcc-patches/2015-07/msg00730.html.


I managed to write a patch series that implements the forbidding of 
multi-step dependencies. I'll post this soon.


Thanks,
- Tom



Re: [PATCH] MIPS: Correctly update the isa and arch_test_option_p variables after the arch dependency handling code in mips.exp

2015-07-12 Thread Richard Sandiford
Matthew Fortune  writes:
> Andrew Bennett  writes:
>> I have noticed that in the mips.exp dg-option handling code the isa and
>> arch_test_option_p variables are not updated after the pre-arch to arch
>> dependency handling.  This means that if this code changes the
>> architecture the post-arch dependency handling code (which relies on
>> arch_test_option_p being true) is not run to handle any extra dependencies
>> the new architecture might need.
>
> I'm not sure this is the right place to fix this, though it does seem
> subjective as we are stretching the logic a little I think.
>
> In the pre-arch options (i.e. when an arch is not explicitly requested) we
> already have code that sets -mnan-2008 when downgrading a test R6 to R5 as
> the R6 headers will be nan2008 and there is no guarantee of nan legacy headers
> existing. This is the opposite case where we upgrade a test from R5 to R6
> and R6 has to use -mnan=2008 so needs to explicitly override any command line
> option to use -mnan=legacy. I think that therefore needs adding when we set
> the arch to R6 in the pre-arch options.
>
> At the same time I think we need to add -mabs=2008 in the same place as R6
> requires ABS2008 as well. You should see that as a failure if you test with
> -mabs=legacy.
>
> I think I wrote the exact same patch as you have when I did the original R6
> tests and concluded it was not in-keeping with the structure of mips.exp.
>
> I've added Richard too since he may be able to offer a guiding hand as
> original author of most of the mips.exp code.

Yeah, I agree that this doesn't really fit the model that well,
but like you say, we're stretching the logic a bit :-).  When I wrote it,
the architectures formed a nice tree in which moving to leaf nodes only
added features.  So in the pre-r6 days:

# Handle dependencies between the pre-arch options and the arch option.
# This should mirror the arch and post-arch code below.
if { !$arch_test_option_p } {

increased the architecture from the --target_board default to match
the features required by the test, whereas:

# Handle dependencies between the arch option and the post-arch options.
# This should mirror the arch and pre-arch code above.
if { $arch_test_option_p } {

turned off features from the --target_board default to match a lower
architecture required by the test.  So in the pre-r6 days, all the code
in the second block was turning something off when going to a lower
architecture.  The blocks were mutually-exclusive and writing it this
way reduced the number of redundant options.  (Admittedly you could argue
that it's daft to worry about that given the kind of command lines you
tend to get from the rest of mips.exp. :-))

r6 is the first time we've had to turn something off when moving up.
-mnan and -mabs are also the first options where old architectures
support only A, higher revisions support A and B, and the newest
revision supports only B.  I think I'd prefer to acknowledge that
and have:

# Handle dependencies between the arch option and the post-arch options.
# This should mirror the arch and pre-arch code above.  For pre-r6
# architectures this only needs to be done when we've moved down
# to a lower architecture and might need to turn features off,
# but moving up from pre-r6 to r6 can remove features too.
if { $arch_test_option_p || ($orig_isa_rev < 6 && $isa_rev >= 6) } {

I think the existing r6->r5 case really is different: there we're
forcing a -mnan option not because the architecture needs it but
because the environment might.

Thanks,
Richard


[PATCH, RFC] combine: Don't create insv insns unless HAVE_insv

2015-07-12 Thread Segher Boessenkool
Currently combine tries to make assignments to bitfields (of a register)
whenever it can.  If the target has no insv pattern, the result will not
ever match (if the MD is sane at all).  Doing insv on registers generates
worse code than what you get if you express things directly (with and/ior),
so many targets do not _want_ to have insv patterns.

This patch changes combine to not generate insv patterns if the target
does not have any.

Bootstrapped and regression checked on powerpc64-linux (with and without
insv patterns there).  Also built on many other targets, for many months.

I'm vaguely aware there have been changes to extzv etc. so there now are
extzv; I'll investigate if that means anything for insv as well.
It's also a new #ifdef HAVE_xxx.  But we're not clean there yet so I hope
to get away with that ;-)

Comments?  Complaints?


Segher

---
 gcc/combine.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/gcc/combine.c b/gcc/combine.c
index 9be230a..dc51d51 100644
--- a/gcc/combine.c
+++ b/gcc/combine.c
@@ -7488,6 +7488,13 @@ make_extraction (machine_mode mode, rtx inner, 
HOST_WIDE_INT pos,
 mode, new_rtx));
 }
 
+  /* If the target has no INSV patterns, do not try to generate such an
+ instruction.  */
+#ifndef HAVE_insv
+  if (in_dest)
+return 0;
+#endif
+
   /* Unless this is a COMPARE or we have a funny memory reference,
  don't do anything with zero-extending field extracts starting at
  the low-order bit since they are simple AND operations.  */
-- 
1.8.1.4



[gomp4, committed] Handle nested loops in kernels regions

2015-07-12 Thread Tom de Vries

Hi,

I.

This patch allows parallelization of an outer loop in an openacc kernels 
region.


The testcase is based on autopar/outer-1.c.


II.

We rely on pass_lim to move the *.omp_data_i loads out of the loop nest. 
For the test-case, pass_lim was managing to move the load out of the 
inner loop, but not the outer loop, because the load was classified as 
'MOVE_PRESERVE_EXECUTION'. By marking the *.omp_data_i load 
non-trapping, it's now classified as 'MOVE_POSSIBLE', and moved out of 
the loop nest.



III.

The 'loops_state_set (LOOPS_NEED_FIXUP)' is a somewhat blunt and 
temporary fix for the oacc kernels variant of PR66846 - parloops does 
not always mark loops for fixup if needed.


The original PR needs an added verify_loop_structure to trigger the 
problem. Normally the problem is hidden by the fact that the first pass 
that runs on the new function is pass_fixup_cfg, which happens to fixup 
the loops (The loops are fixed up because TODO_cleanup_cfg is set during 
pass_fixup_cfg, because the function contains an ECF_CONST function: 
__builtin_omp_get_num_threads).


For the oacc kernels variant, the problem triggers without adding 
verify_loop_structure. During pass_ipa_inline, we call 
loop_optimizer_init, which (given that LOOPS_NEED_FIXUP is not set) 
verifies the loop structure, which fails. Pass_fixup_cfg is not run 
inbetween the discovery of the new function and pass_ipa_inline.



IV.

I've committed this patch to gomp-4_0-branch.

Bootstrapped and reg-tested on x86_64. Build and reg-tested on setup 
with nvidia accelerator.


Thanks,
- Tom
Handle nested loops in kernels regions

2015-07-12  Tom de Vries  

	* omp-low.c (build_receiver_ref): Mark *.omp_data_i as non-trapping.
	* tree-parloops.c (gen_parallel_loop): Add LOOPS_NEED_FIXUP to loop
	state.
	(parallelize_loops): Allow nested loops.

	* testsuite/libgomp.oacc-c-c++-common/kernels-loop-nest.c: New test.

	* c-c++-common/goacc/kernels-loop-nest.c: New test.
---
 gcc/omp-low.c  |  1 +
 .../c-c++-common/goacc/kernels-loop-nest.c | 42 ++
 gcc/tree-parloops.c|  5 +--
 .../libgomp.oacc-c-c++-common/kernels-loop-nest.c  | 26 ++
 4 files changed, 70 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/c-c++-common/goacc/kernels-loop-nest.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-nest.c

diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 11ac909..a938ce0 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -1147,6 +1147,7 @@ build_receiver_ref (tree var, bool by_ref, omp_context *ctx)
 field = x;
 
   x = build_simple_mem_ref (ctx->receiver_decl);
+  TREE_THIS_NOTRAP (x) = 1;
   x = omp_build_component_ref (x, field);
   if (by_ref)
 x = build_simple_mem_ref (x);
diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-loop-nest.c b/gcc/testsuite/c-c++-common/goacc/kernels-loop-nest.c
new file mode 100644
index 000..3e06c9f
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/kernels-loop-nest.c
@@ -0,0 +1,42 @@
+/* { dg-additional-options "-O2" } */
+/* { dg-additional-options "-ftree-parallelize-loops=32" } */
+/* { dg-additional-options "-fdump-tree-parloops_oacc_kernels-all" } */
+/* { dg-additional-options "-fdump-tree-optimized" } */
+
+/* Based on autopar/outer-1.c.  */
+
+#include 
+
+#define N 1000
+
+int
+main (void)
+{
+  int x[N][N];
+
+#pragma acc kernels copyout (x)
+  {
+for (int ii = 0; ii < N; ii++)
+  for (int jj = 0; jj < N; jj++)
+	x[ii][jj] = ii + jj + 3;
+  }
+
+  for (int i = 0; i < N; i++)
+for (int j = 0; j < N; j++)
+  if (x[i][j] != i + j + 3)
+	abort ();
+
+  return 0;
+}
+
+/* Check that only one loop is analyzed, and that it can be parallelized.  */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops_oacc_kernels" } } */
+/* { dg-final { scan-tree-dump-not "FAILED:" "parloops_oacc_kernels" } } */
+
+/* Check that the loop has been split off into a function.  */
+/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
+
+/* { dg-final { scan-tree-dump-times "(?n)pragma omp target oacc_parallel.*num_gangs\\(32\\)" 1 "parloops_oacc_kernels" } } */
+
+/* { dg-final { cleanup-tree-dump "parloops_oacc_kernels" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c
index 04708c0..492ffcb 100644
--- a/gcc/tree-parloops.c
+++ b/gcc/tree-parloops.c
@@ -2442,6 +2442,7 @@ gen_parallel_loop (struct loop *loop,
   /* Cancel the loop (it is simpler to do it here rather than to teach the
  expander to do it).  */
   cancel_loop_tree (loop);
+  loops_state_set (LOOPS_NEED_FIXUP);
 
   /* Free loop bound estimations that could contain references to
  removed statements.  */
@@ -2761,10 +2762,6 @@ parallelize_loops (bool oacc_kernels_p)
 	  if (!loop->in_oacc_kernels_region)
 	continue;
 
-	  /* TODO: Allow nested loop

Re: [PR66726] Factor conversion out of COND_EXPR

2015-07-12 Thread Kugan


On 11/07/15 06:40, Jeff Law wrote:
> On 07/09/2015 05:08 PM, Kugan wrote:
> 
>> Done. Bootstrapped and regression tested on x86-64-none-linux-gnu with
>> no new regressions. Is this OK for trunk?
> Thanks for the additional testcases.
> 
> 
> 
>> +  else
>> +{
>> +  /* If arg1 is an INTEGER_CST, fold it to new type.  */
>> +  if (INTEGRAL_TYPE_P (TREE_TYPE (new_arg0))
>> +  && int_fits_type_p (arg1, TREE_TYPE (new_arg0)))
>> +{
>> +  if (gimple_assign_cast_p (arg0_def_stmt))
>> +new_arg1 = fold_convert (TREE_TYPE (new_arg0), arg1);
>> +  else
>> +return false;
>> +}
>> +  else
>> +return false;
>> +}
> Something looks goofy here formatting-wise.  Can you please check for
> horizontal whitespace consistency before committing.
> 
> 
> 
>> +
>> +  /* If types of new_arg0 and new_arg1 are different bailout.  */
>> +  if (TREE_TYPE (new_arg0) != TREE_TYPE (new_arg1))
>> +return false;
> Seems like this should use types_compatible_p here.  You're testing
> pointer equality, but as long as the types are compatible, we should be
> able to make the transformation.
> 
> With the horizontal whitespace fixed and using types_compatible_p this
> is OK for the trunk.  So pre-approved with those two changes and a final
> bootstrap/regression test (due to the types_compatible_p change).
> 
> jeff
> 

Thanks. Committed as r225722 with the changes. Also did a fresh
bootstrap and regression testing on x86_64-none-linux-gnu before committing.

Thanks,
Kugan


Merge trunk r225562 (2015-07-08) into gomp-4_0-branch (was: gomp4 merge)

2015-07-12 Thread Thomas Schwinge
Hi!

On Fri, 10 Jul 2015 18:50:20 -0400, Nathan Sidwell  
wrote:
> it looks like the most recent merge from trunk to gomp4 was early May.  I 
> think 
> it is time for another one -- can you handle that?

Indeed :-) -- and, as it happens, resolving the "merge artifacts" is one
of the things I've been working on last week.  I hope I got that all
right, in particular gcc/tree-parloops.c (Tom), gcc/tree-ssa-loop-ch.c
(Tom), gcc/config/nvptx/nvptx.c (Nathan), and thereabouts.  You may want
to diff the current gomp-4_0-branch files against trunk r225562 (merge
base) as well as again gomp-4_0-branch r225715 (before the merge) to
verify.  Anyway, in the quiet of the weekend now committed to
gomp-4_0-branch in r225719:

r225719 | tschwinge | 2015-07-12 11:30:39 +0200 (Sun, 12 Jul 2015) | 2 lines

svn merge -r 222860:225562 svn+ssh://gcc.gnu.org/svn/gcc/trunk


Grüße,
 Thomas


pgpsopGGOwUtW.pgp
Description: PGP signature