Re: [PATCH 11/18] add some utility methods to vec

2016-04-20 Thread Trevor Saunders
On Wed, Apr 20, 2016 at 11:13:42AM +0200, Richard Biener wrote:
> On Wed, Apr 20, 2016 at 8:22 AM,   wrote:
> > From: Trevor Saunders 
> >
> > Later patches use these functions, and I believe Mikhail has mentioned 
> > before
> > he'd like to have begin / end () on vec before.
> 
> begin() / end () is fine.  But contains ()?  That makes using a O(n) algorithm
> too easy I think (we have qsort + bsearch for a more efficient way).

 Well, contains is better if you will search less than log(n) times.

> I suppose you are replacing linear list walks with contains () so it
> might be ok...

yeah, and I'm not really sure how much open coding it will actually make
people think more than they would otherwise.

> At least stick some comment on contains () mentioning qsort / bsearch.

sure

Trev

> 
> Ok with that change.
> 
> Richard.
> 
> > gcc/ChangeLog:
> >
> > 2016-04-19  Trevor Saunders  
> >
> > * vec.h (vec_safe_contains): New function.
> > (vec::contains): Likewise.
> > (vec::begin): Likewise.
> > (vec::end): Likewise.
> > ---
> >  gcc/vec.h | 39 ++-
> >  1 file changed, 38 insertions(+), 1 deletion(-)
> >
> > diff --git a/gcc/vec.h b/gcc/vec.h
> > index ff57528..3c16e83 100644
> > --- a/gcc/vec.h
> > +++ b/gcc/vec.h
> > @@ -454,6 +454,10 @@ public:
> >bool is_empty (void) const { return m_vecpfx.m_num == 0; }
> >T *address (void) { return m_vecdata; }
> >const T *address (void) const { return m_vecdata; }
> > +  T *begin () { return address (); }
> > +  const T *begin () const { return address (); }
> > +  T *end () { return address () + length (); }
> > +  const T *end () const { return address () + length (); }
> >const T [] (unsigned) const;
> >T [] (unsigned);
> >T  (void);
> > @@ -473,6 +477,7 @@ public:
> >void qsort (int (*) (const void *, const void *));
> >T *bsearch (const void *key, int (*compar)(const void *, const void *));
> >unsigned lower_bound (T, bool (*)(const T &, const T &)) const;
> > +  bool contains (const T ) const;
> >static size_t embedded_size (unsigned);
> >void embedded_init (unsigned, unsigned = 0, unsigned = 0);
> >void quick_grow (unsigned len);
> > @@ -542,7 +547,6 @@ vec_safe_is_empty (vec *v)
> >return v ? v->is_empty () : true;
> >  }
> >
> > -
> >  /* If V does not have space for NELEMS elements, call
> > V->reserve(NELEMS, EXACT).  */
> >  template
> > @@ -695,6 +699,12 @@ vec_safe_splice (vec *, const 
> > vec *src
> >  }
> >  }
> >
> > +template
> > +inline bool
> > +vec_safe_contains (vec *v, const T )
> > +{
> > +  return v? v->contains (search) : false;
> > +}
> >
> >  /* Index into vector.  Return the IX'th element.  IX must be in the
> > domain of the vector.  */
> > @@ -973,6 +983,19 @@ vec::bsearch (const void *key,
> >return NULL;
> >  }
> >
> > +/* Return true if the vector contains search.  */
> > +
> > +template
> > +inline bool
> > +vec::contains (const T ) const
> > +{
> > +  unsigned int len = length ();
> > +  for (unsigned int i = 0; i < len; i++)
> > +if ((*this)[i] == search)
> > +  return true;
> > +
> > +  return false;
> > +}
> >
> >  /* Find and return the first position in which OBJ could be inserted
> > without changing the ordering of this vector.  LESSTHAN is a
> > @@ -1167,6 +1190,10 @@ public:
> >const T *address (void) const
> >{ return m_vec ? m_vec->m_vecdata : NULL; }
> >
> > +  T *begin () { return address (); }
> > +  const T *begin () const { return address (); }
> > +  T *end () { return begin () + length (); }
> > +  const T *end () const { return begin () + length (); }
> >const T [] (unsigned ix) const
> >{ return (*m_vec)[ix]; }
> >
> > @@ -1208,6 +1235,7 @@ public:
> >void qsort (int (*) (const void *, const void *));
> >T *bsearch (const void *key, int (*compar)(const void *, const void *));
> >unsigned lower_bound (T, bool (*)(const T &, const T &)) const;
> > +  bool contains (const T ) const;
> >
> >bool using_auto_storage () const;
> >
> > @@ -1695,6 +1723,15 @@ vec::lower_bound (T obj,
> >return m_vec ? m_vec->lower_bound (obj, lessthan) : 0;
> >  }
> >
> > +/* Return true if the vector contains search.  */
> > +
> > +template
> > +inline bool
> > +vec::contains (const T ) const
> > +{
> > +  return m_vec ? m_vec->contains (search) : false;
> > +}
> > +
> >  template
> >  inline bool
> >  vec::using_auto_storage () const
> > --
> > 2.7.4
> >


[PATCH] opts-global.c: Include gimple.h for LAST_AND_UNUSED_GIMPLE_CODE.

2016-04-20 Thread Khem Raj
gcc/:
2016-04-16  Khem Raj  

* opts-global.c: Include gimple.h for LAST_AND_UNUSED_GIMPLE_CODE.

Fixes build errors e.g.

| ../../../../../../../work-shared/gcc-6.0.0-r0/git/gcc/lto-streamer.h:159:34: 
error: 'LAST_AND_UNUSED_GIMPLE_CODE' was not declared in this scope
|LTO_bb0 = 1 + MAX_TREE_CODES + LAST_AND_UNUSED_GIMPLE_CODE,
---
 gcc/opts-global.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/opts-global.c b/gcc/opts-global.c
index 989ef3d..92fb9ac 100644
--- a/gcc/opts-global.c
+++ b/gcc/opts-global.c
@@ -36,6 +36,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "plugin-api.h"
 #include "ipa-ref.h"
 #include "cgraph.h"
+#include "gimple.h"
 #include "lto-streamer.h"
 #include "output.h"
 #include "plugin.h"
-- 
2.8.0



Re: [PATCH] vrp: remove rendundant has_single_use tests

2016-04-20 Thread Patrick Palka
On Wed, Apr 20, 2016 at 6:45 PM, Patrick Palka  wrote:
> During assert-location discovery, if an SSA name is live according to
> live_on_edge() on some outgoing edge E, then the SSA name definitely has
> at least two uses: the use on the outgoing edge, and the use in some BB
> dominating E->src from which the SSA_NAME and the potential assertion
> was discovered.  These two uses can't be the same because the liveness
> array is populated on-the-fly in reverse postorder so the latter use
> which dominates BB couldn't have yet contributed to the liveness bitmap.
>
> So AFAICT it's not necessary to check live_on_edge() as well as
> !has_single_use() since the former check will imply the latter.  So this
> patch removes these redundant calls to has_single_use() (and alse
> replaces the use of has_single_use() in find_assert_locations_1 with a
> liveness bitmap test which should be cheaper and more accurate).
>
> I bootstrapped and regtested this change on x86_64-pc-linux-gnu.  I also
> confirmed that the number of calls made to register_new_assert_for
> before and after the patch remains the same during compilation of
> libstdc++ and during compilation of gimple-match.c and when running the
> tree-ssa.exp testsuite.  Does this look OK to commit?
>
> gcc/ChangeLog:
>
> * tree-vrp.c (register_edge_assert_for_2): Remove redundant
> has_single_use() tests.
> (register_edge_assert_for_1): Likewise.
> (find_assert_locations_1): Check the liveness bitmap instead of
> calling has_single_use().

By the way, would it be reasonable to cache/precompute the number of
non-debug uses each ssa name has so that has_single_use, has_zero_uses
etc are much cheaper?


Re: [PATCH][cilkplus] fix c++ implicit conversions with cilk_spawn (PR/69024, PR/68997)

2016-04-20 Thread Ryan Burn
Can we push this now that the gcc 7 branch is open?

On Wed, Feb 10, 2016 at 1:00 PM, Jeff Law  wrote:
> On 01/20/2016 10:57 AM, Ryan Burn wrote:
>>
>> This patch follows on from
>> https://gcc.gnu.org/ml/gcc-patches/2015-12/msg02142.html
>>
>> As discussed, it creates a separate function
>> cilk_cp_detect_spawn_and_unwrap in gcc/cp to handle processing
>> cilk_spawn expressions for c++ and adds support for implicit
>> constructor and type conversions.
>>
>> Bootstrapped and regression tested on x86_64-linux.
>
> FYI, Just saw your assignment fly by.  I'll try to get a close look at this
> patch shortly.
>
> jeff
>


Re: [PATCH 00/18] towards removing rtx_insn_list and rtx_expr_list

2016-04-20 Thread Trevor Saunders
On Wed, Apr 20, 2016 at 06:03:01AM -0700, Andi Kleen wrote:
> tbsaunde+...@tbsaunde.org writes:
> 
> > I have some more patches that almost completely eliminate these, but I 
> > haven't
> > tested the rest yet, and this is already a long series so it would be nice 
> > to
> > get some of it out of my tree and reviewed.
> >
> > patches individually bootstrapped and regtested on x86_64-linux-gnu, ok? I
> > expect none of this will more than textually conflict with something that 
> > may
> > need backported to gcc-6, but its waited a month already I guess it can wait
> > longer if people prefer.
> 
> A vector can have very different performance than a list, depending how
> it is used. Do your patches cause any measure performance difference for
> the compiler?

I haven't measured, but I am aware of that and did consider it when
writing these patches.  I expect they'll help perf some since I went
through some hoops to not move elements around the vector unnecessarily.
I'm not really sure what work load is most effected by each of these
patches, and they don't really seem that risky to me so I'd rather notdo
tons of testing on the off chance they slow something down, in the worst
case we can always revert something to a list without using rtx.

Trev

> 
> -Andi


[PATCH] vrp: remove rendundant has_single_use tests

2016-04-20 Thread Patrick Palka
During assert-location discovery, if an SSA name is live according to
live_on_edge() on some outgoing edge E, then the SSA name definitely has
at least two uses: the use on the outgoing edge, and the use in some BB
dominating E->src from which the SSA_NAME and the potential assertion
was discovered.  These two uses can't be the same because the liveness
array is populated on-the-fly in reverse postorder so the latter use
which dominates BB couldn't have yet contributed to the liveness bitmap.

So AFAICT it's not necessary to check live_on_edge() as well as
!has_single_use() since the former check will imply the latter.  So this
patch removes these redundant calls to has_single_use() (and alse
replaces the use of has_single_use() in find_assert_locations_1 with a
liveness bitmap test which should be cheaper and more accurate).

I bootstrapped and regtested this change on x86_64-pc-linux-gnu.  I also
confirmed that the number of calls made to register_new_assert_for
before and after the patch remains the same during compilation of
libstdc++ and during compilation of gimple-match.c and when running the
tree-ssa.exp testsuite.  Does this look OK to commit?

gcc/ChangeLog:

* tree-vrp.c (register_edge_assert_for_2): Remove redundant
has_single_use() tests.
(register_edge_assert_for_1): Likewise.
(find_assert_locations_1): Check the liveness bitmap instead of
calling has_single_use().
---
 gcc/tree-vrp.c | 29 ++---
 1 file changed, 10 insertions(+), 19 deletions(-)

diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c
index bbdf9ce..3cb470b 100644
--- a/gcc/tree-vrp.c
+++ b/gcc/tree-vrp.c
@@ -5145,8 +5145,7 @@ register_edge_assert_for_2 (tree name, edge e, 
gimple_stmt_iterator bsi,
 
   /* Only register an ASSERT_EXPR if NAME was found in the sub-graph
  reachable from E.  */
-  if (live_on_edge (e, name)
-  && !has_single_use (name))
+  if (live_on_edge (e, name))
 register_new_assert_for (name, name, comp_code, val, NULL, e, bsi);
 
   /* In the case of NAME <= CST and NAME being defined as
@@ -5188,8 +5187,7 @@ register_edge_assert_for_2 (tree name, edge e, 
gimple_stmt_iterator bsi,
  && (cst2 == NULL_TREE
  || TREE_CODE (cst2) == INTEGER_CST)
  && INTEGRAL_TYPE_P (TREE_TYPE (name3))
- && live_on_edge (e, name3)
- && !has_single_use (name3))
+ && live_on_edge (e, name3))
{
  tree tmp;
 
@@ -5215,8 +5213,7 @@ register_edge_assert_for_2 (tree name, edge e, 
gimple_stmt_iterator bsi,
  && TREE_CODE (name2) == SSA_NAME
  && TREE_CODE (cst2) == INTEGER_CST
  && INTEGRAL_TYPE_P (TREE_TYPE (name2))
- && live_on_edge (e, name2)
- && !has_single_use (name2))
+ && live_on_edge (e, name2))
{
  tree tmp;
 
@@ -5319,8 +5316,7 @@ register_edge_assert_for_2 (tree name, edge e, 
gimple_stmt_iterator bsi,
  tree op1 = gimple_assign_rhs2 (def_stmt);
  if (TREE_CODE (op0) == SSA_NAME
  && TREE_CODE (op1) == INTEGER_CST
- && live_on_edge (e, op0)
- && !has_single_use (op0))
+ && live_on_edge (e, op0))
{
  enum tree_code reverse_op = (rhs_code == PLUS_EXPR
   ? MINUS_EXPR : PLUS_EXPR);
@@ -5346,8 +5342,7 @@ register_edge_assert_for_2 (tree name, edge e, 
gimple_stmt_iterator bsi,
  && (comp_code == LE_EXPR || comp_code == GT_EXPR
  || !tree_int_cst_equal (val,
  TYPE_MIN_VALUE (TREE_TYPE (val
- && live_on_edge (e, name2)
- && !has_single_use (name2))
+ && live_on_edge (e, name2))
{
  tree tmp, cst;
  enum tree_code new_comp_code = comp_code;
@@ -5392,8 +5387,7 @@ register_edge_assert_for_2 (tree name, edge e, 
gimple_stmt_iterator bsi,
  && INTEGRAL_TYPE_P (TREE_TYPE (name2))
  && IN_RANGE (tree_to_uhwi (cst2), 1, prec - 1)
  && prec == GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (val)))
- && live_on_edge (e, name2)
- && !has_single_use (name2))
+ && live_on_edge (e, name2))
{
  mask = wi::mask (tree_to_uhwi (cst2), false, prec);
  val2 = fold_binary (LSHIFT_EXPR, TREE_TYPE (val), val, cst2);
@@ -5498,12 +5492,10 @@ register_edge_assert_for_2 (tree name, edge e, 
gimple_stmt_iterator bsi,
  || !INTEGRAL_TYPE_P (TREE_TYPE (names[1]))
  || (TYPE_PRECISION (TREE_TYPE (name2))
  != TYPE_PRECISION (TREE_TYPE (names[1])))
- || !live_on_edge (e, names[1])
- || has_single_use (names[1]))
+ || !live_on_edge (e, names[1]))
names[1] = NULL_TREE;
}
- if (live_on_edge (e, name2)
- && 

Re: [PATCH, rs6000] Expand vec_ld and vec_st during parsing to improve performance

2016-04-20 Thread Mike Stump

> On Apr 20, 2016, at 6:55 AM, Bill Schmidt  wrote:
> Looking into this a bit more reminded me why things are the way they
> are.  The AltiVec interfaces were designed way back to be overloaded
> functions, which isn't valid C99.  Thus they can't be declared in
> headers without some magic.

And for fun, I have a nice generic overload resolution for builtin functions 
subsystem for a nice generic builtins subsystem.  Kinda would like to donate 
it, as with it, builtins are quite a bit nicer to deal with.  It is yet another 
solution to the problem.

We have a 5k line python program that sings and dances and processes builtins 
and wires them into the compiler.  Let me know if you want to invest some time, 
otherwise, we we’d see about contributing it at some point, maybe later this 
year.


[PATCHv2, rs6000] Expand vec_ld and vec_st during parsing to improve performance

2016-04-20 Thread Bill Schmidt
Hi,

Thanks to Richard for help with the POINTER_PLUS_EXPR and BIT_AND_EXPR
changes.  This version of the patch is identical to the previous one,
except that the logic for expanding these expressions is now greatly
simplified.

Eventually we will want to remove the overload handling from rs6000-c.c
and move it into header files, but that is going to take some nontrivial
redesign effort, so I'd like to go forward with this implementation as
an intermediate step on that road.  It will be preferable to see if we
can implement __attribute__ ((__overloadable__)) similarly to what is
done in Clang; it looks like it is possible to use __builtin_choose_expr
for this, but when there are a dozen overloads of the same name, this
leads to very ugly and difficult to read code by comparison.

Bootstrapped and tested on both powerpc64le-unknown-linux-gnu and
powerpc64-unknown-linux-gnu with no regressions.  Is this ok for trunk
after GCC 6 releases?

Thanks,
Bill

On Tue, 2016-04-19 at 10:09 +0200, Richard Biener wrote:
> On Tue, Apr 19, 2016 at 12:05 AM, Bill Schmidt
>  wrote:
> > Hi,
> >
> > Expanding built-ins in the usual way (leaving them as calls until
> > expanding into RTL) restricts the amount of optimization that can be
> > performed on the code represented by the built-ins.  This has been
> > observed to be particularly bad for the vec_ld and vec_st built-ins on
> > PowerPC, which represent the lvx and stvx instructions.  Currently these
> > are expanded into UNSPECs that are left untouched by the optimizers, so
> > no redundant load or store elimination can take place.  For certain
> > idiomatic usages, this leads to very bad performance.
> >
> > Initially I planned to just change the UNSPEC representation to RTL that
> > directly expresses the address masking implicit in lvx and stvx.  This
> > turns out to be only partially successful in improving performance.
> > Among other things, by the time we reach RTL we have lost track of the
> > __restrict__ attribute, leading to more appearances of may-alias
> > relationships than should really be present.  Instead, this patch
> > expands the built-ins during parsing so that they are exposed to all
> > GIMPLE optimizations as well.
> >
> > This works well for vec_ld and vec_st.  It is also possible for
> > programmers to instead use __builtin_altivec_lvx_ and
> > __builtin_altivec_stvx_.  These are not so easy to catch during
> > parsing, since they are not processed by the overloaded built-in
> > function table.  For these, I am currently falling back to expansion
> > during RTL while still exposing the address-masking semantics, which
> > seems ok for these somewhat obscure built-ins.  At some future time we
> > may decide to handle them similarly to vec_ld and vec_st.
> >
> > For POWER8 little-endian only, the loads and stores during expand time
> > require some special handling, since the POWER8 expanders want to
> > convert these to lxvd2x/xxswapd and xxswapd/stxvd2x.  To deal with this,
> > I've added an extra pre-pass to the swap optimization phase that
> > recognizes the lvx and stvx patterns and canonicalizes them so they'll
> > be properly recognized.  This isn't an issue for earlier or later
> > processors, or for big-endian POWER8, so doing this as part of swap
> > optimization is appropriate.
> >
> > We have a lot of existing test cases for this code, which proved very
> > useful in discovering bugs, so I haven't seen a reason to add any new
> > tests.
> >
> > The patch is fairly large, but it isn't feasible to break it up into
> > smaller units without leaving something in a broken state.  So I will
> > have to just apologize for the size and leave it at that.  Sorry! :)
> >
> > Bootstrapped and tested successfully on powerpc64le-unknown-linux-gnu,
> > and on powerpc64-unknown-linux-gnu (-m32 and -m64) with no regressions.
> > Is this ok for trunk after GCC 6 releases?
> 
> Just took a very quick look but it seems you are using integer arithmetic
> for the pointer adjustment and bit-and.  You could use POINTER_PLUS_EXPR
> for the addition and BIT_AND_EXPR is also valid on pointer types.  Which
> means you don't need conversions to/from sizetype.
> 
> x86 nowadays has intrinsics implemented as inlines - they come from
> header files.  It seems for ppc the intrinsics are somehow magically
> there, w/o a header file?
> 
> Richard.
> 
> > Thanks,
> > Bill

2016-04-20  Bill Schmidt  

* config/rs6000/altivec.md (altivec_lvx_): Remove.
(altivec_lvx__internal): Document.
(altivec_lvx__2op): New define_insn.
(altivec_lvx__1op): Likewise.
(altivec_lvx__2op_si): Likewise.
(altivec_lvx__1op_si): Likewise.
(altivec_stvx_): Remove.
(altivec_stvx__internal): Document.
(altivec_stvx__2op): New define_insn.
(altivec_stvx__1op): Likewise.
(altivec_stvx__2op_si): Likewise.
(altivec_stvx__1op_si): Likewise.
* 

Re: [PATCH 10/18] merge adjust_cost and adjust_cost_2 target hooks

2016-04-20 Thread Trevor Saunders
On Wed, Apr 20, 2016 at 07:17:03AM -0500, Segher Boessenkool wrote:
> On Wed, Apr 20, 2016 at 02:22:14AM -0400, tbsaunde+...@tbsaunde.org wrote:
> > * config/microblaze/microblaze.c (microblaze_adjust_cost):
> > * Likewise.
> 
> Stray * (here and elsewhere).
> 
> > --- a/gcc/config/alpha/alpha.c
> > +++ b/gcc/config/alpha/alpha.c
> > @@ -4758,14 +4758,15 @@ alpha_split_atomic_exchange_12 (rtx operands[])
> > a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
> >  
> >  static int
> > -alpha_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
> > +alpha_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int 
> > cost,
> 
> Why an int and not enum reg_note?

I think I just copied what adjust_cost_2 did, but using the enum makes
sense.

> 
> > +  unsigned int)
> >  {
> >enum attr_type dep_insn_type;
> >  
> >/* If the dependence is an anti-dependence, there is no cost.  For an
> >   output dependence, there is sometimes a cost, but it doesn't seem
> >   worth handling those few cases.  */
> > -  if (REG_NOTE_KIND (link) != 0)
> > +  if (dep_type != 0)
> >  return cost;
> 
> From reg-notes.def:
> 
> /* REG_DEP_TRUE is used in scheduler dependencies lists to represent a
>read-after-write dependency (i.e. a true data dependency).  This is
>here, not grouped with REG_DEP_ANTI and REG_DEP_OUTPUT, because some
>passes use a literal 0 for it.  */
> REG_NOTE (DEP_TRUE)
> 
> Get rid of the literal 0 while you're at it?  Some places already have
> REG_DEP_TRUE.

not entirely related to what the patch is doing, but not a bad idea.

> > @@ -4486,7 +4487,7 @@ c6x_adjust_cost (rtx_insn *insn, rtx link, rtx_insn 
> > *dep_insn, int cost)
> >if (insn_code_number >= 0)
> >  insn_type = get_attr_type (insn);
> >  
> > -  kind = REG_NOTE_KIND (link);
> > +  kind = (reg_note) dep_type;
> 
> Maybe it's just me, but it would look a lot less confusing with "enum".

well, if you change the arg to be an enum then it goes away I think.

> >  static int
> > -mips_adjust_cost (rtx_insn *insn ATTRIBUTE_UNUSED, rtx link,
> > - rtx_insn *dep ATTRIBUTE_UNUSED, int cost)
> > +mips_adjust_cost (rtx_insn *, int dep_type, rtx_insn *, int cost, unsigned 
> > int)
> >  {
> > -  if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT
> > -  && TUNE_20KC)
> > -return cost;
> > -  if (REG_NOTE_KIND (link) != 0)
> > +  if (dep_type != 0 && (dep_type != REG_DEP_OUTPUT || !TUNE_20KC))
> >  return 0;
> >return cost;
> >  }
> 
> The original logic was a lot more readable (test positives, not negatives).

 I'm not sure what I think there.

> > +as a data-dependence.  If the scheduler using the automaton based pipeline
> >  description, the cost of anti-dependence is zero and the cost of
> >  output-dependence is maximum of one and the difference of latency
> >  times of the first and the second insns.  If these values are not
> 
> "is using" (pre-existing, but hey).

makes sense I guess.

> So I wonder how much is gained by adding an extra unused argument to so
> many places.

Well that's just a side effect of merging the two hooks, so I'm not sure
what else you'd do other than not pass the arg to ia64, I'm not sure how
important it is there.  On the other hand the unused arg probably isn't
important relative to the indirect call to the hook.

Trev

> 
> 
> Segher


[PATCH] Allow all 1s of integer as standard SSE constants

2016-04-20 Thread H.J. Lu
Since all 1s in TImode is standard SSE2 constants, all 1s in OImode is
standard AVX2 constants and all 1s in XImode is standard AVX512F constants,
pass mode to standard_sse_constant_p and standard_sse_constant_opcode
to check if all 1s is available for target.

Tested on Linux/x86-64.  OK for master?

BTW, it will be used to fix

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70155


H.J.
---
* config/i386/i386-protos.h (standard_sse_constant_p): Take
machine_mode with VOIDmode as default.
* config/i386/i386.c (standard_sse_constant_p): Get mode if
it is VOIDmode.  Return 2 for all 1s of integer in supported
modes.
(ix86_expand_vector_move): Pass mode to standard_sse_constant_p.
* config/i386/i386.md (*movxi_internal_avx512f): Replace
vector_move_operand with nonimmediate_or_sse_const_operand and
use BC instead of C in constraint.  Check register_operand
instead of MEM_P.  Pass mode to standard_sse_constant_opcode.
(*movoi_internal_avx): Disabled for TARGET_AVX2.  Check
register_operand instead of MEM_P.
(*movoi_internal_avx2): New pattern.
(*movti_internal_sse): Likewise.
(*movti_internal): Renamed to ...
(*movti_internal_sse2): This.  Require SSE2.  Use BC instead of
C in constraint. Check register_operand instead of MEM_P in
32-bit mode.
---
 gcc/config/i386/i386-protos.h |   2 +-
 gcc/config/i386/i386.c|  27 ---
 gcc/config/i386/i386.md   | 104 --
 3 files changed, 121 insertions(+), 12 deletions(-)

diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index ff47bc1..cf54189 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -50,7 +50,7 @@ extern bool ix86_using_red_zone (void);
 extern int standard_80387_constant_p (rtx);
 extern const char *standard_80387_constant_opcode (rtx);
 extern rtx standard_80387_constant_rtx (int);
-extern int standard_sse_constant_p (rtx);
+extern int standard_sse_constant_p (rtx, machine_mode = VOIDmode);
 extern const char *standard_sse_constant_opcode (rtx_insn *, rtx);
 extern bool symbolic_reference_mentioned_p (rtx);
 extern bool extended_reg_mentioned_p (rtx);
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 6379313..dd951c2 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -10766,18 +10766,31 @@ standard_80387_constant_rtx (int idx)
in supported SSE/AVX vector mode.  */
 
 int
-standard_sse_constant_p (rtx x)
+standard_sse_constant_p (rtx x, machine_mode mode)
 {
-  machine_mode mode;
-
   if (!TARGET_SSE)
 return 0;
 
-  mode = GET_MODE (x);
-  
+  if (mode == VOIDmode)
+mode = GET_MODE (x);
+
   if (x == const0_rtx || x == CONST0_RTX (mode))
 return 1;
-  if (vector_all_ones_operand (x, mode))
+  if (CONST_INT_P (x))
+{
+  /* If mode != VOIDmode, standard_sse_constant_p must be called:
+1. On TImode with SSE2.
+2. On OImode with AVX2.
+3. On XImode with AVX512F.
+   */
+  if ((HOST_WIDE_INT) INTVAL (x) == HOST_WIDE_INT_M1
+ && (mode == VOIDmode
+ || (mode == TImode && TARGET_SSE2)
+ || (mode == OImode && TARGET_AVX2)
+ || (mode == XImode && TARGET_AVX512F)))
+   return 2;
+}
+  else if (vector_all_ones_operand (x, mode))
 switch (mode)
   {
   case V16QImode:
@@ -18758,7 +18771,7 @@ ix86_expand_vector_move (machine_mode mode, rtx 
operands[])
   && (CONSTANT_P (op1)
  || (SUBREG_P (op1)
  && CONSTANT_P (SUBREG_REG (op1
-  && !standard_sse_constant_p (op1))
+  && !standard_sse_constant_p (op1, mode))
 op1 = validize_mem (force_const_mem (mode, op1));
 
   /* We need to check memory alignment for SSE mode since attribute
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index babd0a4..75227aa 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -1971,8 +1971,10 @@
 
 (define_insn "*movxi_internal_avx512f"
   [(set (match_operand:XI 0 "nonimmediate_operand" "=v,v ,m")
-   (match_operand:XI 1 "vector_move_operand"  "C ,vm,v"))]
-  "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+   (match_operand:XI 1 "nonimmediate_or_sse_const_operand" "BC,vm,v"))]
+  "TARGET_AVX512F
+   && (register_operand (operands[0], XImode)
+   || register_operand (operands[1], XImode))"
 {
   switch (which_alternative)
 {
@@ -1996,7 +1998,10 @@
 (define_insn "*movoi_internal_avx"
   [(set (match_operand:OI 0 "nonimmediate_operand" "=v,v ,m")
(match_operand:OI 1 "vector_move_operand"  "C ,vm,v"))]
-  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "TARGET_AVX
+   && !TARGET_AVX2
+   && (register_operand (operands[0], OImode)
+   || register_operand (operands[1], OImode))"
 {
   switch (get_attr_type (insn))
 {
@@ -2042,10 +2047,62 @@
  ]
   

Re: [PATCHES] Update avx256-unaligned-load-1.c and avx256-unaligned-store-2.c

2016-04-20 Thread Uros Bizjak
On Wed, Apr 20, 2016 at 5:35 PM, H.J. Lu  wrote:
> On Wed, Apr 20, 2016 at 4:19 AM, Uros Bizjak  wrote:
>
>> BTW: There are a couple of regressions in the testsuite [1] when
>> configured --with-arch=corei7. Can you please look at the testcases,
>> if scan patterns need to be adjusted?
>
> They are caused by -mtune=slm.
>
>> FAIL: gcc.target/i386/avx256-unaligned-load-1.c scan-assembler-not
>> (avx_loadups256|vmovups[^\\n\\r]*movv8sf_internal)
>
> It is because avx_loadups256 and sse_loadups have been replaced by
> movv8sf_internal and movv4sf_internal.  -mtune=slm disables SSE
> SSE stores.
>
>> FAIL: gcc.target/i386/avx256-unaligned-store-2.c scan-assembler
>> vmovups.*movv16qi_internal/3
>
> It is because -mtune=slm disables SSE stores which is expected
> by avx256-unaligned-store-2.c.
>
> Here are 2 patches for them.  Tested on x86-64.  OK for trunk?

OK.

Thanks,
Uros.


Re: [PATCH] Fix ICE in predicate_mem_writes (PR tree-optimization/70725)

2016-04-20 Thread H.J. Lu
On Wed, Apr 20, 2016 at 4:19 AM, Marek Polacek  wrote:
> On Wed, Apr 20, 2016 at 12:54:12PM +0200, Richard Biener wrote:
>> On Wed, Apr 20, 2016 at 12:37 PM, Jakub Jelinek  wrote:
>> > On Wed, Apr 20, 2016 at 11:04:08AM +0200, Richard Biener wrote:
>> >> > --- gcc/tree-if-conv.c
>> >> > +++ gcc/tree-if-conv.c
>> >> > @@ -262,6 +262,16 @@ ifc_temp_var (tree type, tree expr, 
>> >> > gimple_stmt_iterator *gsi)
>> >> >return new_name;
>> >> >  }
>> >> >
>> >> > +/* Return true when COND is a false predicate.  */
>> >> > +
>> >> > +static inline bool
>> >> > +is_false_predicate (tree cond)
>> >> > +{
>> >> > +  return (cond == NULL_TREE
>> >> > + || cond == boolean_false_node
>> >> > + || integer_zerop (cond));
>> >> > +}
>> >> > +
>> >
>> > Is it really a good idea to return true even for cond == NULL_TREE?
>> > I mean it is then very confusing, because both is_true_predicate and
>> > is_false_predicate are true in that case.
>>
>> Ah, indeed.  NULL_TREE is true, not false.
>
> I can fix it up with the following.
>
> Bootstrap/regtest pending on x86_64-linux, ok for trunk and 6 if it passes?
>
> 2016-04-20  Marek Polacek  
>
> * tree-if-conv.c (is_false_predicate): For NULL_TREE return false
> rather than true.
>
> diff --git gcc/tree-if-conv.c gcc/tree-if-conv.c
> index a9fbab9..72e808e 100644
> --- gcc/tree-if-conv.c
> +++ gcc/tree-if-conv.c
> @@ -267,9 +267,9 @@ ifc_temp_var (tree type, tree expr, gimple_stmt_iterator 
> *gsi)
>  static inline bool
>  is_false_predicate (tree cond)
>  {
> -  return (cond == NULL_TREE
> - || cond == boolean_false_node
> - || integer_zerop (cond));
> +  return (cond != NULL_TREE
> + && (cond == boolean_false_node
> + || integer_zerop (cond)));
>  }
>
>  /* Return true when COND is a true predicate.  */
>
> Marek

It leads to ICE on 32-bit x86 host:

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70725#c8

-- 
H.J.


match.pd patch: max(int_min, x)->x

2016-04-20 Thread Marc Glisse

Hello,

this simple transformation is currently done in RTL, sometimes also 
in VRP if we have any kind of range information (even on the wrong side, 
but not with VR_VARYING). It seems more natural to complete the match.pd 
pattern than make VRP understand this case.


Bootstrap+regtest on powerpc64le-unknown-linux-gnu (some noise in libgomp 
testcases).


2016-04-21  Marc Glisse  

gcc/
* match.pd (min(int_max, x), max(int_min, x)): New transformations.

gcc/testsuite/
* gcc.dg/tree-ssa/minmax-1.c: New testcase.

--
Marc GlisseIndex: gcc/match.pd
===
--- gcc/match.pd(revision 235292)
+++ gcc/match.pd(working copy)
@@ -1185,30 +1185,40 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 /* min(max(x,y),y) -> y.  */
 (simplify
  (min:c (max:c @0 @1) @1)
  @1)
 /* max(min(x,y),y) -> y.  */
 (simplify
  (max:c (min:c @0 @1) @1)
  @1)
 (simplify
  (min @0 @1)
- (if (INTEGRAL_TYPE_P (type)
-  && TYPE_MIN_VALUE (type)
-  && operand_equal_p (@1, TYPE_MIN_VALUE (type), OEP_ONLY_CONST))
-  @1))
+ (switch
+  (if (INTEGRAL_TYPE_P (type)
+   && TYPE_MIN_VALUE (type)
+   && operand_equal_p (@1, TYPE_MIN_VALUE (type), OEP_ONLY_CONST))
+   @1)
+  (if (INTEGRAL_TYPE_P (type)
+   && TYPE_MAX_VALUE (type)
+   && operand_equal_p (@1, TYPE_MAX_VALUE (type), OEP_ONLY_CONST))
+   @0)))
 (simplify
  (max @0 @1)
- (if (INTEGRAL_TYPE_P (type)
-  && TYPE_MAX_VALUE (type)
-  && operand_equal_p (@1, TYPE_MAX_VALUE (type), OEP_ONLY_CONST))
-  @1))
+ (switch
+  (if (INTEGRAL_TYPE_P (type)
+   && TYPE_MAX_VALUE (type)
+   && operand_equal_p (@1, TYPE_MAX_VALUE (type), OEP_ONLY_CONST))
+   @1)
+  (if (INTEGRAL_TYPE_P (type)
+   && TYPE_MIN_VALUE (type)
+   && operand_equal_p (@1, TYPE_MIN_VALUE (type), OEP_ONLY_CONST))
+   @0)))
 (for minmax (FMIN FMAX)
  /* If either argument is NaN, return the other one.  Avoid the
 transformation if we get (and honor) a signalling NaN.  */
  (simplify
   (minmax:c @0 REAL_CST@1)
   (if (real_isnan (TREE_REAL_CST_PTR (@1))
&& (!HONOR_SNANS (@1) || !TREE_REAL_CST (@1).signalling))
@0)))
 /* Convert fmin/fmax to MIN_EXPR/MAX_EXPR.  C99 requires these
functions to return the numeric arg if the other one is NaN.
Index: gcc/testsuite/gcc.dg/tree-ssa/minmax-1.c
===
--- gcc/testsuite/gcc.dg/tree-ssa/minmax-1.c(revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/minmax-1.c(working copy)
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-optimized" } */
+
+static int min(int a,int b){return (a

Re: [patch] libstdc++/69703 ignore endianness in codecvt_utf8

2016-04-20 Thread Jonathan Wakely

On 19/04/16 19:07 +0100, Jonathan Wakely wrote:

This was reported as a bug in the Filesystem library, but it's
actually a problem in the codecvt_utf8 facet that it uses.


The fix had a silly typo meaning it didn't work for big endian
targets, which was revealed by the improved tests I added.

Tested x86_64-linux and powerpc64-linux, committed to trunk.


commit 3a57afb171190fc8554d68094f9d4e3d2d0b2576
Author: Jonathan Wakely 
Date:   Wed Apr 20 17:44:50 2016 +0100

Fix std::codecvt_utf8* for big-endian targets

	PR libstdc++/69703
	* src/c++11/codecvt.cc (__codecvt_utf8_base::do_in,
	__codecvt_utf8_utf16_base::do_in): Fix mask operations.

diff --git a/libstdc++-v3/src/c++11/codecvt.cc b/libstdc++-v3/src/c++11/codecvt.cc
index b6b6358..b60691c 100644
--- a/libstdc++-v3/src/c++11/codecvt.cc
+++ b/libstdc++-v3/src/c++11/codecvt.cc
@@ -789,7 +789,7 @@ do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
 {
   range from{ __from, __from_end };
   range to{ __to, __to_end };
-  codecvt_mode mode = codecvt_mode(_M_mode | (consume_header|generate_header));
+  codecvt_mode mode = codecvt_mode(_M_mode & (consume_header|generate_header));
 #if __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__
   mode = codecvt_mode(mode | little_endian);
 #endif
@@ -1268,7 +1268,7 @@ do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
 {
   range from{ __from, __from_end };
   range to{ __to, __to_end };
-  codecvt_mode mode = codecvt_mode(_M_mode | (consume_header|generate_header));
+  codecvt_mode mode = codecvt_mode(_M_mode & (consume_header|generate_header));
 #if __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__
   mode = codecvt_mode(mode | little_endian);
 #endif


[PATCH] nvptx libgcc changes

2016-04-20 Thread Alexander Monakov
Rewrite libgcc/config/nvptx from PTX assembly to C, to allow building it for
-mgomp multilib.

Previously posted here:

[gomp-nvptx 6/9] nvptx libgcc: rewrite in C
https://gcc.gnu.org/ml/gcc-patches/2015-12/msg00120.html

[gomp-nvptx 2/7] nvptx libgcc: use attribute shared
https://gcc.gnu.org/ml/gcc-patches/2016-03/msg01106.html

2016-03-10  Alexander Monakov  

* config/nvptx/crt0.c (__nvptx_stacks): Define in C.  Use it...
(__nvptx_uni): Ditto.
(__main): ...here instead of inline asm.
* config/nvptx/stacks.c (__nvptx_stacks): Define in C.
(__nvptx_uni): Ditto.

2015-12-09  Alexander Monakov  

* config/nvptx/crt0.c: New, rewritten in C from ...
* config/nvptx/crt0.s: ...this.  Delete.
* config/nvptx/free.c: New, rewritten in C from ...
* config/nvptx/free.asm: ...this.  Delete.
* config/nvptx/malloc.c: New, rewritten in C from ...
* config/nvptx/malloc.asm: ...this.  Delete.
* config/nvptx/realloc.c: Handle out-of-memory condition.
* config/nvptx/nvptx-malloc.h (__nvptx_real_free,
__nvptx_real_malloc): Declare.
* config/nvptx/stacks.c: New.
* config/nvptx/t-nvptx: Adjust.

diff --git a/libgcc/config/nvptx/crt0.c b/libgcc/config/nvptx/crt0.c
new file mode 100644
index 000..9e9a25e
--- /dev/null
+++ b/libgcc/config/nvptx/crt0.c
@@ -0,0 +1,59 @@
+/* Startup routine for standalone execution.
+
+   Copyright (C) 2015-2016 Free Software Foundation, Inc.
+
+   This file is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 3, or (at your option) any
+   later version.
+
+   This file is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   .  */
+
+void exit (int);
+void abort (void);
+void __attribute__((kernel)) __main (int *, int, char *[]);
+
+static int *__exitval;
+
+void
+exit (int arg)
+{
+  *__exitval = arg;
+  asm volatile ("exit;");
+  __builtin_unreachable ();
+}
+
+void
+abort (void)
+{
+  exit (255);
+}
+
+extern char *__nvptx_stacks[32] __attribute__((shared));
+extern unsigned __nvptx_uni[32] __attribute__((shared));
+
+extern int main (int argc, char *argv[]);
+
+void __attribute__((kernel))
+__main (int *__retval, int __argc, char *__argv[])
+{
+  __exitval = __retval;
+
+  static char gstack[131072] __attribute__((aligned(8)));
+  __nvptx_stacks[0] = gstack + sizeof gstack;
+  __nvptx_uni[0] = 0;
+
+  exit (main (__argc, __argv));
+}
diff --git a/libgcc/config/nvptx/crt0.s b/libgcc/config/nvptx/crt0.s
deleted file mode 100644
index 38327ed..000
--- a/libgcc/config/nvptx/crt0.s
+++ /dev/null
@@ -1,45 +0,0 @@
-   .version 3.1
-   .target sm_30
-   .address_size 64
-
-.global .u64 %__exitval;
-// BEGIN GLOBAL FUNCTION DEF: abort
-.visible .func abort
-{
-.reg .u64 %rd1;
-ld.global.u64   %rd1,[%__exitval];
-st.u32   [%rd1], 255;
-exit;
-}
-// BEGIN GLOBAL FUNCTION DEF: exit
-.visible .func exit (.param .u32 %arg)
-{
-.reg .u64 %rd1;
-   .reg .u32 %val;
-   ld.param.u32 %val,[%arg];
-ld.global.u64   %rd1,[%__exitval];
-st.u32   [%rd1], %val;
-exit;
-}
-
-.extern .func (.param.u32 retval) main (.param.u32 argc, .param.u64 argv);
-
-.visible .entry __main (.param .u64 __retval, .param.u32 __argc, .param.u64 
__argv)
-{
-.reg .u32 %r<3>;
-.reg .u64 %rd<3>;
-   .param.u32 %argc;
-   .param.u64 %argp;
-   .param.u32 %mainret;
-ld.param.u64%rd0, [__retval];
-st.global.u64   [%__exitval], %rd0;
-
-   ld.param.u32%r1, [__argc];
-   ld.param.u64%rd1, [__argv];
-   st.param.u32[%argc], %r1;
-   st.param.u64[%argp], %rd1;
-call.uni(%mainret), main, (%argc, %argp);
-   ld.param.u32%r1,[%mainret];
-st.s32   [%rd0], %r1;
-exit;
-}
diff --git a/libgcc/config/nvptx/free.asm b/libgcc/config/nvptx/free.asm
deleted file mode 100644
index 3b8e39e..000
--- a/libgcc/config/nvptx/free.asm
+++ /dev/null
@@ -1,50 +0,0 @@
-// A wrapper around free to enable a realloc implementation.
-
-// Copyright (C) 2014-2016 Free Software Foundation, Inc.
-
-// This file is free software; you can redistribute it and/or 

[PATCH] nvptx mkoffload changes

2016-04-20 Thread Alexander Monakov
Handle OpenMP offloading in NVPTX mkoffload.

Previously posted here:

[gomp-nvptx 7/9] nvptx mkoffload: pass -mgomp for OpenMP offloading
https://gcc.gnu.org/ml/gcc-patches/2015-12/msg00121.html
 
2015-12-09  Alexander Monakov  

* config/nvptx/mkoffload.c (main): Check that either OpenACC or OpenMP
is selected.  Pass -mgomp to offload compiler in OpenMP case.

2015-12-08  Alexander Monakov  

* config/nvptx/mkoffload.c (main): Allow -fopenmp.

diff --git a/gcc/config/nvptx/mkoffload.c b/gcc/config/nvptx/mkoffload.c
index c8eed45..e99ef37 100644
--- a/gcc/config/nvptx/mkoffload.c
+++ b/gcc/config/nvptx/mkoffload.c
@@ -460,6 +460,7 @@ main (int argc, char **argv)
 
   /* Scan the argument vector.  */
   bool fopenmp = false;
+  bool fopenacc = false;
   for (int i = 1; i < argc; i++)
 {
 #define STR "-foffload-abi="
@@ -476,11 +477,15 @@ main (int argc, char **argv)
 #undef STR
   else if (strcmp (argv[i], "-fopenmp") == 0)
fopenmp = true;
+  else if (strcmp (argv[i], "-fopenacc") == 0)
+   fopenacc = true;
   else if (strcmp (argv[i], "-save-temps") == 0)
save_temps = true;
   else if (strcmp (argv[i], "-v") == 0)
verbose = true;
 }
+  if (!(fopenacc ^ fopenmp))
+fatal_error (input_location, "either -fopenacc or -fopenmp must be set");
 
   struct obstack argv_obstack;
   obstack_init (_obstack);
@@ -501,6 +506,8 @@ main (int argc, char **argv)
 default:
   gcc_unreachable ();
 }
+  if (fopenmp)
+obstack_ptr_grow (_obstack, "-mgomp");
 
   for (int ix = 1; ix != argc; ix++)
 {
@@ -517,8 +524,8 @@ main (int argc, char **argv)
 fatal_error (input_location, "cannot open '%s'", ptx_cfile_name);
 
   /* PR libgomp/65099: Currently, we only support offloading in 64-bit
- configurations.  PR target/67822: OpenMP offloading to nvptx fails.  */
-  if (offload_abi == OFFLOAD_ABI_LP64 && !fopenmp)
+ configurations.  */
+  if (offload_abi == OFFLOAD_ABI_LP64)
 {
   ptx_name = make_temp_file (".mkoffload");
   obstack_ptr_grow (_obstack, "-o");




[PATCH] nvptx omp target entrypoint handling

2016-04-20 Thread Alexander Monakov
This patch adds OpenMP-specific kernel entry code emission.  There's a
corresponding omp-low.c patch that makes OpenACC use "omp acc target
entrypoint" to disambiguate OpenACC and OpenMP target regions.

2015-12-09  Alexander Monakov  

* config/nvptx/nvptx.c (nvptx_record_offload_symbol): Allow NULL attrs
for OpenMP offloading.

2015-12-08  Alexander Monakov  

* config/nvptx/nvptx.c: (write_omp_entry): New.  Use it...
(nvptx_declare_function_name): ...here to emit pointers for libgomp.

diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index 2d4dad1..e9e4d06 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -699,7 +692,10 @@ static bool
 write_as_kernel (tree attrs)
 {
   return (lookup_attribute ("kernel", attrs) != NULL_TREE
- || lookup_attribute ("omp target entrypoint", attrs) != NULL_TREE);
+ || lookup_attribute ("omp acc target entrypoint", attrs) != 
NULL_TREE);
+  /* Ignore "omp target entrypoint" here: OpenMP target region functions are
+ called from gomp_nvptx_main.  The corresponding kernel entry is emitted
+ from write_omp_entry.  */
 }
 
 /* Emit a linker marker for a function decl or defn.  */
@@ -944,6 +940,69 @@ nvptx_init_axis_predicate (FILE *file, int regno, const 
char *name)
   fprintf (file, "\t}\n");
 }
 
+/* Emit kernel NAME for function ORIG outlined for an OpenMP 'target' region:
+
+   extern void gomp_nvptx_main (void (*fn)(void*), void *fnarg);
+   void __attribute__((kernel)) NAME (void *arg, char *stack, size_t stacksize)
+   {
+ __nvptx_stacks[tid.y] = stack + stacksize * (ctaid.x * ntid.y + tid.y + 
1);
+ __nvptx_uni[tid.y] = 0;
+ gomp_nvptx_main (ORIG, arg);
+   }
+   ORIG itself should not be emitted as a PTX .entry function.  */
+
+static void
+write_omp_entry (FILE *file, const char *name, const char *orig)
+{
+  static bool gomp_nvptx_main_declared;
+  if (!gomp_nvptx_main_declared)
+{
+  gomp_nvptx_main_declared = true;
+  write_fn_marker (func_decls, false, true, "gomp_nvptx_main");
+  func_decls << ".extern .func gomp_nvptx_main (.param.u" << POINTER_SIZE
+<< " %in_ar1, .param.u" << POINTER_SIZE << " %in_ar2);\n";
+}
+#define ENTRY_TEMPLATE(PS, PS_BYTES, MAD_PS_32) "\
+ (.param.u" PS " %arg, .param.u" PS " %stack, .param.u" PS " %sz)\n\
+{\n\
+   .reg.u32 %r<3>;\n\
+   .reg.u" PS " %R<4>;\n\
+   mov.u32 %r0, %tid.y;\n\
+   mov.u32 %r1, %ntid.y;\n\
+   mov.u32 %r2, %ctaid.x;\n\
+   cvt.u" PS ".u32 %R1, %r0;\n\
+   " MAD_PS_32 " %R1, %r1, %r2, %R1;\n\
+   mov.u" PS " %R0, __nvptx_stacks;\n\
+   " MAD_PS_32 " %R0, %r0, " PS_BYTES ", %R0;\n\
+   ld.param.u" PS " %R2, [%stack];\n\
+   ld.param.u" PS " %R3, [%sz];\n\
+   add.u" PS " %R2, %R2, %R3;\n\
+   mad.lo.u" PS " %R2, %R1, %R3, %R2;\n\
+   st.shared.u" PS " [%R0], %R2;\n\
+   mov.u" PS " %R0, __nvptx_uni;\n\
+   " MAD_PS_32 " %R0, %r0, 4, %R0;\n\
+   mov.u32 %r0, 0;\n\
+   st.shared.u32 [%R0], %r0;\n\
+   mov.u" PS " %R0, \0;\n\
+   ld.param.u" PS " %R1, [%arg];\n\
+   {\n\
+   .param.u" PS " %P<2>;\n\
+   st.param.u" PS " [%P0], %R0;\n\
+   st.param.u" PS " [%P1], %R1;\n\
+   call.uni gomp_nvptx_main, (%P0, %P1);\n\
+   }\n\
+   ret.uni;\n\
+}\n"
+  static const char entry64[] = ENTRY_TEMPLATE ("64", "8", "mad.wide.u32");
+  static const char entry32[] = ENTRY_TEMPLATE ("32", "4", "mad.lo.u32  ");
+#undef ENTRY_TEMPLATE
+  const char *entry_1 = TARGET_ABI64 ? entry64 : entry32;
+  /* Position ENTRY_2 after the embedded nul using strlen of the prefix.  */
+  const char *entry_2 = entry_1 + strlen (entry64) + 1;
+  fprintf (file, ".visible .entry %s%s%s%s", name, entry_1, orig, entry_2);
+  need_softstack_decl = need_unisimt_decl = true;
+}
+
 /* Implement ASM_DECLARE_FUNCTION_NAME.  Writes the start of a ptx
function, including local var decls and copies from the arguments to
local regs.  */
@@ -955,6 +1041,14 @@ nvptx_declare_function_name (FILE *file, const char 
*name, const_tree decl)
   tree result_type = TREE_TYPE (fntype);
   int argno = 0;
 
+  if (flag_openmp
+  && lookup_attribute ("omp target entrypoint", DECL_ATTRIBUTES (decl)))
+{
+  char *buf = (char *) alloca (strlen (name) + sizeof ("$impl"));
+  sprintf (buf, "%s$impl", name);
+  write_omp_entry (file, name, buf);
+  name = buf;
+}
   /* We construct the initial part of the function into a string
  stream, in order to share the prototype writing code.  */
   std::stringstream s;
@@ -3872,13 +4116,13 @@ nvptx_record_offload_symbol (tree decl)
 case FUNCTION_DECL:
   {
tree attr = get_oacc_fn_attrib (decl);
-   tree dims = TREE_VALUE (attr);
-   unsigned ix;
+   /* OpenMP offloading does not set this attribute.  */
+   tree dims = attr ? TREE_VALUE (attr) : NULL_TREE;
 
  

[PATCH] nvptx -mgomp multilib

2016-04-20 Thread Alexander Monakov
Wire up -mgomp multilib for OpenMP offloading, in a straightforward way.

Changes in nvptx.opt and invoke.texi adding -msoft-stack and -muniform-simt
are originally from the patches that introduced those.

Doc additions in invoke.texi will probably change; I've asked Sandra Loosemore
to have a look.

Previously posted here:

[gomp-nvptx 4/9] nvptx backend: add -mgomp option and multilib
https://gcc.gnu.org/ml/gcc-patches/2015-12/msg00115.html
 
2015-12-09  Alexander Monakov  

* config/nvptx/nvptx.c (nvptx_option_override): Handle TARGET_GOMP.
* config/nvptx/nvptx.opt (mgomp): New option.
* config/nvptx/t-nvptx (MULTILIB_OPTIONS): New.
* doc/invoke.texi (mgomp): Document.

diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index 2d4dad1..e9e4d06 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -184,6 +171,9 @@ nvptx_option_override (void)
   worker_red_sym = gen_rtx_SYMBOL_REF (Pmode, "__worker_red");
   SET_SYMBOL_DATA_AREA (worker_red_sym, DATA_AREA_SHARED);
   worker_red_align = GET_MODE_ALIGNMENT (SImode) / BITS_PER_UNIT;
+
+  if (TARGET_GOMP)
+target_flags |= MASK_SOFT_STACK | MASK_UNIFORM_SIMT;
 }
 
 /* Return a ptx type for MODE.  If PROMOTE, then use .u32 for QImode to
diff --git a/gcc/config/nvptx/nvptx.opt b/gcc/config/nvptx/nvptx.opt
index 056b9b2..1a7608b 100644
--- a/gcc/config/nvptx/nvptx.opt
+++ b/gcc/config/nvptx/nvptx.opt
@@ -32,3 +32,15 @@ Link in code for a __main kernel.
 moptimize
 Target Report Var(nvptx_optimize) Init(-1)
 Optimize partition neutering
+
+msoft-stack
+Target Report Mask(SOFT_STACK)
+Use custom stacks instead of local memory for automatic storage.
+
+muniform-simt
+Target Report Mask(UNIFORM_SIMT)
+Generate code that executes all threads in a warp as if one was active.
+
+mgomp
+Target Report Mask(GOMP)
+Generate code for OpenMP offloading: enables -msoft-stack and -muniform-simt.
diff --git a/gcc/config/nvptx/t-nvptx b/gcc/config/nvptx/t-nvptx
index e2580c9..6c1010d 100644
--- a/gcc/config/nvptx/t-nvptx
+++ b/gcc/config/nvptx/t-nvptx
@@ -8,3 +8,5 @@ ALL_HOST_OBJS += mkoffload.o
 mkoffload$(exeext): mkoffload.o collect-utils.o libcommon-target.a 
$(LIBIBERTY) $(LIBDEPS)
+$(LINKER) $(ALL_LINKERFLAGS) $(LDFLAGS) -o $@ \
  mkoffload.o collect-utils.o libcommon-target.a $(LIBIBERTY) $(LIBS)
+
+MULTILIB_OPTIONS = mgomp
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index d281975..a02a852 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -19341,6 +19341,32 @@ offloading execution.
 Apply partitioned execution optimizations.  This is the default when any
 level of optimization is selected.
 
+@item -msoft-stack
+@opindex msoft-stack
+Do not use @code{.local} memory for automatic storage.  Instead, use pointer
+in shared memory array @code{char *__nvptx_stacks[]} at position @code{tid.y}
+as the stack pointer.  This is for placing automatic variables into storage
+that can be accessed from other threads, or modified with atomic instructions.
+
+@item -muniform-simt
+@opindex muniform-simt
+Switch to code generation variant that allows to execute all threads in each
+warp, while maintaining memory state and side effects as if only one thread
+in each warp was active outside of OpenMP SIMD regions.  All atomic operations
+and calls to runtime (malloc, free, vprintf) are conditionally executed (iff
+current lane index equals the master lane index), and the register being
+assigned is copied via a shuffle instruction from the master lane.  Outside of
+SIMD regions lane 0 is the master; inside, each thread sees itself as the
+master.  Shared memory array @code{int __nvptx_uni[]} stores all-zeros or
+all-ones bitmasks for each warp, indicating current mode (0 outside of SIMD
+regions).  Each thread can bitwise-and the bitmask at position @code{tid.y}
+with current lane index to compute the master lane index.
+
+@item -mgomp
+@opindex mgomp
+Generate code for use in OpenMP offloading: enables @option{-msoft-stack} and
+@option{-muniform-simt} options, and selects corresponding multilib variant.
+
 @end table
 
 @node PDP-11 Options




[PATCH] nvptx -muniform-simt codegen variant

2016-04-20 Thread Alexander Monakov
This patch implements a code generation variant needed for OpenMP, where all
lanes in each warp can be kept active outside of SIMD regions, with observable
execution effects as if only one lane was active, and local state (program
counter and registers) synchronized among lanes, under -muniform-simt option.

A bikeshed on the new nvptx.md attribute name is possible: perhaps "uniform"
would work better than "divergent" that I chose back then.

Previously posted here:

[gomp-nvptx 1/9] nvptx backend: allow emitting COND_EXEC insns
https://gcc.gnu.org/ml/gcc-patches/2015-12/msg00127.html

[gomp-nvptx 2/9] nvptx backend: new "uniform SIMT" codegen variant
https://gcc.gnu.org/ml/gcc-patches/2015-12/msg00116.html

[gomp-nvptx 3/9] nvptx backend: add two more identifier maps
https://gcc.gnu.org/ml/gcc-patches/2015-12/msg00126.html

2015-12-09  Alexander Monakov  

* config/nvptx/nvptx.c (nvptx_name_replacement): Rewrite.  Add
__nvptx_real_malloc -> malloc and __nvptx_real_free -> free
replacements.

2015-12-09  Alexander Monakov  

* config/nvptx/nvptx.c (need_unisimt_decl): New variable.  Set it...
(nvptx_init_unisimt_predicate): ...here (new function) and use it...
(nvptx_file_end): ...here to emit declaration of __nvptx_uni array.
(nvptx_declare_function_name): Call nvptx_init_unisimt_predicate.
(nvptx_get_unisimt_master): New helper function.
(nvptx_get_unisimt_predicate): Ditto.
(nvptx_call_insn_is_syscall_p): Ditto.
(nvptx_unisimt_handle_set): Ditto.
(nvptx_reorg_uniform_simt): New.  Transform code for -muniform-simt.
(nvptx_get_axis_predicate): New helper function, factored out from...
(nvptx_single): ...here.
(nvptx_reorg): Call nvptx_reorg_uniform_simt.
* config/nvptx/nvptx.h (TARGET_CPU_CPP_BUILTINS): Define
__nvptx_unisimt__ when -muniform-simt option is active.
(struct machine_function): Add unisimt_master, unisimt_predicate
rtx fields.
* config/nvptx/nvptx.md (divergent): New attribute.
(atomic_compare_and_swap_1): Mark as divergent.
(atomic_exchange): Ditto.
(atomic_fetch_add): Ditto.
(atomic_fetch_addsf): Ditto.
(atomic_fetch_): Ditto.
* config/nvptx/nvptx.opt (muniform-simt): New option.
* doc/invoke.texi (-muniform-simt): Document.

2015-12-09  Alexander Monakov  

* config/nvptx/nvptx.c (nvptx_output_call_insn): Handle COND_EXEC
patterns.  Emit instruction predicate.
(nvptx_print_operand): Unbreak handling of instruction predicates.
* config/nvptx/nvptx.md (predicable): New attribute.  Generate
predicated forms via define_cond_exec.
(br_true): Mark as not predicable.
(br_false): Ditto.
(br_true_uni): Ditto.
(br_false_uni): Ditto.
(return): Ditto.
(trap_if_true): Ditto.
(trap_if_false): Ditto.
(nvptx_fork): Ditto.
(nvptx_forked): Ditto.
(nvptx_joining): Ditto.
(nvptx_join): Ditto.
(nvptx_barsync): Ditto.

diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index 2d4dad1..e9e4d06 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -272,14 +265,14 @@ section_for_decl (const_tree decl)
 static const char *
 nvptx_name_replacement (const char *name)
 {
-  if (strcmp (name, "call") == 0)
-return "__nvptx_call";
-  if (strcmp (name, "malloc") == 0)
-return "__nvptx_malloc";
-  if (strcmp (name, "free") == 0)
-return "__nvptx_free";
-  if (strcmp (name, "realloc") == 0)
-return "__nvptx_realloc";
+  static const char *const replacements[] = {
+"malloc", "__nvptx_malloc", "free", "__nvptx_free",
+"realloc", "__nvptx_realloc", "call", "__nvptx_call",
+"__nvptx_real_malloc", "malloc", "__nvptx_real_free", "free"
+  };
+  for (size_t i = 0; i < ARRAY_SIZE (replacements) / 2; i++)
+if (!strcmp (name, replacements[2 * i]))
+  return replacements[2 * i + 1];
   return name;
 }
 
@@ -944,6 +940,33 @@ nvptx_init_axis_predicate (FILE *file, int regno, const 
char *name)
   fprintf (file, "\t}\n");
 }
 
+/* Emit code to initialize predicate and master lane index registers for
+   -muniform-simt code generation variant.  */
+
+static void
+nvptx_init_unisimt_predicate (FILE *file)
+{
+  int bits = POINTER_SIZE;
+  int master = REGNO (cfun->machine->unisimt_master);
+  int pred = REGNO (cfun->machine->unisimt_predicate);
+  fprintf (file, "\t{\n");
+  fprintf (file, "\t\t.reg.u32 %%ustmp0;\n");
+  fprintf (file, "\t\t.reg.u%d %%ustmp1;\n", bits);
+  fprintf (file, "\t\t.reg.u%d %%ustmp2;\n", bits);
+  fprintf (file, "\t\tmov.u32 %%ustmp0, %%tid.y;\n");
+  fprintf (file, "\t\tmul%s.u32 %%ustmp1, %%ustmp0, 4;\n",
+  bits == 64 ? ".wide" : ".lo");
+  fprintf (file, "\t\tmov.u%d %%ustmp2, __nvptx_uni;\n", bits);
+  fprintf (file, 

[PATCH] nvptx per-warp compiler-defined stacks (-msoft-stack)

2016-04-20 Thread Alexander Monakov
This patch implements per-warp compiler-defined stacks under -msoft-stack
option, and implements alloca on top of that.  In a few obvious places,
changes from -muniform-simt patch are present in the hunks.

Previously posted here:

[PATCH] nvptx: implement automatic storage in custom stacks
https://gcc.gnu.org/ml/gcc-patches/2015-11/msg01519.html

[gomp-nvptx] nvptx backend: implement alloca with -msoft-stack
https://gcc.gnu.org/ml/gcc-patches/2015-12/msg01397.html

[gomp-nvptx 7/7] nvptx backend: define STACK_SIZE_MODE
https://gcc.gnu.org/ml/gcc-patches/2016-03/msg01108.html

2016-03-15  Alexander Monakov  

* config/nvptx/nvptx.h (STACK_SIZE_MODE): Define.

2015-12-14  Alexander Monakov  

* config/nvptx/nvptx.c (nvptx_declare_function_name): Emit %outargs
using .local %outargs_ar only if not TARGET_SOFT_STACK.  Emit %outargs
under TARGET_SOFT_STACK by offsetting from %frame.
(nvptx_get_drap_rtx): Return %argp as the DRAP if needed.
* config/nvptx/nvptx.md (nvptx_register_operand): Allow %outargs under
TARGET_SOFT_STACK.
(nvptx_nonimmediate_operand): Ditto.
(allocate_stack): Implement for TARGET_SOFT_STACK.  Remove unused code.
(allocate_stack_): Remove unused pattern.
(set_softstack_insn): New pattern.
(restore_stack_block): Handle for TARGET_SOFT_STACK.

2015-12-09  Alexander Monakov  

* config/nvptx/nvptx.c: (need_softstack_decl): New variable.
(nvptx_declare_function_name): Handle TARGET_SOFT_STACK.
(nvptx_output_return): Emit stack restore if needed.
(nvptx_file_end): Handle need_softstack_decl.
* config/nvptx/nvptx.h: (TARGET_CPU_CPP_BUILTINS): Define
__nvptx_softstack__ when -msoft-stack is active.
(struct machine_function): New bool field using_softstack.
* config/nvptx/nvptx.opt: (msoft-stack): New option.
* doc/invoke.texi (msoft-stack): Document.

diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index 2d4dad1..e9e4d06 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -139,6 +129,12 @@ static GTY(()) rtx worker_red_sym;
 /* Global lock variable, needed for 128bit worker & gang reductions.  */
 static GTY(()) tree global_lock_var;
 
+/* True if any function references __nvptx_stacks.  */
+static bool need_softstack_decl;
+
+/* True if any function references __nvptx_uni.  */
+static bool need_unisimt_decl;
+
 /* Allocate a new, cleared machine_function structure.  */
 
 static struct machine_function *
@@ -992,16 +1086,55 @@ nvptx_declare_function_name (FILE *file, const char 
*name, const_tree decl)
 
   fprintf (file, "%s", s.str().c_str());
 
-  /* Declare a local var for outgoing varargs.  */
-  if (cfun->machine->has_varadic)
-init_frame (file, STACK_POINTER_REGNUM,
-   UNITS_PER_WORD, crtl->outgoing_args_size);
-
-  /* Declare a local variable for the frame.  */
   HOST_WIDE_INT sz = get_frame_size ();
-  if (sz || cfun->machine->has_chain)
-init_frame (file, FRAME_POINTER_REGNUM,
-   crtl->stack_alignment_needed / BITS_PER_UNIT, sz);
+  bool need_frameptr = sz || cfun->machine->has_chain;
+  int alignment = crtl->stack_alignment_needed / BITS_PER_UNIT;
+  if (!TARGET_SOFT_STACK)
+{
+  /* Declare a local var for outgoing varargs.  */
+  if (cfun->machine->has_varadic)
+   init_frame (file, STACK_POINTER_REGNUM,
+   UNITS_PER_WORD, crtl->outgoing_args_size);
+
+  /* Declare a local variable for the frame.  */
+  if (need_frameptr)
+   init_frame (file, FRAME_POINTER_REGNUM, alignment, sz);
+}
+  else if (need_frameptr || cfun->machine->has_varadic || cfun->calls_alloca)
+{
+  /* Maintain 64-bit stack alignment.  */
+  int keep_align = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
+  sz = ROUND_UP (sz, keep_align);
+  int bits = POINTER_SIZE;
+  fprintf (file, "\t.reg.u%d %%frame;\n", bits);
+  fprintf (file, "\t.reg.u32 %%fstmp0;\n");
+  fprintf (file, "\t.reg.u%d %%fstmp1;\n", bits);
+  fprintf (file, "\t.reg.u%d %%fstmp2;\n", bits);
+  fprintf (file, "\tmov.u32 %%fstmp0, %%tid.y;\n");
+  fprintf (file, "\tmul%s.u32 %%fstmp1, %%fstmp0, %d;\n",
+  bits == 64 ? ".wide" : ".lo", bits / 8);
+  fprintf (file, "\tmov.u%d %%fstmp2, __nvptx_stacks;\n", bits);
+  /* fstmp2 = &__nvptx_stacks[tid.y];  */
+  fprintf (file, "\tadd.u%d %%fstmp2, %%fstmp2, %%fstmp1;\n", bits);
+  fprintf (file, "\tld.shared.u%d %%fstmp1, [%%fstmp2];\n", bits);
+  fprintf (file, "\tsub.u%d %%frame, %%fstmp1, "
+  HOST_WIDE_INT_PRINT_DEC ";\n", bits, sz);
+  if (alignment > keep_align)
+   fprintf (file, "\tand.b%d %%frame, %%frame, %d;\n",
+bits, -alignment);
+  fprintf (file, "\t.reg.u%d %%stack;\n", bits);
+  sz = crtl->outgoing_args_size;
+  gcc_assert (sz % 

[PATCH] add support for placing variables in shared memory

2016-04-20 Thread Alexander Monakov
Allow using __attribute__((shared)) to place static variables in '.shared'
memory space.

Previously posted here:

[gomp-nvptx 04/13] nvptx backend: add support for placing variables in shared 
memory
https://gcc.gnu.org/ml/gcc-patches/2016-01/msg01546.html

[gomp-nvptx] doc: document nvptx shared attribute
https://gcc.gnu.org/ml/gcc-patches/2016-04/msg00940.html

2016-04-19  Alexander Monakov  

* doc/extend.texi (Nvidia PTX Variable Attributes): New section.

2016-01-17  Alexander Monakov  

* config/nvptx/nvptx.c (nvptx_encode_section_info): Handle "shared"
attribute.
(nvptx_handle_shared_attribute): New.  Use it...
(nvptx_attribute_table): ... here (new entry).


diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index 2d4dad1..e9e4d06 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -234,9 +224,12 @@ nvptx_encode_section_info (tree decl, rtx rtl, int first)
   if (TREE_CONSTANT (decl))
area = DATA_AREA_CONST;
   else if (TREE_CODE (decl) == VAR_DECL)
-   /* TODO: This would be a good place to check for a .shared or
-  other section name.  */
-   area = TREE_READONLY (decl) ? DATA_AREA_CONST : DATA_AREA_GLOBAL;
+   {
+ if (lookup_attribute ("shared", DECL_ATTRIBUTES (decl)))
+   area = DATA_AREA_SHARED;
+ else
+   area = TREE_READONLY (decl) ? DATA_AREA_CONST : DATA_AREA_GLOBAL;
+   }
 
   SET_SYMBOL_DATA_AREA (XEXP (rtl, 0), area);
 }
@@ -3805,12 +4025,36 @@ nvptx_handle_kernel_attribute (tree *node, tree name, 
tree ARG_UNUSED (args),
   return NULL_TREE;
 }
 
+/* Handle a "shared" attribute; arguments as in
+   struct attribute_spec.handler.  */
+
+static tree
+nvptx_handle_shared_attribute (tree *node, tree name, tree ARG_UNUSED (args),
+  int ARG_UNUSED (flags), bool *no_add_attrs)
+{
+  tree decl = *node;
+
+  if (TREE_CODE (decl) != VAR_DECL)
+{
+  error ("%qE attribute only applies to variables", name);
+  *no_add_attrs = true;
+}
+  else if (current_function_decl && !TREE_STATIC (decl))
+{
+  error ("%qE attribute only applies to non-stack variables", name);
+  *no_add_attrs = true;
+}
+
+  return NULL_TREE;
+}
+
 /* Table of valid machine attributes.  */
 static const struct attribute_spec nvptx_attribute_table[] =
 {
   /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
affects_type_identity } */
   { "kernel", 0, 0, true, false,  false, nvptx_handle_kernel_attribute, false 
},
+  { "shared", 0, 0, true, false,  false, nvptx_handle_shared_attribute, false 
},
   { NULL, 0, 0, false, false, false, NULL, false }
 };
 
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index e11ce4d..5eeb179 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -5469,6 +5469,7 @@ attributes.
 * MeP Variable Attributes::
 * Microsoft Windows Variable Attributes::
 * MSP430 Variable Attributes::
+* Nvidia PTX Variable Attributes::
 * PowerPC Variable Attributes::
 * RL78 Variable Attributes::
 * SPU Variable Attributes::
@@ -6099,6 +6100,20 @@ same name (@pxref{MSP430 Function Attributes}).
 These attributes can be applied to both functions and variables.
 @end table
 
+@node Nvidia PTX Variable Attributes
+@subsection Nvidia PTX Variable Attributes
+
+These variable attributes are supported by the Nvidia PTX back end:
+
+@table @code
+@item shared
+@cindex @code{shared} attribute, Nvidia PTX
+Use this attribute to place a variable in the @code{.shared} memory space.
+This memory space is private to each cooperative thread array; only threads
+within one thread block refer to the same instance of the variable.
+The runtime does not initialize variables in this memory space.
+@end table
+
 @node PowerPC Variable Attributes
 @subsection PowerPC Variable Attributes
 



[PATCH] new target hook: TARGET_SIMT_VF

2016-04-20 Thread Alexander Monakov
This patch adds a new target hook and implements it in a straightforward
manner on NVPTX to indicate that the target is running in SIMT fashion with 32
threads in a synchronous group ("warp").  For use in OpenMP transforms.

Previously posted here:

[gomp-nvptx 5/9] new target hook: TARGET_SIMT_VF
https://gcc.gnu.org/ml/gcc-patches/2015-12/msg00122.html
 
2015-12-09  Alexander Monakov  

* config/nvptx/nvptx.c (nvptx_simt_vf): New.
(TARGET_SIMT_VF): Define.
* doc/tm.texi: Regenerate.
* doc/tm.texi.in: (TARGET_SIMT_VF): New hook.
* target.def: Define it.

diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index 2d4dad1..e9e4d06 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -4119,10 +4375,19 @@ nvptx_expand_builtin (tree exp, rtx target, rtx 
ARG_UNUSED (subtarget),
 }
 }
 
+
 /* Define dimension sizes for known hardware.  */
 #define PTX_VECTOR_LENGTH 32
 #define PTX_WORKER_LENGTH 32
 
+/* Implement TARGET_SIMT_VF target hook: number of threads in a warp.  */
+
+static int
+nvptx_simt_vf ()
+{
+  return PTX_VECTOR_LENGTH;
+}
+
 /* Validate compute dimensions of an OpenACC offload or routine, fill
in non-unity defaults.  FN_LEVEL indicates the level at which a
routine might spawn a loop.  It is negative for non-routines.  */
@@ -4874,6 +5139,9 @@ nvptx_goacc_reduction (gcall *call)
 #undef  TARGET_BUILTIN_DECL
 #define TARGET_BUILTIN_DECL nvptx_builtin_decl
 
+#undef TARGET_SIMT_VF
+#define TARGET_SIMT_VF nvptx_simt_vf
+
 #undef TARGET_GOACC_VALIDATE_DIMS
 #define TARGET_GOACC_VALIDATE_DIMS nvptx_goacc_validate_dims
 
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index aae09bf..01d7887 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -5764,6 +5764,10 @@ usable.  In that case, the smaller the number is, the 
more desirable it is
 to use it.
 @end deftypefn
 
+@deftypefn {Target Hook} int TARGET_SIMT_VF (void)
+Return number of threads in SIMT thread group on the target.
+@end deftypefn
+
 @deftypefn {Target Hook} bool TARGET_GOACC_VALIDATE_DIMS (tree @var{decl}, int 
*@var{dims}, int @var{fn_level})
 This hook should check the launch dimensions provided for an OpenACC
 compute region, or routine.  Defaulted values are represented as -1
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index f31c763..dea70bd 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -4263,6 +4263,8 @@ address;  but often a machine-dependent strategy can 
generate better code.
 
 @hook TARGET_SIMD_CLONE_USABLE
 
+@hook TARGET_SIMT_VF
+
 @hook TARGET_GOACC_VALIDATE_DIMS
 
 @hook TARGET_GOACC_DIM_LIMIT
diff --git a/gcc/target.def b/gcc/target.def
index d60319e4..576bf68 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -1639,6 +1639,18 @@ int, (struct cgraph_node *), NULL)
 
 HOOK_VECTOR_END (simd_clone)
 
+/* Functions relating to OpenMP SIMT vectorization transform.  */
+#undef HOOK_PREFIX
+#define HOOK_PREFIX "TARGET_SIMT_"
+HOOK_VECTOR (TARGET_SIMT, simt)
+
+DEFHOOK
+(vf,
+"Return number of threads in SIMT thread group on the target.",
+int, (void), NULL)
+
+HOOK_VECTOR_END (simt)
+
 /* Functions relating to openacc.  */
 #undef HOOK_PREFIX
 #define HOOK_PREFIX "TARGET_GOACC_"




[PATCH] new patterns for OpenMP SIMD-via-SIMT

2016-04-20 Thread Alexander Monakov
This patch adds a few insn patterns used for OpenMP SIMD
reduction/lastprivate/ordered lowering for SIMT execution.  OpenMP lowering
produces GOMP_SIMT_... internal functions when lowering SIMD constructs that
can be offloaded to a SIMT device.  After lto stream-in, those internal
functions are trivially folded when compiling for non-SIMT execution;
otherwise they are kept, and expanded into these insns.

Previously posted here:

[gomp-nvptx 01/13] nvptx backend: new patterns for OpenMP SIMD-via-SIMT
https://gcc.gnu.org/ml/gcc-patches/2016-01/msg01550.html
 
2016-01-17  Alexander Monakov  

* config/nvptx/nvptx-protos.h (nvptx_shuffle_kind): Move enum
declaration from nvptx.c.
(nvptx_gen_shuffle): Declare.
* config/nvptx/nvptx.c (nvptx_shuffle_kind): Moved to nvptx-protos.h.
(nvptx_gen_shuffle): No longer static.
* config/nvptx/nvptx.md (UNSPEC_VOTE_BALLOT): New unspec.
(UNSPEC_LANEID): Ditto.
(UNSPECV_NOUNROLL): Ditto.
(nvptx_vote_ballot): New pattern.
(omp_simt_lane): Ditto.
(nvptx_nounroll): Ditto.
(omp_simt_last_lane): Ditto.
(omp_simt_ordered): Ditto.
(omp_simt_vote_any): Ditto.
(omp_simt_xchg_bfly): Ditto.
(omp_simt_xchg_idx): Ditto.
* target-insns.def (omp_simt_lane): New.
(omp_simt_last_lane): New.
(omp_simt_ordered): New.
(omp_simt_vote_any): New.
(omp_simt_xchg_bfly): New.
(omp_simt_xchg_idx): New.

diff --git a/gcc/config/nvptx/nvptx-protos.h b/gcc/config/nvptx/nvptx-protos.h
index ec4588e..2ea3d54 100644
--- a/gcc/config/nvptx/nvptx-protos.h
+++ b/gcc/config/nvptx/nvptx-protos.h
@@ -21,6 +21,16 @@
 #ifndef GCC_NVPTX_PROTOS_H
 #define GCC_NVPTX_PROTOS_H
 
+/* The kind of shuffe instruction.  */
+enum nvptx_shuffle_kind
+{
+  SHUFFLE_UP,
+  SHUFFLE_DOWN,
+  SHUFFLE_BFLY,
+  SHUFFLE_IDX,
+  SHUFFLE_MAX
+};
+
 extern void nvptx_declare_function_name (FILE *, const char *, const_tree 
decl);
 extern void nvptx_declare_object_name (FILE *file, const char *name,
   const_tree decl);
@@ -36,6 +46,7 @@ extern void nvptx_register_pragmas (void);
 extern void nvptx_expand_oacc_fork (unsigned);
 extern void nvptx_expand_oacc_join (unsigned);
 extern void nvptx_expand_call (rtx, rtx);
+extern rtx nvptx_gen_shuffle (rtx, rtx, rtx, nvptx_shuffle_kind);
 extern rtx nvptx_expand_compare (rtx);
 extern const char *nvptx_ptx_type_from_mode (machine_mode, bool);
 extern const char *nvptx_output_mov_insn (rtx, rtx);
diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index 2d4dad1..e9e4d06 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -70,16 +70,6 @@
 /* This file should be included last.  */
 #include "target-def.h"
 
-/* The kind of shuffe instruction.  */
-enum nvptx_shuffle_kind
-{
-  SHUFFLE_UP,
-  SHUFFLE_DOWN,
-  SHUFFLE_BFLY,
-  SHUFFLE_IDX,
-  SHUFFLE_MAX
-};
-
 /* The various PTX memory areas an object might reside in.  */
 enum nvptx_data_area
 {
@@ -1266,7 +1407,7 @@ nvptx_gen_pack (rtx dst, rtx src0, rtx src1)
 /* Generate an instruction or sequence to broadcast register REG
across the vectors of a single warp.  */
 
-static rtx
+rtx
 nvptx_gen_shuffle (rtx dst, rtx src, rtx idx, nvptx_shuffle_kind kind)
 {
   rtx res;
diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md
index 33a4862..e5650b6 100644
--- a/gcc/config/nvptx/nvptx.md
+++ b/gcc/config/nvptx/nvptx.md
@@ -40,6 +40,10 @@ (define_c_enum "unspec" [
 
UNSPEC_BIT_CONV
 
+   UNSPEC_VOTE_BALLOT
+
+   UNSPEC_LANEID
+
UNSPEC_SHUFFLE
UNSPEC_BR_UNIFIED
 ])
@@ -1123,6 +1160,81 @@ (define_insn "nvptx_shuffle"
   ""
   "%.\\tshfl%S3.b32\\t%0, %1, %2, 31;")
 
+(define_insn "nvptx_vote_ballot"
+  [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
+   (unspec:SI [(match_operand:BI 1 "nvptx_register_operand" "R")]
+  UNSPEC_VOTE_BALLOT))]
+  ""
+  "%.\\tvote.ballot.b32\\t%0, %1;")
+
+(define_insn "omp_simt_lane"
+  [(set (match_operand:SI 0 "nvptx_register_operand" "")
+   (unspec:SI [(const_int 0)] UNSPEC_LANEID))]
+  ""
+  "%.\\tmov.u32\\t%0, %%laneid;")
+
+(define_insn "nvptx_nounroll"
+  [(unspec_volatile [(const_int 0)] UNSPECV_NOUNROLL)]
+  ""
+  "\\t.pragma \\\"nounroll\\\";"
+  [(set_attr "predicable" "false")])
+
+(define_expand "omp_simt_last_lane"
+  [(match_operand:SI 0 "nvptx_register_operand" "=R")
+   (match_operand:SI 1 "nvptx_register_operand" "R")]
+  ""
+{
+  rtx pred = gen_reg_rtx (BImode);
+  rtx tmp = gen_reg_rtx (SImode);
+  emit_move_insn (pred, gen_rtx_NE (BImode, operands[1], const0_rtx));
+  emit_insn (gen_nvptx_vote_ballot (tmp, pred));
+  emit_insn (gen_ctzsi2 (operands[0], tmp));
+  DONE;
+})
+
+(define_expand "omp_simt_ordered"
+  [(match_operand:SI 0 "nvptx_register_operand" "=R")
+   (match_operand:SI 1 "nvptx_register_operand" "R")]
+  ""
+{
+  emit_move_insn (operands[0], operands[1]);
+  emit_insn 

OpenMP offloading to NVPTX: backend patches

2016-04-20 Thread Alexander Monakov
Hello!

In responses to this email, I'll be posting 9 NVPTX-specific patches that are
required for enabling OpenMP offloading.  I intend to post corresponding
libgomp and middle-end changes that make these useful a bit later.

The patches are generated by taking a diff on amonakov/gomp-nvptx git branch,
and cutting it up into pieces that can be reviewed separately.  I've made an
effort to provide links to original patch submissions, which are even more
fine-grained, and provide verbose descriptions.

In a few places there's some "leakage" where a given hunk contains stuff from
separated patches. I hope it's obvious where it happens, and does not make
reviewing harder.

Even though the patches are posted separately, I think committing them
separately wouldn't be useful, i.e. I'd like to commit all at once after
review.

Please review for trunk.

Thanks.
Alexander


Re: [PATCH] [AArch64] support -mfentry feature for arm64

2016-04-20 Thread Szabolcs Nagy
On 20/04/16 02:25, AKASHI Takahiro wrote:
> On Tue, Apr 19, 2016 at 09:39:39AM +0300, Alexander Monakov wrote:
>> On Tue, 19 Apr 2016, AKASHI Takahiro wrote:
> But if Szabolcs' two-instruction 
> sequence in the adjacent subthread is sufficient, this is moot.

 .  It can also be solved by having just one NOP after the function label, 
 and a number of them before, then no thread can be in the nop pad.  That 
 seems to indicate that GCC should not try to be too clever and simply 
 leave the specified number of nops before and after the function label, 
 leaving safety measures to the patching infrastructure.
>>>
>>> I don't get this idea very well.
>>> How can the instructions *before* a function label be executed
>>> after branching into this function?
>>
>> The single nop after the function label is changed to a short backwards 
>> branch
>> to the instructions just before the function label.
>>
>> As a result, the last instruction in the pad would have to become a short
>> forward branch jumping over the backwards branch described above, to the 
>> first
>> real instruction of the function.
> 
> So you mean something like:
> 1:
>   str x30, [sp, #-8]!
>   bl _tracefunc
>   ldr x30, [sp], #8
>   b 2f
> .global 
>   b 1b
> 2:
>   
>   ...
> (We will not have to use x9 or else to preserve x30 here.)
> 
> Interesting.
> Livepatch code in the kernel has an assumption that the address of
> "bl _tracefunc" be equal to , but a recent patch for
> power pc to support livepatch tries to ease this restriction [1],
> and so hopefully it won't be an issue.
> (I will have to dig into the kernel code to be sure that there is
> no other issues though.)
> 

i think ldr x30,[sp],#8 after the _tracefunc is not ok for
livepatching, since _tracefunc will change the return
address to the new function to hijack the call, which will
not restore the stack (this can be solved if the new
function can be instrumented, but fiddly).
and sp has to be 16 byte aligned, so the options are

  str x30,[sp,#-16]!
  bl _tracefunc

or

  mov x9,x30
  bl _tracefunc

where _tracefunc is responsible for restoring x30 and
sp, and this sequence can come before or after the
function symbol.

if it's before then

1:
  
  bl _tracefunc
  b 2f
func:
  b 1b
2:
  

the trace disabled case is better (only one nop), but i
think it would mean more kernel work (the current
code assumes bl _tracefunc is nopped, so whenever
tracing is enabled a different tracefunc target may be
used in the atomic update, i don't know if this is
necessary though).

it is probably only worth inventing something new for
aarch64 in gcc if the kernel can use that consistently
across targets or if that can cover other significant
use cases, but it's not clear if the various flexible nop
padding solutions can be more useful than the simple
two instruction sequence which kernel tools can already
deal with.

so it seems to me that

func:
  mov x9, x30
  bl __fentry__
  

is still the best option with a new -mfentry option for
aarch64 (then we can keep the default -pg behaviour
for backward compatibility and work similarly to x86
with -mfentry) it does not solve the more general
instrumentation problem, but that would require more
analysis.

(on x86, gcc also provides -mrecord-mcount and
-mnop-mcount to record the noped out mcount call
sites,  but the kernel seems to use its own tool
to do that by looking for the mcount/fentry call
relocs so they are probably not needed).

> Thanks,
> -Takahiro AKASHI
> 
> [1] http://lkml.iu.edu//hypermail/linux/kernel/1604.1/04111.html and
> http://lkml.iu.edu//hypermail/linux/kernel/1604.1/04112.html
> 
>> Alexander
> 



Re: [PATCH PR69489/02]Handle PHI which can be degenerated to two arguments node in tree ifcvt.

2016-04-20 Thread Bin.Cheng
On Tue, Mar 22, 2016 at 10:41 AM, Richard Biener
 wrote:
> On Mon, Mar 21, 2016 at 4:22 PM, Bin Cheng  wrote:
>> Hi,
>> The second issue revealed by PR69489 is tree ifcvt could not convert PHI 
>> nodes with more than 2 arguments.  Among these nodes, there is a special 
>> kind of PHI which can be handled.  Precisely, if the PHI node satisfies 
>> below two conditions:
>>  1) Number of PHI arguments with different values equals to 2 and one 
>> argument has the only occurrence.
>>  2) The edge corresponding to the unique argument isn't critical edge.
>>
>>Such PHI can be degenerated and handled just like PHI node with only two 
>> arguments.  For example:
>>  res = PHI ;
>>can be transformed into:
>>  res = (predicate of e3) ? A_2 : A_1;
>>
>> This patch fixes the issue.  I know we may be able to further relax the 
>> check and allow handling of general multiple args PHI node, this can be a 
>> starter since the change is kind of trivial.
>> Bootstrap & test on x86_64 & AArch64.  Though the first part patch at 
>> https://gcc.gnu.org/ml/gcc-patches/2016-03/msg00888.html needs to be 
>> revised, this one is quite independent apart from the test case itself.  So 
>> any opinions?
>
> Looks good to me.  Btw, see also PR56541 where jump threading can
Patch re-tested and applied on trunk with below minor change:
+  unsigned int i, i1, i2, n1 = 0, n2 = 0;
  Changed to:
+  unsigned int i, i1 = 0, i2 = 0, n1 = 0, n2 = 0;
Because GCC falsely reports uninitialized use of `i2' at O2
optimization level, resulting in bootstrap failure.

> introduce the case but with more than two distinct PHI args.
> IMHO we "simply" want to force *amy_mask_load_store to true if
> if_convertible_phi_p runs into this case (so we perform
> versioning to only expose the if-converted code to the vectorizer
> which has a cost model to tell whether the result is profitable).
> There is still the critical edge splitting only performed for
> aggressive-if-conv but I think that's easily sth we can do for all
> loop
> bodies.
Yeah, I have following patch to handle such cases.

Thanks,
bin
>
> Richard.
>
>> Thanks,
>> bin
>>
>> 2016-03-21  Bin Cheng  
>>
>> PR tree-optimization/69489
>> * tree-if-conv.c (phi_convertible_by_degenerating_args): New.
>> (if_convertible_phi_p): Call phi_convertible_by_degenerating_args.
>> Revise dump message.
>> (if_convertible_bb_p): Remove check on edge count of basic block's
>> predecessors.
>>
>> gcc/testsuite/ChangeLog
>> 2016-03-21  Bin Cheng  
>>
>> PR tree-optimization/69489
>> * gcc.dg/tree-ssa/ifc-pr69489-2.c: New test.


Re: [PATCH][CilkPlus] Fix PR69363

2016-04-20 Thread Ilya Verbin
On Wed, Feb 17, 2016 at 15:46:00 +0100, Jakub Jelinek wrote:
> On Wed, Feb 17, 2016 at 05:32:58PM +0300, Ilya Verbin wrote:
> > This patch fixes 
> > Bootstrap and make check passed.  OK for... stage 1?
> 
> Ok for stage1, with a few nits.

Committed to trunk with fixed line lengths.


gcc/c-family/
PR c++/69363
* c-cilkplus.c (c_finish_cilk_clauses): Remove function.
* c-common.h (c_finish_cilk_clauses): Remove declaration.
gcc/c/
PR c++/69363
* c-parser.c (c_parser_cilk_all_clauses): Use c_finish_omp_clauses
instead of c_finish_cilk_clauses.
* c-tree.h (c_finish_omp_clauses): Add new default argument.
* c-typeck.c (c_finish_omp_clauses): Add new argument.  Allow
floating-point variables in the linear clause for Cilk Plus.
gcc/cp/
PR c++/69363
* cp-tree.h (finish_omp_clauses): Add new default argument.
* parser.c (cp_parser_cilk_simd_all_clauses): Use finish_omp_clauses
instead of c_finish_cilk_clauses.
* semantics.c (finish_omp_clauses): Add new argument.  Allow
floating-point variables in the linear clause for Cilk Plus.
gcc/testsuite/
PR c++/69363
* c-c++-common/cilk-plus/PS/clauses3.c: Adjust dg-error string.
* c-c++-common/cilk-plus/PS/clauses4.c: New test.
* c-c++-common/cilk-plus/PS/pr69363.c: New test.


diff --git a/gcc/c-family/c-cilkplus.c b/gcc/c-family/c-cilkplus.c
index 3e7902fd..9f1f364 100644
--- a/gcc/c-family/c-cilkplus.c
+++ b/gcc/c-family/c-cilkplus.c
@@ -41,56 +41,6 @@ c_check_cilk_loop (location_t loc, tree decl)
   return true;
 }
 
-/* Validate and emit code for <#pragma simd> clauses.  */
-
-tree
-c_finish_cilk_clauses (tree clauses)
-{
-  for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
-{
-  tree prev = clauses;
-
-  /* If a variable appears in a linear clause it cannot appear in
-any other OMP clause.  */
-  if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR)
-   for (tree c2 = clauses; c2; c2 = OMP_CLAUSE_CHAIN (c2))
- {
-   if (c == c2)
- continue;
-   enum omp_clause_code code = OMP_CLAUSE_CODE (c2);
-
-   switch (code)
- {
- case OMP_CLAUSE_LINEAR:
- case OMP_CLAUSE_PRIVATE:
- case OMP_CLAUSE_FIRSTPRIVATE:
- case OMP_CLAUSE_LASTPRIVATE:
- case OMP_CLAUSE_REDUCTION:
-   break;
-
- case OMP_CLAUSE_SAFELEN:
-   goto next;
-
- default:
-   gcc_unreachable ();
- }
-
-   if (OMP_CLAUSE_DECL (c) == OMP_CLAUSE_DECL (c2))
- {
-   error_at (OMP_CLAUSE_LOCATION (c2),
- "variable appears in more than one clause");
-   inform (OMP_CLAUSE_LOCATION (c),
-   "other clause defined here");
-   // Remove problematic clauses.
-   OMP_CLAUSE_CHAIN (prev) = OMP_CLAUSE_CHAIN (c2);
- }
- next:
-   prev = c2;
- }
-}
-  return clauses;
-}
-
 /* Calculate number of iterations of CILK_FOR.  */
 
 tree
diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h
index fa3746c..663e457 100644
--- a/gcc/c-family/c-common.h
+++ b/gcc/c-family/c-common.h
@@ -1369,7 +1369,6 @@ extern enum stv_conv scalar_to_vector (location_t loc, 
enum tree_code code,
   tree op0, tree op1, bool);
 
 /* In c-cilkplus.c  */
-extern tree c_finish_cilk_clauses (tree);
 extern tree c_validate_cilk_plus_loop (tree *, int *, void *);
 extern bool c_check_cilk_loop (location_t, tree);
 
diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c
index 1b6bacd..bdd669d 100644
--- a/gcc/c/c-parser.c
+++ b/gcc/c/c-parser.c
@@ -17509,7 +17509,7 @@ c_parser_cilk_all_clauses (c_parser *parser)
 
  saw_error:
   c_parser_skip_to_pragma_eol (parser);
-  return c_finish_cilk_clauses (clauses);
+  return c_finish_omp_clauses (clauses, false, false, true);
 }
 
 /* This function helps parse the grainsize pragma for a _Cilk_for statement.
diff --git a/gcc/c/c-tree.h b/gcc/c/c-tree.h
index d559207..4633182 100644
--- a/gcc/c/c-tree.h
+++ b/gcc/c/c-tree.h
@@ -661,7 +661,7 @@ extern tree c_begin_omp_task (void);
 extern tree c_finish_omp_task (location_t, tree, tree);
 extern void c_finish_omp_cancel (location_t, tree);
 extern void c_finish_omp_cancellation_point (location_t, tree);
-extern tree c_finish_omp_clauses (tree, bool, bool = false);
+extern tree c_finish_omp_clauses (tree, bool, bool = false, bool = false);
 extern tree c_build_va_arg (location_t, tree, location_t, tree);
 extern tree c_finish_transaction (location_t, tree, int);
 extern bool c_tree_equal (tree, tree);
diff --git a/gcc/c/c-typeck.c b/gcc/c/c-typeck.c
index 59a3c61..58c2139 100644
--- a/gcc/c/c-typeck.c
+++ b/gcc/c/c-typeck.c
@@ -12496,7 +12496,8 @@ c_find_omp_placeholder_r (tree 

Re: [PING][PATCH] libgcc: Fix typos in comments for ARM FP emulation routines

2016-04-20 Thread Sandra Loosemore

On 04/20/2016 06:38 AM, Martin Galvan wrote:

On Wed, Apr 20, 2016 at 1:44 AM, Sandra Loosemore
 wrote:

Or, do you need someone to check this in for you because you don't have
write access to the repository?


Hi! Yeah, I don't have write access. If you could commit this for me
it would be great. Thanks!


OK, done.  It's r235291.

-Sandra



[PATCHES] Update avx256-unaligned-load-1.c and avx256-unaligned-store-2.c

2016-04-20 Thread H.J. Lu
On Wed, Apr 20, 2016 at 4:19 AM, Uros Bizjak  wrote:

> BTW: There are a couple of regressions in the testsuite [1] when
> configured --with-arch=corei7. Can you please look at the testcases,
> if scan patterns need to be adjusted?

They are caused by -mtune=slm.

> FAIL: gcc.target/i386/avx256-unaligned-load-1.c scan-assembler-not
> (avx_loadups256|vmovups[^\\n\\r]*movv8sf_internal)

It is because avx_loadups256 and sse_loadups have been replaced by
movv8sf_internal and movv4sf_internal.  -mtune=slm disables SSE
SSE stores.

> FAIL: gcc.target/i386/avx256-unaligned-store-2.c scan-assembler
> vmovups.*movv16qi_internal/3

It is because -mtune=slm disables SSE stores which is expected
by avx256-unaligned-store-2.c.

Here are 2 patches for them.  Tested on x86-64.  OK for trunk?

-- 
H.J.
From be7034d5b3f1b261058fbb7359822088efece1e9 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" 
Date: Wed, 20 Apr 2016 08:22:00 -0700
Subject: [PATCH 1/2] Update load scan avx256-unaligned-load-1.c

Since avx_loadups256 and sse_loadups have been replaced by movv8sf_internal
and movv4sf_internal, respectively, we need to scan movv8sf_internal and
movv4sf_internal for load.

	* gcc.target/i386/avx256-unaligned-load-1.c: Update load scan.
---
 gcc/testsuite/gcc.target/i386/avx256-unaligned-load-1.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-1.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-1.c
index 0c476cd..68378a5 100644
--- a/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-1.c
@@ -14,6 +14,6 @@ avx_test (void)
 c[i] = a[i] * b[i+3];
 }
 
-/* { dg-final { scan-assembler-not "(avx_loadups256|vmovups\[^\n\r]*movv8sf_internal)" } } */
-/* { dg-final { scan-assembler "(sse_loadups|movv4sf_internal)" } } */
+/* { dg-final { scan-assembler-not "vmovups\[^\n\r]*movv8sf_internal/2" } } */
+/* { dg-final { scan-assembler "movv4sf_internal/2" } } */
 /* { dg-final { scan-assembler "vinsertf128" } } */
-- 
2.5.5

From 80a55496112abd43ff2076152ec9662e2709bedd Mon Sep 17 00:00:00 2001
From: "H.J. Lu" 
Date: Wed, 20 Apr 2016 08:24:43 -0700
Subject: [PATCH 2/2] Add -mtune-ctrl=sse_typeless_stores to
 avx256-unaligned-store-2.c

Since avx256-unaligned-store-2.c scans typeless SSE stores, add
-mtune-ctrl=sse_typeless_stores to enable typeless SSE stores.

	* gcc.target/i386/avx256-unaligned-store-2.c: Add
	-mtune-ctrl=sse_typeless_stores.
---
 gcc/testsuite/gcc.target/i386/avx256-unaligned-store-2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-2.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-2.c
index 817be17..87285c6 100644
--- a/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-2.c
@@ -1,5 +1,5 @@
 /* { dg-do compile { target { ! ia32 } } } */
-/* { dg-options "-O3 -dp -mavx -mavx256-split-unaligned-store -mno-prefer-avx128" } */
+/* { dg-options "-O3 -mtune-ctrl=sse_typeless_stores -dp -mavx -mavx256-split-unaligned-store -mno-prefer-avx128" } */
 
 #define N 1024
 
-- 
2.5.5



Re: C++ PATCH to fix a part of c++/70513 (ICE-on-invalid with enums)

2016-04-20 Thread Jason Merrill

On 04/08/2016 07:51 AM, Marek Polacek wrote:

This is my attempt to fix at least a part of this PR.  I haven't been able to
come up with a fix that fixes the other part involving templates.

We were ICEing on code such as

struct S
{
   enum E : int;
   enum S::E : int { foo } e;
};

Clang rejects this with "extra qualification" error.  When I modified the test
to use structs rather than enums...

struct T
{
   struct U;
   struct T::U {};
};

...I found out that we reject this with "extra qualification not allowed".  So
I think the enum case is missing a similar check that this patch adds.

By the template part of this PR I mean that we ICE on

template 
class D
{
   enum D::A { foo } c;
};

where clang++ says
error: template specialization or definition requires a template parameter list
corresponding to the nested type 'D'
which I guess means that a valid code would have "" after "D".


No, this is misleading; adding the template args wouldn't make the extra 
qualification valid.  We should just give the extra qualification error 
in this case, too.


It might help to move your added check to before we push_scope.


I thought
num_template_headers_for_class and cp_parser_check_template_parameters would
do the job here, but apparently something else needs to be used for this case.
But I'm at my wits' end here.

Bootstrapped/regtested on x86_64-linux, ok for trunk?

2016-04-08  Marek Polacek  

PR c++/70513
* parser.c (cp_parser_enum_specifier): Check for extra qualification.

* g++.dg/cpp0x/forw_enum12.C: New test.

diff --git gcc/cp/parser.c gcc/cp/parser.c
index 28e01af..dc0d1c8 100644
--- gcc/cp/parser.c
+++ gcc/cp/parser.c
@@ -17231,6 +17231,15 @@ cp_parser_enum_specifier (cp_parser* parser)
  type, prev_scope, nested_name_specifier);
  type = error_mark_node;
}
+ /* If that scope is the scope where the declaration is being placed
+the program is invalid.  */
+ else if (nested_name_specifier == prev_scope)
+   {
+ permerror (type_start_token->location,
+"extra qualification not allowed");
+ type = error_mark_node;
+ nested_name_specifier = NULL_TREE;
+   }
}

if (scoped_enum_p)
diff --git gcc/testsuite/g++.dg/cpp0x/forw_enum12.C 
gcc/testsuite/g++.dg/cpp0x/forw_enum12.C
index e69de29..906ba68 100644
--- gcc/testsuite/g++.dg/cpp0x/forw_enum12.C
+++ gcc/testsuite/g++.dg/cpp0x/forw_enum12.C
@@ -0,0 +1,29 @@
+// PR c++/70513
+// { dg-do compile { target c++11 } }
+
+struct S1
+{
+  enum E : int;
+  enum S1::E : int { X } e; // { dg-error "extra qualification not allowed" }
+};
+
+struct S2
+{
+  enum class E : int;
+  enum class S2::E : int { X } e; // { dg-error "extra qualification not 
allowed" }
+};
+
+struct S3
+{
+  enum struct E : int;
+  enum struct S3::E : int { X } e; // { dg-error "extra qualification not 
allowed" }
+};
+
+struct S4
+{
+  struct S5
+  {
+enum E : char;
+enum S4::S5::E : char { X } e; // { dg-error "extra qualification not 
allowed" }
+  };
+};

Marek





Re: [PATCH] Don't build 32-bit libgomp with -march=i486 on x86-64

2016-04-20 Thread H.J. Lu
On Wed, Apr 20, 2016 at 7:53 AM, Jakub Jelinek  wrote:
> On Wed, Apr 20, 2016 at 07:43:27AM -0700, H.J. Lu wrote:
>> From 12c6ddcf67593ed7137764ca74043f1a9c2d8fda Mon Sep 17 00:00:00 2001
>> From: "H.J. Lu" 
>> Date: Wed, 30 Mar 2016 05:56:08 -0700
>> Subject: [PATCH 2/3] Don't build 32-bit libgomp with -march=i486 on x86-64
>>
>> Gcc uses the same -march= for both -m32 and -m64 on x86-64 unless
>> --with-arch-32= is used.  There is no need for -march=i486 to compile
>> 32-bit libgomp on x86-64.
>>
>>   PR target/70454
>>   * configure.tgt (XCFLAGS): Don't add -march=i486 to compile
>>   32-bit target library on x86-64.
>
> That is wrong.  It could be --with-arch-32=i386 build.
> If you really want to find out the ISA reliably, just run ${CC} ${CFLAGS} -E 
> -dD
> -xc /dev/null and get through all the cases, or try to compile a testcase
> with some __atomic* builtin in it and see if it results in a call or not.
>

That is a better approach.  However it isn't how it is handled for
Linux/x86.  Should we rewrite the whole -march= stuff for 32-bit
Linux/x86 run-time, independent of Linux/x86 or Linux/x86-64?

-- 
H.J.


[PATCH PR70715]Expand simple operations in IV.base and check if it's the control_IV

2016-04-20 Thread Bin Cheng
Hi,
As reported in PR70715, GCC failed to prove no-overflows of IV([n]) for 
simple example like:
int
foo (char *p, unsigned n)
{
  while(n--)
{
  p[n]='A';
}
  return 0;
}
Actually, code has already been added to handle this form loops when fixing 
PR68529.  Problem with this case is loop niter analyzer records control_IV with 
its base expanded by calling expand_simple_operations.  This patch simply adds 
code expanding BASE before we check its equality against control_IV.base.  In 
the long run, we might want to remove the use of expand_simple_operations.

Bootstrap and test on x86_64.  Is it OK?

Thanks,
bin


2016-04-20  Bin Cheng  

PR tree-optimization/70715
* tree-ssa-loop-niter.c (loop_exits_before_overflow): Check equality
after expanding BASE using expand_simple_operations.
diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
index 81689fc..c61083e 100644
--- a/gcc/tree-ssa-loop-niter.c
+++ b/gcc/tree-ssa-loop-niter.c
@@ -4141,7 +4141,11 @@ loop_exits_before_overflow (tree base, tree step,
continue;
 
  /* Done proving if this is a no-overflow control IV.  */
- if (operand_equal_p (base, civ->base, 0))
+ if (operand_equal_p (base, civ->base, 0)
+ /* Control IV is recorded after expanding simple operations,
+Here we compare it against expanded base too.  */
+ || operand_equal_p (expand_simple_operations (base),
+ civ->base, 0))
return true;
 
  /* If this is a before stepping control IV, in other words, we have
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/scev-11.c 
b/gcc/testsuite/gcc.dg/tree-ssa/scev-11.c
new file mode 100644
index 000..b9223c8
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/scev-11.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-ldist" } */
+
+int
+foo (char *p, unsigned n)
+{
+  while(n--)
+{
+  p[n]='A';
+}
+  return 0;
+}
+
+/* Loop can be transformed into builtin memset since [n] is SCEV.  */
+/* { dg-final { scan-tree-dump "builtin_memset" "ldist" } } */


Re: [PATCH] Don't build 32-bit libatomic with -march=i486 on x86-64

2016-04-20 Thread Jakub Jelinek
On Wed, Apr 20, 2016 at 07:45:44AM -0700, H.J. Lu wrote:
> On Wed, Apr 20, 2016 at 12:02 AM, Uros Bizjak  wrote:
> >>
> >> That is why I submitted my patches.  Since -m32 passes -march=x86-64
> >> to cc1 on x86-64,  we shouldn't pass -march=i486 to cc1.  It is undesirable
> >> especially when --with-arch= is used.  I noticed the issue when 32-bit
> >> libatomic/libgomp/libitm weren't optimized with -march=haswell when GCC
> >> was configured with --with-arch=haswell
> >
> > OK then. IMO, following comment is more informative:
> >
> > # x86_64 compiler passes -march=x86_64 by default when building 32bit
> > target libraries.
> >
>  +   # Since 64-bit arch > i486, we can use the same -march= to 
>  build
>  +   # both 32-bit and 64-bit target libraries.
> >
> > OK with the above change.
> >
> 
> This is the patch I checked in.  I also updated patches for libgomp:
> 
> https://gcc.gnu.org/ml/gcc-patches/2016-04/msg01079.html
> 
> and libitm:
> 
> https://gcc.gnu.org/ml/gcc-patches/2016-04/msg01080.html

This is wrong, see my other comment on the libgomp patch.

>   PR target/70454
>   * configure.tgt (XCFLAGS): Don't add -march=i486 to compile
>   32-bit x86 target library on x86-64.
> ---
>  libatomic/configure.tgt | 10 ++
>  1 file changed, 2 insertions(+), 8 deletions(-)
> 
> diff --git a/libatomic/configure.tgt b/libatomic/configure.tgt
> index c5470d7..49233a4 100644
> --- a/libatomic/configure.tgt
> +++ b/libatomic/configure.tgt
> @@ -81,14 +81,8 @@ case "${target_cpu}" in
>   try_ifunc=yes
>   ;;
>x86_64)
> - case " ${CC} ${CFLAGS} " in
> -   *" -m32 "*)
> - XCFLAGS="${XCFLAGS} -march=i486 -mtune=generic"
> - XCFLAGS="${XCFLAGS} -fomit-frame-pointer"
> - ;;
> -   *)
> - ;;
> - esac
> + # x86_64 compiler passes -march=x86_64 by default when building
> + # 32bit target libraries.
>   ARCH=x86
>   # ??? Detect when -mcx16 is already enabled.
>   try_ifunc=yes


Jakub


Re: [PATCH] Don't build 32-bit libgomp with -march=i486 on x86-64

2016-04-20 Thread H.J. Lu
On Wed, Apr 20, 2016 at 7:53 AM, Jakub Jelinek  wrote:
> On Wed, Apr 20, 2016 at 07:43:27AM -0700, H.J. Lu wrote:
>> From 12c6ddcf67593ed7137764ca74043f1a9c2d8fda Mon Sep 17 00:00:00 2001
>> From: "H.J. Lu" 
>> Date: Wed, 30 Mar 2016 05:56:08 -0700
>> Subject: [PATCH 2/3] Don't build 32-bit libgomp with -march=i486 on x86-64
>>
>> Gcc uses the same -march= for both -m32 and -m64 on x86-64 unless
>> --with-arch-32= is used.  There is no need for -march=i486 to compile
>> 32-bit libgomp on x86-64.
>>
>>   PR target/70454
>>   * configure.tgt (XCFLAGS): Don't add -march=i486 to compile
>>   32-bit target library on x86-64.
>
> That is wrong.  It could be --with-arch-32=i386 build.

libgomp/configure.tgt has

   # Note that bare i386 is not included here.  We need cmpxchg.
i[456]86-*-linux*)
config_path="linux/x86 linux posix"
case " ${CC} ${CFLAGS} " in
 *" -m64 "*|*" -mx32 "*)
   ;;
 *)
   if test -z "$with_arch"; then
^^^

--with-arch overrides everything.  I just follow the same practice.

 XCFLAGS="${XCFLAGS} -march=i486 -mtune=${target_cpu}"
   fi
esac
;;

-- 
H.J.


Re: [PATCH] Don't build 32-bit libatomic with -march=i486 on x86-64

2016-04-20 Thread H.J. Lu
On Wed, Apr 20, 2016 at 7:54 AM, Jakub Jelinek  wrote:
> On Wed, Apr 20, 2016 at 07:45:44AM -0700, H.J. Lu wrote:
>> On Wed, Apr 20, 2016 at 12:02 AM, Uros Bizjak  wrote:
>> >>
>> >> That is why I submitted my patches.  Since -m32 passes -march=x86-64
>> >> to cc1 on x86-64,  we shouldn't pass -march=i486 to cc1.  It is 
>> >> undesirable
>> >> especially when --with-arch= is used.  I noticed the issue when 32-bit
>> >> libatomic/libgomp/libitm weren't optimized with -march=haswell when GCC
>> >> was configured with --with-arch=haswell
>> >
>> > OK then. IMO, following comment is more informative:
>> >
>> > # x86_64 compiler passes -march=x86_64 by default when building 32bit
>> > target libraries.
>> >
>>  +   # Since 64-bit arch > i486, we can use the same -march= to 
>>  build
>>  +   # both 32-bit and 64-bit target libraries.
>> >
>> > OK with the above change.
>> >
>>
>> This is the patch I checked in.  I also updated patches for libgomp:
>>
>> https://gcc.gnu.org/ml/gcc-patches/2016-04/msg01079.html
>>
>> and libitm:
>>
>> https://gcc.gnu.org/ml/gcc-patches/2016-04/msg01080.html
>
> This is wrong, see my other comment on the libgomp patch.
>

See my reply to your reply on the libgomp patch.


-- 
H.J.


Re: [PATCH] Don't build 32-bit libgomp with -march=i486 on x86-64

2016-04-20 Thread Jakub Jelinek
On Wed, Apr 20, 2016 at 07:43:27AM -0700, H.J. Lu wrote:
> From 12c6ddcf67593ed7137764ca74043f1a9c2d8fda Mon Sep 17 00:00:00 2001
> From: "H.J. Lu" 
> Date: Wed, 30 Mar 2016 05:56:08 -0700
> Subject: [PATCH 2/3] Don't build 32-bit libgomp with -march=i486 on x86-64
> 
> Gcc uses the same -march= for both -m32 and -m64 on x86-64 unless
> --with-arch-32= is used.  There is no need for -march=i486 to compile
> 32-bit libgomp on x86-64.
> 
>   PR target/70454
>   * configure.tgt (XCFLAGS): Don't add -march=i486 to compile
>   32-bit target library on x86-64.

That is wrong.  It could be --with-arch-32=i386 build.
If you really want to find out the ISA reliably, just run ${CC} ${CFLAGS} -E -dD
-xc /dev/null and get through all the cases, or try to compile a testcase
with some __atomic* builtin in it and see if it results in a call or not.
> ---
>  libgomp/configure.tgt | 10 ++
>  1 file changed, 2 insertions(+), 8 deletions(-)
> 
> diff --git a/libgomp/configure.tgt b/libgomp/configure.tgt
> index 77e73f0..c290080 100644
> --- a/libgomp/configure.tgt
> +++ b/libgomp/configure.tgt
> @@ -79,16 +79,10 @@ if test x$enable_linux_futex = xyes; then
>   esac
>   ;;
>  
> -# Similar jiggery-pokery for x86_64 multilibs, except here we
> -# can't rely on the --with-arch configure option, since that
> -# applies to the 64-bit side.
>  x86_64-*-linux*)
>   config_path="linux/x86 linux posix"
> - case " ${CC} ${CFLAGS} " in
> -   *" -m32 "*)
> - XCFLAGS="${XCFLAGS} -march=i486 -mtune=generic"
> - ;;
> - esac
> + # x86_64 compiler passes -march=x86_64 by default when building
> + # 32bit target libraries.
>   ;;
>  
>  # Note that sparcv7 and sparcv8 is not included here.  We need cas.
> -- 
> 2.5.5
> 


Jakub


Re: [PATCH] Don't build 32-bit libatomic with -march=i486 on x86-64

2016-04-20 Thread H.J. Lu
On Wed, Apr 20, 2016 at 12:02 AM, Uros Bizjak  wrote:
>>
>> That is why I submitted my patches.  Since -m32 passes -march=x86-64
>> to cc1 on x86-64,  we shouldn't pass -march=i486 to cc1.  It is undesirable
>> especially when --with-arch= is used.  I noticed the issue when 32-bit
>> libatomic/libgomp/libitm weren't optimized with -march=haswell when GCC
>> was configured with --with-arch=haswell
>
> OK then. IMO, following comment is more informative:
>
> # x86_64 compiler passes -march=x86_64 by default when building 32bit
> target libraries.
>
 +   # Since 64-bit arch > i486, we can use the same -march= to 
 build
 +   # both 32-bit and 64-bit target libraries.
>
> OK with the above change.
>

This is the patch I checked in.  I also updated patches for libgomp:

https://gcc.gnu.org/ml/gcc-patches/2016-04/msg01079.html

and libitm:

https://gcc.gnu.org/ml/gcc-patches/2016-04/msg01080.html

Thanks.

-- 
H.J.
From bafad333cdf4125bf245e05d82df824ffb62c9d5 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" 
Date: Wed, 30 Mar 2016 05:51:28 -0700
Subject: [PATCH 1/3] Don't build 32-bit libatomic with -march=i486 on x86-64

Gcc uses the same -march= for both -m32 and -m64 on x86-64 unless
--with-arch-32= is used.  There is no need for -march=i486 to compile
32-bit libatomic on x86-64.

	PR target/70454
	* configure.tgt (XCFLAGS): Don't add -march=i486 to compile
	32-bit x86 target library on x86-64.
---
 libatomic/configure.tgt | 10 ++
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/libatomic/configure.tgt b/libatomic/configure.tgt
index c5470d7..49233a4 100644
--- a/libatomic/configure.tgt
+++ b/libatomic/configure.tgt
@@ -81,14 +81,8 @@ case "${target_cpu}" in
 	try_ifunc=yes
 	;;
   x86_64)
-	case " ${CC} ${CFLAGS} " in
-	  *" -m32 "*)
-	XCFLAGS="${XCFLAGS} -march=i486 -mtune=generic"
-	XCFLAGS="${XCFLAGS} -fomit-frame-pointer"
-	;;
-	  *)
-	;;
-	esac
+	# x86_64 compiler passes -march=x86_64 by default when building
+	# 32bit target libraries.
 	ARCH=x86
 	# ??? Detect when -mcx16 is already enabled.
 	try_ifunc=yes
-- 
2.5.5



Re: [PATCH] Don't build 32-bit libitm with -march=i486 on x86-64

2016-04-20 Thread H.J. Lu
On Tue, Apr 19, 2016 at 8:08 AM, H.J. Lu  wrote:
> Gcc uses the same -march= for both -m32 and -m64 on x86-64 unless
> --with-arch-32= is used.  There is no need for -march=i486 to compile
> 32-bit libitm on x86-64.
>
> Tested on x86-64.  OK for trunk?
>
>
> H.J.
> ---
> PR target/70454
> * configure.tgt (XCFLAGS): Don't add -march=i486 to compile
> 32-bit target library on x86-64.

Here is the updated patch.  Tested on x86-64.  OK for trunk?

-- 
H.J.
From 9c156365db19ba3326e98dccf1cd8cc19b1af2fb Mon Sep 17 00:00:00 2001
From: "H.J. Lu" 
Date: Wed, 30 Mar 2016 05:57:48 -0700
Subject: [PATCH 3/3] Don't build 32-bit libitm with -march=i486 on x86-64

Gcc uses the same -march= for both -m32 and -m64 on x86-64 unless
--with-arch-32= is used.  There is no need for -march=i486 to compile
32-bit libitm on x86-64.

	PR target/70454
	* configure.tgt (XCFLAGS): Don't add -march=i486 to compile
	32-bit target library on x86-64.
---
 libitm/configure.tgt | 8 ++--
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/libitm/configure.tgt b/libitm/configure.tgt
index e84382f..2a271ef 100644
--- a/libitm/configure.tgt
+++ b/libitm/configure.tgt
@@ -100,12 +100,8 @@ case "${target_cpu}" in
 	;;
 
   x86_64)
-	case " ${CC} ${CFLAGS} " in
-	  *" -m32 "*)
-	XCFLAGS="${XCFLAGS} -march=i486 -mtune=generic"
-	XCFLAGS="${XCFLAGS} -fomit-frame-pointer"
-	;;
-	esac
+	# x86_64 compiler passes -march=x86_64 by default when building
+	# 32bit target libraries.
 	XCFLAGS="${XCFLAGS} -mrtm"
 	ARCH=x86
 	;;
-- 
2.5.5



Re: [PATCH] Don't build 32-bit libgomp with -march=i486 on x86-64

2016-04-20 Thread H.J. Lu
On Tue, Apr 19, 2016 at 8:08 AM, H.J. Lu  wrote:
> Gcc uses the same -march= for both -m32 and -m64 on x86-64 unless
> --with-arch-32= is used.  There is no need for -march=i486 to compile
> 32-bit libgomp on x86-64.
>
> Tested on x86-64.  OK for trunk?
>
> H.J.
> ---
> PR target/70454
> * configure.tgt (XCFLAGS): Don't add -march=i486 to compile
> 32-bit target library on x86-64.

Here is the updated patch.  Tested on x86-64.  OK for trunk?

-- 
H.J.
From 12c6ddcf67593ed7137764ca74043f1a9c2d8fda Mon Sep 17 00:00:00 2001
From: "H.J. Lu" 
Date: Wed, 30 Mar 2016 05:56:08 -0700
Subject: [PATCH 2/3] Don't build 32-bit libgomp with -march=i486 on x86-64

Gcc uses the same -march= for both -m32 and -m64 on x86-64 unless
--with-arch-32= is used.  There is no need for -march=i486 to compile
32-bit libgomp on x86-64.

	PR target/70454
	* configure.tgt (XCFLAGS): Don't add -march=i486 to compile
	32-bit target library on x86-64.
---
 libgomp/configure.tgt | 10 ++
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/libgomp/configure.tgt b/libgomp/configure.tgt
index 77e73f0..c290080 100644
--- a/libgomp/configure.tgt
+++ b/libgomp/configure.tgt
@@ -79,16 +79,10 @@ if test x$enable_linux_futex = xyes; then
 	esac
 	;;
 
-# Similar jiggery-pokery for x86_64 multilibs, except here we
-# can't rely on the --with-arch configure option, since that
-# applies to the 64-bit side.
 x86_64-*-linux*)
 	config_path="linux/x86 linux posix"
-	case " ${CC} ${CFLAGS} " in
-	  *" -m32 "*)
-	XCFLAGS="${XCFLAGS} -march=i486 -mtune=generic"
-	;;
-	esac
+	# x86_64 compiler passes -march=x86_64 by default when building
+	# 32bit target libraries.
 	;;
 
 # Note that sparcv7 and sparcv8 is not included here.  We need cas.
-- 
2.5.5



[PATCH, i386]: Use explicit mode iterators in LEA splitters

2016-04-20 Thread Uros Bizjak
Substantial cleanups without functional changes.

2016-04-20  Uros Bizjak  

* config/i386/i386.md (*lea_general_1): Rename from
*lea_general_1.  Use explicit SWI12 mode interator.
(*lea_general_2): Rename from *lea_general_2.
Use explicit SWI12 mode interator.
(*lea_general_3): Rename from *lea_general_3.
Use explicit SWI12 mode interator.
(*lea_general_4): Split from *lea_general_4.
Use explicit SWI12 mode interator.
(*lea_general_4): Split from *lea_general_4.
Use explicit SWI48 mode interator.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Committed to mainline SVN.

Uros.
Index: config/i386/i386.md
===
--- config/i386/i386.md (revision 235207)
+++ config/i386/i386.md (working copy)
@@ -6213,144 +6213,119 @@
 ;; The lea patterns for modes less than 32 bits need to be matched by
 ;; several insns converted to real lea by splitters.
 
-(define_insn_and_split "*lea_general_1"
-  [(set (match_operand 0 "register_operand" "=r")
-   (plus (plus (match_operand 1 "index_register_operand" "l")
-   (match_operand 2 "register_operand" "r"))
- (match_operand 3 "immediate_operand" "i")))]
-  "(GET_MODE (operands[0]) == QImode || GET_MODE (operands[0]) == HImode)
-   && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
-   && GET_MODE (operands[0]) == GET_MODE (operands[1])
-   && GET_MODE (operands[0]) == GET_MODE (operands[2])
-   && (GET_MODE (operands[0]) == GET_MODE (operands[3])
-   || GET_MODE (operands[3]) == VOIDmode)"
+(define_insn_and_split "*lea_general_1"
+  [(set (match_operand:SWI12 0 "register_operand" "=r")
+   (plus:SWI12
+ (plus:SWI12 (match_operand:SWI12 1 "index_register_operand" "l")
+ (match_operand:SWI12 2 "register_operand" "r"))
+ (match_operand:SWI12 3 "immediate_operand" "i")))]
+  "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
   "#"
   "&& reload_completed"
-  [(const_int 0)]
+  [(set (match_dup 0)
+   (plus:SI
+ (plus:SI (match_dup 1) (match_dup 2))
+ (match_dup 3)))]
 {
-  machine_mode mode = SImode;
-  rtx pat;
-
-  operands[0] = gen_lowpart (mode, operands[0]);
-  operands[1] = gen_lowpart (mode, operands[1]);
-  operands[2] = gen_lowpart (mode, operands[2]);
-  operands[3] = gen_lowpart (mode, operands[3]);
-
-  pat = gen_rtx_PLUS (mode, gen_rtx_PLUS (mode, operands[1], operands[2]),
- operands[3]);
-
-  emit_insn (gen_rtx_SET (operands[0], pat));
-  DONE;
+  operands[0] = gen_lowpart (SImode, operands[0]);
+  operands[1] = gen_lowpart (SImode, operands[1]);
+  operands[2] = gen_lowpart (SImode, operands[2]);
+  operands[3] = gen_lowpart (SImode, operands[3]);
 }
   [(set_attr "type" "lea")
(set_attr "mode" "SI")])
 
-(define_insn_and_split "*lea_general_2"
-  [(set (match_operand 0 "register_operand" "=r")
-   (plus (mult (match_operand 1 "index_register_operand" "l")
-   (match_operand 2 "const248_operand" "n"))
- (match_operand 3 "nonmemory_operand" "ri")))]
-  "(GET_MODE (operands[0]) == QImode || GET_MODE (operands[0]) == HImode)
-   && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
-   && GET_MODE (operands[0]) == GET_MODE (operands[1])
-   && (GET_MODE (operands[0]) == GET_MODE (operands[3])
-   || GET_MODE (operands[3]) == VOIDmode)"
+(define_insn_and_split "*lea_general_2"
+  [(set (match_operand:SWI12 0 "register_operand" "=r")
+   (plus:SWI12
+ (mult:SWI12 (match_operand:SWI12 1 "index_register_operand" "l")
+ (match_operand:SWI12 2 "const248_operand" "n"))
+ (match_operand:SWI12 3 "nonmemory_operand" "ri")))]
+  "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
   "#"
   "&& reload_completed"
-  [(const_int 0)]
+  [(set (match_dup 0)
+   (plus:SI
+ (mult:SI (match_dup 1) (match_dup 2))
+ (match_dup 3)))]
 {
-  machine_mode mode = SImode;
-  rtx pat;
-
-  operands[0] = gen_lowpart (mode, operands[0]);
-  operands[1] = gen_lowpart (mode, operands[1]);
-  operands[3] = gen_lowpart (mode, operands[3]);
-
-  pat = gen_rtx_PLUS (mode, gen_rtx_MULT (mode, operands[1], operands[2]),
- operands[3]);
-
-  emit_insn (gen_rtx_SET (operands[0], pat));
-  DONE;
+  operands[0] = gen_lowpart (SImode, operands[0]);
+  operands[1] = gen_lowpart (SImode, operands[1]);
+  operands[3] = gen_lowpart (SImode, operands[3]);
 }
   [(set_attr "type" "lea")
(set_attr "mode" "SI")])
 
-(define_insn_and_split "*lea_general_3"
-  [(set (match_operand 0 "register_operand" "=r")
-   (plus (plus (mult (match_operand 1 "index_register_operand" "l")
- (match_operand 2 "const248_operand" "n"))
-   (match_operand 3 "register_operand" "r"))
- (match_operand 4 "immediate_operand" "i")))]
-  "(GET_MODE 

Re: [PATCH] Optimize strchr (s, 0) to strlen

2016-04-20 Thread Wilco Dijkstra
Jakub Jelinek wrote:
> On Wed, Apr 20, 2016 at 11:17:06AM +, Wilco Dijkstra wrote:
>> Can you quantify "don't like"? I benchmarked rawmemchr on a few targets
>> and it's slower than strlen, so it's hard to guess what you don't like about 
>> it.
>
> This is the same stuff as has been discussed for mempcpy, rawmemchr is the
> API meant to use for getting pointer to the terminating '\0', if there are
> deficiencies on the library side, they should be fixed.  

About mempcpy, GLIBC nowadays expands it into memcpy (p, q, n) + n by default
in string.h.

Generally after a lot of discussion on this last year, the consensus is that 
these
functions don't provide a useful gain and are often detrimental to performance 
even
if optimized assembly implementations happen to be available due to I-cache
pressure.

Emitting rawmemchr/mempcpy/stpcpy automatically as a result of optimization is
a bad idea for most targets given libraries often have inefficient default 
implementations.
I fixed the GLIBC mempcpy and stpcpy C implementations to use memcpy and strlen
so at least for these performance is no longer absolutely terrible.

Saying that all C libraries should be forced to provide highly optimized 
assembler
versions for these functions is onerous since they are not frequently used in 
code 
(a quick grep of SPEC resulted in one use of mempcpy, 0 uses of rawmemchr,
strchrnul and stpcpy).

> If you hardcode in
> GCC emitting worse sequence at the caller (which s + strlen (s) is), then
> even once the library deficiency is fixed, you still don't get benefit from
> it.  

What benefit exactly? Rawmemchr cannot ever beat strlen. There is a trick that 
can
make a good strlen faster than rawmemchr, but even ignoring that, an integer 
based
rawmemchr needs to do extra operations in its inner loop. A SIMD version could 
use
similar inner loops although rawmemchr still has a higher cost. You could 
special case
searching for '\0' and jump to strlen (I have patches for that), but that also 
adds cost...

>I wonder how you work around the
> define strchr(s, c) \
> ..
> in glibc headers anyway.

That should either be removed or changed to use strlen (I have patches for both
options out for review).

> Another thing is for the cases where strlen is desirable to be expanded
> inline, in that case rawmemchr (x, 0) or strchr (x, 0) is likely useful to be
> expanded inline as well and then this decision should be done at expansion
> time.

I'm not sure I'm following you here - that's an argument to expand into strlen 
early
as strlen is better optimized in GCC...

Wilco



Re: [PATCH] Fix missed DSE opportunity with operator delete.

2016-04-20 Thread Richard Biener
On Tue, Apr 19, 2016 at 10:48 PM, Mikhail Maltsev  wrote:
> On 04/18/2016 12:14 PM, Richard Biener wrote:
>>
>> Enlarging tree_function_decl is bad.
> Probably using 3 bits for malloc_flag, operator_new_flag and free_flag is
> redundant. I packed the state into 2 bits.
>>
>> Passes should get at the info via flags_from_decl_or_type () and a new
>> ECF_FREE.
> Fixed.

Thanks - much better.

@@ -2117,6 +2127,13 @@ call_may_clobber_ref_p_1 (gcall *call, ao_ref *ref)
  /* Fallthru to general call handling.  */;
   }

+  if (callee != NULL_TREE
+  && (flags_from_decl_or_type (callee) & ECF_FREE) != 0)
+{

as you have a stmt here please use gimple_call_flags (call) & ECF_FREE.

@@ -2402,6 +2409,16 @@ stmt_kills_ref_p (gimple *stmt, ao_ref *ref)

  default:;
  }
+
+  if (callee != NULL_TREE
+ && (flags_from_decl_or_type (callee) & ECF_FREE) != 0)
+   {

Likewise.

@@ -1728,6 +1729,15 @@ ref_maybe_used_by_call_p_1 (gcall *call, ao_ref *ref)
  /* Fallthru to general call handling.  */;
   }

+  /* free-like functions may not reference their first argument.  */
+  if (callee != NULL_TREE && (flags & ECF_FREE) != 0)
+{
+  tree ptr = gimple_call_arg (call, 0);
+  tree base = ao_ref_base (ref);
+  if (base && TREE_CODE (base) == MEM_REF && TREE_OPERAND (base, 0) == ptr)
+   return false;
+}
+

So this is less aggressive than what we do for BUILT_IN_FREE which simply
returns false as "not reading from (any) memory".  I suspect we might want
to amend the documentation of the "free" attribute to that effect, or find
a better wording ...

Otherwise using DECL_SET_MALLOC sometimes and sometimes
DECL_ALLOC_FN_KIND () = ALLOC_FN_MALLOC looks somewhat
inconsistent.  I'd prefer removing DECL_SET_MALLOC.

You have

+static tree
+handle_free_attribute (tree *node, tree name, tree /*args*/, int /*flags*/,
+  bool *no_add_attrs)
+{
+  tree decl = *node;
+  if (TREE_CODE (decl) == FUNCTION_DECL
+  && type_num_arguments (TREE_TYPE (decl)) != 0
+  && POINTER_TYPE_P (TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (decl)
+DECL_ALLOC_FN_KIND (decl) = ALLOC_FN_FREE;
+  else
+{
+  warning_at (DECL_SOURCE_LOCATION (decl), OPT_Wattributes,
+ "%qE attribute ignored", name);
+  *no_add_attrs = true;
+}

so one can happily apply the attribute to

 void foo (void *, void *);

but then

@@ -2117,6 +2127,13 @@ call_may_clobber_ref_p_1 (gcall *call, ao_ref *ref)
  /* Fallthru to general call handling.  */;
   }

+  if (callee != NULL_TREE
+  && (flags_from_decl_or_type (callee) & ECF_FREE) != 0)
+{
+  tree ptr = gimple_call_arg (call, 0);
+  return ptr_deref_may_alias_ref_p_1 (ptr, ref);
+}

will ignore the 2nd argument.  I think it's better to ignore the attribute
if type_num_arguments () != 1.

Richard.

> --
> Regards,
> Mikhail Maltsev


Re: [PATCH, rs6000] Expand vec_ld and vec_st during parsing to improve performance

2016-04-20 Thread Bill Schmidt
On Tue, 2016-04-19 at 08:10 -0500, Bill Schmidt wrote:
> On Tue, 2016-04-19 at 10:09 +0200, Richard Biener wrote:
> > 
> > x86 nowadays has intrinsics implemented as inlines - they come from
> > header files.  It seems for ppc the intrinsics are somehow magically
> > there, w/o a header file?
> 
> Yes, and we really need to start gravitating to the inlines in header
> files model (Clang does this successfully for PowerPC and it is quite a
> bit cleaner, and allows for more optimization).  We have a very
> complicated setup for handling overloaded built-ins that could use a
> rewrite once somebody has time to attack it.  We do have one header file
> for built-ins (altivec.h) but it largely just #defines well-known
> aliases for the internal built-in names.  We have a lot of other things
> we have to do in GCC 7, but I'd like to do something about this in the
> relatively near future.  (Things like "vec_add" that just do a vector
> addition aren't expanded until RTL time??  Gack.)

Looking into this a bit more reminded me why things are the way they
are.  The AltiVec interfaces were designed way back to be overloaded
functions, which isn't valid C99.  Thus they can't be declared in
headers without some magic.  Clang solved this by adding an extension
__attribute__ ((__overloaded__)), which allows nice always-inline
functions that fully express the semantics and integrate well into the
optimizers.  To date, GCC doesn't have such an attribute.  Thus we have
this somewhat nasty code that gets called out of the front end that
allows us to resolve the overloaded built-ins during parsing.

With C11 we could use _Generic, but having two separate interfaces to
maintain based on the language level doesn't seem reasonable.

It looks like there is a way to do this with GCC built-ins, however,
using __builtin_choose_expr and __builtin_types_compatible_p
(https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html).  I need to
play with this and see what kind of code gets generated.  If we end up
with a bunch of run-time type checks that would still not be a good
solution.

I wonder how hard it would be to get support for __attribute__
((__overloaded__)) in GCC...

Thanks again,
Bill



RE: [PATCH] x86 interrupt attribute patch [1/2]

2016-04-20 Thread Koval, Julia
Sorry, here is the right patch.

-Original Message-
From: Koval, Julia 
Sent: Wednesday, April 20, 2016 4:42 PM
To: 'gcc-patches@gcc.gnu.org' 
Cc: Lu, Hongjiu ; 'vaalfr...@gmail.com' 
; 'ubiz...@gmail.com' ; 
'l...@redhat.com' ; Zamyatin, Igor 
Subject: [PATCH] x86 interrupt attribute patch [1/2]

Hi,
Here is the new version of interrupt attribute patch. Bootstraped/regtested for 
Linux/x86_64. Ok for trunk?

Update TARGET_FUNCTION_INCOMING_ARG documentation

On x86, interrupt handlers are only called by processors which push
interrupt data onto stack at the address where the normal return address
is.  Since interrupt handlers must access interrupt data via pointers so
that they can update interrupt data, the pointer argument is passed as
"argument pointer - word".

TARGET_FUNCTION_INCOMING_ARG defines how callee sees its argument.
Normally it returns REG, NULL, or CONST_INT.  This patch adds arbitrary
address computation based on hard register, which can be forced into a
register, to the list.

When copying an incoming argument onto stack, assign_parm_setup_stack
has:

if (argument in memory)
  copy argument in memory to stack
else
  move argument to stack

Since an arbitrary address computation may be passed as an argument, we
change it to:

if (argument in memory)
  copy argument in memory to stack
else
  {
if (argument isn't in register)
  force argument into a register
move argument to stack
  }

* function.c (assign_parm_setup_stack): Force source into a
register if needed.
* target.def (function_incoming_arg): Update documentation to
allow arbitrary address computation based on hard register.
* doc/tm.texi: Regenerated.
commit be34f5e4e7417d426d2b35228eac5310525cce82
Author: H.J. Lu 
Date:   Fri Feb 5 04:43:15 2016 -0800

Update TARGET_FUNCTION_INCOMING_ARG documentation

On x86, interrupt handlers are only called by processors which push
interrupt data onto stack at the address where the normal return address
is.  Since interrupt handlers must access interrupt data via pointers so
that they can update interrupt data, the pointer argument is passed as
"argument pointer - word".

TARGET_FUNCTION_INCOMING_ARG defines how callee sees its argument.
Normally it returns REG, NULL, or CONST_INT.  This patch adds arbitrary
address computation based on hard register, which can be forced into a
register, to the list.

When copying an incoming argument onto stack, assign_parm_setup_stack
has:

if (argument in memory)
  copy argument in memory to stack
else
  move argument to stack

Since an arbitrary address computation may be passed as an argument, we
change it to:

if (argument in memory)
  copy argument in memory to stack
else
  {
if (argument isn't in register)
  force argument into a register
move argument to stack
  }

* function.c (assign_parm_setup_stack): Force source into a
register if needed.
* target.def (function_incoming_arg): Update documentation to
allow arbitrary address computation based on hard register.
* doc/tm.texi: Regenerated.

diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 745910f..826f8fb 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -3949,6 +3949,10 @@ which the caller passes the value, and
 fashion to tell the function being called where the arguments will
 arrive.
 
+@code{TARGET_FUNCTION_INCOMING_ARG} can also return arbitrary address
+computation using hard register, which can be forced into a register,
+so that it can be used to pass special arguments.
+
 If @code{TARGET_FUNCTION_INCOMING_ARG} is not defined,
 @code{TARGET_FUNCTION_ARG} serves both purposes.
 @end deftypefn
diff --git a/gcc/function.c b/gcc/function.c
index 1ac8e26..c42e865 100644
--- a/gcc/function.c
+++ b/gcc/function.c
@@ -3466,7 +3466,11 @@ assign_parm_setup_stack (struct assign_parm_data_all 
*all, tree parm,
   BLOCK_OP_NORMAL);
}
   else
-   emit_move_insn (dest, src);
+   {
+ if (!REG_P (src))
+   src = force_reg (GET_MODE (src), src);
+ emit_move_insn (dest, src);
+   }
 }
 
   if (to_conversion)
diff --git a/gcc/target.def b/gcc/target.def
index 20f2b32..d3d9963 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -4468,8 +4468,8 @@ a register.",
bool named),
  default_function_arg)
 
-/* Likewise, but for machines with register windows.  Return the
-   location where the argument will appear to the callee.  */
+/* Likewise, but for machines with register windows or special arguments.
+   Return the location where the 

[PATCH] x86 interrupt attribute patch [2/2]

2016-04-20 Thread Koval, Julia
Hi,
Here is the new version of interrupt attribute patch. Bootstraped/regtested for 
Linux/x86_64. Ok for trunk?

The interrupt and exception handlers are called by x86 processors.  X86
hardware pushes information onto stack and calls the handler.  The
requirements are

1. Both interrupt and exception handlers must use the 'IRET' instruction,
instead of the 'RET' instruction, to return from the handlers.
2. All registers are callee-saved in interrupt and exception handlers.
3. The difference between interrupt and exception handlers is the
exception handler must pop 'ERROR_CODE' off the stack before the 'IRET'
instruction.

The design goals of interrupt and exception handlers for x86 processors
are:

1. Support both 32-bit and 64-bit modes.
2. Flexible for compilers to optimize.
3. Easy to use by programmers.

To implement interrupt and exception handlers for x86 processors, a
compiler should support:

'interrupt' attribute

Use this attribute to indicate that the specified function with
mandatory arguments is an interrupt or exception handler.  The compiler
generates function entry and exit sequences suitable for use in an
interrupt handler when this attribute is present.  The 'IRET' instruction,
instead of the 'RET' instruction, is used to return from interrupt or
exception handlers.  All registers, except for the EFLAGS register which
is restored by the 'IRET' instruction, are preserved by the compiler.
If the compiler generates MPX, SSE, MMX or x87 instructions in an interrupt
or exception handler, or functions called from an interrupt or exception
handler may contain MPX, SSE, MMX or x87 instructions, the compiler must
save and restore the corresponding state.

Since the direction flag in the FLAGS register in interrupt (exception)
handlers is undetermined, cld instruction must be emitted in function
prologue if rep string instructions are used in interrupt (exception)
handler or interrupt (exception) handler isn't a leaf function.

Any interruptible-without-stack-switch code must be compiled with
-mno-red-zone since interrupt handlers can and will, because of the
hardware design, touch the red zone.

1. interrupt handler must be declared with a mandatory pointer argument:

struct interrupt_frame;

__attribute__ ((interrupt))
void
f (struct interrupt_frame *frame)
{
...
}

and user must properly define the structure the pointer pointing to.

2. exception handler:

The exception handler is very similar to the interrupt handler with
a different mandatory function signature:

typedef unsigned int uword_t __attribute__ ((mode (__word__)));

struct interrupt_frame;

__attribute__ ((interrupt))
void
f (struct interrupt_frame *frame, uword_t error_code)
{
...
}

and compiler pops the error code off stack before the 'IRET' instruction.

The exception handler should only be used for exceptions which push an
error code and all other exceptions must use the interrupt handler.
The system will crash if the wrong handler is used.

'no_caller_saved_registers' attribute

Use this attribute to indicate that the specified function has no
caller-saved registers.  That is, all registers are callee-saved.
The compiler generates proper function entry and exit sequences to
save and restore any modified registers, except for the EFLAGS register.
If the compiler generates MPX, SSE, MMX or x87 instructions in a function
with 'no_caller_saved_registers' attribute or functions called from a
function with 'no_caller_saved_registers' attribute may contain MPX,
SSE, MMX or x87 instructions, the compiler must save and restore the
corresponding state.

The user can call functions specified with 'no_caller_saved_registers'
attribute from an interrupt handler without saving and restoring all
call clobbered registers.

On x86, interrupt handlers are only called by processors which push
interrupt data onto stack at the address where the normal return address
is.  Interrupt handlers must access interrupt data via pointers so that
they can update interrupt data.
gcc/

PR target/66960
PR target/67630
PR target/67634
PR target/67841
PR target/68037
PR target/68618
PR target/68661
PR target/69575
PR target/69596
PR target/69734
* config/i386/i386-protos.h (ix86_epilogue_uses): New prototype.
* config/i386/i386.c (ix86_conditional_register_usage): Preserve
all registers, except for function return registers if there are
no caller-saved registers.
(ix86_set_func_type): New function.
(ix86_set_current_function): Call ix86_set_func_type to set
no_caller_saved_registers and func_type.  Call reinit_regs if
caller-saved registers are changed.  Don't allow MPX, SSE, MMX
nor x87 instructions in interrupt handler nor function with
no_caller_saved_registers attribute.
(ix86_function_ok_for_sibcall): Return false if there are no
caller-saved registers.

Re: [PATCH] Simplify ix86_expand_vector_move_misalign

2016-04-20 Thread H.J. Lu
On Wed, Apr 20, 2016 at 4:19 AM, Uros Bizjak  wrote:
> On Wed, Apr 20, 2016 at 1:09 PM, Uros Bizjak  wrote:
>> On Tue, Apr 19, 2016 at 4:48 PM, H.J. Lu  wrote:
>>> Since mov_internal patterns handle both aligned/unaligned load
>>> and store, we can simplify ix86_avx256_split_vector_move_misalign and
>>> ix86_expand_vector_move_misalign.
>>>
>>> Tested on x86-64.  OK for trunk?
>>>
>>> H.J.
>>> ---
>>> * config/i386/i386.c (ix86_avx256_split_vector_move_misalign):
>>> Short-cut unaligned load and store cases.  Handle all integer
>>> vector modes.
>>> (ix86_expand_vector_move_misalign): Short-cut unaligned load
>>> and store cases.  Call ix86_avx256_split_vector_move_misalign
>>> directly without checking mode class.
>>
>> LGTM, but it is hard to review interwoven code movements and deletions...
>>
>> Hopefully OK.
>
> BTW: There are a couple of regressions in the testsuite [1] when
> configured --with-arch=corei7. Can you please look at the testcases,
> if scan patterns need to be adjusted?
>
> FAIL: gcc.target/i386/avx256-unaligned-load-1.c scan-assembler-not
> (avx_loadups256|vmovups[^\\n\\r]*movv8sf_internal)
> FAIL: gcc.target/i386/avx256-unaligned-store-2.c scan-assembler
> vmovups.*movv16qi_internal/3
>
> [1] https://gcc.gnu.org/ml/gcc-testresults/2016-04/msg01932.html

I will submit a patch.


-- 
H.J.


Re: [PATCH][AArch64] Work around PR target/64971

2016-04-20 Thread Richard Biener
On Wed, 20 Apr 2016, Kyrill Tkachov wrote:

> 
> On 15/04/16 17:27, James Greenhalgh wrote:
> > On Fri, Apr 15, 2016 at 03:12:58PM +0100, Kyrill Tkachov wrote:
> > > On 15/04/16 15:10, Kyrill Tkachov wrote:
> > > > Hi all,
> > > > 
> > > > This is a repost of Andrew's fix for PR target/64971 that was originally
> > > > posted at:
> > > > https://gcc.gnu.org/ml/gcc-patches/2015-02/msg00502.html
> > > > 
> > > > The only change is that I substituted DImode for Pmode and added a FIXME
> > > > comment to remind us to revisit this (see the PR in bugzilla for more
> > > > info).
> > > > 
> > > > Bootstrapped and tested on aarch64-none-linux-gnu (LP64, I don't have
> > > > access
> > > > to a full ILP32 system) This patch affects only ILP32 codegen so I've
> > > > run a
> > > > make check on aarch64-none-elf with /-mabi=ilp32 and nothing regressed.
> > > > I think at this stage it's the least risky band-aid.
> > > > 
> > > > Is this ok for trunk at this stage?
> > I hope that we are able to revisit this for GCC 7 with the more complete
> > fixes detailed in the bug report.
> > 
> > I've got no objections to the patch as a band-aid step forward for the
> > ILP32 ABI, and the patch is no risk to the LP64 ABI (the code added is very
> > clearly predicated on TARGET_ILP32).
> > 
> > As Jeff points out, this will need RM approval to go in to GCC 6.
> 
> Sorry for the early ping, but since we're planning for RC2 this week
> can I apply this to the GCC 6 branch?

Sure.  A broken ILP32 aarch64 won't block the release.

Thanks,
Richard.

> Thanks,
> Kyrill
> 
> > Thanks,
> > James
> >   
> 
> 

-- 
Richard Biener 
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 
21284 (AG Nuernberg)


Re: [PATCH 00/18] towards removing rtx_insn_list and rtx_expr_list

2016-04-20 Thread Andi Kleen
tbsaunde+...@tbsaunde.org writes:

> I have some more patches that almost completely eliminate these, but I haven't
> tested the rest yet, and this is already a long series so it would be nice to
> get some of it out of my tree and reviewed.
>
> patches individually bootstrapped and regtested on x86_64-linux-gnu, ok? I
> expect none of this will more than textually conflict with something that may
> need backported to gcc-6, but its waited a month already I guess it can wait
> longer if people prefer.

A vector can have very different performance than a list, depending how
it is used. Do your patches cause any measure performance difference for
the compiler?

-Andi


Re: [PATCH] Optimize strchr (s, 0) to strlen

2016-04-20 Thread Jakub Jelinek
On Wed, Apr 20, 2016 at 11:17:06AM +, Wilco Dijkstra wrote:
> Can you quantify "don't like"? I benchmarked rawmemchr on a few targets
> and it's slower than strlen, so it's hard to guess what you don't like about 
> it.

This is the same stuff as has been discussed for mempcpy, rawmemchr is the
API meant to use for getting pointer to the terminating '\0', if there are
deficiencies on the library side, they should be fixed.  If you hardcode in
GCC emitting worse sequence at the caller (which s + strlen (s) is), then
even once the library deficiency is fixed, you still don't get benefit from
it.  I wonder how you work around the
 define strchr(s, c) \
  (__extension__ (__builtin_constant_p (c) && !__builtin_constant_p (s) \
  && (c) == '\0' \
  ? (char *) __rawmemchr (s, c) \
  : __builtin_strchr (s, c)))
in glibc headers anyway.

Another thing is for the cases where strlen is desirable to be expanded
inline, in that case rawmemchr (x, 0) or strchr (x, 0) is likely useful to be
expanded inline as well and then this decision should be done at expansion
time.

Jakub


[committed, PATCH] Replace -skip-rax-setup with -mskip-rax-setup

2016-04-20 Thread H.J. Lu
This fixed a typo.  Checked into trunk.

H.J.
---
Index: gcc/ChangeLog
===
--- gcc/ChangeLog   (revision 235274)
+++ gcc/ChangeLog   (working copy)
@@ -1,3 +1,7 @@
+2016-04-20  H.J. Lu  
+
+   * doc/invoke.texi: Replace -skip-rax-setup with -mskip-rax-setup.
+
 2016-04-20  Richard Biener  
 
* gimple-match.h (maybe_build_generic_op): Adjust prototype.
Index: gcc/doc/invoke.texi
===
--- gcc/doc/invoke.texi (revision 235274)
+++ gcc/doc/invoke.texi (working copy)
@@ -24157,7 +24157,7 @@ useful together with @option{-mrecord-mc
 @itemx -mno-skip-rax-setup
 @opindex mskip-rax-setup
 When generating code for the x86-64 architecture with SSE extensions
-disabled, @option{-skip-rax-setup} can be used to skip setting up RAX
+disabled, @option{-mskip-rax-setup} can be used to skip setting up RAX
 register when there are no variable arguments passed in vector registers.
 
 @strong{Warning:} Since RAX register is used to avoid unnecessarily


Re: [PATCH 18/18] make last_pending_memory_flush a vec

2016-04-20 Thread Segher Boessenkool
On Wed, Apr 20, 2016 at 02:22:22AM -0400, tbsaunde+...@tbsaunde.org wrote:
> --- a/gcc/sched-rgn.c
> +++ b/gcc/sched-rgn.c
> @@ -2620,12 +2620,8 @@ deps_join (struct deps_desc *succ_deps, struct 
> deps_desc *pred_deps)
>concat_mem_list (pred_deps->pending_write_mems,
>  _deps->pending_write_mems);
>  
> -  succ_deps->pending_jump_insns
> -= concat_INSN_LIST (pred_deps->pending_jump_insns,
> -succ_deps->pending_jump_insns);
> -  succ_deps->last_pending_memory_flush
> -= concat_INSN_LIST (pred_deps->last_pending_memory_flush,
> -succ_deps->last_pending_memory_flush);
> +  succ_deps->pending_jump_insns.safe_splice (pred_deps->pending_jump_insns);
> +  succ_deps->last_pending_memory_flush.safe_splice 
> (pred_deps->last_pending_memory_flush);

Line too long.


Segher


Re: [PATCH 17/18] make pending_{read,write}_insns vec

2016-04-20 Thread Segher Boessenkool
On Wed, Apr 20, 2016 at 02:22:21AM -0400, tbsaunde+...@tbsaunde.org wrote:
>   (add_insn_mem_dependence): Likewise. Likewise.

Likewise!

> @@ -1614,22 +1644,22 @@ remove_from_dependence_list (rtx_insn *insn, 
> rtx_insn_list **listp)
>  /* Same as above, but process two lists at once.  */
>  static int
>  remove_from_both_dependence_lists (rtx_insn *insn,
> -rtx_insn_list **listp,
> +vec *insns,
>  rtx_expr_list **exprp)
>  {
>int removed = 0;
>  
> -  while (*listp)
> +  unsigned int len = insns->length ();
> +  for (unsigned int i = len - 1; i < len; i--)
>  {
> -  if (XEXP (*listp, 0) == insn)
> +  if ((*insns)[i] == insn)
>  {
> -  remove_free_INSN_LIST_node (listp);
> +   insns->ordered_remove (i);
>remove_free_EXPR_LIST_node (exprp);
>removed++;
>continue;
>  }
>  
> -  listp = (rtx_insn_list **) (*listp, 1);
>exprp = (rtx_expr_list **) (*exprp, 1);
>  }

Tab-ify it all, not just the line you edit?

> @@ -2489,33 +2514,29 @@ sched_analyze_1 (struct deps_desc *deps, rtx x, 
> rtx_insn *insn)
>   }
>else
>   {
> -   rtx_insn_list *pending;
> rtx_expr_list *pending_mem;
>  
> -   pending = deps->pending_read_insns;
> +   unsigned int len = deps->pending_read_insns.length ();
> pending_mem = deps->pending_read_mems;
> -   while (pending)
> +   for (unsigned int i = len - 1; i < len; i--)
>   {
> +   rtx_insn *temp = deps->pending_read_insns[i];
> if (anti_dependence (pending_mem->element (), t)
> -   && ! sched_insns_conditions_mutex_p (insn, pending->insn ()))
> - note_mem_dep (t, pending_mem->element (), pending->insn (),
> -   DEP_ANTI);
> +   && ! sched_insns_conditions_mutex_p (insn, temp))
> + note_mem_dep (t, pending_mem->element (), temp, DEP_ANTI);

No space after "!" (yeah, pre-existing).  More of that later in the file.


Segher


[PATCH] Fix GENERICizing of BIT_FIELD_REF

2016-04-20 Thread Richard Biener

This fixes a latent issue in maybe_build_generic_op.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.

Richard.

2016-04-20  Richard Biener  

* gimple-match.h (maybe_build_generic_op): Adjust prototype.
* gimple-match-head.c (maybe_build_generic_op): Pass all ops
by reference, clear op1 and op2 when GENERICizing BIT_FIELD_REF.
(maybe_push_res_to_seq): Adjust.
* gimple-fold.c (maybe_build_generic_op): Likewise.

Index: gcc/gimple-fold.c
===
--- gcc/gimple-fold.c   (revision 235272)
+++ gcc/gimple-fold.c   (working copy)
@@ -3376,8 +3376,7 @@ replace_stmt_with_simplification (gimple
  || gimple_num_ops (stmt) > get_gimple_rhs_num_ops (rcode))
{
  maybe_build_generic_op (rcode,
- TREE_TYPE (gimple_assign_lhs (stmt)),
- [0], ops[1], ops[2]);
+ TREE_TYPE (gimple_assign_lhs (stmt)), ops);
  gimple_assign_set_rhs_with_ops (gsi, rcode, ops[0], ops[1], ops[2]);
  if (dump_file && (dump_flags & TDF_DETAILS))
{
Index: gcc/gimple-match-head.c
===
--- gcc/gimple-match-head.c (revision 235272)
+++ gcc/gimple-match-head.c (working copy)
@@ -233,18 +233,18 @@ gimple_resimplify3 (gimple_seq *seq,
a GENERIC tree for that expression into *OP0.  */
 
 void
-maybe_build_generic_op (enum tree_code code, tree type,
-   tree *op0, tree op1, tree op2)
+maybe_build_generic_op (enum tree_code code, tree type, tree *ops)
 {
   switch (code)
 {
 case REALPART_EXPR:
 case IMAGPART_EXPR:
 case VIEW_CONVERT_EXPR:
-  *op0 = build1 (code, type, *op0);
+  ops[0] = build1 (code, type, ops[0]);
   break;
 case BIT_FIELD_REF:
-  *op0 = build3 (code, type, *op0, op1, op2);
+  ops[0] = build3 (code, type, ops[0], ops[1], ops[2]);
+  ops[1] = ops[2] = NULL_TREE;
   break;
 default:;
 }
@@ -316,7 +316,7 @@ maybe_push_res_to_seq (code_helper rcode
  else
res = create_tmp_reg (type);
}
-  maybe_build_generic_op (rcode, type, [0], ops[1], ops[2]);
+  maybe_build_generic_op (rcode, type, ops);
   gimple *new_stmt = gimple_build_assign (res, rcode,
 ops[0], ops[1], ops[2]);
   gimple_seq_add_stmt_without_update (seq, new_stmt);
Index: gcc/gimple-match.h
===
--- gcc/gimple-match.h  (revision 235272)
+++ gcc/gimple-match.h  (working copy)
@@ -64,7 +64,7 @@ bool gimple_resimplify3 (gimple_seq *, c
 tree (*)(tree));
 tree maybe_push_res_to_seq (code_helper, tree, tree *,
gimple_seq *, tree res = NULL_TREE);
-void maybe_build_generic_op (enum tree_code, tree, tree *, tree, tree);
+void maybe_build_generic_op (enum tree_code, tree, tree *);
 
 
 #endif  /* GCC_GIMPLE_MATCH_H */


Re: [PATCH 16/18] make forced labels a vec

2016-04-20 Thread Segher Boessenkool
On Wed, Apr 20, 2016 at 02:22:20AM -0400, tbsaunde+...@tbsaunde.org wrote:
>   * function.h (struct expr_status): make x_forced_labels a vector.

Capital M.

> --- a/gcc/cfgrtl.c
> +++ b/gcc/cfgrtl.c
> @@ -115,7 +115,8 @@ can_delete_label_p (const rtx_code_label *label)
>return (!LABEL_PRESERVE_P (label)
> /* User declared labels must be preserved.  */
> && LABEL_NAME (label) == 0
> -   && !in_insn_list_p (forced_labels, label));
> +   && !vec_safe_contains (forced_labels,
> +  const_cast 
> (label)));
>  }

Line too long.  Rewrite this with a conditional, maybe?


Segher


Re: [PING][PATCH] libgcc: Fix typos in comments for ARM FP emulation routines

2016-04-20 Thread Martin Galvan
On Wed, Apr 20, 2016 at 1:44 AM, Sandra Loosemore
 wrote:
> Or, do you need someone to check this in for you because you don't have
> write access to the repository?

Hi! Yeah, I don't have write access. If you could commit this for me
it would be great. Thanks!


Re: [PATCH 15/18] make nonlocal_goto_handler_labels a vec

2016-04-20 Thread Segher Boessenkool
On Wed, Apr 20, 2016 at 02:22:19AM -0400, tbsaunde+...@tbsaunde.org wrote:
> --- a/gcc/cfgrtl.c
> +++ b/gcc/cfgrtl.c
> @@ -157,7 +157,14 @@ delete_insn (rtx uncast_insn)
>   }
>   }
>  
> -  remove_node_from_insn_list (insn, _goto_handler_labels);
> +
> +  unsigned int len = vec_safe_length (nonlocal_goto_handler_labels);
> +  for (unsigned int i = 0; i < len; i++)
> + if ((*nonlocal_goto_handler_labels)[i] == insn)
> +   {
> + nonlocal_goto_handler_labels->ordered_remove (i);
> + break;
> +   }
>  }

Maybe you want a new helper function for this?

> @@ -4255,11 +4259,10 @@ cfg_layout_initialize (unsigned int flags)
>record_effective_endpoints ();
>  
>/* Make sure that the targets of non local gotos are marked.  */
> -  for (x = nonlocal_goto_handler_labels; x; x = x->next ())
> -{
> -  bb = BLOCK_FOR_INSN (x->insn ());
> -  bb->flags |= BB_NON_LOCAL_GOTO_TARGET;
> -}
> +  rtx_insn *temp;
> +  unsigned int i;
> +  FOR_EACH_VEC_SAFE_ELT_REVERSE (nonlocal_goto_handler_labels, i, temp)
> +  BLOCK_FOR_INSN (temp)->flags |= BB_NON_LOCAL_GOTO_TARGET;

Bad indent.

> @@ -3877,9 +3877,10 @@ set_initial_label_offsets (void)
>  if (x->insn ())
>set_label_offsets (x->insn (), NULL, 1);
>  
> -  for (rtx_insn_list *x = nonlocal_goto_handler_labels; x; x = x->next ())
> -if (x->insn ())
> -  set_label_offsets (x->insn (), NULL, 1);
> +  rtx_insn *insn;
> +  unsigned int i;
> +  FOR_EACH_VEC_SAFE_ELT_REVERSE (nonlocal_goto_handler_labels, i, insn)
> +  set_label_offsets (insn, NULL, 1);

Bad indent.


Segher


Re: [PATCH v2] [libatomic] Add RTEMS support

2016-04-20 Thread Sebastian Huber

Hello,

I know that I am pretty late, but is there a chance to get this into the 
GCC 6.1 release?


On 19/04/16 14:56, Sebastian Huber wrote:

v2: Do not use architecture configuration due to broken ARM libatomic
support.

gcc/

* config/rtems.h (LIB_SPEC): Add -latomic.

libatomic/

* configure.tgt (*-*-rtems*): New supported target.
* config/rtems/host-config.h: New file.
* config/rtems/lock.c: Likewise.


--
Sebastian Huber, embedded brains GmbH

Address : Dornierstr. 4, D-82178 Puchheim, Germany
Phone   : +49 89 189 47 41-16
Fax : +49 89 189 47 41-09
E-Mail  : sebastian.hu...@embedded-brains.de
PGP : Public key available on request.

Diese Nachricht ist keine geschäftliche Mitteilung im Sinne des EHUG.



Re: [PATCH 11/18] add some utility methods to vec

2016-04-20 Thread Segher Boessenkool
On Wed, Apr 20, 2016 at 02:22:15AM -0400, tbsaunde+...@tbsaunde.org wrote:
> +template
> +inline bool
> +vec_safe_contains (vec *v, const T )
> +{
> +  return v? v->contains (search) : false;
> +}

Missing space.


Segher


Re: [PATCH 10/18] merge adjust_cost and adjust_cost_2 target hooks

2016-04-20 Thread Segher Boessenkool
On Wed, Apr 20, 2016 at 02:22:14AM -0400, tbsaunde+...@tbsaunde.org wrote:
>   * config/microblaze/microblaze.c (microblaze_adjust_cost):
>   * Likewise.

Stray * (here and elsewhere).

> --- a/gcc/config/alpha/alpha.c
> +++ b/gcc/config/alpha/alpha.c
> @@ -4758,14 +4758,15 @@ alpha_split_atomic_exchange_12 (rtx operands[])
> a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
>  
>  static int
> -alpha_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
> +alpha_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int 
> cost,

Why an int and not enum reg_note?

> +unsigned int)
>  {
>enum attr_type dep_insn_type;
>  
>/* If the dependence is an anti-dependence, there is no cost.  For an
>   output dependence, there is sometimes a cost, but it doesn't seem
>   worth handling those few cases.  */
> -  if (REG_NOTE_KIND (link) != 0)
> +  if (dep_type != 0)
>  return cost;

>From reg-notes.def:

/* REG_DEP_TRUE is used in scheduler dependencies lists to represent a
   read-after-write dependency (i.e. a true data dependency).  This is
   here, not grouped with REG_DEP_ANTI and REG_DEP_OUTPUT, because some
   passes use a literal 0 for it.  */
REG_NOTE (DEP_TRUE)

Get rid of the literal 0 while you're at it?  Some places already have
REG_DEP_TRUE.

> @@ -4486,7 +4487,7 @@ c6x_adjust_cost (rtx_insn *insn, rtx link, rtx_insn 
> *dep_insn, int cost)
>if (insn_code_number >= 0)
>  insn_type = get_attr_type (insn);
>  
> -  kind = REG_NOTE_KIND (link);
> +  kind = (reg_note) dep_type;

Maybe it's just me, but it would look a lot less confusing with "enum".

>  static int
> -mips_adjust_cost (rtx_insn *insn ATTRIBUTE_UNUSED, rtx link,
> -   rtx_insn *dep ATTRIBUTE_UNUSED, int cost)
> +mips_adjust_cost (rtx_insn *, int dep_type, rtx_insn *, int cost, unsigned 
> int)
>  {
> -  if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT
> -  && TUNE_20KC)
> -return cost;
> -  if (REG_NOTE_KIND (link) != 0)
> +  if (dep_type != 0 && (dep_type != REG_DEP_OUTPUT || !TUNE_20KC))
>  return 0;
>return cost;
>  }

The original logic was a lot more readable (test positives, not negatives).

> +as a data-dependence.  If the scheduler using the automaton based pipeline
>  description, the cost of anti-dependence is zero and the cost of
>  output-dependence is maximum of one and the difference of latency
>  times of the first and the second insns.  If these values are not

"is using" (pre-existing, but hey).


So I wonder how much is gained by adding an extra unused argument to so
many places.


Segher


[PATCH] [ARC] Add new ARCv2 instructions.

2016-04-20 Thread Claudiu Zissulescu
This patch adds new instruction variants as introduced by the ARCv2
architecture.

OK to apply?
Claudiu

gcc/
2016-04-20  Claudiu Zissulescu  

* config/arc/arc-protos.h (compact_memory_operand_p): Declare.
* config/arc/arc.c (arc_output_commutative_cond_exec): Consider
bmaskn instruction.
(arc_dwarf_register_span): Remove enum keyword.
(compact_memory_operand_p): New function.
* config/arc/arc.h (reg_class): Add code density register classes.
(REG_CLASS_NAMES): Likewise.
(REG_CLASS_CONTENTS): Likewise.
* config/arc/arc.md (*movqi_insn): Add code density instructions.
(*movhi_insn, *movsi_insn, *movsf_insn): Likewise.
(*extendhisi2_i, andsi3_i, cmpsi_cc_insn_mixed): Likewise.
(*cmpsi_cc_c_insn, *movsi_ne): Likewise.
* config/arc/constraints.md (C2p, Uts, Cm1, Cm3, Ucd): New
constraints.
(h, Rcd, Rsd, Rzd): New register constraints.
(T): Use compact_memory_operand_p function.
* config/arc/predicates.md (compact_load_memory_operand): Remove.
---
 gcc/config/arc/arc-protos.h   |   2 +-
 gcc/config/arc/arc.c  | 146 +-
 gcc/config/arc/arc.h  |   9 +++
 gcc/config/arc/arc.md | 142 +++-
 gcc/config/arc/constraints.md |  58 -
 gcc/config/arc/predicates.md  |  89 -
 6 files changed, 295 insertions(+), 151 deletions(-)

diff --git a/gcc/config/arc/arc-protos.h b/gcc/config/arc/arc-protos.h
index f487291..812b06a 100644
--- a/gcc/config/arc/arc-protos.h
+++ b/gcc/config/arc/arc-protos.h
@@ -44,7 +44,7 @@ extern void emit_shift (enum rtx_code, rtx, rtx, rtx);
 extern void arc_expand_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx);
 extern void arc_split_compare_and_swap (rtx *);
 extern void arc_expand_compare_and_swap (rtx *);
-
+extern bool compact_memory_operand_p (rtx, machine_mode, bool, bool);
 #endif /* RTX_CODE */
 
 #ifdef TREE_CODE
diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
index f4bef3e..6f2136e 100644
--- a/gcc/config/arc/arc.c
+++ b/gcc/config/arc/arc.c
@@ -7071,6 +7071,11 @@ arc_output_commutative_cond_exec (rtx *operands, bool 
output_p)
   case AND:
if (satisfies_constraint_C1p (operands[2]))
  pat = "bmsk%? %0,%1,%Z2";
+   else if (satisfies_constraint_C2p (operands[2]))
+ {
+   operands[2] = GEN_INT ((~INTVAL (operands[2])));
+   pat = "bmskn%? %0,%1,%Z2";
+ }
else if (satisfies_constraint_Ccp (operands[2]))
  pat = "bclr%? %0,%1,%M2";
else if (satisfies_constraint_CnL (operands[2]))
@@ -9481,7 +9486,7 @@ arc_no_speculation_in_delay_slots_p ()
 static rtx
 arc_dwarf_register_span (rtx rtl)
 {
-   enum machine_mode mode = GET_MODE (rtl);
+   machine_mode mode = GET_MODE (rtl);
unsigned regno;
rtx p;
 
@@ -9496,6 +9501,145 @@ arc_dwarf_register_span (rtx rtl)
return p;
 }
 
+/* Return true if OP is an acceptable memory operand for ARCompact
+   16-bit load instructions of MODE.
+
+   SCALED indicates if address can be scaled.
+
+   CODE_DENSITY indicates ARCv2 code density operations are
+   available. */
+
+bool
+compact_memory_operand_p (rtx op, machine_mode mode,
+ bool code_density, bool scaled)
+{
+  rtx addr, plus0, plus1;
+  int size, off;
+
+  /* Eliminate non-memory operations.  */
+  if (GET_CODE (op) != MEM)
+return 0;
+
+  /* .di instructions have no 16-bit form.  */
+  if (MEM_VOLATILE_P (op) && !TARGET_VOLATILE_CACHE_SET)
+return false;
+
+  if (mode == VOIDmode)
+mode = GET_MODE (op);
+
+  size = GET_MODE_SIZE (mode);
+
+  /* dword operations really put out 2 instructions, so eliminate
+ them.  */
+  if (size > UNITS_PER_WORD)
+return false;
+
+  /* Decode the address now.  */
+  addr = XEXP (op, 0);
+  switch (GET_CODE (addr))
+{
+case REG:
+  return (REGNO (addr) >= FIRST_PSEUDO_REGISTER
+ || COMPACT_GP_REG_P (REGNO (addr))
+ || (SP_REG_P (REGNO (addr)) && (size != 2)));
+  /* Reverting for the moment since ld/st{w,h}_s does not have sp
+as a valid parameter.  */
+case PLUS:
+  plus0 = XEXP (addr, 0);
+  plus1 = XEXP (addr, 1);
+
+  if ((GET_CODE (plus0) == REG)
+  && ((REGNO (plus0) >= FIRST_PSEUDO_REGISTER)
+  || COMPACT_GP_REG_P (REGNO (plus0)))
+  && ((GET_CODE (plus1) == REG)
+  && ((REGNO (plus1) >= FIRST_PSEUDO_REGISTER)
+  || COMPACT_GP_REG_P (REGNO (plus1)
+{
+  return !code_density;
+}
+
+  if ((GET_CODE (plus0) == REG)
+  && ((REGNO (plus0) >= FIRST_PSEUDO_REGISTER)
+  || (COMPACT_GP_REG_P (REGNO (plus0)) && !code_density)
+ || (IN_RANGE (REGNO (plus0), 0, 31) && code_density))
+  && (GET_CODE (plus1) == CONST_INT))
+{
+ bool 

Re: [PATCH][AArch64] Work around PR target/64971

2016-04-20 Thread Kyrill Tkachov


On 15/04/16 17:27, James Greenhalgh wrote:

On Fri, Apr 15, 2016 at 03:12:58PM +0100, Kyrill Tkachov wrote:

On 15/04/16 15:10, Kyrill Tkachov wrote:

Hi all,

This is a repost of Andrew's fix for PR target/64971 that was originally posted 
at:
https://gcc.gnu.org/ml/gcc-patches/2015-02/msg00502.html

The only change is that I substituted DImode for Pmode and added a FIXME
comment to remind us to revisit this (see the PR in bugzilla for more info).

Bootstrapped and tested on aarch64-none-linux-gnu (LP64, I don't have access
to a full ILP32 system) This patch affects only ILP32 codegen so I've run a
make check on aarch64-none-elf with /-mabi=ilp32 and nothing regressed.
I think at this stage it's the least risky band-aid.

Is this ok for trunk at this stage?

I hope that we are able to revisit this for GCC 7 with the more complete
fixes detailed in the bug report.

I've got no objections to the patch as a band-aid step forward for the
ILP32 ABI, and the patch is no risk to the LP64 ABI (the code added is very
clearly predicated on TARGET_ILP32).

As Jeff points out, this will need RM approval to go in to GCC 6.


Sorry for the early ping, but since we're planning for RC2 this week
can I apply this to the GCC 6 branch?

Thanks,
Kyrill


Thanks,
James
  




Re: [PATCH] Optimize strchr (s, 0) to strlen

2016-04-20 Thread Richard Biener
On Wed, Apr 20, 2016 at 1:56 PM, Wilco Dijkstra  wrote:
> Richard Biener wrote:
>> Better - comments below.  Jakub objections to the usefulness of the transform
>> remain - we do have the strlen pass that uses some global knowledge to decide
>> on profitability.  One could argue that for -Os doing the reverse transform 
>> is
>> profitable?
>
> In what way would it get more info to decide on profitability? The transform 
> is
> profitable unless you messed up your strlen implementation badly.
>
> For -Os one could do the reverse, but I don't think it is going to give a 
> substantial
> codesize gain compared to other simple improvements, so unlikely worth it.
>
>>> +  if (optimize_function_for_size_p (cfun))
>>> +return false;
>>
>> Hmm, I think we'd want a optimize_stmt_for_size_p (stmt) which
>> does the right thing for the case we have a CFG (look at the BB)
>> or when not (look at the function).
>
> Does that use the often incorrect BB probabilities? I used the function 
> variant on
> purpose to avoid it making the wrong decision. A typical example I see is 
> that GCC
> inlines a return sequence into an if marked with __builtin_expect (c, 0) but 
> not in the
> hot code that follows...
>
>> I think you want to build a gimple_assign directly here, otherwise ...
>
>>... this may not reliably end up at the call stmt.
>
> OK, I revisit that once we've agreed how to proceed with this patch - we now 
> have
> 3 variants...

Yeah ;)  I'm currently bootstrapping/testing the patch that makes it possible to
write all this in match.pd.

Richard.

> Wilco
>


Re: [PATCH] Optimize strchr (s, 0) to strlen

2016-04-20 Thread Wilco Dijkstra
Richard Biener wrote:
> Better - comments below.  Jakub objections to the usefulness of the transform
> remain - we do have the strlen pass that uses some global knowledge to decide
> on profitability.  One could argue that for -Os doing the reverse transform is
> profitable?

In what way would it get more info to decide on profitability? The transform is 
profitable unless you messed up your strlen implementation badly.

For -Os one could do the reverse, but I don't think it is going to give a 
substantial
codesize gain compared to other simple improvements, so unlikely worth it.

>> +  if (optimize_function_for_size_p (cfun))
>> +return false;
>
> Hmm, I think we'd want a optimize_stmt_for_size_p (stmt) which
> does the right thing for the case we have a CFG (look at the BB)
> or when not (look at the function).

Does that use the often incorrect BB probabilities? I used the function variant 
on
purpose to avoid it making the wrong decision. A typical example I see is that 
GCC
inlines a return sequence into an if marked with __builtin_expect (c, 0) but 
not in the
hot code that follows...

> I think you want to build a gimple_assign directly here, otherwise ...

>... this may not reliably end up at the call stmt.

OK, I revisit that once we've agreed how to proceed with this patch - we now 
have
3 variants...

Wilco



Re: [PATCH] Fix ICE in predicate_mem_writes (PR tree-optimization/70725)

2016-04-20 Thread Richard Biener
On Wed, Apr 20, 2016 at 1:19 PM, Marek Polacek  wrote:
> On Wed, Apr 20, 2016 at 12:54:12PM +0200, Richard Biener wrote:
>> On Wed, Apr 20, 2016 at 12:37 PM, Jakub Jelinek  wrote:
>> > On Wed, Apr 20, 2016 at 11:04:08AM +0200, Richard Biener wrote:
>> >> > --- gcc/tree-if-conv.c
>> >> > +++ gcc/tree-if-conv.c
>> >> > @@ -262,6 +262,16 @@ ifc_temp_var (tree type, tree expr, 
>> >> > gimple_stmt_iterator *gsi)
>> >> >return new_name;
>> >> >  }
>> >> >
>> >> > +/* Return true when COND is a false predicate.  */
>> >> > +
>> >> > +static inline bool
>> >> > +is_false_predicate (tree cond)
>> >> > +{
>> >> > +  return (cond == NULL_TREE
>> >> > + || cond == boolean_false_node
>> >> > + || integer_zerop (cond));
>> >> > +}
>> >> > +
>> >
>> > Is it really a good idea to return true even for cond == NULL_TREE?
>> > I mean it is then very confusing, because both is_true_predicate and
>> > is_false_predicate are true in that case.
>>
>> Ah, indeed.  NULL_TREE is true, not false.
>
> I can fix it up with the following.
>
> Bootstrap/regtest pending on x86_64-linux, ok for trunk and 6 if it passes?

Works for me.

Richard.

> 2016-04-20  Marek Polacek  
>
> * tree-if-conv.c (is_false_predicate): For NULL_TREE return false
> rather than true.
>
> diff --git gcc/tree-if-conv.c gcc/tree-if-conv.c
> index a9fbab9..72e808e 100644
> --- gcc/tree-if-conv.c
> +++ gcc/tree-if-conv.c
> @@ -267,9 +267,9 @@ ifc_temp_var (tree type, tree expr, gimple_stmt_iterator 
> *gsi)
>  static inline bool
>  is_false_predicate (tree cond)
>  {
> -  return (cond == NULL_TREE
> - || cond == boolean_false_node
> - || integer_zerop (cond));
> +  return (cond != NULL_TREE
> + && (cond == boolean_false_node
> + || integer_zerop (cond)));
>  }
>
>  /* Return true when COND is a true predicate.  */
>
> Marek


Re: [PATCH 05/18] make stores rtx_insn_list a vec

2016-04-20 Thread Segher Boessenkool
On Wed, Apr 20, 2016 at 02:22:09AM -0400, tbsaunde+...@tbsaunde.org wrote:
> 2016-04-19  Trevor Saunders  
> 
>   * gcse.c (struct ls_expr): make stores field a vector.

Capital M.

> @@ -3604,7 +3604,7 @@ ldst_entry (rtx x)
>ptr->expr = NULL;
>ptr->pattern  = x;
>ptr->pattern_regs = NULL_RTX;
> -  ptr->stores   = NULL;
> +  ptr->stores  .create (0);

Spaces.

> @@ -3620,7 +3620,7 @@ ldst_entry (rtx x)
>  static void
>  free_ldst_entry (struct ls_expr * ptr)
>  {
> -  free_INSN_LIST_list (& ptr->stores);
> +   ptr->stores.release ();

Wrong indent.


Segher


Re: [PATCH 03/18] make antic_stores a vec

2016-04-20 Thread Segher Boessenkool
On Wed, Apr 20, 2016 at 02:22:07AM -0400, tbsaunde+...@tbsaunde.org wrote:
> @@ -163,7 +162,7 @@ st_expr_entry (rtx x)
>  static void
>  free_st_expr_entry (struct st_expr * ptr)
>  {
> -  free_INSN_LIST_list (& ptr->antic_stores);
> +   ptr->antic_stores.release ();
> ptr->avail_stores.release ();

It looks like indent went wrong, in a previous patch as well.


Segher


libgomp: Make GCC 5 OpenACC offloading executables work (was: Openacc launch API)

2016-04-20 Thread Thomas Schwinge
Hi!

On Mon, 28 Sep 2015 15:38:57 -0400, Nathan Sidwell  wrote:
> On 09/24/15 04:40, Jakub Jelinek wrote:
> > Iff GCC 5 compiled offloaded OpenACC/PTX code will always do host fallback
> > anyway because of the incompatible PTX version

I do agree that it's reasonable to require users to re-compile their code
when switching between major GCC releases, to retain the offloading
feature, or otherwise resort to host fallback execution.  I'll propose
some text along these lines for the GCC 6 release notes.

> > why don't you just
> > do
> >goacc_save_and_set_bind (acc_device_host);
> >fn (hostaddrs);
> >goacc_restore_bind ();
> 
> Committed the  attached.  Thanks for the review.

What we now got, doesn't work, for several reasons.  GCC 5 OpenACC
offloading executables will just run into SIGSEGV.  Here is a patch
(which depends on
).
Unfortunately, we have to jump through some hoops: because GCC 5
compiler-generated OpenACC reductions code emits calls to
acc_get_device_type, and because we'll (have to) always resort to host
fallback execution for GCC 5 executables, we also have to enforce these
acc_get_device_type calls to return acc_device_host; otherwise reductions
will give bogus results.  (I hope I'm correctly implementing/using the
symbol versioning "magic".)  OK for gcc-6-branch and trunk?  Assuming we
want this fixed on gcc-6-branch, should it be part of 6.1 (to avoid 6.1
users running into the SIGSEGV), or delay for 6.2?

We don't have an easy way to add test cases to make sure we don't break
such legacy interfaces, do we?  (So, I just manually checked a few test
cases.)

commit c68c6b8e79176f5dc21684efe2517cbfb83a182e
Author: Thomas Schwinge 
Date:   Wed Apr 20 13:08:57 2016 +0200

libgomp: Make GCC 5 OpenACC offloading executables work

* libgomp.h: Include "openacc.h".
(goacc_get_device_type_201, goacc_get_device_type_20): New
prototypes.
(oacc_20_201_symver, goacc_get_device_type_201): New macros.
* libgomp.map: Add acc_get_device_type with OACC_2.0.1 symbol
version.
* oacc-init.c (acc_get_device_type): Rename to
goacc_get_device_type_201.
(goacc_get_device_type_20): New function.
* oacc-parallel.c (GOACC_parallel): Call goacc_lazy_initialize.
* plugin/plugin-nvptx.c (GOMP_OFFLOAD_load_image): Refuse version
0 offload images.
* target.c (gomp_load_image_to_device): Gracefully handle the case
that a plugin refuses to load offload images.
---
 libgomp/libgomp.h | 10 ++
 libgomp/libgomp.map   | 10 ++
 libgomp/oacc-init.c   | 18 +-
 libgomp/oacc-parallel.c   | 11 +++
 libgomp/plugin/plugin-nvptx.c | 10 +-
 libgomp/target.c  |  6 +-
 6 files changed, 62 insertions(+), 3 deletions(-)

diff --git libgomp/libgomp.h libgomp/libgomp.h
index 6a05bbc..9fa1cb1 100644
--- libgomp/libgomp.h
+++ libgomp/libgomp.h
@@ -1011,6 +1011,8 @@ gomp_work_share_init_done (void)
 /* Now that we're back to default visibility, include the globals.  */
 #include "libgomp_g.h"
 
+#include "openacc.h"
+
 /* Include omp.h by parts.  */
 #include "omp-lock.h"
 #define _LIBGOMP_OMP_LOCK_DEFINED 1
@@ -1047,11 +1049,17 @@ extern void gomp_set_nest_lock_25 (omp_nest_lock_25_t 
*) __GOMP_NOTHROW;
 extern void gomp_unset_nest_lock_25 (omp_nest_lock_25_t *) __GOMP_NOTHROW;
 extern int gomp_test_nest_lock_25 (omp_nest_lock_25_t *) __GOMP_NOTHROW;
 
+extern acc_device_t goacc_get_device_type_201 (void) __GOACC_NOTHROW;
+extern acc_device_t goacc_get_device_type_20 (void) __GOACC_NOTHROW;
+
 # define strong_alias(fn, al) \
   extern __typeof (fn) al __attribute__ ((alias (#fn)));
 # define omp_lock_symver(fn) \
   __asm (".symver g" #fn "_30, " #fn "@@OMP_3.0"); \
   __asm (".symver g" #fn "_25, " #fn "@OMP_1.0");
+# define oacc_20_201_symver(fn) \
+  __asm (".symver go" #fn "_201, " #fn "@@OACC_2.0.1"); \
+  __asm (".symver go" #fn "_20, " #fn "@OACC_2.0");
 #else
 # define gomp_init_lock_30 omp_init_lock
 # define gomp_destroy_lock_30 omp_destroy_lock
@@ -1063,6 +1071,8 @@ extern int gomp_test_nest_lock_25 (omp_nest_lock_25_t *) 
__GOMP_NOTHROW;
 # define gomp_set_nest_lock_30 omp_set_nest_lock
 # define gomp_unset_nest_lock_30 omp_unset_nest_lock
 # define gomp_test_nest_lock_30 omp_test_nest_lock
+
+# define goacc_get_device_type_201 acc_get_device_type
 #endif
 
 #ifdef HAVE_ATTRIBUTE_VISIBILITY
diff --git libgomp/libgomp.map libgomp/libgomp.map
index 4d42c42..4803aab 100644
--- libgomp/libgomp.map
+++ libgomp/libgomp.map
@@ -304,7 +304,12 @@ OACC_2.0 {
acc_get_num_devices_h_;
acc_set_device_type;
acc_set_device_type_h_;
+#ifdef HAVE_SYMVER_SYMBOL_RENAMING_RUNTIME_SUPPORT
+   # If the assembler used lacks the .symver directive or the linker
+   # 

Re: [PATCH 02/18] make avail_stores a vec

2016-04-20 Thread Segher Boessenkool
On Wed, Apr 20, 2016 at 02:22:06AM -0400, tbsaunde+...@tbsaunde.org wrote:
> +void
> +print_rtx_insn_vec (FILE *file, const vec )
> +{
> +  fputc('{', file);
> +
> +  unsigned int len = vec.length ();
> +  for (unsigned int i = 0; i < len; i++)
> +{
> +  print_rtl (file, vec[i]);
> +  if (i < (len - 1))

Useless parens.

> + fputs (", ", file);
> +}
> +  fputc ('}', file);
> +}

OCD wants a blank line before that closing brace print, just like the
one after the opening one, heh.

> @@ -697,7 +694,7 @@ compute_store_table (void)
> ptr != NULL;
> ptr = *prev_next_ptr_ptr)
>  {
> -  if (! ptr->avail_stores)
> +  if ( ptr->avail_stores.is_empty ())

Stray space char.


Segher


Re: [PATCH] Simplify ix86_expand_vector_move_misalign

2016-04-20 Thread Uros Bizjak
On Wed, Apr 20, 2016 at 1:09 PM, Uros Bizjak  wrote:
> On Tue, Apr 19, 2016 at 4:48 PM, H.J. Lu  wrote:
>> Since mov_internal patterns handle both aligned/unaligned load
>> and store, we can simplify ix86_avx256_split_vector_move_misalign and
>> ix86_expand_vector_move_misalign.
>>
>> Tested on x86-64.  OK for trunk?
>>
>> H.J.
>> ---
>> * config/i386/i386.c (ix86_avx256_split_vector_move_misalign):
>> Short-cut unaligned load and store cases.  Handle all integer
>> vector modes.
>> (ix86_expand_vector_move_misalign): Short-cut unaligned load
>> and store cases.  Call ix86_avx256_split_vector_move_misalign
>> directly without checking mode class.
>
> LGTM, but it is hard to review interwoven code movements and deletions...
>
> Hopefully OK.

BTW: There are a couple of regressions in the testsuite [1] when
configured --with-arch=corei7. Can you please look at the testcases,
if scan patterns need to be adjusted?

FAIL: gcc.target/i386/avx256-unaligned-load-1.c scan-assembler-not
(avx_loadups256|vmovups[^\\n\\r]*movv8sf_internal)
FAIL: gcc.target/i386/avx256-unaligned-store-2.c scan-assembler
vmovups.*movv16qi_internal/3

[1] https://gcc.gnu.org/ml/gcc-testresults/2016-04/msg01932.html

Uros.


Re: [PATCH] Fix ICE in predicate_mem_writes (PR tree-optimization/70725)

2016-04-20 Thread Marek Polacek
On Wed, Apr 20, 2016 at 12:54:12PM +0200, Richard Biener wrote:
> On Wed, Apr 20, 2016 at 12:37 PM, Jakub Jelinek  wrote:
> > On Wed, Apr 20, 2016 at 11:04:08AM +0200, Richard Biener wrote:
> >> > --- gcc/tree-if-conv.c
> >> > +++ gcc/tree-if-conv.c
> >> > @@ -262,6 +262,16 @@ ifc_temp_var (tree type, tree expr, 
> >> > gimple_stmt_iterator *gsi)
> >> >return new_name;
> >> >  }
> >> >
> >> > +/* Return true when COND is a false predicate.  */
> >> > +
> >> > +static inline bool
> >> > +is_false_predicate (tree cond)
> >> > +{
> >> > +  return (cond == NULL_TREE
> >> > + || cond == boolean_false_node
> >> > + || integer_zerop (cond));
> >> > +}
> >> > +
> >
> > Is it really a good idea to return true even for cond == NULL_TREE?
> > I mean it is then very confusing, because both is_true_predicate and
> > is_false_predicate are true in that case.
> 
> Ah, indeed.  NULL_TREE is true, not false.

I can fix it up with the following.

Bootstrap/regtest pending on x86_64-linux, ok for trunk and 6 if it passes?

2016-04-20  Marek Polacek  

* tree-if-conv.c (is_false_predicate): For NULL_TREE return false
rather than true.

diff --git gcc/tree-if-conv.c gcc/tree-if-conv.c
index a9fbab9..72e808e 100644
--- gcc/tree-if-conv.c
+++ gcc/tree-if-conv.c
@@ -267,9 +267,9 @@ ifc_temp_var (tree type, tree expr, gimple_stmt_iterator 
*gsi)
 static inline bool
 is_false_predicate (tree cond)
 {
-  return (cond == NULL_TREE
- || cond == boolean_false_node
- || integer_zerop (cond));
+  return (cond != NULL_TREE
+ && (cond == boolean_false_node
+ || integer_zerop (cond)));
 }
 
 /* Return true when COND is a true predicate.  */

Marek


Re: [PATCH] Optimize strchr (s, 0) to strlen

2016-04-20 Thread Wilco Dijkstra
Jakub Jelinek wrote:
> I still don't like this transformation and would very much prefer to see
> using rawmemchr instead on targets that provide it, and also this is
> something that IMHO should be done in the tree-ssa-strlen.c pass together
> with the other optimizations in there.  Similarly to stpcpy, which is also
> non-standard (in POSIX, but not in C), we should just look at headers if
> rawmemchr is defined with compatible prototype.

Can you quantify "don't like"? I benchmarked rawmemchr on a few targets
and it's slower than strlen, so it's hard to guess what you don't like about it.

Several targets don't even have an assembly implementation of rawmemchr, so
looking at the header would not be sufficient to determine rawmemchr is fast, 
let
alone as fast as strlen.

The tree-ssa-strlen pass seems to optimize repeated calls to strlen, or strcpy
after a strlen, so I'm not sure how this is related - this is a local 
transformation
like the foldings in builtin.c/gimple-fold.c.

> Also, strrchr (s, 0) should be folded to strchr (s, 0) or handled the same
> like that one.

GCC converts strrchr (s, 0) to strchr (s, 0) which then gets optimized. I 
checked
this happens as expected with both versions of my patch.

> And, while x = strchr (s, 0) to x = rawmemchr (s, 0) is a reasonable -Os
> transformation, x = s + strlen (s) is not, it makes code usually larger
> (especially because it increases register pressure across the call).

Indeed, that's why my transformation is disabled with -Os.

Wilco



Re: [PATCH] Simplify ix86_expand_vector_move_misalign

2016-04-20 Thread Uros Bizjak
On Tue, Apr 19, 2016 at 4:48 PM, H.J. Lu  wrote:
> Since mov_internal patterns handle both aligned/unaligned load
> and store, we can simplify ix86_avx256_split_vector_move_misalign and
> ix86_expand_vector_move_misalign.
>
> Tested on x86-64.  OK for trunk?
>
> H.J.
> ---
> * config/i386/i386.c (ix86_avx256_split_vector_move_misalign):
> Short-cut unaligned load and store cases.  Handle all integer
> vector modes.
> (ix86_expand_vector_move_misalign): Short-cut unaligned load
> and store cases.  Call ix86_avx256_split_vector_move_misalign
> directly without checking mode class.

LGTM, but it is hard to review interwoven code movements and deletions...

Hopefully OK.

Thanks,
Uros.

> ---
>  gcc/config/i386/i386.c | 252 
> -
>  1 file changed, 81 insertions(+), 171 deletions(-)
>
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index 4e48572..e056f68 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -18820,7 +18820,39 @@ ix86_avx256_split_vector_move_misalign (rtx op0, rtx 
> op1)
>rtx (*extract) (rtx, rtx, rtx);
>machine_mode mode;
>
> -  switch (GET_MODE (op0))
> +  if ((MEM_P (op1) && !TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
> +  || (MEM_P (op0) && !TARGET_AVX256_SPLIT_UNALIGNED_STORE))
> +{
> +  emit_insn (gen_rtx_SET (op0, op1));
> +  return;
> +}
> +
> +  rtx orig_op0 = NULL_RTX;
> +  mode = GET_MODE (op0);
> +  switch (GET_MODE_CLASS (mode))
> +{
> +case MODE_VECTOR_INT:
> +case MODE_INT:
> +  if (mode != V32QImode)
> +   {
> + if (!MEM_P (op0))
> +   {
> + orig_op0 = op0;
> + op0 = gen_reg_rtx (V32QImode);
> +   }
> + else
> +   op0 = gen_lowpart (V32QImode, op0);
> + op1 = gen_lowpart (V32QImode, op1);
> + mode = V32QImode;
> +   }
> +  break;
> +case MODE_VECTOR_FLOAT:
> +  break;
> +default:
> +  gcc_unreachable ();
> +}
> +
> +  switch (mode)
>  {
>  default:
>gcc_unreachable ();
> @@ -18840,34 +18872,25 @@ ix86_avx256_split_vector_move_misalign (rtx op0, 
> rtx op1)
>
>if (MEM_P (op1))
>  {
> -  if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
> - && optimize_insn_for_speed_p ())
> -   {
> - rtx r = gen_reg_rtx (mode);
> - m = adjust_address (op1, mode, 0);
> - emit_move_insn (r, m);
> - m = adjust_address (op1, mode, 16);
> - r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
> - emit_move_insn (op0, r);
> -   }
> -  else
> -   emit_insn (gen_rtx_SET (op0, op1));
> +  rtx r = gen_reg_rtx (mode);
> +  m = adjust_address (op1, mode, 0);
> +  emit_move_insn (r, m);
> +  m = adjust_address (op1, mode, 16);
> +  r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
> +  emit_move_insn (op0, r);
>  }
>else if (MEM_P (op0))
>  {
> -  if (TARGET_AVX256_SPLIT_UNALIGNED_STORE
> - && optimize_insn_for_speed_p ())
> -   {
> - m = adjust_address (op0, mode, 0);
> - emit_insn (extract (m, op1, const0_rtx));
> - m = adjust_address (op0, mode, 16);
> - emit_insn (extract (m, op1, const1_rtx));
> -   }
> -  else
> -   emit_insn (gen_rtx_SET (op0, op1));
> +  m = adjust_address (op0, mode, 0);
> +  emit_insn (extract (m, op1, const0_rtx));
> +  m = adjust_address (op0, mode, 16);
> +  emit_insn (extract (m, op1, const1_rtx));
>  }
>else
>  gcc_unreachable ();
> +
> +  if (orig_op0)
> +emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
>  }
>
>  /* Implement the movmisalign patterns for SSE.  Non-SSE modes go
> @@ -18925,118 +18948,50 @@ ix86_avx256_split_vector_move_misalign (rtx op0, 
> rtx op1)
>  void
>  ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
>  {
> -  rtx op0, op1, orig_op0 = NULL_RTX, m;
> +  rtx op0, op1, m;
>
>op0 = operands[0];
>op1 = operands[1];
>
> -  if (GET_MODE_SIZE (mode) == 64)
> +  /* Use unaligned load/store for AVX512 or when optimizing for size.  */
> +  if (GET_MODE_SIZE (mode) == 64 || optimize_insn_for_size_p ())
>  {
> -  switch (GET_MODE_CLASS (mode))
> -   {
> -   case MODE_VECTOR_INT:
> -   case MODE_INT:
> - if (GET_MODE (op0) != V16SImode)
> -   {
> - if (!MEM_P (op0))
> -   {
> - orig_op0 = op0;
> - op0 = gen_reg_rtx (V16SImode);
> -   }
> - else
> -   op0 = gen_lowpart (V16SImode, op0);
> -   }
> - op1 = gen_lowpart (V16SImode, op1);
> - /* FALLTHRU */
> -
> -   case MODE_VECTOR_FLOAT:
> -
> - emit_insn (gen_rtx_SET (op0, op1));
> - if (orig_op0)
> -   emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
> - break;
> -
> 

Re: [PATCH, i386] Relax target requirement for vec_unpacks_lo_hi

2016-04-20 Thread Kirill Yukhin
Hi Ilya,
On 19 Apr 19:09, Ilya Enkovich wrote:
> Hi,
> 
> vec_unpacks_lo_[si,hi,di] patterns for scalar masks don't need to extend
> mask elements.  It means a simple register copy is good enough.
> 
> Currently vec_unpacks_lo_hi pattern uses kmovb instruction which requires
> AVX512DQ target.  But 16-bit masks to/from 8-bit masks conversion is typical
> for AVX512F code with a mix of integer (or float, or logical (kind=4) for
> Fortran) and double computations.  This patch implements vec_unpacks_lo_hi
> as kmovw instead to make masks conversion available for AVX512F target.
> 
> Bootstrapped and tested on x96_64-unknown-linux-gnu.  Does it look OK
> for trunk?
Patch is OK for main trunk.

--
Thanks, K

> 
> Thanks,
> Ilya
> --
> gcc/
> 
> 2016-04-19  Ilya Enkovich  
> 
>   * config/i386/sse.md (vec_unpacks_lo_hi): Always
>   use kmovw to support AVX512F target.
> 
> 
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index 4d2927e..c213ee1 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -13661,9 +13661,9 @@
>"ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;")
>  
>  (define_expand "vec_unpacks_lo_hi"
> -  [(set (match_operand:QI 0 "register_operand")
> -(subreg:QI (match_operand:HI 1 "register_operand") 0))]
> -  "TARGET_AVX512DQ")
> +  [(set (subreg:HI (match_operand:QI 0 "register_operand") 0)
> +(match_operand:HI 1 "register_operand"))]
> +  "TARGET_AVX512F")
>  
>  (define_expand "vec_unpacks_lo_si"
>[(set (match_operand:HI 0 "register_operand")


[Ada] Spurious discriminant check on type with unknown discriminants

2016-04-20 Thread Arnaud Charlet
This patch removes a spurious discriminant check on an generated assignment
statement in an iterator loop, when the cursor type is a type with unknown
discriminants, when the full view has discriminants with defaults.

Executing:

   gnatmake -f -q -g date_iteration_test.adb
   date_iteration_test

Must yield:

 1
 2
 3
 4
 5
 6
 7
 8
 9
 10

2015-12-31 05:00:00
2016-01-01 05:00:00
2016-01-02 05:00:00
2016-01-03 05:00:00
2016-01-04 05:00:00
2016-01-05 05:00:00
2016-01-06 05:00:00
2016-01-07 05:00:00
2016-01-08 05:00:00
2016-01-09 05:00:00
2016-01-10 05:00:00

---

with Date_Iteration;
with Ada.Calendar.Formatting;
with Ada.Text_IO; use Ada.Text_IO;
procedure Date_Iteration_Test is
   use type Ada.Calendar.Time;
   Day : constant Duration := 86_400.0;
   Number : Natural := 0;
begin
   for D in Date_Iteration.Generator
 (Start_Time => Ada.Calendar.Clock,
  End_Time   => Ada.Calendar.Clock + Day * 10)
   loop
  Number := Number + 1;
  Put_Line (Number'Img);
   end loop;
   New_Line;
   for D of Date_Iteration.Generator
 (Start_Time => Ada.Calendar.Time_Of (2015, 12, 31),
  End_Time   => Ada.Calendar.Time_Of (2015, 12, 31) + Day * 10)
   loop
  Put_Line (Ada.Calendar.Formatting.Image (D));
   end loop;
end Date_Iteration_Test;
---
with Ada.Calendar;
with Ada.Iterator_Interfaces;
package Date_Iteration is

   type Cursor (<>) is private;

   function Has_Element (C : Cursor) return Boolean;
   function Date (C : Cursor) return Ada.Calendar.Time;

   package Iterator_Interfaces
 is new Ada.Iterator_Interfaces (Cursor, Has_Element);

   type Date_Set is new Iterator_Interfaces.Forward_Iterator with private
   with
 Constant_Indexing => Element,
 Default_Iterator  => Iterate,
 Iterator_Element  => Ada.Calendar.Time;

   function Element (Set : Date_Set; C : Cursor) return Ada.Calendar.Time;

   function Iterate (Set : Date_Set)
return Iterator_Interfaces.Forward_Iterator'Class;

   function Generator (Start_Time : Ada.Calendar.Time;
   End_Time   : Ada.Calendar.Time;
   Interval   : Duration := 86_400.0)
  return Date_Set;
private
   type Cursor (Valid : Boolean := True) is record
  case Valid is
 when True =>
Date : Ada.Calendar.Time;
 when False =>
null;
  end case;
   end record;

   function Has_Element (C : Cursor) return Boolean is (C.Valid);

   function Date (C : Cursor) return Ada.Calendar.Time is (C.Date);

   type Date_Set is new Iterator_Interfaces.Forward_Iterator with record
  Start_Time : Ada.Calendar.Time;
  End_Time   : Ada.Calendar.Time;
  Interval   : Duration;
   end record;

   overriding
   function First (Object : Date_Set) return Cursor;
   overriding
   function Next (Object : Date_Set; Position : Cursor) return Cursor;

   function Element (Set : Date_Set; C : Cursor) return Ada.Calendar.Time
 is (C.Date);

   function Iterate (Set : Date_Set)
return Iterator_Interfaces.Forward_Iterator'Class
  is (Set);
end Date_Iteration;
---
package body Date_Iteration is
   function Generator (Start_Time : Ada.Calendar.Time;
   End_Time   : Ada.Calendar.Time;
   Interval   : Duration := 86_400.0)
  return Date_Set is
   begin
  return D : Date_Set do
 D := (Start_Time   => Start_Time,
   End_Time => End_Time,
   Interval => Interval);
  end return;
   end Generator;

   function First (Object : Date_Set) return Cursor is
  use type Ada.Calendar.Time;
   begin
  if Object.End_Time >= Object.Start_Time then
 return (Valid => True, Date => Object.Start_Time);
  else
 return (Valid => False);
  end if;
   end First;

   function Next (Object : Date_Set; Position : Cursor)
 return Cursor is
  use type Ada.Calendar.Time;
  Next : Ada.Calendar.Time := Position.Date + Object.Interval;
   begin
  if Next > Object.End_Time then
 return (Valid => False);
  else
 return (Valid => True, Date => Next);
  end if;
   end Next;
end Date_Iteration;

Tested on x86_64-pc-linux-gnu, committed on trunk

2016-04-20  Ed Schonberg  

* exp_ch5.adb (Expand_N_Assignment_Statement): Do no generate
a discriminant check for a type whose partial view has unknown
discriminants when the full view has discriminants with defaults.

Index: exp_ch5.adb
===
--- exp_ch5.adb (revision 235265)
+++ exp_ch5.adb (working copy)
@@ -1946,10 +1946,12 @@
   --  have a full view with discriminants, but those are nameable only
   --  in the underlying type, so convert the Rhs to it before potential
   --  checking. Convert Lhs as well, otherwise the actual subtype might
-  --  not be constructible.
+  --  

Re: [PATCH] Fix ICE in predicate_mem_writes (PR tree-optimization/70725)

2016-04-20 Thread Richard Biener
On Wed, Apr 20, 2016 at 12:37 PM, Jakub Jelinek  wrote:
> On Wed, Apr 20, 2016 at 11:04:08AM +0200, Richard Biener wrote:
>> > --- gcc/tree-if-conv.c
>> > +++ gcc/tree-if-conv.c
>> > @@ -262,6 +262,16 @@ ifc_temp_var (tree type, tree expr, 
>> > gimple_stmt_iterator *gsi)
>> >return new_name;
>> >  }
>> >
>> > +/* Return true when COND is a false predicate.  */
>> > +
>> > +static inline bool
>> > +is_false_predicate (tree cond)
>> > +{
>> > +  return (cond == NULL_TREE
>> > + || cond == boolean_false_node
>> > + || integer_zerop (cond));
>> > +}
>> > +
>
> Is it really a good idea to return true even for cond == NULL_TREE?
> I mean it is then very confusing, because both is_true_predicate and
> is_false_predicate are true in that case.

Ah, indeed.  NULL_TREE is true, not false.

> It doesn't make a difference when both are used in ||, but looks really
> weird and makes the occassional reader wonder if NULL_TREE is valid there at
> all and what exactly it means.
>
>> >  /* Return true when COND is a true predicate.  */
>> >
>> >  static inline bool
>> > @@ -1988,7 +1998,7 @@ predicate_mem_writes (loop_p loop)
>> >gimple *stmt;
>> >int index;
>> >
>> > -  if (is_true_predicate (cond))
>> > +  if (is_true_predicate (cond) || is_false_predicate (cond))
>> > continue;
>> >
>> >swap = false;
>> >
>> > Marek
>
> Jakub


[Ada] Illegal use of current instance in attribute reference

2016-04-20 Thread Arnaud Charlet
The current instance of a type in an aspect specification is an object of
the type. If the type is scalar, it cannot be the prefix of an attribute
reference such as 'First,  whose prefix must an array object (even though it
can be a scalar type in other contexts).

Compiling foo.adb must yield:
foo.adb:3:29:
  prefix of "First" attribute cannot be the current instance of a scalar type
foo.adb:6:29:
  prefix of "First" attribute cannot be the current instance of a scalar type

---
procedure Foo is
type T_Data_Sending_Frequency is new Natural
  with Default_Value => T_Data_Sending_Frequency'First;

type Infrequent is new Natural
  with Default_Value => Infrequent'First + 3;

begin
null;
end;

Tested on x86_64-pc-linux-gnu, committed on trunk

2016-04-20  Ed Schonberg  

* sem_attr.adb (Check_Type): Reject an attribute reference in
an aspect expression, when the prefix of the reference is the
current instance of the type to which the aspect applies.

Index: sem_attr.adb
===
--- sem_attr.adb(revision 235243)
+++ sem_attr.adb(working copy)
@@ -1408,10 +1408,41 @@
   
 
   procedure Check_Array_Or_Scalar_Type is
+ function In_Aspect_Specification return Boolean;
+ --  A current instance of a type in an aspect specification is an
+ --  object and not a type, and therefore cannot be of a scalar type
+ --  in the prefix of one of the array attributes if the attribute
+ --  reference is part of an aspect expression.
+
+ -
+ -- In_Aspect_Specification --
+ -
+
+ function In_Aspect_Specification return Boolean is
+P : Node_Id;
+
+ begin
+P := Parent (N);
+while Present (P) loop
+   if Nkind (P) = N_Aspect_Specification then
+  return P_Type = Entity (P);
+
+   elsif Nkind (P) in N_Declaration then
+  return False;
+   end if;
+
+   P := Parent (P);
+end loop;
+
+return False;
+ end In_Aspect_Specification;
+
+ --  Local variables
+
+ Dims  : Int;
  Index : Entity_Id;
 
- D : Int;
- --  Dimension number for array attributes
+  --  Start of processing for Check_Array_Or_Scalar_Type
 
   begin
  --  Case of string literal or string literal subtype. These cases
@@ -1431,6 +1462,12 @@
 
 if Present (E1) then
Error_Attr ("invalid argument in % attribute", E1);
+
+elsif In_Aspect_Specification then
+   Error_Attr
+ ("prefix of % attribute cannot be the current instance of a "
+  & "scalar type", P);
+
 else
Set_Etype (N, P_Base_Type);
return;
@@ -1466,9 +1503,9 @@
Set_Etype (N, Base_Type (Etype (Index)));
 
 else
-   D := UI_To_Int (Intval (E1));
+   Dims := UI_To_Int (Intval (E1));
 
-   for J in 1 .. D - 1 loop
+   for J in 1 .. Dims - 1 loop
   Next_Index (Index);
end loop;
 


Re: [PATCH] Optimize strchr (s, 0) to strlen

2016-04-20 Thread Richard Biener
On Wed, Apr 20, 2016 at 12:33 PM, Jakub Jelinek  wrote:
> On Wed, Apr 20, 2016 at 11:44:08AM +0200, Richard Biener wrote:
>> (simplify
>>  (BUILT_IN_STRCHR @0 integer_zerop)
>>  (pointer_plus @0 (BUILT_IN_STRLEN:size_type_node @0)))
>
> I still don't like this transformation and would very much prefer to see
> using rawmemchr instead on targets that provide it, and also this is
> something that IMHO should be done in the tree-ssa-strlen.c pass together
> with the other optimizations in there.  Similarly to stpcpy, which is also
> non-standard (in POSIX, but not in C), we should just look at headers if
> rawmemchr is defined with compatible prototype.
> Also, strrchr (s, 0) should be folded to strchr (s, 0) or handled the same
> like that one.
> And, while x = strchr (s, 0) to x = rawmemchr (s, 0) is a reasonable -Os
> transformation, x = s + strlen (s) is not, it makes code usually larger
> (especially because it increases register pressure across the call).

Sure - agreed.  So with the patch

(simplify
 (BUILT_IN_STRRCHR @0 integer_zerop@1)
 (BUILT_IN_STRCHR @0 @1))

is possible at least ;)

Richard.

> Jakub


Re: [PATCH GCC]Support BIT_AND_EXPR in scalar evolution

2016-04-20 Thread Richard Biener
On Wed, Apr 20, 2016 at 12:22 PM, Bin.Cheng  wrote:
> On Wed, Apr 20, 2016 at 9:55 AM, Richard Biener
>  wrote:
>> On Tue, Apr 19, 2016 at 7:00 PM, Bin Cheng  wrote:
>>> Hi,
>>> Type conversion from integer to smaller unsigned type could be transformed 
>>> into BIT_AND_EXPR in compilation.  For example,
>>>   int i;
>>>   for (i = 0; i < n; i++)
>>> {
>>>   unsigned char uc = (unsigned char) i;  // transformed into X = i && 
>>> 255, in which both X and i are of int type.
>>>   b[uc] = 0;
>>> }
>>> X here could a valid SCEV if we can prove that loop doesn't iterate more 
>>> than 255 times.  In other words, if 'i' is SCEV and its value is in the 
>>> range of representable set of type "unsigned char".  This information could 
>>> be available with -faggressive-loop-optimizations.
>>> This patch adds support for BIT_AND_EXPR in scalar evolution to handle such 
>>> cases, as well as two new tests.
>>>
>>> Bootstrap and test on x86_64 & AArch64.  Is it OK?
>>
>> Don't use cst_and_fits_in_hwi/int_cst_value - those are odd beasts.
>> Use tree_fits_uhwi_p / tree_to_uhwi.
>> Or just use wi::popcount and verify it against wi::clz.
> Thanks for reviewing, here is the updated patch.  Regtest ongoing,
> shouldn't be any surprise though.  Is it OK?

Ok.

Thanks,
Richard.

> Thanks,
> bin
>>
>> Richard.
>>
>>> Thanks,
>>> bin
>>>
>>> 2016-03-24  Bin Cheng  
>>>
>>> * tree-scalar-evolution.c (interpret_rhs_expr): Handle BIT_AND_EXPR.
>>>
>>> gcc/testsuite/ChangeLog
>>> 2016-03-24  Bin Cheng  
>>>
>>> * gcc.dg/tree-ssa/scev-11.c: New test.
>>> * gcc.dg/tree-ssa/scev-12.c: New test.
>>>


[Ada] Leak with function returning String in exception handler

2016-04-20 Thread Arnaud Charlet
This patch modifies the transient scope mechanism to ignore blocks generated
for exception handlers with a choice parameter when propagating secondary stack
information up the scope stack. Such blocks are not physically present in the
tree and can never release the secondary stack on exit.


-- Source --


--  memory_leak.adb

procedure Memory_Leak is
   function My_String return String is
   begin
  return "Foo";
   end My_String;

begin
   for I in 1 .. 100_000 loop
  begin
 raise Program_Error;
  exception
 when E : others =>
if My_String = "Bar" then
   raise;
end if;
  end;
   end loop;
end Memory_Leak;


-- Compilation and output --


$ gnatmake -q memory_leak.adb -largs -lgmem
$ ./memory_leak
$ gnatmem ./memory_leak > output.txt
$ grep "Total number of" output.txt
   Total number of allocations:10
   Total number of deallocations  :10

Tested on x86_64-pc-linux-gnu, committed on trunk

2016-04-20  Hristian Kirtchev  

* einfo.adb Flag286 is now used as Is_Exception_Handler.
(Is_Exception_Handler): New routine.
(Set_Is_Exception_Handler): New routine.
(Write_Entity_Flags): Output the status of Is_Exception_Handler.
* einfo.ads New attribute Is_Exception_Handler along with
occurrences in entities.
(Is_Exception_Handler): New routine along with pragma Inline.
(Set_Is_Exception_Handler): New routine along with pragma Inline.
* exp_ch7.adb (Make_Transient_Block): Ignore blocks generated
for exception handlers with a choice parameter.
* sem_ch11.adb (Analyze_Exception_Handlers): Mark the scope
generated for a choice parameter as an exception handler.

Index: exp_ch7.adb
===
--- exp_ch7.adb (revision 235258)
+++ exp_ch7.adb (working copy)
@@ -7993,14 +7993,22 @@
elsif Ekind_In (S, E_Entry, E_Loop) then
   exit;
 
-   --  In a procedure or a block, we release on exit of the
-   --  procedure or block. ??? memory leak can be created by
-   --  recursive calls.
+   --  In a procedure or a block, release the sec stack on exit
+   --  from the construct. Note that an exception handler with a
+   --  choice parameter requires a declarative region in the form
+   --  of a block. The block does not physically manifest in the
+   --  tree as it only serves as a scope. Do not consider such a
+   --  block because it will never release the sec stack.
 
-   elsif Ekind_In (S, E_Block, E_Procedure) then
+   --  ??? Memory leak can be created by recursive calls
+
+   elsif Ekind (S) = E_Procedure
+ or else (Ekind (S) = E_Block
+   and then not Is_Exception_Handler (S))
+   then
+  Set_Uses_Sec_Stack (Current_Scope, False);
   Set_Uses_Sec_Stack (S, True);
   Check_Restriction (No_Secondary_Stack, Action);
-  Set_Uses_Sec_Stack (Current_Scope, False);
   exit;
 
else
Index: einfo.adb
===
--- einfo.adb   (revision 235248)
+++ einfo.adb   (working copy)
@@ -597,7 +597,7 @@
--Is_Uplevel_Referenced_EntityFlag283
--Is_UnimplementedFlag284
--Is_Volatile_Full_Access Flag285
-   --(unused)Flag286
+   --Is_Exception_HandlerFlag286
--Rewritten_For_C Flag287
 
--(unused)Flag288
@@ -1976,12 +1976,6 @@
   return Flag146 (Id);
end Is_Abstract_Type;
 
-   function Is_Local_Anonymous_Access (Id : E) return B is
-   begin
-  pragma Assert (Is_Access_Type (Id));
-  return Flag194 (Id);
-   end Is_Local_Anonymous_Access;
-
function Is_Access_Constant (Id : E) return B is
begin
   pragma Assert (Is_Access_Type (Id));
@@ -2137,6 +2131,12 @@
   return Flag52 (Id);
end Is_Entry_Formal;
 
+   function Is_Exception_Handler (Id : E) return B is
+   begin
+  pragma Assert (Ekind (Id) = E_Block);
+  return Flag286 (Id);
+   end Is_Exception_Handler;
+
function Is_Exported (Id : E) return B is
begin
   return Flag99 (Id);
@@ -2307,6 +2307,12 @@
   return Flag25 (Id);
end Is_Limited_Record;
 
+   function Is_Local_Anonymous_Access (Id : E) return B is
+   begin
+  pragma Assert (Is_Access_Type (Id));
+  return Flag194 (Id);
+   end Is_Local_Anonymous_Access;
+
function Is_Machine_Code_Subprogram (Id : E) return B is
begin
   pragma Assert (Is_Subprogram (Id));
@@ -5146,6 +5152,12 @@
   

[Ada] Spurious error on binary operator in instance

2016-04-20 Thread Arnaud Charlet
This patch updates the mechanism which qualifies universal literals that act as
operands of binary or unary operators to avoid the partial qualification of the
subtype_mark when the immediate scope of the corresponding actual parameter is
a generic unit. No simple reproducer possible.

Tested on x86_64-pc-linux-gnu, committed on trunk

2016-04-20  Hristian Kirtchev  

* sem_ch12.adb (Qualify_Type): Do not perform
partial qualification when the immediate scope is a generic unit.

Index: sem_ch12.adb
===
--- sem_ch12.adb(revision 235256)
+++ sem_ch12.adb(working copy)
@@ -14052,7 +14052,7 @@
  begin
 Result := Make_Identifier (Loc, Chars (Typ));
 
-if Present (Scop) and then Scop /= Standard_Standard then
+if Present (Scop) and then not Is_Generic_Unit (Scop) then
Result :=
  Make_Selected_Component (Loc,
Prefix=> Make_Identifier (Loc, Chars (Scop)),


Re: [PATCH] Fix ICE in predicate_mem_writes (PR tree-optimization/70725)

2016-04-20 Thread Jakub Jelinek
On Wed, Apr 20, 2016 at 11:04:08AM +0200, Richard Biener wrote:
> > --- gcc/tree-if-conv.c
> > +++ gcc/tree-if-conv.c
> > @@ -262,6 +262,16 @@ ifc_temp_var (tree type, tree expr, 
> > gimple_stmt_iterator *gsi)
> >return new_name;
> >  }
> >
> > +/* Return true when COND is a false predicate.  */
> > +
> > +static inline bool
> > +is_false_predicate (tree cond)
> > +{
> > +  return (cond == NULL_TREE
> > + || cond == boolean_false_node
> > + || integer_zerop (cond));
> > +}
> > +

Is it really a good idea to return true even for cond == NULL_TREE?
I mean it is then very confusing, because both is_true_predicate and
is_false_predicate are true in that case.
It doesn't make a difference when both are used in ||, but looks really
weird and makes the occassional reader wonder if NULL_TREE is valid there at
all and what exactly it means.

> >  /* Return true when COND is a true predicate.  */
> >
> >  static inline bool
> > @@ -1988,7 +1998,7 @@ predicate_mem_writes (loop_p loop)
> >gimple *stmt;
> >int index;
> >
> > -  if (is_true_predicate (cond))
> > +  if (is_true_predicate (cond) || is_false_predicate (cond))
> > continue;
> >
> >swap = false;
> >
> > Marek

Jakub


Re: [PATCH] Optimize strchr (s, 0) to strlen

2016-04-20 Thread Jakub Jelinek
On Wed, Apr 20, 2016 at 11:44:08AM +0200, Richard Biener wrote:
> (simplify
>  (BUILT_IN_STRCHR @0 integer_zerop)
>  (pointer_plus @0 (BUILT_IN_STRLEN:size_type_node @0)))

I still don't like this transformation and would very much prefer to see
using rawmemchr instead on targets that provide it, and also this is
something that IMHO should be done in the tree-ssa-strlen.c pass together
with the other optimizations in there.  Similarly to stpcpy, which is also
non-standard (in POSIX, but not in C), we should just look at headers if
rawmemchr is defined with compatible prototype.
Also, strrchr (s, 0) should be folded to strchr (s, 0) or handled the same
like that one.
And, while x = strchr (s, 0) to x = rawmemchr (s, 0) is a reasonable -Os
transformation, x = s + strlen (s) is not, it makes code usually larger
(especially because it increases register pressure across the call).

Jakub


[Ada] Missing style warning on overlong line is task subunit

2016-04-20 Thread Arnaud Charlet
This patch restores a warning message on an overlong line in a subunit that
is a task body, when compiling the parent unit.

Compiling:

   gcc -c -gnatyM50 par.adb

must yield:

par-separated_task.adb:6:51: (style) this line is too long

---
opackage Par is

   task type Separated_Task is
  entry Start;
   end Separated_Task;

end Par;
---
package body Par is

   task body Separated_Task is separate;

end Par;
---
separate (Par)
task body Separated_Task is

   procedure Other is
   begin
  --- Too long
  null;
   end Other;
begin
   accept Start;
   Other;
end Separated_Task;

Tested on x86_64-pc-linux-gnu, committed on trunk

2016-04-20  Ed Schonberg  

* sem.adb (Do_Analyze): Save and restore Style_Max_Line_Length
so that the corresponding checks are preserved across compilations
that include System.Constants in their context.

Index: sem.adb
===
--- sem.adb (revision 235192)
+++ sem.adb (working copy)
@@ -53,6 +53,7 @@
 with Sem_Util; use Sem_Util;
 with Sinfo;use Sinfo;
 with Stand;use Stand;
+with Stylesw;  use Stylesw;
 with Uintp;use Uintp;
 with Uname;use Uname;
 
@@ -1316,6 +1317,13 @@
   procedure Do_Analyze is
  Save_Ghost_Mode : constant Ghost_Mode_Type := Ghost_Mode;
 
+ --  Generally style checks are preserved across compilations, with
+ --  one exception: s-oscons.ads, which allows arbitrary long lines
+ --  unconditionally, and has no restore mechanism, because it is
+ --  intended as a lowest-level Pure package.
+
+ Save_Max_Line   : constant Int := Style_Max_Line_Length;
+
  List : Elist_Id;
 
   begin
@@ -1346,6 +1354,7 @@
  Pop_Scope;
  Restore_Scope_Stack (List);
  Ghost_Mode := Save_Ghost_Mode;
+ Style_Max_Line_Length := Save_Max_Line;
   end Do_Analyze;
 
   --  Local variables


[Ada] Constraint_Error on spurious ambiguity in instance

2016-04-20 Thread Arnaud Charlet
This patch updates the instantiation machinery to properly preserve a reference
to a global type in a qualified expression used to convert a universal literal
to a specific type, and propagate it to the instantiated template.


-- Source --


--  types.ads

package Types is
   type Uint is private;
   type Int is range -2**31 .. +2**31 - 1;

   function "+" (Left : Uint; Right : Uint) return Uint;
   function "+" (Left : Int;  Right : Uint) return Uint;
   function "+" (Left : Uint; Right : Int)  return Uint;

   function "*" (Left : Uint; Right : Uint) return Uint;
   function "*" (Left : Int;  Right : Uint) return Uint;
   function "*" (Left : Uint; Right : Int)  return Uint;

private
   Uint_Low_Bound  : constant := 600_000_000;
   Uint_High_Bound : constant := 2_099_999_999;

   type Uint is new Int range Uint_Low_Bound .. Uint_High_Bound;
   No_Uint : constant Uint := Uint (Uint_Low_Bound);
end Types;

--  types.adb

package body Types is
   function "+" (Left : Uint; Right : Uint) return Uint is
   begin return No_Uint; end "+";
   function "+" (Left : Int;  Right : Uint) return Uint is
   begin return No_Uint; end "+";
   function "+" (Left : Uint; Right : Int)  return Uint is
   begin return No_Uint; end "+";

   function "*" (Left : Uint; Right : Uint) return Uint is
   begin return No_Uint; end "+";
   function "*" (Left : Int;  Right : Uint) return Uint is
   begin return No_Uint; end "+";
   function "*" (Left : Uint; Right : Int)  return Uint is
   begin return No_Uint; end "+";
end Types;

--  types_gen.ads

generic
package Types_Gen is
   procedure Compute;
end Types_Gen;

--  types_gen.adb

with Types; use Types;

package body Types_Gen is
   procedure Compute is
  UI_Int_Value : Uint;
   begin
  UI_Int_Value := UI_Int_Value * 10 + 20;
   end Compute;
end Types_Gen;

--  types_inst.ads

with Types_Gen;

package Types_Inst is new Types_Gen;

-
-- Compilation --
-

$ gcc -c -gnatct types_inst.ads

Tested on x86_64-pc-linux-gnu, committed on trunk

2016-04-20  Hristian Kirtchev  

* sem_ch12.adb (Copy_Generic_Node): Handle the special
qualification installed for universal literals that act as
operands in binary or unary operators.  (Qualify_Operand): Mark
the qualification to signal the instantiation mechanism how to
handle global reference propagation.
* sinfo.adb (Is_Qualified_Universal_Literal): New routine.
(Set_Is_Qualified_Universal_Literal): New routine.
* sinfo.ads New attribute Is_Qualified_Universal_Literal along
with occurrences in nodes.
(Is_Qualified_Universal_Literal):
New routine along with pragma Inline.
(Set_Is_Qualified_Universal_Literal): New routine along with
pragma Inline.

Index: sem_ch12.adb
===
--- sem_ch12.adb(revision 235254)
+++ sem_ch12.adb(working copy)
@@ -7293,6 +7293,20 @@
  Set_Entity (New_N, Entity (Assoc));
  Check_Private_View (N);
 
+  --  The node is a reference to a global type and acts as the
+  --  subtype mark of a qualified expression created in order
+  --  to aid resolution of accidental overloading in instances.
+  --  Since N is a reference to a type, the Associated_Node of
+  --  N denotes an entity rather than another identifier. See
+  --  Qualify_Universal_Operands for details.
+
+  elsif Nkind (N) = N_Identifier
+and then Nkind (Parent (N)) = N_Qualified_Expression
+and then Subtype_Mark (Parent (N)) = N
+and then Is_Qualified_Universal_Literal (Parent (N))
+  then
+ Set_Entity (New_N, Assoc);
+
   --  The name in the call may be a selected component if the
   --  call has not been analyzed yet, as may be the case for
   --  pre/post conditions in a generic unit.
@@ -13982,6 +13996,7 @@
 Loc  : constant Source_Ptr := Sloc (Opnd);
 Typ  : constant Entity_Id  := Etype (Actual);
 Mark : Node_Id;
+Qual : Node_Id;
 
  begin
 --  Qualify the operand when it is of a universal type. Note that
@@ -14007,10 +14022,19 @@
   Mark := Qualify_Type (Loc, Typ);
end if;
 
-   Rewrite (Opnd,
+   Qual :=
  Make_Qualified_Expression (Loc,
Subtype_Mark => Mark,
-   Expression   => Relocate_Node (Opnd)));
+   Expression   => Relocate_Node (Opnd));
+
+   --  Mark the qualification to distinguish it from other source
+   --  constructs and signal the instantiation mechanism that this
+ 

Re: Remove unused openacc call

2016-04-20 Thread Jakub Jelinek
On Wed, Apr 20, 2016 at 10:46:38AM +0200, Thomas Schwinge wrote:
> On Mon, 9 Nov 2015 16:58:48 -0500, Nathan Sidwell  wrote:
> > I've committed this to trunk.   It nuke the now unused 
> > GOACC_GET_NUM_THREADS and 
> > GOACC_GET_THREAD_NUM  calls.
> 
> > * omp-low.c: [...]
> > (lower_reduction_clauses): Remove BUILT_IN_GOACC_GET_THREAD_NUM call.
> > * omp-builtins.def (BUILT_IN_GOACC_GET_THREAD_NUM,
> > BUILT_IN_GOACC_GET_NUM_THREADS): Delete.
> 
> Given that in GCC 6 we only provide host fallback execution for
> executables compiled with GCC 5 (and thus using the legacy entry points),
> we can further clean this up as follows.  OK for gcc-6-branch and trunk?
> 
> commit a3993b5fae8b430e1dca23179aaa23a6c53ea2fb
> Author: Thomas Schwinge 
> Date:   Tue Nov 10 16:59:46 2015 +0100
> 
> Clean up libgomp GCC 5 legacy support
> 
>   libgomp/
>   * config/nvptx/oacc-parallel.c: Empty file.

Ok for trunk and 6.2, we don't need this for 6.1.

Jakub


[Ada] Spurious discriminant error on aggregate for derived type

2016-04-20 Thread Arnaud Charlet
This patch extends the mechanism used to provide discriminants values for
an aggregate of a derived type that constrains some parent discriminants
and renames others, when the type of the target is unconstrained.

No simple test available.

Tested on x86_64-pc-linux-gnu, committed on trunk

2016-04-20  Ed Schonberg  

* exp_aggr.adb (Init_Stored_Discriminants,
Init_Visible_Discriminants): New procedures, subsidiary of
Build_Record_Aggr_Code, to handle properly the construction
of aggregates for a derived type that constrains some parent
discriminants and renames others.

Index: exp_aggr.adb
===
--- exp_aggr.adb(revision 235253)
+++ exp_aggr.adb(working copy)
@@ -1879,6 +1879,11 @@
   --  Returns the first discriminant association in the constraint
   --  associated with T, if any, otherwise returns Empty.
 
+  function Get_Explicit_Discriminant_Value (D : Entity_Id) return Node_Id;
+  --  If the ancestor part is an unconstrained type and further ancestors
+  --  do not provide discriminants for it, check aggregate components for
+  --  values of the discriminants.
+
   procedure Init_Hidden_Discriminants (Typ : Entity_Id; List : List_Id);
   --  If Typ is derived, and constrains discriminants of the parent type,
   --  these discriminants are not components of the aggregate, and must be
@@ -1886,11 +1891,20 @@
   --  if Typ derives fron an already constrained subtype of a discriminated
   --  parent type.
 
-  function Get_Explicit_Discriminant_Value (D : Entity_Id) return Node_Id;
-  --  If the ancestor part is an unconstrained type and further ancestors
-  --  do not provide discriminants for it, check aggregate components for
-  --  values of the discriminants.
+  procedure Init_Stored_Discriminants;
+  --  If the type is derived and has inherited discriminants, generate
+  --  explicit assignments for each, using the store constraint of the
+  --  type. Note that both visible and stored discriminants must be
+  --  initialized in case the derived type has some renamed and some
+  --  constrained discriminants.
 
+  procedure Init_Visible_Discriminants;
+  --  If type has discriminants, retrieve their values from aggregate,
+  --  and generate explicit assignments for each. This does not include
+  --  discriminants inherited from ancestor, which are handled above.
+  --  The type of the aggregate is a subtype created ealier using the
+  --  given values of the discriminant components of the aggregate.
+
   function Is_Int_Range_Bounds (Bounds : Node_Id) return Boolean;
   --  Check whether Bounds is a range node and its lower and higher bounds
   --  are integers literals.
@@ -2279,6 +2293,70 @@
  end loop;
   end Init_Hidden_Discriminants;
 
+  
+  -- Init_Visible_Discriminants --
+  
+
+  procedure Init_Visible_Discriminants is
+ Discriminant   : Entity_Id;
+ Discriminant_Value : Node_Id;
+
+  begin
+ Discriminant := First_Discriminant (Typ);
+ while Present (Discriminant) loop
+Comp_Expr :=
+  Make_Selected_Component (Loc,
+Prefix=> New_Copy_Tree (Target),
+Selector_Name => New_Occurrence_Of (Discriminant, Loc));
+
+Discriminant_Value :=
+  Get_Discriminant_Value
+(Discriminant, Typ, Discriminant_Constraint (N_Typ));
+
+Instr :=
+  Make_OK_Assignment_Statement (Loc,
+Name   => Comp_Expr,
+Expression => New_Copy_Tree (Discriminant_Value));
+
+Set_No_Ctrl_Actions (Instr);
+Append_To (L, Instr);
+
+Next_Discriminant (Discriminant);
+ end loop;
+  end Init_Visible_Discriminants;
+
+  ---
+  -- Init_Stored_Discriminants --
+  ---
+
+  procedure Init_Stored_Discriminants is
+ Discriminant   : Entity_Id;
+ Discriminant_Value : Node_Id;
+
+  begin
+ Discriminant := First_Stored_Discriminant (Typ);
+ while Present (Discriminant) loop
+Comp_Expr :=
+  Make_Selected_Component (Loc,
+Prefix=> New_Copy_Tree (Target),
+Selector_Name => New_Occurrence_Of (Discriminant, Loc));
+
+Discriminant_Value :=
+  Get_Discriminant_Value
+(Discriminant, N_Typ, Discriminant_Constraint (N_Typ));
+
+Instr :=
+  Make_OK_Assignment_Statement (Loc,
+Name   => Comp_Expr,
+Expression => New_Copy_Tree (Discriminant_Value));
+
+Set_No_Ctrl_Actions (Instr);

Re: Resolve idempotency issue with libgomp's config.h/libgomp.h

2016-04-20 Thread Jakub Jelinek
On Wed, Apr 20, 2016 at 09:55:35AM +0200, Thomas Schwinge wrote:
> diff --git libgomp/config.h.in libgomp/config.h.in
> index 226ac53..1ef51ca 100644
> --- libgomp/config.h.in
> +++ libgomp/config.h.in
> @@ -1,5 +1,11 @@
>  /* config.h.in.  Generated from configure.ac by autoheader.  */
>  
> +
> +  #ifdef LIBGOMP_H
> +  # error Must not #include "config.h" after #include "libgomp.h".
> +  #endif
> +
> +
>  /* Define to 1 if the target assembler supports .symver directive. */
>  #undef HAVE_AS_SYMVER_DIRECTIVE

> --- libgomp/libgomp.h
> +++ libgomp/libgomp.h
> @@ -33,7 +33,12 @@
> that are part of the external ABI, and the lower case prefix "gomp"
> is used group items that are completely private to the library.  */
>  
> -#ifndef LIBGOMP_H 
> +#ifndef LIBGOMP_H
> +/* We #include "config.h" early, before we #define LIBGOMP_H, so that we can
> +   use the latter to check in "config.h" that it's not being included again,
> +   which might conflict with configuration changes done further down in
> +   libgomp.h.  */
> +#include "config.h"
>  #define LIBGOMP_H 1

The above breaks the multiple inclusion guards of libgomp.h, the
preprocessor will need to treat them as normal macros.
So IMNSHO it would be better to just use a different macro for this, keep
config.h included where it is now in libgomp.h and just make sure the macro
is defined after it.
Either use one of the many preexisting macros, like gomp_alloca, ...,
REFCOUNT_INFINITY, ... _LIBGOMP_OMP_LOCK_DEFINED, attribute_hidden, ...,
ialias, ..., or add one specially for this purpose.

Otherwise it is reasonable, but only for trunk and 6.2.

Jakub


Re: [PATCH GCC]Support BIT_AND_EXPR in scalar evolution

2016-04-20 Thread Bin.Cheng
On Wed, Apr 20, 2016 at 9:55 AM, Richard Biener
 wrote:
> On Tue, Apr 19, 2016 at 7:00 PM, Bin Cheng  wrote:
>> Hi,
>> Type conversion from integer to smaller unsigned type could be transformed 
>> into BIT_AND_EXPR in compilation.  For example,
>>   int i;
>>   for (i = 0; i < n; i++)
>> {
>>   unsigned char uc = (unsigned char) i;  // transformed into X = i && 
>> 255, in which both X and i are of int type.
>>   b[uc] = 0;
>> }
>> X here could a valid SCEV if we can prove that loop doesn't iterate more 
>> than 255 times.  In other words, if 'i' is SCEV and its value is in the 
>> range of representable set of type "unsigned char".  This information could 
>> be available with -faggressive-loop-optimizations.
>> This patch adds support for BIT_AND_EXPR in scalar evolution to handle such 
>> cases, as well as two new tests.
>>
>> Bootstrap and test on x86_64 & AArch64.  Is it OK?
>
> Don't use cst_and_fits_in_hwi/int_cst_value - those are odd beasts.
> Use tree_fits_uhwi_p / tree_to_uhwi.
> Or just use wi::popcount and verify it against wi::clz.
Thanks for reviewing, here is the updated patch.  Regtest ongoing,
shouldn't be any surprise though.  Is it OK?

Thanks,
bin
>
> Richard.
>
>> Thanks,
>> bin
>>
>> 2016-03-24  Bin Cheng  
>>
>> * tree-scalar-evolution.c (interpret_rhs_expr): Handle BIT_AND_EXPR.
>>
>> gcc/testsuite/ChangeLog
>> 2016-03-24  Bin Cheng  
>>
>> * gcc.dg/tree-ssa/scev-11.c: New test.
>> * gcc.dg/tree-ssa/scev-12.c: New test.
>>
diff --git a/gcc/tree-scalar-evolution.c b/gcc/tree-scalar-evolution.c
index 88a0eaa..d6f2a2f 100644
--- a/gcc/tree-scalar-evolution.c
+++ b/gcc/tree-scalar-evolution.c
@@ -1937,6 +1937,36 @@ interpret_rhs_expr (struct loop *loop, gimple *at_stmt,
   res = chrec_convert (type, chrec1, at_stmt);
   break;
 
+case BIT_AND_EXPR:
+  /* Given int variable A, handle A&0x as (int)(unsigned short)A.
+If A is SCEV and its value is in the range of representable set
+of type unsigned short, the result expression is a (no-overflow)
+SCEV.  */
+  res = chrec_dont_know;
+  if (tree_fits_uhwi_p (rhs2))
+   {
+ int precision;
+ unsigned HOST_WIDE_INT val = tree_to_uhwi (rhs2);
+
+ val ++;
+ /* Skip if value of rhs2 wraps in unsigned HOST_WIDE_INT or
+it's not the maximum value of a smaller type than rhs1.  */
+ if (val != 0
+ && (precision = exact_log2 (val)) > 0
+ && (unsigned) precision < TYPE_PRECISION (TREE_TYPE (rhs1)))
+   {
+ tree utype = build_nonstandard_integer_type (precision, 1);
+
+ if (TYPE_PRECISION (utype) < TYPE_PRECISION (TREE_TYPE (rhs1)))
+   {
+ chrec1 = analyze_scalar_evolution (loop, rhs1);
+ chrec1 = chrec_convert (utype, chrec1, at_stmt);
+ res = chrec_convert (TREE_TYPE (rhs1), chrec1, at_stmt);
+   }
+   }
+   }
+  break;
+
 default:
   res = chrec_dont_know;
   break;
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/scev-11.c 
b/gcc/testsuite/gcc.dg/tree-ssa/scev-11.c
new file mode 100644
index 000..a7181b2
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/scev-11.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-ivopts-details" } */
+
+int a[128];
+extern int b[];
+
+int bar (int *);
+
+int
+foo (int n)
+{
+  int i;
+
+  for (i = 0; i < n; i++)
+{
+  unsigned char uc = (unsigned char)i;
+  a[i] = i;
+  b[uc] = 0;
+}
+
+  bar (a);
+  return 0;
+}
+
+/* Address of array reference to b is scev.  */
+/* { dg-final { scan-tree-dump-times "use \[0-9\]\n  address" 2 "ivopts" } } */
+
+
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/scev-12.c 
b/gcc/testsuite/gcc.dg/tree-ssa/scev-12.c
new file mode 100644
index 000..6915ba8
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/scev-12.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-ivopts-details" } */
+
+int a[128];
+extern int b[];
+
+int bar (int *);
+
+int
+foo (int x, int n)
+{
+  int i;
+
+  for (i = 0; i < n; i++)
+{
+  unsigned char uc = (unsigned char)i;
+  if (x)
+   a[i] = i;
+  b[uc] = 0;
+}
+
+  bar (a);
+  return 0;
+}
+
+/* Address of array reference to b is not scev.  */
+/* { dg-final { scan-tree-dump-times "use \[0-9\]\n  address" 1 "ivopts" } } */
+
+
+


[Ada] Constraint_Error on spurious ambiguity in instance

2016-04-20 Thread Arnaud Charlet
This match modifies the processing of generics to aid overload resolution of
binary and unary operators in instances. This is achieved by installing type
conversions in the form of qualified expressions for each operand that yields
a universal type.


-- Source --


--  px.ads

package PX is
   pragma Pure;
   Min_Integer : constant := -2**31;
   Max_Integer : constant := 2**31 - 1;

   subtype Integer_T is Integer   range Min_Integer .. Max_Integer;
   subtype Natural_T is Integer_T range 0 .. Integer_T'last;
   subtype String_T  is String;
end PX;

--  pg.ads

with PX;

generic
   type Element_T is (<>);
   type Index_T is (<>);
   type String_T is array (Index_T range <>) of Element_T;
   Blank_Element : in Element_T;

package PG is
   function Left_Piece
 (Str   : in String_T;
  Size  : in PX.Natural_T;
  Pad_Character : in Element_T := Blank_Element) return String_T;
end PG;

--  pg.adb

package body PG is
   subtype Null_String_T is String_T (Index_T'Last .. Index_T'First);
   Null_String : constant Null_String_T := (others => Element_T'First);

   function "+" (L : in PX.Integer_T; R : in Index_T ) return Index_T;
   function "+" (L : in Index_T;  R : in PX.Integer_T) return Index_T; 
   function "-" (L : in Index_T;  R : in PX.Integer_T) return Index_T;

   function "+" (L : in PX.Integer_T; R : in Index_T) return Index_T is
   begin
  return Index_T'Val (L + Index_T'Pos(R));
   end "+";

   function "+" (L : in Index_T; R : in PX.Integer_T) return Index_T is
   begin
  return Index_T'Val (Index_T'Pos (L) + R);
   end "+";

   function "-" (L : in Index_T; R : in PX.Integer_T) return Index_T is
   begin
  return Index_T'Val (Index_T'Pos (L) - R);
   end "-";

   function Left_Piece
 (Str   : in String_T;
  Size  : in PX.Natural_T;
  Pad_Character : in Element_T := Blank_Element) return String_T
   is
   begin
  if Size > 0 then
 declare
Result : String_T (Index_T'First .. Index_T'First + Size - 1);

 begin
if Size < Str'Length then
   Result := Str (Str'First .. Str'First + Size - 1);

elsif Size = Str'Length then
   Result := Str;

else
   if Str'Length > 0 then
  Result (Result'First .. Result'First + Str'Length - 1) :=
Str;
   end if;

   Result (Result'First + Str'Length .. Result'Last) :=
 (others => Pad_Character);
end if;

return Result;
 end;

  else
 return Null_String;
  end if;
   end Left_Piece;
end PG;

--  nullstr.adb

with Ada.Text_IO; use Ada.Text_IO;
with PG;

procedure Nullstr is
   package PPG is new PG
 (Element_T => Character,
  Index_T   => Positive,
  String_T  => String,
  Blank_Element => '$');

begin
   Put_Line (PPG.Left_Piece ("abcdef", 6));
   Put_Line (PPG.Left_Piece ("abcde", 6));
   Put_Line (PPG.Left_Piece ("", 6));
end Nullstr;


-- Compilation and output --


$ gnatmake -q nullstr.adb
$ ./nullstr
abcdef
abcde$
$$

Tested on x86_64-pc-linux-gnu, committed on trunk

2016-04-20  Hristian Kirtchev  

* sem_ch12.adb (Qualify_Universal_Operands): New routine.
(Save_References_In_Operator): Add explicit qualifications in
the generic template for all operands of universal type.
* sem_type.adb (Disambiguate): Update the call to Matches.
(Matches): Reimplemented.
* sem_util.ads, sem_util.adb (Yields_Universal_Type): New routine.

Index: sem_type.adb
===
--- sem_type.adb(revision 235199)
+++ sem_type.adb(working copy)
@@ -1316,13 +1316,13 @@
   --  the generic. Within the instance the actual is represented by a
   --  constructed subprogram renaming.
 
-  function Matches (Actual, Formal : Node_Id) return Boolean;
-  --  Look for exact type match in an instance, to remove spurious
-  --  ambiguities when two formal types have the same actual.
+  function Matches (Op : Node_Id; Func_Id : Entity_Id) return Boolean;
+  --  Determine whether function Func_Id is an exact match for binary or
+  --  unary operator Op.
 
   function Operand_Type return Entity_Id;
-  --  Determine type of operand for an equality operation, to apply
-  --  Ada 2005 rules to equality on anonymous access types.
+  --  Determine type of operand for an equality operation, to apply Ada
+  --  2005 rules to equality on anonymous access types.
 
   function Standard_Operator return Boolean;
   --  Check whether subprogram is predefined operator declared in Standard.
@@ -1412,14 +1412,82 @@
   -- Matches --
   -
 
-  function Matches (Actual, Formal : Node_Id) return 

Re: C++ PATCH to fix a part of c++/70513 (ICE-on-invalid with enums)

2016-04-20 Thread Marek Polacek
Ping.

On Fri, Apr 08, 2016 at 01:51:02PM +0200, Marek Polacek wrote:
> This is my attempt to fix at least a part of this PR.  I haven't been able to
> come up with a fix that fixes the other part involving templates.
> 
> We were ICEing on code such as
> 
> struct S
> {
>   enum E : int;
>   enum S::E : int { foo } e;
> };
> 
> Clang rejects this with "extra qualification" error.  When I modified the test
> to use structs rather than enums...
> 
> struct T
> {
>   struct U;
>   struct T::U {};
> };
> 
> ...I found out that we reject this with "extra qualification not allowed".  So
> I think the enum case is missing a similar check that this patch adds.
> 
> By the template part of this PR I mean that we ICE on
> 
> template 
> class D
> {
>   enum D::A { foo } c;
> };
> 
> where clang++ says
> error: template specialization or definition requires a template parameter 
> list
>corresponding to the nested type 'D'
> which I guess means that a valid code would have "" after "D".  I thought
> num_template_headers_for_class and cp_parser_check_template_parameters would
> do the job here, but apparently something else needs to be used for this case.
> But I'm at my wits' end here.
> 
> Bootstrapped/regtested on x86_64-linux, ok for trunk?
> 
> 2016-04-08  Marek Polacek  
> 
>   PR c++/70513
>   * parser.c (cp_parser_enum_specifier): Check for extra qualification.
> 
>   * g++.dg/cpp0x/forw_enum12.C: New test.
> 
> diff --git gcc/cp/parser.c gcc/cp/parser.c
> index 28e01af..dc0d1c8 100644
> --- gcc/cp/parser.c
> +++ gcc/cp/parser.c
> @@ -17231,6 +17231,15 @@ cp_parser_enum_specifier (cp_parser* parser)
> type, prev_scope, nested_name_specifier);
> type = error_mark_node;
>   }
> +   /* If that scope is the scope where the declaration is being placed
> +  the program is invalid.  */
> +   else if (nested_name_specifier == prev_scope)
> + {
> +   permerror (type_start_token->location,
> +  "extra qualification not allowed");
> +   type = error_mark_node;
> +   nested_name_specifier = NULL_TREE;
> + }
>   }
>  
>if (scoped_enum_p)
> diff --git gcc/testsuite/g++.dg/cpp0x/forw_enum12.C 
> gcc/testsuite/g++.dg/cpp0x/forw_enum12.C
> index e69de29..906ba68 100644
> --- gcc/testsuite/g++.dg/cpp0x/forw_enum12.C
> +++ gcc/testsuite/g++.dg/cpp0x/forw_enum12.C
> @@ -0,0 +1,29 @@
> +// PR c++/70513
> +// { dg-do compile { target c++11 } }
> +
> +struct S1
> +{
> +  enum E : int;
> +  enum S1::E : int { X } e; // { dg-error "extra qualification not allowed" }
> +};
> +
> +struct S2
> +{
> +  enum class E : int;
> +  enum class S2::E : int { X } e; // { dg-error "extra qualification not 
> allowed" }
> +};
> +
> +struct S3
> +{
> +  enum struct E : int;
> +  enum struct S3::E : int { X } e; // { dg-error "extra qualification not 
> allowed" }
> +};
> +
> +struct S4
> +{
> +  struct S5
> +  {
> +enum E : char;
> +enum S4::S5::E : char { X } e; // { dg-error "extra qualification not 
> allowed" }
> +  };
> +};
> 
>   Marek

Marek


[Ada] Better error message for illegal aspect

2016-04-20 Thread Arnaud Charlet
This patch improves on the error message for an aspect whose expression freezes
the entity to which it applies.

Compiling BD11002.adb must yield:

bd11002.adb:3:02: aspect specification causes premature freezing of "Size"

---
procedure BD11002 is

 Sizer : constant Natural := Integer'Size
with Size => Sizer;   -- ERROR
 function Foo (P : in Natural) return Natural
with Pre => P in 0 .. 3 or else Foo (P - 4) = 0;  -- weird but ok

 function Foo (P : in Natural) return Natural is
 begin
return P;
 end Foo;

begin
   null;
end BD11002;

Tested on x86_64-pc-linux-gnu, committed on trunk

2016-04-20  Ed Schonberg  

* sem_ch13.adb (Rep_Item_Too_Late): Better error message for
an illegal aspect that freezes the entity to which it applies.

Index: sem_ch13.adb
===
--- sem_ch13.adb(revision 235240)
+++ sem_ch13.adb(working copy)
@@ -12286,6 +12286,18 @@
 
 and then Comes_From_Source (T)
   then
+ --  A self-referential aspect is illegal if it forces freezing the
+ --  entity before the corresponding pragma has been analyzed.
+
+ if Nkind_In (N, N_Attribute_Definition_Clause, N_Pragma)
+   and then From_Aspect_Specification (N)
+ then
+Error_Msg_NE
+  ("aspect specification causes premature freezing of&", T, N);
+Set_Has_Delayed_Freeze (T, False);
+return True;
+ end if;
+
  Too_Late;
  S := First_Subtype (T);
 


[Ada] Crash on configuration pragma Check_Policy.

2016-04-20 Thread Arnaud Charlet
There are two different syntaxes for this pragma, and the analysis of the
pragma in the new syntax results in the construction of one or more pragmas
in the older form. If the original pragma appears in a configuration file
the generated ones must be inserted in the same file because Insert_Actions
is not usable in the absence of a scope.

Given the following gnat.adc file:

 pragma Check_Policy (Debug => Disable);

The following must compile quietly:

---
proceddure Dummy is
begin
   null;
end;

Tested on x86_64-pc-linux-gnu, committed on trunk

2016-04-20  Ed Schonberg  

* sem_prag.adb (Analyze_Pragma, case Check_Policy):  If this
is a configuration pragma and it uses the ARG syntax, insert
the rewritten pragma after the current one rather than using
Insert_Actions.

Index: sem_prag.adb
===
--- sem_prag.adb(revision 235240)
+++ sem_prag.adb(working copy)
@@ -12504,9 +12504,10 @@
 
 else
declare
-  Arg  : Node_Id;
-  Argx : Node_Id;
-  LocP : Source_Ptr;
+  Arg   : Node_Id;
+  Argx  : Node_Id;
+  LocP  : Source_Ptr;
+  New_P : Node_Id;
 
begin
   Arg := Arg1;
@@ -12526,7 +12527,7 @@
  --  Construct equivalent old form syntax Check_Policy
  --  pragma and insert it to get remaining checks.
 
- Insert_Action (N,
+ New_P :=
Make_Pragma (LocP,
  Chars=> Name_Check_Policy,
  Pragma_Argument_Associations => New_List (
@@ -12534,9 +12535,20 @@
  Expression =>
Make_Identifier (LocP, Chars (Arg))),
Make_Pragma_Argument_Association (Sloc (Argx),
- Expression => Argx;
+ Expression => Argx)));
 
  Arg := Next (Arg);
+
+ --  For a configuration pragma, insert old form in
+ --  the corresponding file.
+
+ if Is_Configuration_Pragma then
+Insert_After (N, New_P);
+Analyze (New_P);
+
+ else
+Insert_Action (N, New_P);
+ end if;
   end loop;
 
   --  Rewrite original Check_Policy pragma to null, since we


Re: [PATCH] [AArch64] support -mfentry feature for arm64

2016-04-20 Thread Szabolcs Nagy
On 20/04/16 01:36, AKASHI Takahiro wrote:
> On Tue, Apr 19, 2016 at 09:44:37AM +0300, Alexander Monakov wrote:
>> On Tue, 19 Apr 2016, AKASHI Takahiro wrote:
 looking at [2] i don't see why

 func:
   mov x9, x30
   bl _tracefunc
   
>>>
>>> Actually,
>>> mov x9, x30
>>> bl _tracefunc
>>> mov x30, x9
>>> 
>>
>> I think here Szabolcs' point was that the last instruction can be eliminated:
>> _tracefunc can be responsible for restoring x30, and can use x9 to return to
>> its caller. It has a non-standard calling convention and needs to be
>> implemented in assembly anyway.
> 
> OK, but in _tracefunc, x30 has been updated, and so we should
> return as follows:
> mov xTMP, x30
> mov x30, x9
> ret xTMP
> 
> We need one more temp register here...
> 

you have to save/restore x9 and x30 around
the ftrace callback that is written in c anyway,
so i think you don't need more registers, just
restore from the stack differently.

and the instrumentation code sequence should
be optimized, not the trace function.

> Thanks,
> -Takahiro AKASHI
> 
>> Alexander
> 



Re: [PATCH] Fix ICE in predicate_mem_writes (PR tree-optimization/70725)

2016-04-20 Thread Marek Polacek
On Wed, Apr 20, 2016 at 11:47:07AM +0200, Richard Biener wrote:
> On Wed, Apr 20, 2016 at 11:42 AM, Marek Polacek  wrote:
> > On Wed, Apr 20, 2016 at 11:04:08AM +0200, Richard Biener wrote:
> >> On Tue, Apr 19, 2016 at 8:35 PM, Marek Polacek  wrote:
> >> > While predicate_mem_writes has a check to skip conditions that were 
> >> > evaluated
> >> > to true, it's lacking the same check for false, so we hit an assert 
> >> > later on.
> >> > So I'm adding is_false_predicate.  Maybe it should be added to other 
> >> > spots as
> >> > well, but I'm not sure about that.
> >> >
> >> > Bootstrapped/regtested on x86_64-linux, ok for trunk?
> >>
> >> Ok.
> >
> > Thanks, should I backport this to gcc-6-branch now?  Or wait until after 
> > 6.1?
> 
> It's fine to backport now as it's probably a regression.

Yes, it is (gcc5 worked).  Will backport now then.

Marek


Re: [PATCH] Fix ICE in predicate_mem_writes (PR tree-optimization/70725)

2016-04-20 Thread Richard Biener
On Wed, Apr 20, 2016 at 11:42 AM, Marek Polacek  wrote:
> On Wed, Apr 20, 2016 at 11:04:08AM +0200, Richard Biener wrote:
>> On Tue, Apr 19, 2016 at 8:35 PM, Marek Polacek  wrote:
>> > While predicate_mem_writes has a check to skip conditions that were 
>> > evaluated
>> > to true, it's lacking the same check for false, so we hit an assert later 
>> > on.
>> > So I'm adding is_false_predicate.  Maybe it should be added to other spots 
>> > as
>> > well, but I'm not sure about that.
>> >
>> > Bootstrapped/regtested on x86_64-linux, ok for trunk?
>>
>> Ok.
>
> Thanks, should I backport this to gcc-6-branch now?  Or wait until after 6.1?

It's fine to backport now as it's probably a regression.

Richard.

> Marek


Re: [PATCH] Optimize strchr (s, 0) to strlen

2016-04-20 Thread Richard Biener
On Wed, Apr 20, 2016 at 10:45 AM, Richard Biener
 wrote:
> On Tue, Apr 19, 2016 at 6:32 PM, Wilco Dijkstra  
> wrote:
>> Richard Biener wrote:
>>>
>>> This folding should be added to gimple-fold.c:gimple_fold_builtin instead,
>>> the builtins.c foldings are purerly for folding to constants nowadays.
>>
>> So is this better? It's a lot more verbose for something so simple...
>> Unfortunately match.pd doesn't support this kind of thing either.
>
> Better - comments below.  Jakub objections to the usefulness of the transform
> remain - we do have the strlen pass that uses some global knowledge to decide
> on profitability.  One could argue that for -Os doing the reverse transform is
> profitable?
>
> Yes, match.pd doesn't support this (yet).  It may be possible to teach it 
> simple
> cases like this - I plan to revisit this at some point.  The issue is one of
> side-effects which are tricky to handle if you consider patterns that do not
> only match a single call (as this one would).  So a baby-step towards getting
> this supported in match.pd is to handle matching toplevel calls specially.

Ok, just gave it a stab in a different way - see attached.  This makes

(simplify
 (BUILT_IN_STRCHR @0 integer_zerop)
 (pointer_plus @0 (BUILT_IN_STRLEN:size_type_node @0)))

work.  Note how the SSA following predicate needs to be aware of
side-effects to guard against bogus applies of complicated patterns
(none of those exist yet).

Richard.

> Richard.
>
>> Wilco
>>
>>
>> ChangeLog:
>> 2016-04-19  Wilco Dijkstra  
>>
>> gcc/
>> * gcc/gimple-fold.c (gimple_fold_builtin_strchr):
>> New function to optimize strchr (s, 0) to strlen.
>> (gimple_fold_builtin): Add BUILT_IN_STRCHR case.
>>
>> testsuite/
>> * gcc/testsuite/gcc.dg/strlenopt-20.c: Update test.
>> * gcc/testsuite/gcc.dg/strlenopt-21.c: Likewise.
>> * gcc/testsuite/gcc.dg/strlenopt-22.c: Likewise.
>> * gcc/testsuite/gcc.dg/strlenopt-26.c: Likewise.
>> * gcc/testsuite/gcc.dg/strlenopt-5.c: Likewise.
>> * gcc/testsuite/gcc.dg/strlenopt-7.c: Likewise.
>> * gcc/testsuite/gcc.dg/strlenopt-9.c: Likewise.
>>
>> --
>>
>> diff --git a/gcc/gimple-fold.c b/gcc/gimple-fold.c
>> index 
>> eb130d048469f0b8196e565fed9a40de74b098bd..11dcf69fc919f066362f4f713db392d14b39764e
>>  100644
>> --- a/gcc/gimple-fold.c
>> +++ b/gcc/gimple-fold.c
>> @@ -1380,6 +1380,59 @@ gimple_fold_builtin_strncpy (gimple_stmt_iterator 
>> *gsi,
>>return true;
>>  }
>>
>> +/* Simplify strchr (str, 0) into str + strlen (str).
>> +   In general strlen is significantly faster than strchr
>> +   due to being a simpler operation.  */
>> +static bool
>> +gimple_fold_builtin_strchr (gimple_stmt_iterator *gsi)
>> +{
>> +  gimple *stmt = gsi_stmt (*gsi);
>> +  tree str = gimple_call_arg (stmt, 0);
>> +  tree c = gimple_call_arg (stmt, 1);
>> +  location_t loc = gimple_location (stmt);
>> +
>> +  if (optimize_function_for_size_p (cfun))
>> +return false;
>
> Hmm, I think we'd want a optimize_stmt_for_size_p (stmt) which
> does the right thing for the case we have a CFG (look at the BB)
> or when not (look at the function).
>
>> +  if (!integer_zerop (c) || !gimple_call_lhs (stmt))
>> +return false;
>> +
>> +  tree newstr;
>> +  tree strlen_fn = builtin_decl_implicit (BUILT_IN_STRLEN);
>> +
>> +  if (!strlen_fn)
>> +return false;
>> +
>> +  /* Create newstr = strlen (str).  */
>> +  gimple_seq stmts = NULL, stmts2;
>> +  gimple *repl = gimple_build_call (strlen_fn, 1, str);
>> +  gimple_set_location (repl, loc);
>> +  if (gimple_in_ssa_p (cfun))
>> +newstr = make_ssa_name (size_type_node);
>> +  else
>> +newstr = create_tmp_reg (size_type_node);
>> +  gimple_call_set_lhs (repl, newstr);
>> +  gimple_seq_add_stmt_without_update (, repl);
>> +
>> +  /* Create (str p+ strlen (str)).  */
>> +  newstr = fold_build_pointer_plus_loc (loc, str, newstr);
>> +  newstr = force_gimple_operand (newstr, , true, NULL_TREE);
>
> I think you want to build a gimple_assign directly here, otherwise ...
>
>> +  gimple_seq_add_seq_without_update (, stmts2);
>> +
>> +  repl = gimple_build_assign (gimple_call_lhs (stmt), newstr);
>> +  gimple_seq_add_stmt_without_update (, repl);
>> +  gsi_replace_with_seq_vops (gsi, stmts);
>> +  /* gsi now points at the assignment to the lhs, get a
>> + stmt iterator to the strlen.
>> + ???  We can't use gsi_for_stmt as that doesn't work when the
>> + CFG isn't built yet.  */
>> +  gimple_stmt_iterator gsi2 = *gsi;
>> +  gsi_prev ();
>> +  gsi_prev ();
>
> ... this may not reliably end up at the call stmt.
>
>> +  fold_stmt ();
>> +  return true;
>> +}
>> +
>>  /* Simplify a call to the strcat builtin.  DST and SRC are the arguments
>> to the call.
>>
>> @@ -2821,6 +2874,8 @@ gimple_fold_builtin (gimple_stmt_iterator *gsi)
>>  gimple_call_arg (stmt, 1));
>>  case 

Re: [PATCH] Fix ICE in predicate_mem_writes (PR tree-optimization/70725)

2016-04-20 Thread Marek Polacek
On Wed, Apr 20, 2016 at 11:04:08AM +0200, Richard Biener wrote:
> On Tue, Apr 19, 2016 at 8:35 PM, Marek Polacek  wrote:
> > While predicate_mem_writes has a check to skip conditions that were 
> > evaluated
> > to true, it's lacking the same check for false, so we hit an assert later 
> > on.
> > So I'm adding is_false_predicate.  Maybe it should be added to other spots 
> > as
> > well, but I'm not sure about that.
> >
> > Bootstrapped/regtested on x86_64-linux, ok for trunk?
> 
> Ok.

Thanks, should I backport this to gcc-6-branch now?  Or wait until after 6.1?

Marek


[Ada] Wrong resolution of intrinsic in postcondition

2016-04-20 Thread Arnaud Charlet
This patch ensures that intrinsic operators that act as generic actuals are
properly resolved and rewritten in the instance when the context is a fully
analyzed and expanded pre/postcondition. Prior to this change the rewriting
guard was too restrictive and led to erroneous resolution.


-- Source --


--  g.ads

generic
   with function "<" (L, R : Integer) return Boolean;

package G is
   function Foo (L, R : Integer) return Boolean is (L < R)
 with Post => Foo'Result = (L < R);
end G;

--  main.adb

with G;

procedure Main is
   package I is new G (">");

   Result : constant Boolean := I.Foo (1, 2);

begin
   null;
end Main;

Tested on x86_64-pc-linux-gnu, committed on trunk

2016-04-20  Hristian Kirtchev  

* sem_res.adb (Rewrite_Renamed_Operator): Do not rewrite the
renamed operator when the associated node appears within a
pre/postcondition.
* sem_util.ads, sem_util.adb (In_Pre_Post_Condition): New routine.

Index: sem_util.adb
===
--- sem_util.adb(revision 235248)
+++ sem_util.adb(working copy)
@@ -10474,6 +10474,51 @@
   end loop;
end In_Pragma_Expression;
 
+   ---
+   -- In_Pre_Post_Condition --
+   ---
+
+   function In_Pre_Post_Condition (N : Node_Id) return Boolean is
+  Par : Node_Id;
+  Prag: Node_Id := Empty;
+  Prag_Id : Pragma_Id;
+
+   begin
+  --  Climb the parent chain looking for an enclosing pragma
+
+  Par := N;
+  while Present (Par) loop
+ if Nkind (Par) = N_Pragma then
+Prag := Par;
+exit;
+
+ --  Prevent the search from going too far
+
+ elsif Is_Body_Or_Package_Declaration (Par) then
+exit;
+ end if;
+
+ Par := Parent (Par);
+  end loop;
+
+  if Present (Prag) then
+ Prag_Id := Get_Pragma_Id (Prag);
+
+ return
+   Prag_Id = Pragma_Post
+ or else Prag_Id = Pragma_Post_Class
+ or else Prag_Id = Pragma_Postcondition
+ or else Prag_Id = Pragma_Pre
+ or else Prag_Id = Pragma_Pre_Class
+ or else Prag_Id = Pragma_Precondition;
+
+  --  Otherwise the node is not enclosed by a pre/postcondition pragma
+
+  else
+ return False;
+  end if;
+   end In_Pre_Post_Condition;
+
-
-- In_Reverse_Storage_Order_Object --
-
Index: sem_util.ads
===
--- sem_util.ads(revision 235199)
+++ sem_util.ads(working copy)
@@ -1152,8 +1152,8 @@
--  Returns true if the Typ_Ent implements interface Iface_Ent
 
function In_Assertion_Expression_Pragma (N : Node_Id) return Boolean;
-   --  Determine whether an arbitrary node appears in a pragma that acts as an
-   --  assertion expression. See Sem_Prag for the list of qualifying pragmas.
+   --  Returns True if node N appears within a pragma that acts as an assertion
+   --  expression. See Sem_Prag for the list of qualifying pragmas.
 
function In_Instance return Boolean;
--  Returns True if the current scope is within a generic instance
@@ -1179,6 +1179,10 @@
function In_Pragma_Expression (N : Node_Id; Nam : Name_Id) return Boolean;
--  Returns true if the expression N occurs within a pragma with name Nam
 
+   function In_Pre_Post_Condition (N : Node_Id) return Boolean;
+   --  Returns True if node N appears within a pre/postcondition pragma. Note
+   --  the pragma Check equivalents are NOT considered.
+
function In_Reverse_Storage_Order_Object (N : Node_Id) return Boolean;
--  Returns True if N denotes a component or subcomponent in a record or
--  array that has Reverse_Storage_Order.
Index: sem_res.adb
===
--- sem_res.adb (revision 235240)
+++ sem_res.adb (working copy)
@@ -11122,8 +11122,10 @@
   --  Do not perform this transformation within a pre/postcondition,
   --  because the expression will be re-analyzed, and the transformation
   --  might affect the visibility of the operator, e.g. in an instance.
+  --  Note that fully analyzed and expanded pre/postconditions appear as
+  --  pragma Check equivalents.
 
-  if In_Assertion_Expr > 0 then
+  if In_Pre_Post_Condition (N) then
  return;
   end if;
 
@@ -11145,7 +11147,7 @@
  Generate_Reference (Op, N);
 
  if Is_Binary then
-Set_Left_Opnd  (Op_Node, Left_Opnd  (N));
+Set_Left_Opnd (Op_Node, Left_Opnd (N));
  end if;
 
  Rewrite (N, Op_Node);
@@ -11154,9 +11156,7 @@
  --  that the operator is applied to the full view. This is done in the
  --  routines that resolve intrinsic operators.
 
- if 

Re: [PATCH 11/18] add some utility methods to vec

2016-04-20 Thread Richard Biener
On Wed, Apr 20, 2016 at 8:22 AM,   wrote:
> From: Trevor Saunders 
>
> Later patches use these functions, and I believe Mikhail has mentioned before
> he'd like to have begin / end () on vec before.

begin() / end () is fine.  But contains ()?  That makes using a O(n) algorithm
too easy I think (we have qsort + bsearch for a more efficient way).

I suppose you are replacing linear list walks with contains () so it
might be ok...

At least stick some comment on contains () mentioning qsort / bsearch.

Ok with that change.

Richard.

> gcc/ChangeLog:
>
> 2016-04-19  Trevor Saunders  
>
> * vec.h (vec_safe_contains): New function.
> (vec::contains): Likewise.
> (vec::begin): Likewise.
> (vec::end): Likewise.
> ---
>  gcc/vec.h | 39 ++-
>  1 file changed, 38 insertions(+), 1 deletion(-)
>
> diff --git a/gcc/vec.h b/gcc/vec.h
> index ff57528..3c16e83 100644
> --- a/gcc/vec.h
> +++ b/gcc/vec.h
> @@ -454,6 +454,10 @@ public:
>bool is_empty (void) const { return m_vecpfx.m_num == 0; }
>T *address (void) { return m_vecdata; }
>const T *address (void) const { return m_vecdata; }
> +  T *begin () { return address (); }
> +  const T *begin () const { return address (); }
> +  T *end () { return address () + length (); }
> +  const T *end () const { return address () + length (); }
>const T [] (unsigned) const;
>T [] (unsigned);
>T  (void);
> @@ -473,6 +477,7 @@ public:
>void qsort (int (*) (const void *, const void *));
>T *bsearch (const void *key, int (*compar)(const void *, const void *));
>unsigned lower_bound (T, bool (*)(const T &, const T &)) const;
> +  bool contains (const T ) const;
>static size_t embedded_size (unsigned);
>void embedded_init (unsigned, unsigned = 0, unsigned = 0);
>void quick_grow (unsigned len);
> @@ -542,7 +547,6 @@ vec_safe_is_empty (vec *v)
>return v ? v->is_empty () : true;
>  }
>
> -
>  /* If V does not have space for NELEMS elements, call
> V->reserve(NELEMS, EXACT).  */
>  template
> @@ -695,6 +699,12 @@ vec_safe_splice (vec *, const vec A, vl_embed> *src
>  }
>  }
>
> +template
> +inline bool
> +vec_safe_contains (vec *v, const T )
> +{
> +  return v? v->contains (search) : false;
> +}
>
>  /* Index into vector.  Return the IX'th element.  IX must be in the
> domain of the vector.  */
> @@ -973,6 +983,19 @@ vec::bsearch (const void *key,
>return NULL;
>  }
>
> +/* Return true if the vector contains search.  */
> +
> +template
> +inline bool
> +vec::contains (const T ) const
> +{
> +  unsigned int len = length ();
> +  for (unsigned int i = 0; i < len; i++)
> +if ((*this)[i] == search)
> +  return true;
> +
> +  return false;
> +}
>
>  /* Find and return the first position in which OBJ could be inserted
> without changing the ordering of this vector.  LESSTHAN is a
> @@ -1167,6 +1190,10 @@ public:
>const T *address (void) const
>{ return m_vec ? m_vec->m_vecdata : NULL; }
>
> +  T *begin () { return address (); }
> +  const T *begin () const { return address (); }
> +  T *end () { return begin () + length (); }
> +  const T *end () const { return begin () + length (); }
>const T [] (unsigned ix) const
>{ return (*m_vec)[ix]; }
>
> @@ -1208,6 +1235,7 @@ public:
>void qsort (int (*) (const void *, const void *));
>T *bsearch (const void *key, int (*compar)(const void *, const void *));
>unsigned lower_bound (T, bool (*)(const T &, const T &)) const;
> +  bool contains (const T ) const;
>
>bool using_auto_storage () const;
>
> @@ -1695,6 +1723,15 @@ vec::lower_bound (T obj,
>return m_vec ? m_vec->lower_bound (obj, lessthan) : 0;
>  }
>
> +/* Return true if the vector contains search.  */
> +
> +template
> +inline bool
> +vec::contains (const T ) const
> +{
> +  return m_vec ? m_vec->contains (search) : false;
> +}
> +
>  template
>  inline bool
>  vec::using_auto_storage () const
> --
> 2.7.4
>


[Ada] Do not use secondary stack in some cases

2016-04-20 Thread Arnaud Charlet
The compiler is no longer using the secondary stack in the case of
"for ... of" loops over arrays. No small test case is available.

Tested on x86_64-pc-linux-gnu, committed on trunk

2016-04-20  Bob Duff  

* sem_ch5.adb (Analyze_Iterator_Specification): Do not use secondary
stack when possible.

Index: sem_ch5.adb
===
--- sem_ch5.adb (revision 235240)
+++ sem_ch5.adb (working copy)
@@ -1753,14 +1753,6 @@

 
procedure Analyze_Iterator_Specification (N : Node_Id) is
-  Loc   : constant Source_Ptr := Sloc (N);
-  Def_Id: constant Node_Id:= Defining_Identifier (N);
-  Subt  : constant Node_Id:= Subtype_Indication (N);
-  Iter_Name : constant Node_Id:= Name (N);
-
-  Typ : Entity_Id;
-  Bas : Entity_Id;
-
   procedure Check_Reverse_Iteration (Typ : Entity_Id);
   --  For an iteration over a container, if the loop carries the Reverse
   --  indicator, verify that the container type has an Iterate aspect that
@@ -1822,6 +1814,16 @@
  return Etype (Ent);
   end Get_Cursor_Type;
 
+  --  Local variables
+
+  Def_Id: constant Node_Id:= Defining_Identifier (N);
+  Iter_Name : constant Node_Id:= Name (N);
+  Loc   : constant Source_Ptr := Sloc (N);
+  Subt  : constant Node_Id:= Subtype_Indication (N);
+
+  Bas : Entity_Id;
+  Typ : Entity_Id;
+
--   Start of processing for Analyze_iterator_Specification
 
begin
@@ -1925,7 +1927,7 @@
 
 --  Do not perform this expansion in SPARK mode, since the formal
 --  verification directly deals with the source form of the iterator.
---  Ditto for ASIS and when expansion is disabled,, where the temporary
+--  Ditto for ASIS and when expansion is disabled, where the temporary
 --  may hide the transformation of a selected component into a prefixed
 --  function call, and references need to see the original expression.
 
@@ -2001,7 +2003,7 @@
 --  to it. It has no effect on the generated code if no actions
 --  are added to it (see Wrap_Transient_Declaration).
 
-if Expander_Active then
+if not Is_Array_Type (Typ) and then Expander_Active then
Establish_Transient_Scope (Name (Decl), Sec_Stack => True);
 end if;
 


  1   2   >