Re: [PATCH 12/13] Eliminate FOR_EACH_BB_REVERSE macro.

2013-12-06 Thread Oleg Endo
David,

Could you please also update the use of FOR_EACH_BB_REVERSE in
config/sh/sh_optimize_sett_clrt.cc ?

Thanks,
Oleg

On Fri, 2013-12-06 at 09:51 -0500, David Malcolm wrote:
> gcc/
>   * basic-block.h (FOR_EACH_BB_REVERSE): Eliminate macro.
> 
>   * cfghooks.c (verify_flow_info): Replace uses of FOR_EACH_BB_REVERSE
>   with FOR_EACH_BB_REVERSE_FN, making uses of cfun explicit.
>   * cfgrtl.c (print_rtl_with_bb, rtl_verify_edges,
>   rtl_verify_bb_insns, rtl_verify_bb_pointers,
>   rtl_verify_bb_insn_chain, rtl_verify_fallthru): Likewise.
>   * config/ia64/ia64.c (emit_predicate_relation_info): Likewise.
>   * config/sh/sh.c (sh_md_init_global): Likewise.
>   * dce.c (reset_unmarked_insns_debug_uses, delete_unmarked_insns):
>   Likewise.
>   * dominance.c (calc_dfs_tree): Likewise.
>   * final.c (final): Likewise.
>   * function.c (thread_prologue_and_epilogue_insns): Likewise.
>   * gcse.c (compute_code_hoist_vbeinout): Likewise.
>   * ira.c (update_equiv_regs, build_insn_chain): Likewise.
>   * lcm.c (compute_antinout_edge): Likewise.
>   * mode-switching.c (optimize_mode_switching): Likewise.
>   * postreload.c (reload_combine): Likewise.
>   * recog.c (split_all_insns, peephole2_optimize): Likewise.
>   * tree-ssa-live.c (live_worklist): Likewise.
> ---
>  gcc/basic-block.h  |  2 --
>  gcc/cfghooks.c |  2 +-
>  gcc/cfgrtl.c   | 12 ++--
>  gcc/config/ia64/ia64.c |  4 ++--
>  gcc/config/sh/sh.c |  2 +-
>  gcc/dce.c  |  4 ++--
>  gcc/dominance.c|  4 ++--
>  gcc/final.c|  2 +-
>  gcc/function.c |  2 +-
>  gcc/gcse.c |  2 +-
>  gcc/ira.c  |  4 ++--
>  gcc/lcm.c  |  2 +-
>  gcc/mode-switching.c   |  4 ++--
>  gcc/postreload.c   |  2 +-
>  gcc/recog.c|  4 ++--
>  gcc/tree-ssa-live.c|  2 +-
>  16 files changed, 26 insertions(+), 28 deletions(-)
> 
> diff --git a/gcc/basic-block.h b/gcc/basic-block.h
> index b378a5b..75f16ac 100644
> --- a/gcc/basic-block.h
> +++ b/gcc/basic-block.h
> @@ -336,8 +336,6 @@ struct GTY(()) control_flow_graph {
>  #define FOR_EACH_BB_REVERSE_FN(BB, FN) \
>FOR_BB_BETWEEN (BB, (FN)->cfg->x_exit_block_ptr->prev_bb, 
> (FN)->cfg->x_entry_block_ptr, prev_bb)
>  
> -#define FOR_EACH_BB_REVERSE(BB) FOR_EACH_BB_REVERSE_FN (BB, cfun)
> -
>  /* For iterating over insns in basic block.  */
>  #define FOR_BB_INSNS(BB, INSN)   \
>for ((INSN) = BB_HEAD (BB);\
> diff --git a/gcc/cfghooks.c b/gcc/cfghooks.c
> index 2400965..78218b5 100644
> --- a/gcc/cfghooks.c
> +++ b/gcc/cfghooks.c
> @@ -123,7 +123,7 @@ verify_flow_info (void)
>  }
>  
>/* Now check the basic blocks (boundaries etc.) */
> -  FOR_EACH_BB_REVERSE (bb)
> +  FOR_EACH_BB_REVERSE_FN (bb, cfun)
>  {
>int n_fallthru = 0;
>edge e;
> diff --git a/gcc/cfgrtl.c b/gcc/cfgrtl.c
> index daadd9b..7734ac1 100644
> --- a/gcc/cfgrtl.c
> +++ b/gcc/cfgrtl.c
> @@ -2153,7 +2153,7 @@ print_rtl_with_bb (FILE *outf, const_rtx rtx_first, int 
> flags)
>  
>if (flags & TDF_BLOCKS)
>   {
> -   FOR_EACH_BB_REVERSE (bb)
> +   FOR_EACH_BB_REVERSE_FN (bb, cfun)
>   {
> rtx x;
>  
> @@ -2408,7 +2408,7 @@ rtl_verify_edges (void)
>int err = 0;
>basic_block bb;
>  
> -  FOR_EACH_BB_REVERSE (bb)
> +  FOR_EACH_BB_REVERSE_FN (bb, cfun)
>  {
>int n_fallthru = 0, n_branch = 0, n_abnormal_call = 0, n_sibcall = 0;
>int n_eh = 0, n_abnormal = 0;
> @@ -2586,7 +2586,7 @@ rtl_verify_bb_insns (void)
>int err = 0;
>basic_block bb;
>  
> -  FOR_EACH_BB_REVERSE (bb)
> +  FOR_EACH_BB_REVERSE_FN (bb, cfun)
>  {
>/* Now check the header of basic
>block.  It ought to contain optional CODE_LABEL followed
> @@ -2649,7 +2649,7 @@ rtl_verify_bb_pointers (void)
>basic_block bb;
>  
>/* Check the general integrity of the basic blocks.  */
> -  FOR_EACH_BB_REVERSE (bb)
> +  FOR_EACH_BB_REVERSE_FN (bb, cfun)
>  {
>rtx insn;
>  
> @@ -2739,7 +2739,7 @@ rtl_verify_bb_insn_chain (void)
>  
>bb_info = XCNEWVEC (basic_block, max_uid);
>  
> -  FOR_EACH_BB_REVERSE (bb)
> +  FOR_EACH_BB_REVERSE_FN (bb, cfun)
>  {
>rtx head = BB_HEAD (bb);
>rtx end = BB_END (bb);
> @@ -2821,7 +2821,7 @@ rtl_verify_fallthru (void)
>basic_block bb;
>int err = 0;
>  
> -  FOR_EACH_BB_REVERSE (bb)
> +  FOR_EACH_BB_REVERSE_FN (bb, cfun)
>  {
>edge e;
>  
> diff --git a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c
> index a837974..99bc094 100644
> --- a/gcc/config/ia64/ia64.c
> +++ b/gcc/config/ia64/ia64.c
> @@ -9613,7 +9613,7 @@ emit_predicate_relation_info (void)
>  {
>basic_block bb;
>  
> -  FOR_EACH_BB_REVERSE (bb)
> +  FOR_EACH_BB_REVERSE_FN (bb, cfun)
>  {
>int r;
>rtx head = BB_HEAD (bb);
> @@ -9641,7 +9641,7 @@ emit_predicate_relation_info (voi

[patch] microblaze-rtems Add TARGET_BIG_ENDIAN_DEFAULT

2013-12-06 Thread Ralf Corsepius

Hi,

I intend to the patch below to gcc-trunk and 4.8-branch:

It's a partial sync of the microblaze-rtems* section in gcc/config.gcc 
with microblaze*-*-elf's:


Add TARGET_BIG_ENDIAN_DEFAULT-switch for microblaze*-*-rtems*.

Ralf
2013-12-07  Ralf Corsépius  

	* config.gcc (microblaze*-*-rtems*): Add TARGET_BIG_ENDIAN_DEFAULT.

Index: config.gcc
===
--- config.gcc	(revision 205770)
+++ config.gcc	(working copy)
@@ -1887,6 +1887,14 @@
 	tmake_file="${tmake_file} microblaze/t-microblaze-linux"
 	;;
 microblaze*-*-rtems*)
+	case $target in
+		microblazeel-*)
+			tm_defines="${tm_defines} TARGET_BIG_ENDIAN_DEFAULT=0"
+			;;
+		microblaze-*)
+			tm_defines="${tm_defines} TARGET_BIG_ENDIAN_DEFAULT=4321"
+			;;
+	esac
 	tm_file="${tm_file} dbxelf.h"
 	tm_file="${tm_file} microblaze/rtems.h rtems.h newlib-stdint.h"
 	c_target_objs="${c_target_objs} microblaze-c.o"


Re: [PATCH PR41488]Recognize more induction variables by simplifying PEELED chrec in scalar evolution

2013-12-06 Thread Bin.Cheng
On Sat, Dec 7, 2013 at 3:20 AM, Jeff Law  wrote:
> On 12/06/13 03:29, bin.cheng wrote:
>>
>> Hi,
>> Entry pr41488 shows a case in which induction variable can't be recognized
>> or coalesced.  As noted in the bug entry, one possible fix is to teach PRE
>> not to do some specific transformation.  However, it's in nature a scalar
>> evolution issue.  Considering below snippet:
>>
>> # i_17 = PHI 
>> # _20 = PHI <_5(5), start_4(D)(3)>
>> ...
>> i_13 = i_17 + 1;
>> _5 = start_4(D) + i_13;
>>
>> Variable _20 appears in the form of PEELED chrec like (start_4, _5)_LOOP,
>> meaning it has value "start_4" in the 1st iteration, then changes to _5 in
>> following iterations.  PEELED chrec is not implemented by GCC right now,
>> it
>> can be simplified into either POLYNOMIAL or PERIOD one.
>
> Right.  PEELED_CHREC was removed from the LNO branch back in 2004,
> presumably before the LNO branch was merged into the mainline.  No reason
> was given.

Maybe we don't have any user for such CHRECs in GCC now, but is's just guessing.

>
> But what you're really discovering here is that _20 is just another standard
> looking IV that appears in the form of a peeled chrec, right?

Exactly, IVOPT handles only polynomial ones, and _20 is one such chrec
only appearing in peeled form.

>
>
>
>  The POLYNOMIAL
>>
>> chrec is processed by GCC after being recognized, as in the examle, _20 is
>> actually {start_4, 1}_LOOP.
>
> Right.  Based on reading the archives, it looks like this stuff is/was
> generated by PRE.  I also suspect jump threading can create them.  There was
> talk of throttling PRE to leave things in a form that the IV analysis could
> more easily digest, but I'm not sure if that was ever completed.

Also could just because it is coded in that way, just as the case I
added in patch.  I found real examples from ggc-page.c in GCC.
But it's always good to cleanup input of an optimization, I presume
that's why Richard tried to move IVOPT later before.

>
> [ snip ]
>
>
>>
>> One point needs to be clarified that I use tree_to_aff_combination_expand
>> in
>> the patch.  Rational is the number of the specific PEELED_CHRECs should be
>> moderate, I also check the equality literally firstly and resort to affine
>> facility if that fails.  By measuring bootstrap of gcc and spec2kint, I
>> collected the number of opportunities caught by this patch like below:
>>  literal comparison/affine comparison
>> GCC  ~1200/~250
>> Spec2Kint  93/34
>>
>> I could back trace the ssa chain instead of using affine functions, but
>> that
>> would miss some cases.
>
> I assume tree_to_aff_combination_expand is relatively expensive, thus the
> two approaches, one which avoids tree_to_aff_combination_expand.
Considering the dump of case in the patch:

  :
  _16 = start_4(D) + 1000;
  if (end_6(D) < _16)
goto ;
  else
goto ;

  :
  pretmp_22 = sp_7(D)->data;

  :
  # i_17 = PHI 
  # _20 = PHI <_5(5), _16(3)>
  _9 = (unsigned int) _20;
  _10 = _9 * 4;
  _11 = pretmp_22 + _10;
  *_11 = 0;
  i_13 = i_17 + -1;
  _5 = start_4(D) + i_13;
  if (_5 > end_6(D))
goto ;
  else
goto ;

  :
  goto ;

I have to prove (_16 + -1) equals to (start_4 + 999) for _20, so
either using affine function to back trace the definition of _16, or I
have to back trace ssa_chain manually.  Here I use affine function
because the number of such cases should be moderate and there are more
complicated case in which simple back trace will lose.

Another question, is it acceptable to add an parameter for
tree_to_aff_combination_expand to limit the number of recursive call
for it?  Thus we don't need to expand to leaf node every time.

>
>
> In add_old_iv_candidates, is it the case that the only time
> SSA_NAME_DEF_STMT (def) is another PHI node is when it's one of these affine

I suppose.  Actually IVOPT make an assert on IP_ORIGINAL candidates in
function rewrite_use_nonlinear_expr, like:

  /* An important special case -- if we are asked to express value of
 the original iv by itself, just exit; there is no need to
 introduce a new computation (that might also need casting the
 variable to unsigned and back).  */
  if (cand->pos == IP_ORIGINAL
  && cand->incremented_at == use->stmt)
{
  enum tree_code stmt_code;

  gcc_assert (is_gimple_assign (use->stmt));
  ..

The only case I can think about involving other kind of phi node is
for merging phi in conditional code like:

LOOP:
x_1 = phi 

if (cond)
   x_3 = x_1 + 1;
else
   x_4 = x_1 + 1;
x_2 = phi 

Though SCEV knows x_3/x_4 are ssa_names for an iv in different
branches, IVOPT don't handle them this way (not treated as an original
iv).  This is one defect of current implementation of IVOPT, I think.

> ivs appearing in the form of a PEELED_CHREC?  And in that case, we do not
> want to record the possibility of leaving the original IV untouched?  --

IVOPT adds origi

Re: RFA: patch to fix 2 testsuite failures for LRA on PPC

2013-12-06 Thread David Edelsohn
On Fri, Dec 6, 2013 at 7:53 PM, Alan Modra  wrote:
> On Fri, Dec 06, 2013 at 05:23:28PM -0500, Vladimir Makarov wrote:
>> On 12/6/2013, 2:40 PM, David Edelsohn wrote:
>> >On Fri, Dec 6, 2013 at 2:02 PM, Vladimir Makarov  
>> >wrote:
>> >> * config/rs6000/rs600.md (*bswapdi2_64bit): Remove ?? from the
>> >> constraint.
>> >
>> >Okay, let's just remove the "??" modifier from the constraint.
>> >
>> >Thanks for your patience, explanations, and work on this, Vlad.
>> >
>>
>> Thanks, David.
>>
>> Committed as rev. 205765.
>
> /* -m32 -O2 -S -mlra */
> long long swap64 (long long x)
> {
>   return __builtin_bswap64 (x);
> }
>
> Here too, I think.  OK to apply David?
>
> * config/rs6000/rs600.md (bswapdi2_32bit): Remove ?? from the
> constraint.

Okay.  We might as well change it in this location as well.

Thanks, David


Re: RFA: patch to fix 2 testsuite failures for LRA on PPC

2013-12-06 Thread Alan Modra
On Fri, Dec 06, 2013 at 05:23:28PM -0500, Vladimir Makarov wrote:
> On 12/6/2013, 2:40 PM, David Edelsohn wrote:
> >On Fri, Dec 6, 2013 at 2:02 PM, Vladimir Makarov  wrote:
> >> * config/rs6000/rs600.md (*bswapdi2_64bit): Remove ?? from the
> >> constraint.
> >
> >Okay, let's just remove the "??" modifier from the constraint.
> >
> >Thanks for your patience, explanations, and work on this, Vlad.
> >
> 
> Thanks, David.
> 
> Committed as rev. 205765.

/* -m32 -O2 -S -mlra */
long long swap64 (long long x)
{
  return __builtin_bswap64 (x);
}

Here too, I think.  OK to apply David?

* config/rs6000/rs600.md (bswapdi2_32bit): Remove ?? from the
constraint.

Index: gcc/config/rs6000/rs6000.md
===
--- gcc/config/rs6000/rs6000.md (revision 205767)
+++ gcc/config/rs6000/rs6000.md (working copy)
@@ -2544,7 +2544,7 @@
 }")
 
 (define_insn "bswapdi2_32bit"
-  [(set (match_operand:DI 0 "reg_or_mem_operand" "=&r,Z,??&r")
+  [(set (match_operand:DI 0 "reg_or_mem_operand" "=&r,Z,&r")
(bswap:DI (match_operand:DI 1 "reg_or_mem_operand" "Z,r,r")))
(clobber (match_scratch:SI 2 "=&b,&b,X"))]
   "!TARGET_POWERPC64 && (REG_P (operands[0]) || REG_P (operands[1]))"

-- 
Alan Modra
Australia Development Lab, IBM


Re: [PATCH] Enhancing the widen-mult pattern in vectorization.

2013-12-06 Thread Cong Hou
After further reviewing this patch, I found I don't have to change the
code in tree-vect-stmts.c to allow further type conversion after
widen-mult operation. Instead, I detect the following pattern in
vect_recog_widen_mult_pattern():

T1 a, b;
ai = (T2) a;
bi = (T2) b;
c = ai * bi;

where T2 is more that double the size of T1. (e.g. T1 is char and T2 is int).

In this case I just create a new type T3 whose size is double of the
size of T1, then get an intermediate result of type T3 from
widen-mult. Then I add a new statement to STMT_VINFO_PATTERN_DEF_SEQ
converting the result into type T2.

This strategy makes the patch more clean.

Bootstrapped and tested on an x86-64 machine.


thanks,
Cong


diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index f298c0b..12990b2 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,10 @@
+2013-12-02  Cong Hou  
+
+ * tree-vect-patterns.c (vect_recog_widen_mult_pattern): Enhance
+ the widen-mult pattern by handling two operands with different
+ sizes, and operands whose size is smaller than half of the result
+ type.
+
 2013-11-22  Jakub Jelinek  

  PR sanitizer/59061
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 12d2c90..611ae1c 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2013-12-02  Cong Hou  
+
+ * gcc.dg/vect/vect-widen-mult-u8-s16-s32.c: New test.
+ * gcc.dg/vect/vect-widen-mult-u8-u32.c: New test.
+
 2013-11-22  Jakub Jelinek  

  * c-c++-common/asan/no-redundant-instrumentation-7.c: Fix
diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-s16-s32.c
b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-s16-s32.c
new file mode 100644
index 000..9f9081b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-s16-s32.c
@@ -0,0 +1,48 @@
+/* { dg-require-effective-target vect_int } */
+
+#include 
+#include "tree-vect.h"
+
+#define N 64
+
+unsigned char X[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
+short Y[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
+int result[N];
+
+/* unsigned char * short -> int widening-mult.  */
+__attribute__ ((noinline)) int
+foo1(int len) {
+  int i;
+
+  for (i=0; i
+#include "tree-vect.h"
+
+#define N 64
+
+unsigned char X[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
+unsigned char Y[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
+unsigned int result[N];
+
+/* unsigned char-> unsigned int widening-mult.  */
+__attribute__ ((noinline)) int
+foo1(int len) {
+  int i;
+
+  for (i=0; i
+   If the result of WIDEN_MULT needs to be converted to a larger type, the
+   returned stmt will be this type conversion stmt.
 */

 static gimple
@@ -606,8 +610,8 @@ vect_recog_widen_mult_pattern (vec *stmts,
   gimple def_stmt0, def_stmt1;
   tree oprnd0, oprnd1;
   tree type, half_type0, half_type1;
-  gimple pattern_stmt;
-  tree vectype, vectype_out = NULL_TREE;
+  gimple new_stmt = NULL, pattern_stmt = NULL;
+  tree vectype, vecitype;
   tree var;
   enum tree_code dummy_code;
   int dummy_int;
@@ -661,6 +665,33 @@ vect_recog_widen_mult_pattern (vec *stmts,
 return NULL;
 }

+  /* If the two arguments have different sizes, convert the one with
+ the smaller type into the larger type.  */
+  if (TYPE_PRECISION (half_type0) != TYPE_PRECISION (half_type1))
+{
+  tree* oprnd = NULL;
+  gimple def_stmt = NULL;
+
+  if (TYPE_PRECISION (half_type0) < TYPE_PRECISION (half_type1))
+ {
+  def_stmt = def_stmt0;
+  half_type0 = half_type1;
+  oprnd = &oprnd0;
+ }
+  else
+ {
+  def_stmt = def_stmt1;
+  half_type1 = half_type0;
+  oprnd = &oprnd1;
+ }
+
+tree old_oprnd = gimple_assign_rhs1 (def_stmt);
+tree new_oprnd = make_ssa_name (half_type0, NULL);
+new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
+ old_oprnd, NULL_TREE);
+*oprnd = new_oprnd;
+}
+
   /* Handle unsigned case.  Look for
  S6  u_prod_T = (unsigned TYPE) prod_T;
  Use unsigned TYPE as the type for WIDEN_MULT_EXPR.  */
@@ -692,6 +723,15 @@ vect_recog_widen_mult_pattern (vec *stmts,
   if (!types_compatible_p (half_type0, half_type1))
 return NULL;

+  /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
+ to get an intermediate result of type ITYPE.  In this case we need
+ to build a statement to convert this intermediate result to type TYPE.  */
+  tree itype = type;
+  if (TYPE_PRECISION (type) > TYPE_PRECISION (half_type0) * 2)
+itype = build_nonstandard_integer_type
+  (GET_MODE_BITSIZE (TYPE_MODE (half_type0)) * 2,
+   TYPE_UNSIGNED (type));
+
   /* Pattern detected.  */
   if (dump_enabled_p ())
 dump_printf_loc (MSG_NOTE, vect_location,
@@ -699,23 +739,56 @@ vect_recog_widen_mult_pattern (vec *stmts,

   /* Check target support  */
   vectype = get_vectype_for_scalar_type (half_type0);
-  vectype_out = get_vectype_for_scalar_type (type);
+  vecitype = get_vectype_for_scalar_type

Re: [PING]: [GOMP4] [PATCH] SIMD-Enabled Functions (formerly Elemental functions) for C

2013-12-06 Thread Aldy Hernandez

[Jakub, see below]


+  if (!c_parser_elem_fn_vectorlength (parser)) +
{ +  c_parser_skip_until_found (parser,
CPP_CLOSE_PAREN, NULL); +  /* NO reason to keep
any of these tokens if the + vectorlength is
messed up.  */ +  vec_free (parser->elem_fn_tokens); +
return;


It may be cleaner to make the caller free the vector.


Well, the caller doesn't know if an error has occurred. I suppose I
could do something like check for seen_error (), but this sounds a
bit more clearer (to me altleast)


Sorry, what I meant to say is that it may be cleaner if 
c_parser_elem_fn_vectorlength (or whatever it's called now) frees the 
vector upon error before returning.



First of all, it's a really bad idea to scan all the functions
twice. You should have adapted expand_simd_clones() to do the work
for both.



OK.  I included this in the first loop itself so we won't have to
scan the functions twice.


But even so, I don't think you need to do this at all.  Aren't Cilk
Plus elementals supposed to be tagged as "omp declare simd" as
well?  In which case expand_simd_clones() will DTRT.  It
should...and then simd_clone_clauses_extract() already has the
smarts to differentiate between the both variants.



Yes, the thing is, there is a big do-while loop in the function and
that needs to be replaced if we have to check for SIMD-enabled
function and #pragma omp declare simd. If we pass it as a type, then
it just needs to check for the type string.


But aren't both OpenMP and Cilk Plus simd clones marked as "omp declare 
simd"?  In which case you shouldn't have to do anything?  Are the Cilk 
Plus clones not being marked as "omp declare simd" in the front-ends?



I narrowed this to 5 with the help of Jakub a while back. But now, I
have replaced it with 3, "-O3 -g"," -O3 -g -std=c99"


I would prefer to get rid of -O3, since inlining may do interesting 
things to tests, and you'll have to use __attribute__((noinline)) to 
test some things.


Jakub, would you be ok with "-O0 -g" and "-O0 -std=c99"?  For that 
matter, I'd say pass no arguments at all (""), and let the test itself 
test something special if required.



For that matter, we should probably get rid of all the variant
testing in the rest of Cilk Plus.


I will send this out as a different patch later for all the others.


Thank you.  Do so after Jakub responds as to what he prefers.


Renamed "EF" to "SE" (Simd Enabled function)


If you must, but I would still prefer something more meaningful (i.e., 
not an abbreviation).  I know there is precedence with the 
array-notation feature, but I dislike that too :).  Feel free to ignore 
me on this one.



+/* Parses the vector attribute of SIMD enabled functions in Cilk Plus.



+   The VEC_TOKEN is the "vector" token that is replaced with "simd" and
+   pushed into the token list.



+   Syntax:
+   vector
+   vector ().  */


Also, s/The VEC_TOKEN/VEC_TOKEN/


+static void
+c_parser_cilk_simd_fn_expr_list (c_parser *parser, c_token vec_token)


This is a parsing routine for the vector attribute, let's call this 
"c_parser_cilk_simd_fn_vector" or "c_parser_cilk_simd_fn_vector_attrs". 
 The expr_list is confusing.



+  /* NO reason to keep any of these tokens if the
+ vectorlength is messed up.  */


Lower case "NO".


+ vec_free (parser->cilk_simd_fn_tokens);
+  // parser->cilk_simd_fn_tokens->release ();
+  return;


What's this commented out code?  If unnecessary, remove it.


+  return;
+}


Empty return at end of function.  Remove it.


+  /* c_parser_attributes is called in several places, and so if these EOF


s/and so/so/


+  /* Two EOF_token is added as a safety-net since the normal C front-end has
+ two token look-ahead.  */


Shouldn't that be, "Two CPP_EOF tokens" ??


+  error ("%<#pragma omp declare simd%> cannot be used in the same "
+"function marked as a SIMD-enabled function");


Perhaps we should say "...as a Cilk Plus SIMD-enabled...", to make it 
absolutely clear that it is OpenMP and Cilk Plus that can't coexist.



 /* Cilk Plus:
-   vectorlength ( constant-expression ) */
+   This function is shared by SIMD-enabled functions and #pragma simd.
+   If IS_SIMD_FN is true then it is parsing a SIMD-enabled function and
+   CLAUSES is unused.
+   Syntax:
+   vectorlength ( constant-expression )  */

-static tree
-c_parser_cilk_clause_vectorlength (c_parser *parser, tree clauses)
+static bool
+c_parser_cilk_clause_vectorlength (c_parser *parser, tree *clauses,
+  bool is_simd_fn)


Can you document what this function does?  Also, document the fact that 
when IS_SIMD_FN is true, we are merely caching the tokens, otherwise we 
are building the OMP_CLAUSE_SAFELEN.



  /* if expr is error_mark_node, then the returning function would have
 flagged the error.  No need to flag them twice.  */


[PATCH] PR libstdc++/59392: Fix ARM EABI uncaught throw from unexpected exception handler

2013-12-06 Thread Roland McGrath
[This patch is on the git-only branch roland/pr59392.]

As described in http://gcc.gnu.org/bugzilla/show_bug.cgi?id=59392, this
bug looks to have been present since 4.2 originally introduced support
for ARM EABI-based C++ exception handling.  I'd like to put this fix on
trunk and 4.8, and don't personally care about older versions but the
same fix should apply to all versions still being maintained.

The nature of the bug is quite straightforward: it's an unconditional
null pointer dereference in the code path for an unexpected throw done
inside a user-supplied handler for unexpected exceptions.  I'm not
really sure if there are other ways to make it manifest.

Mark Seaborn is responsible for identifying the fix, which mimics the
similar code for the non-EABI implementation (and copies its comment).
I filled it out with a regression test.  (We're both covered by Google's
blanket copyright assignment.)

No regressions in 'make check-c++' on arm-linux-gnueabihf.

Ok for trunk and 4.8?


Thanks,
Roland


libstdc++-v3/
2013-12-06  Roland McGrath  
Mark Seaborn  

PR libstdc++/59392
* libsupc++/eh_call.cc (__cxa_call_unexpected): Call __do_catch with
the address of a null pointer, not with a null pointer to pointer.
Copy comment for this case from eh_personality.cc:__cxa_call_unexpected.
* testsuite/18_support/bad_exception/59392.cc: New file.

--- a/libstdc++-v3/libsupc++/eh_call.cc
+++ b/libstdc++-v3/libsupc++/eh_call.cc
@@ -140,7 +140,11 @@ __cxa_call_unexpected(void* exc_obj_in)
   &new_ptr) != ctm_failed)
__throw_exception_again;

- if (catch_type->__do_catch(&bad_exc, 0, 1))
+ // If the exception spec allows std::bad_exception, throw that.
+ // We don't have a thrown object to compare against, but since
+ // bad_exception doesn't have virtual bases, that's OK; just pass 
NULL.
+ void* obj = NULL;
+ if (catch_type->__do_catch(&bad_exc, &obj, 1))
bad_exception_allowed = true;
}

--- /dev/null
+++ b/libstdc++-v3/testsuite/18_support/bad_exception/59392.cc
@@ -0,0 +1,51 @@
+// Copyright (C) 2013 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// .
+
+#include 
+#include 
+
+class expected {};
+class unexpected {};
+class from_handler {};
+
+static void func_with_exception_spec() throw(expected)
+{
+  throw unexpected();
+}
+
+static void unexpected_handler()
+{
+  throw from_handler();
+}
+
+static void terminate_handler()
+{
+  exit(0);
+}
+
+// libstdc++/59392
+int main()
+{
+  std::set_unexpected(unexpected_handler);
+  std::set_terminate(terminate_handler);
+  try {
+func_with_exception_spec();
+  } catch (expected&) {
+abort();
+  }
+  abort();
+}


Re: [wide-int] small cleanup in wide-int.*

2013-12-06 Thread Kenneth Zadeck

On 12/06/2013 01:32 PM, Richard Sandiford wrote:

Kenneth Zadeck  writes:

On 12/03/2013 11:52 AM, Richard Sandiford wrote:

Kenneth Zadeck  writes:

Index: tree-vrp.c
===
--- tree-vrp.c  (revision 205597)
+++ tree-vrp.c  (working copy)
@@ -2611,22 +2611,28 @@ extract_range_from_binary_expr_1 (value_
   
 signop sign = TYPE_SIGN (expr_type);

 unsigned int prec = TYPE_PRECISION (expr_type);
-  unsigned int prec2 = (prec * 2) + (sign == UNSIGNED ? 2 : 0);
   
 if (range_int_cst_p (&vr0)

  && range_int_cst_p (&vr1)
  && TYPE_OVERFLOW_WRAPS (expr_type))
{
- wide_int sizem1 = wi::mask (prec, false, prec2);
- wide_int size = sizem1 + 1;
+ /* vrp_int is twice as wide as anything that the target
+supports so it can support a full width multiply.  No
+need to add any more padding for an extra sign bit
+because that comes with the way that WIDE_INT_MAX_ELTS is
+defined.  */
+ typedef FIXED_WIDE_INT (WIDE_INT_MAX_PRECISION * 2)
+   vrp_int;
+ vrp_int sizem1 = wi::mask  (prec, false);
+ vrp_int size = sizem1 + 1;
   
   	  /* Extend the values using the sign of the result to PREC2.

 From here on out, everthing is just signed math no matter
 what the input types were.  */
- wide_int min0 = wide_int::from (vr0.min, prec2, sign);
- wide_int max0 = wide_int::from (vr0.max, prec2, sign);
- wide_int min1 = wide_int::from (vr1.min, prec2, sign);
- wide_int max1 = wide_int::from (vr1.max, prec2, sign);
+ vrp_int min0 = wi::to_vrp (vr0.min);
+ vrp_int max0 = wi::to_vrp (vr0.max);
+ vrp_int min1 = wi::to_vrp (vr1.min);
+ vrp_int max1 = wi::to_vrp (vr1.max);

I think we should avoid putting to_vrp in tree.h if vrp_int is only
local to this block.  Instead you could have:

  typedef generic_wide_int
> vrp_int_cst;
...
vrp_int_cst min0 = vr0.min;
vrp_int_cst max0 = vr0.max;
vrp_int_cst min1 = vr1.min;
vrp_int_cst max1 = vr1.max;


i did this in a different way because i had trouble doing it as you
suggested.the short answer is that all of the vrp_int code is now
local to tree-vrp.c which i think was your primary goal

Ah, so we later assign to these variables:

  /* Canonicalize the intervals.  */
  if (sign == UNSIGNED)
{
  if (wi::ltu_p (size, min0 + max0))
{
  min0 -= size;
  max0 -= size;
}

  if (wi::ltu_p (size, min1 + max1))
{
  min1 -= size;
  max1 -= size;
}
}

OK, in that case I suppose a temporary is needed.  But I'd prefer
not to put local stuff in the wi:: namespace.  You could just have:

  typedef generic_wide_int
 > vrp_int_cst;

   vrp_int min0 = vrp_int_cst (vr0.min);
   vrp_int max0 = vrp_int_cst (vr0.max);
   vrp_int min1 = vrp_int_cst (vr1.min);
   vrp_int max1 = vrp_int_cst (vr1.max);

which removes the need for:

+/* vrp_int is twice as wide as anything that the target supports so it
+   can support a full width multiply.  No need to add any more padding
+   for an extra sign bit because that comes with the way that
+   WIDE_INT_MAX_ELTS is defined.  */
+typedef FIXED_WIDE_INT (WIDE_INT_MAX_PRECISION * 2) vrp_int;
+namespace wi
+{
+  generic_wide_int  > to_vrp 
(const_tree);
+}
+
+inline generic_wide_int  >
+wi::to_vrp (const_tree t)
+{
+  return t;
+}
+


   #define WIDE_INT_MAX_ELTS \
-  ((4 * MAX_BITSIZE_MODE_ANY_INT + HOST_BITS_PER_WIDE_INT - 1) \
-   / HOST_BITS_PER_WIDE_INT)
+  (((MAX_BITSIZE_MODE_ANY_INT + HOST_BITS_PER_WIDE_INT - 1)\
+/ HOST_BITS_PER_WIDE_INT) + 1)

I think this should be:

(MAX_BITSIZE_MODE_ANY_INT / HOST_BITS_PER_WIDE_INT + 1)

We only need an extra HWI if MAX_BITSIZE_MODE_ANY_INT is an exact multiple
of HOST_BITS_PER_WIDE_INT.

we will do this later when some other issues that Eric B raised are settled.

I think you're talking about defining MAX_BITSIZE_MODE_ANY_INT as
MAX_SIZE_MODE_ANY_INT * BITS_PER_UNIT, but that's orthogonal to the
change above.  IMO it doesn't make sense to both round up the division
and also add 1 to the result.  So I think we should make this change
regardless of whatever follows.

Looks good to me otherwise, thanks.

Richard

so this one works the way you want.While it is true the the problems 
are disjoint, the solution will likely evolving change the same lines of 
source in two different ways.


ok to commit.

kenny
Index: gcc/tree-vrp.c
===
--- gcc/tree-vrp.c	(revision 205726)
+++ gcc/tree-vrp.c	(working copy)
@@ -2620,23 +2620,24 @@ extract_

Re: RFA: patch to fix 2 testsuite failures for LRA on PPC

2013-12-06 Thread Vladimir Makarov

On 12/6/2013, 2:40 PM, David Edelsohn wrote:

On Fri, Dec 6, 2013 at 2:02 PM, Vladimir Makarov  wrote:

Here is the patch.

Tested and bootstrapped on gcc110.fsffrance.org.


Ok to commit?

2013-12-05  Vladimir Makarov  

 * config/rs6000/rs600.md (*bswapdi2_64bit): Remove ?? from the
 constraint.


Okay, let's just remove the "??" modifier from the constraint.

Thanks for your patience, explanations, and work on this, Vlad.



Thanks, David.

Committed as rev. 205765.

I'd like to work more on LRA for power.  If you find any issue with LRA 
generated code performance, it could help me a lot.


I know you are busy with power8 stuff but if you see some LRA generated 
code examples can be improved, please let me know.


I'd like to work on spilling general regs into VSX regs too as it is 
done for intel x86-64 (spilling into SSE).  I guess it would help power8 
performance.  I am going to start this work when i have access to 
power8.  Although I should say the LRA code for this is not good right 
now - there are few opportunities for spilling into SSE on x86-64.  So 
probably, I rewrite this code in LRA.






Re: [REPOST] Invalid Code when reading from unaligned zero-sized array

2013-12-06 Thread Jeff Law

On 12/06/13 02:11, Eric Botcazou wrote:

Here's the Correct Fix(tm).  We may or may not decide to go for it because
of concerns about ABI changes; in the latter case, any kludge that we'll
put in place instead must be restricted to the cases caught by this patch.


* stor-layout.c (compute_record_mode): Return BLKmode for a trailing
array with size 0 or 1.


Revised version without the one-by-one error...
I'd certainly be concerned.  Ports have (for better or worse) keyed on 
BLKmode rather than looking at the underlying types.  So if something 
which was previously SImode or DImode is now BLKmode, there's a nonzero 
chance we're going to change how it gets passed.


jeff


Re: [REPOST] Invalid Code when reading from unaligned zero-sized array

2013-12-06 Thread Jeff Law

On 12/06/13 01:51, Bernd Edlinger wrote:

As for the patch itself. In a few places within expand_expr_real_1 you

changed calls to expand_expr to instead call expand_expr_real. ISTM
you could have gotten the same effect by just adding your extra argument
to the existing code?



Yes, but one goal is to keep the patch-file as small as possible,
and expand_expr is used everywhere.

Actually expand_expr is just a wrapper for

expand_expr_real (exp, target, mode, modifier, NULL, false)
Sorry, I glossed over that -- mentally I saw the change to expand_expr 
and assumed you passed the new flag through.  But that's not how the 
change is implemented.  My bad.


So clearly for the cases where you want the flag to be true, you can't 
go through the wrapper.  Again, my bad.



jeff



Re: [REPOST] Invalid Code when reading from unaligned zero-sized array

2013-12-06 Thread Jeff Law

On 12/06/13 03:06, Richard Biener wrote:


The issue is that we handle expansion of misaligned moves in the code
we recurse to - but that misaligned move handling can only work at
the "outermost" level of the recursion as it is supposed to see the
"real" access (alignment and mode of the memory access, not of
some base of it).

So we need a recursion that skips this part (and others - which already
works), just processing as-if in "expand the base of some memory access"
mode.
So it's really not a case of someone outside expand_expr needing 
different behaviour, but a case of stopping the recursion within 
expand_expr.  That's a bit less concerning.




That we recurse at all when expanding memory accesses makes this
expansion path quite a twisted maze - which is why I suggested to
re-factor the whole thing to not require recursion (but that will be
a very big change, not suitable for this stage).

No doubt.  In general expand_expr is a mess and has been, well, forever.




The easiest is to add a flag to indicate the "we're-expanding-the-base",
doing another expand modifier doesn't work as they are not combinable
and the passed modifier is already modified for the recursion - and that
dependent on stuff.
We could always make the modifiers a bitmask, but probably not something 
we really need to do during stage3.




"Fixing" the mode of the base object isn't really fixing the fact that
the recursion shouldn't even try to generate a movmisalign mem,
it just papers over this issue by making it (hopefully) never trigger
(at least for valid code) for bases.
Ok, that answers the other question I had after looking at other parts 
of this thread.  Though one could argue that the modes are in fact wrong.


Jeff



Re: wide-int, rtl

2013-12-06 Thread Kenneth Zadeck

On 11/27/2013 11:24 AM, Eric Botcazou wrote:

Richi has asked the we break the wide-int patch so that the individual port
and front end maintainers can review their parts without have to go through
the entire patch.This patch covers the first half of the rtl code.

--- a/gcc/cse.c
+++ b/gcc/cse.c
@@ -2336,15 +2336,23 @@ hash_rtx_cb (const_rtx x, enum machine_mode mode,
 + (unsigned int) INTVAL (x));
return hash;
  
+case CONST_WIDE_INT:

+  {
+   int i;
+   for (i = 0; i < CONST_WIDE_INT_NUNITS (x); i++)
+ hash += CONST_WIDE_INT_ELT (x, i);
+  }
+  return hash;

You can write "for (int i = 0; ..." now and remove the parentheses.


done

--- a/gcc/cselib.c
+++ b/gcc/cselib.c
@@ -1121,15 +1120,23 @@ cselib_hash_rtx (rtx x, int create, enum machine_mode
memmode)
hash += ((unsigned) CONST_INT << 7) + INTVAL (x);
return hash ? hash : (unsigned int) CONST_INT;
  
+case CONST_WIDE_INT:

+  {
+   int i;
+   for (i = 0; i < CONST_WIDE_INT_NUNITS (x); i++)
+ hash += CONST_WIDE_INT_ELT (x, i);
+  }
+  return hash;
+

Likewise.

done

--- a/gcc/expmed.c
+++ b/gcc/expmed.c
@@ -3298,23 +3268,13 @@ choose_multiplier (unsigned HOST_WIDE_INT d, int n,
int precision,
pow = n + lgup;
pow2 = n + lgup - precision;
  
-  /* We could handle this with some effort, but this case is much

- better handled directly with a scc insn, so rely on caller using
- that.  */
-  gcc_assert (pow != HOST_BITS_PER_DOUBLE_INT);

Why removing it?

the code no longer has restrictions on the size/mode of the variables.

--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -722,64 +722,33 @@ convert_modes (enum machine_mode mode, enum machine_mode
oldmode, rtx x, int uns
if (mode == oldmode)
  return x;
  
-  /* There is one case that we must handle specially: If we are converting

- a CONST_INT into a mode whose size is twice HOST_BITS_PER_WIDE_INT and
- we are to interpret the constant as unsigned, gen_lowpart will do
- the wrong if the constant appears negative.  What we want to do is
- make the high-order word of the constant zero, not all ones.  */
-
-  if (unsignedp && GET_MODE_CLASS (mode) == MODE_INT
-  && GET_MODE_BITSIZE (mode) == HOST_BITS_PER_DOUBLE_INT
-  && CONST_INT_P (x) && INTVAL (x) < 0)
+  if (CONST_SCALAR_INT_P (x)
+  && GET_MODE_CLASS (mode) == MODE_INT)

On a single line.

done


  {
-  double_int val = double_int::from_uhwi (INTVAL (x));
-
-  /* We need to zero extend VAL.  */
-  if (oldmode != VOIDmode)
-   val = val.zext (GET_MODE_BITSIZE (oldmode));
-
-  return immed_double_int_const (val, mode);
+  /* If the caller did not tell us the old mode, then there is
+not much to do with respect to canonization.  We have to assume
+that all the bits are significant.  */
+  if (GET_MODE_CLASS (oldmode) != MODE_INT)
+   oldmode = MAX_MODE_INT;
+  wide_int w = wide_int::from (std::make_pair (x, oldmode),
+  GET_MODE_PRECISION (mode),
+  unsignedp ? UNSIGNED : SIGNED);
+  return immed_wide_int_const (w, mode);

canonicalization?

done


@@ -5301,10 +5271,10 @@ store_expr (tree exp, rtx target, int call_param_p,
bool nontemporal)
   &alt_rtl);
  }
  
-  /* If TEMP is a VOIDmode constant and the mode of the type of EXP is not

- the same as that of TARGET, adjust the constant.  This is needed, for
- example, in case it is a CONST_DOUBLE and we want only a word-sized
- value.  */
+  /* If TEMP is a VOIDmode constant and the mode of the type of EXP is
+ not the same as that of TARGET, adjust the constant.  This is
+ needed, for example, in case it is a CONST_DOUBLE or
+ CONST_WIDE_INT and we want only a word-sized value.  */
if (CONSTANT_P (temp) && GET_MODE (temp) == VOIDmode
&& TREE_CODE (exp) != ERROR_MARK
&& GET_MODE (target) != TYPE_MODE (TREE_TYPE (exp)))

Why reformatting the whole comment?

it is what emacs did.  I have formatted it back.

@@ -9481,11 +9459,19 @@ expand_expr_real_1 (tree exp, rtx target, enum
machine_mode tmode,
return decl_rtl;
  
  case INTEGER_CST:

-  temp = immed_double_const (TREE_INT_CST_LOW (exp),
-TREE_INT_CST_HIGH (exp), mode);
-
-  return temp;
-
+  {
+   tree type = TREE_TYPE (exp);

Redundant, see the beginning of the function.

fixed

+   /* One could argue that GET_MODE_PRECISION (TYPE_MODE (type))
+  should always be the same as TYPE_PRECISION (type).
+  However, it is not.  Since we are converting from tree to
+  rtl, we have to expose this ugly truth here.  */
+   temp = immed_wide_int_const (wide_int::from
+  (exp,
+   GET_MODE_PRECISION (TYPE_MODE (type)),
+   

Re: [PATCH] Fix up reassoc range test optimization (PR tree-optimization/59388)

2013-12-06 Thread Jeff Law

On 12/06/13 13:33, Jakub Jelinek wrote:

Hi!

range->exp can in some cases be equal to op, as in the following testcase:
_2;
   int _3;
   _Bool _4;
   int a.0_5;
   _Bool _7;

   :
   _2 = b.f;
   _3 = (int) _2;
   _4 = _3 > 0;
   _7 = _2 | _4;
   a.0_5 = (int) _7;
   a = a.0_5;
   return a.0_5;

op here is _2 (and other range's op is _4), the two range tests have been
successfully merged and the code chose to use the first one to be the one
that is kept.  Normally the range test isn't a SSA_NAME itself, but say
comparison etc. and thus emitting the statements before the statement
is desirable, but in this case we can't emit the new statements before it,
as they use _2.  Fixed thusly, bootstrapped/regtested on x86_64-linux and
i686-linux, ok for trunk and 4.8 (for the latter the fix is tiny bit
different, therefore attached)?
I was going to ask how this was possible, but then I looked at the 
testcase...  Sigh.





2013-12-06  Jakub Jelinek  

PR tree-optimization/59388
* tree-ssa-reassoc.c (update_range_test): If op == range->exp,
gimplify tem after stmt rather than before it.

* gcc.c-torture/execute/pr59388.c: New test.

OK for the trunk.  Branch maintainers have final say for the branches.

jeff



Re: [PATCH] Fix small vtable verification bugs.

2013-12-06 Thread Jeff Law

On 12/06/13 12:30, Caroline Tice wrote:

The attached  patch fixes two small problems with the current vtable
verification code:  it makes the libvtv function decls globally
visible, and it  updates all uses of the verified vtable pointer with
the verification results, rather than just the first use.

I have bootstrapped the compiler with this patch, both with and
without vtable verification enabled, and have run the regression
testsuites, both with and without vtable verification enabled, with no
regressions.  I have only tested this on Linux.

Is this patch OK to commit?

-- Caroline Tice
cmt...@google.com

gcc ChangeLog:

2013-12-06  Caroline Tice  

 Submitting patch from Stephen Checkoway, s...@cs.jhu.edu
 * vtable-class-hierarchy.c (init_functions): Make the libvtv
function decls externally visible.


gcc/cp ChangeLog:

2013-12-06  Caroline Tice  

 Submitting patch from Stephen Checkoway, s...@cs.jhu.edu
 * vtable-class-hierarchy.c (init_functions): Make the libvtv
 function decls externally visible.

OK.
jeff



Re: [PATCH] Masked load/store vectorization (take 6)

2013-12-06 Thread Jakub Jelinek
On Fri, Dec 06, 2013 at 02:27:25PM +0100, Jakub Jelinek wrote:
> > You can install that as cleanup now if you split it out (so hopefully
> > no users creep back that make removing it impossible).
> 
> Ok, will do.

Here is that part, bootstrapped/regtested on x86_64-linux and i686-linux,
committed to trunk.

2013-12-06  Jakub Jelinek  

* tree-data-ref.c (struct data_ref_loc_d): Replace pos field with ref.
(get_references_in_stmt): Don't record operand addresses, but
operands themselves.
(find_data_references_in_stmt, graphite_find_data_references_in_stmt):
Adjust for the pos -> ref change.

--- gcc/tree-data-ref.c.jj  2013-11-28 16:01:05.674837413 +0100
+++ gcc/tree-data-ref.c 2013-12-06 18:31:35.496730632 +0100
@@ -4320,8 +4320,8 @@ compute_all_dependences (vecsafe_push (ref);
}
@@ -4386,16 +4386,16 @@ get_references_in_stmt (gimple stmt, vec
 {
   unsigned i, n;
 
-  op0 = gimple_call_lhs_ptr (stmt);
+  op0 = gimple_call_lhs (stmt);
   n = gimple_call_num_args (stmt);
   for (i = 0; i < n; i++)
{
- op1 = gimple_call_arg_ptr (stmt, i);
+ op1 = gimple_call_arg (stmt, i);
 
- if (DECL_P (*op1)
- || (REFERENCE_CLASS_P (*op1) && get_base_address (*op1)))
+ if (DECL_P (op1)
+ || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
{
- ref.pos = op1;
+ ref.ref = op1;
  ref.is_read = true;
  references->safe_push (ref);
}
@@ -4404,11 +4404,11 @@ get_references_in_stmt (gimple stmt, vec
   else
 return clobbers_memory;
 
-  if (*op0
-  && (DECL_P (*op0)
- || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0
+  if (op0
+  && (DECL_P (op0)
+ || (REFERENCE_CLASS_P (op0) && get_base_address (op0
 {
-  ref.pos = op0;
+  ref.ref = op0;
   ref.is_read = false;
   references->safe_push (ref);
 }
@@ -4435,7 +4435,7 @@ find_data_references_in_stmt (struct loo
   FOR_EACH_VEC_ELT (references, i, ref)
 {
   dr = create_data_ref (nest, loop_containing_stmt (stmt),
-   *ref->pos, stmt, ref->is_read);
+   ref->ref, stmt, ref->is_read);
   gcc_assert (dr != NULL);
   datarefs->safe_push (dr);
 }
@@ -4464,7 +4464,7 @@ graphite_find_data_references_in_stmt (l
 
   FOR_EACH_VEC_ELT (references, i, ref)
 {
-  dr = create_data_ref (nest, loop, *ref->pos, stmt, ref->is_read);
+  dr = create_data_ref (nest, loop, ref->ref, stmt, ref->is_read);
   gcc_assert (dr != NULL);
   datarefs->safe_push (dr);
 }

Jakub


[PATCH] Fix up reassoc range test optimization (PR tree-optimization/59388)

2013-12-06 Thread Jakub Jelinek
Hi!

range->exp can in some cases be equal to op, as in the following testcase:
   _2;
  int _3;
  _Bool _4;
  int a.0_5;
  _Bool _7;

  :
  _2 = b.f;
  _3 = (int) _2;
  _4 = _3 > 0;
  _7 = _2 | _4;
  a.0_5 = (int) _7;
  a = a.0_5;
  return a.0_5;

op here is _2 (and other range's op is _4), the two range tests have been
successfully merged and the code chose to use the first one to be the one
that is kept.  Normally the range test isn't a SSA_NAME itself, but say
comparison etc. and thus emitting the statements before the statement
is desirable, but in this case we can't emit the new statements before it,
as they use _2.  Fixed thusly, bootstrapped/regtested on x86_64-linux and
i686-linux, ok for trunk and 4.8 (for the latter the fix is tiny bit
different, therefore attached)?

2013-12-06  Jakub Jelinek  

PR tree-optimization/59388
* tree-ssa-reassoc.c (update_range_test): If op == range->exp,
gimplify tem after stmt rather than before it.

* gcc.c-torture/execute/pr59388.c: New test.

--- gcc/tree-ssa-reassoc.c.jj   2013-11-23 15:21:23.0 +0100
+++ gcc/tree-ssa-reassoc.c  2013-12-06 17:27:01.908908702 +0100
@@ -2072,9 +2072,19 @@ update_range_test (struct range_entry *r
 
   tem = fold_convert_loc (loc, optype, tem);
   gsi = gsi_for_stmt (stmt);
-  tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
- GSI_SAME_STMT);
-  for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
+  /* In rare cases range->exp can be equal to lhs of stmt.
+ In that case we have to insert after the stmt rather then before
+ it.  */
+  if (op == range->exp)
+tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false,
+   GSI_CONTINUE_LINKING);
+  else
+{
+  tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
+ GSI_SAME_STMT);
+  gsi_prev (&gsi);
+}
+  for (; !gsi_end_p (gsi); gsi_prev (&gsi))
 if (gimple_uid (gsi_stmt (gsi)))
   break;
 else
--- gcc/testsuite/gcc.c-torture/execute/pr59388.c.jj2013-12-06 
17:41:55.811244282 +0100
+++ gcc/testsuite/gcc.c-torture/execute/pr59388.c   2013-12-06 
17:32:28.0 +0100
@@ -0,0 +1,11 @@
+/* PR tree-optimization/59388 */
+
+int a;
+struct S { unsigned int f:1; } b;
+
+int
+main ()
+{
+  a = (0 < b.f) | b.f;
+  return a;
+}

Jakub
2013-12-06  Jakub Jelinek  

PR tree-optimization/59388
* tree-ssa-reassoc.c (update_range_test): If op == range->exp,
gimplify tem after stmt rather than before it.

* gcc.c-torture/execute/pr59388.c: New test.

--- gcc/tree-ssa-reassoc.c.jj   2013-09-09 19:10:19.971488442 +0200
+++ gcc/tree-ssa-reassoc.c  2013-12-06 17:56:01.288842668 +0100
@@ -1980,8 +1980,15 @@ update_range_test (struct range_entry *r
 
   tem = fold_convert_loc (loc, optype, tem);
   gsi = gsi_for_stmt (stmt);
-  tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
- GSI_SAME_STMT);
+  /* In rare cases range->exp can be equal to lhs of stmt.
+ In that case we have to insert after the stmt rather then before
+ it.  */
+  if (op == range->exp)
+tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false,
+   GSI_SAME_STMT);
+  else
+tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
+   GSI_SAME_STMT);
 
   /* If doing inter-bb range test optimization, update the
  stmts immediately.  Start with changing the first range test
--- gcc/testsuite/gcc.c-torture/execute/pr59388.c.jj2013-12-06 
17:41:55.811244282 +0100
+++ gcc/testsuite/gcc.c-torture/execute/pr59388.c   2013-12-06 
17:32:28.0 +0100
@@ -0,0 +1,11 @@
+/* PR tree-optimization/59388 */
+
+int a;
+struct S { unsigned int f:1; } b;
+
+int
+main ()
+{
+  a = (0 < b.f) | b.f;
+  return a;
+}


RE: [PING]: [GOMP4] [PATCH] SIMD-Enabled Functions (formerly Elemental functions) for C

2013-12-06 Thread Iyer, Balaji V
Hi Aldy  & Jakub,
I have made the fixes you have mentioned and have answered your 
questions below. Attached is the fixed patch and here are the ChangeLog entries.

Gcc/ChangeLog
2013-12-06  Balaji V. Iyer  

* omp-low.c (expand_simd_clones): Added a new parameter called "type."
(ipa_omp_simd_clone): Added a call to expand_simd_clones when Cilk Plus
is enabled.
(simd_clone_clauses_extract): Replaced the string "cilk simd elemental"
with "cilk simd function."

Gcc/c-family/Changelog
2013-12-06  Balaji V. Iyer  

* c-common.c (c_common_attribute_table): Added "cilk simd function"
attribute.

Gcc/c/ChangeLog
2013-12-06  Balaji V. Iyer  

* c-parser.c (struct c_parser::cilk_simd_fn_tokens): Added new field.
(c_parser_declaration_or_fndef): Added a check if cilk_simd_fn_tokens
field in parser is not empty.  If not-empty, call the function
c_parser_finish_omp_declare_simd.
(c_parser_cilk_clause_vectorlength): Modified function to be shared
between SIMD-enabled functions and #pragma simd.  Changed return-type
to bool and added new parameter.
(c_parser_cilk_all_clauses): Modified the usage of the function
c_parser_cilk_clause_vectorlength as mentioned above.
(c_parser_cilk_simd_fn_expr_list): Likewise.
(c_finish_cilk_simd_fn_tokens): Likewise.
(c_parser_attributes): Added a cilk_simd_fn_tokens parameter.  Added a
check for vector attribute and if so call the function
c_parser_cilk_simd_fn_expr_list.  Also, when Cilk plus is enabled,
called the function c_finish_cilk_simd_fn_tokens.
(c_finish_omp_declare_simd): Added a check if cilk_simd_fn_tokens in
parser field is non-empty.  If so, parse them as you would parse
the omp declare simd pragma.

Gcc/testsuite/ChangeLog
2013-12-06  Balaji V. Iyer  

* c-c++-common/cilk-plus/SE/ef_test.c: New test.
* c-c++-common/cilk-plus/SE/ef_test2.c: Likewise.
* c-c++-common/cilk-plus/SE/vlength_errors.c: Likewise.
* c-c++-common/cilk-plus/SE/ef_error.c: Likewise.
* c-c++-common/cilk-plus/SE/ef_error2.c: Likewise.
* gcc.dg/cilk-plus/cilk-plus.exp: Added calls for the above tests.

Jakub, Is it Ok for branch?

Thanks,

Balaji V. Iyer.



> -Original Message-
> From: Aldy Hernandez [mailto:al...@redhat.com]
> Sent: Thursday, December 5, 2013 3:20 PM
> To: Iyer, Balaji V
> Cc: 'Jakub Jelinek'; 'gcc-patches@gcc.gnu.org'
> Subject: Re: [PING]: [GOMP4] [PATCH] SIMD-Enabled Functions (formerly
> Elemental functions) for C
> 
> On 11/30/13 20:38, Iyer, Balaji V wrote:
> > Hello Aldy,
> > Some of the middle end changes I made in the previous patch was
> not flying for the C++. Here is a fixed patch where the middle-end changes
> will work for both C and C++.
> > With this email, I am attaching the patch for C along with the middle
> end changes. Is this Ok for the branch?
> 
> Jakub and company ultimately have to approve your patch, but here are a
> few things.
> 
> > +
> > +  /* Cilk Plus specific parser/lexer information.  */
> > +
> > +  /* Buffer to hold all the tokens from parsing the vector attribute for 
> > the
> > + SIMD Enabled functions (formerly known as elemental functions).
> > + */  vec  *elem_fn_tokens;
> 
> If the name "elementals" is being phased out, then perhaps you need to
> come up with another name here.  Perhaps "cilk_simd_clone_tokens" or
> something that looks similar to the OpenMP variant
> "cilk_declare_simd_tokens" (akin to omp_declare_simd_clauses) in the rest
> of the patch.
> 

Fixed. I called it "cilk_simd_fn" instead of "elem_fn"

> Also, "Enabled" should not be capitalized.
> 

Fixed.


> > +/* Parses the vectorlength vector attribute for the SIMD Enabled
> functions
> > +   in Cilk Plus.
> > +   Syntax:
> > +   vectorlength ()  */
> > +
> > +static bool
> > +c_parser_elem_fn_vectorlength (c_parser *parser)
> 
> Similarly here.  Let's get rid of *elem* nomenclature throughout.
> Perhaps c_parser_cilk_declare_simd_vectorlength and similarly throughout
> the other parsing routines in the patch.  This will make it clearer that
> *cilk_declare_simd* is related to OpenMP's declare simd.
> 

Fixed.

> > +  if (TREE_CODE (value) != INTEGER_CST)
> > +{
> > +  error_at (token->location, "vectorlength must be a constant 
> > integer");
> > +  c_parser_skip_until_found (parser, CPP_CLOSE_PAREN, NULL);
> > +  return false;
> > +}
> 
> I thought integer constant expressions were allowed here.  Shouldn't things
> like "sizeof (int)" be allowed?  See what is done in
> c_parser_cilk_clause_vectorlength() which handles constant expressions.
>   Also, you will need a corresponding test.
> 

Yes it is allowed. Fixed and added a test.

> For that matter... can't we combine the above function with
> c_parser_cilk_clause_vectorlength().  It doesn't make much sense having
> two functions 

Re: [PATCH 10/13] Eliminate last_basic_block macro.

2013-12-06 Thread Richard Biener
Oleg Endo  wrote:
>On Fri, 2013-12-06 at 16:57 +0100, Steven Bosscher wrote:
>> On Fri, Dec 6, 2013 at 3:51 PM, David Malcolm wrote:
>> > * asan.c (transform_statements): Eliminate use of
>last_basic_block
>> > in favor of last_basic_block_for_fn, in order to make use
>of cfun
>> > explicit.
>> 
>> Can we please make all this _for_fn go away?
>> 
>
>Sorry if this has been discussed before... but why not adding member
>functions to 'function' instead of freestanding macros/functions that
>take a function* as a first argument?  This would also make it easier
>to
>eliminate the "_for_fn" (freestanding function/macro name clashes etc)
>I
>think.

Both can be done, but these patches make cfun uses explicit which was the goal 
while following existing practice.

Richard.

>Cheers,
>Oleg




Re: [PATCH] pr59043

2013-12-06 Thread Mike Stump
On Dec 6, 2013, at 12:02 PM, Dominique Dhumieres  wrote:
>> Be sure to attach patches to be applied instead of cut-n-paste if you use 
>> Mail, it destroys the text cut and pasted.
> 
> I did not use Mail. Did you have troubles with Tabs? Apparently the post on 
> the list has them right.

Curious indeed.  Usually I can cut-n-paste those just fine…  I wonder why yours 
I could not.  I checked the raw version, and indeed I see the spaces.  Thanks 
for letting me know.

Re: [PATCH] pr59043

2013-12-06 Thread Dominique Dhumieres
> :-)  We stop using cvs a long time ago.

I meant SVN, just showing my age!-)

> Be sure to attach patches to be applied instead of cut-n-paste if you use 
> Mail, it destroys the text cut and pasted.

I did not use Mail. Did you have troubles with Tabs? Apparently the post on the 
list has them right.

Thanks for the commit,

Dominique



Re: RFA: patch to fix 2 testsuite failures for LRA on PPC

2013-12-06 Thread David Edelsohn
On Fri, Dec 6, 2013 at 2:02 PM, Vladimir Makarov  wrote:
> On 12/6/2013, 12:30 PM, Vladimir Makarov wrote:
>>
>> On 12/6/2013, 11:28 AM, Michael Meissner wrote:
>>>
>>> On Thu, Dec 05, 2013 at 12:40:17PM -0500, Vladimir Makarov wrote:

 The following patch fixes two GCC testsuite failures for LRA.  The
 patch makes swap through registers instead of memory for the test
 cases when LRA is used.

 There are differences in reload and LRA constraint matching
 algorithm which results in different alternative choices when the
 original pattern is used.

 Actually my first proposed solution variant used one pattern which
 is now for LRA in this patch.  But some doubt arose that it may
 affect reload pass in some bad way.

 Ok to commit?
>>>
>>>
>>> I must admit to not remembering why I used ??&r.  I know I wanted it
>>> to prefer
>>> doing the memory patterns.  I would think we should try just the pattern
>>> without the ??.
>>>
>>
>>I tried it about 2 months ago.  I did not see any problems of such
>> change for reload and LRA.  There were no regressions on GCC testsuite.
>>
>>So, Mike, if you don't see any compelling reason to keep ??, probably
>> we should remove them.
>>
>> If you don't mind, I'll make the patch and test again and after that
>> submit it for approval.
>>
>
> Here is the patch.
>
> Tested and bootstrapped on gcc110.fsffrance.org.
>
>
> Ok to commit?
>
> 2013-12-05  Vladimir Makarov  
>
> * config/rs6000/rs600.md (*bswapdi2_64bit): Remove ?? from the
> constraint.

Okay, let's just remove the "??" modifier from the constraint.

Thanks for your patience, explanations, and work on this, Vlad.

Thanks, David


Re: [SH] Fix PR 58314 - Rework *movqi / *movhi patterns

2013-12-06 Thread Oleg Endo
On Tue, 2013-11-26 at 12:18 +0100, Oleg Endo wrote:
> Hello,
> 
> The attached patch is the same as posted in the PR as attachment 31283.
> In addition to the testing done by Kaz and Christian, I've also tested
> it with
> make -k check RUNTESTFLAGS="--target_board=sh-sim
> \{-m2/-ml,-m2/-mb,-m2a/-mb,-m4/-ml,-m4/-mb,-m4a/-ml,-m4a/-mb}"
> 
> on rev 205313 with no new failures.
> OK for trunk?
> 
> The additional test case from PR 58314 will follow.
> I'll also post a version of the patch for 4.8 after testing.

I've just committed the attached patch to the 4.8 branch as rev 205759.
Tested with
make -k check RUNTESTFLAGS="--target_board=sh-sim
\{-m2/-ml,-m2/-mb,-m2a/-mb,-m4/-ml,-m4/-mb,-m4a/-ml,-m4a/-mb}"

Cheers,
Oleg

gcc/ChangeLog:
Backport from mainline
2013-11-26  Oleg Endo  

PR target/58314
PR target/50751
* config/sh/sh.c (max_mov_insn_displacement, disp_addr_displacement):
Prefix function names with 'sh_'.  Make them non-static.
* config/sh/sh-protos.h (sh_disp_addr_displacement,
sh_max_mov_insn_displacement): Add declarations.
* config/sh/constraints.md (Q): Reject QImode.
(Sdd): Use match_code "mem".
(Snd): Fix erroneous matching of non-memory operands.
* config/sh/predicates.md (short_displacement_mem_operand): New
predicate.
(general_movsrc_operand): Disallow PC relative QImode loads.
* config/sh/sh.md (*mov_reg_reg): Remove it.
(*movqi, *movhi): Merge both insns into...
(*mov): ... this new insn.  Replace generic 'm' constraints with
'Snd' and 'Sdd' constraints.  Calculate insn length dynamically based
on the operand types.

Index: gcc/config/sh/constraints.md
===
--- gcc/config/sh/constraints.md	(revision 205733)
+++ gcc/config/sh/constraints.md	(working copy)
@@ -221,6 +221,7 @@
 (define_constraint "Q"
   "A pc relative load operand."
   (and (match_code "mem")
+   (match_test "GET_MODE (op) != QImode")
(match_test "IS_PC_RELATIVE_LOAD_ADDR_P (XEXP (op, 0))")))
 
 (define_constraint "Bsc"
@@ -295,13 +296,15 @@
 
 (define_memory_constraint "Sdd"
   "A memory reference that uses displacement addressing."
-  (and (match_test "MEM_P (op) && GET_CODE (XEXP (op, 0)) == PLUS")
+  (and (match_code "mem")
+   (match_test "GET_CODE (XEXP (op, 0)) == PLUS")
(match_test "REG_P (XEXP (XEXP (op, 0), 0))")
(match_test "CONST_INT_P (XEXP (XEXP (op, 0), 1))")))
 
 (define_memory_constraint "Snd"
   "A memory reference that excludes displacement addressing."
-  (match_test "! satisfies_constraint_Sdd (op)"))
+  (and (match_code "mem")
+   (match_test "! satisfies_constraint_Sdd (op)")))
 
 (define_memory_constraint "Sbv"
   "A memory reference, as used in SH2A bclr.b, bset.b, etc."
Index: gcc/config/sh/sh.md
===
--- gcc/config/sh/sh.md	(revision 205734)
+++ gcc/config/sh/sh.md	(working copy)
@@ -6831,34 +6831,9 @@
   prepare_move_operands (operands, QImode);
 })
 
-;; If movqi_reg_reg is specified as an alternative of movqi, movqi will be
-;; selected to copy QImode regs.  If one of them happens to be allocated
-;; on the stack, reload will stick to movqi insn and generate wrong
-;; displacement addressing because of the generic m alternatives.
-;; With the movqi_reg_reg being specified before movqi it will be initially
-;; picked to load/store regs.  If the regs regs are on the stack reload
-;; try other insns and not stick to movqi_reg_reg, unless there were spilled
-;; pseudos in which case 'm' constraints pertain.
-;; The same applies to the movhi variants.
-;;
-;; Notice, that T bit is not allowed as a mov src operand here.  This is to
-;; avoid things like (set (reg:QI) (subreg:QI (reg:SI T_REG) 0)), which
-;; introduces zero extensions after T bit stores and redundant reg copies.
-;;
-;; FIXME: We can't use 'arith_reg_operand' (which disallows T_REG) as a
-;; predicate for the mov src operand because reload will have trouble
-;; reloading MAC subregs otherwise.  For that probably special patterns
-;; would be required.
-(define_insn "*mov_reg_reg"
-  [(set (match_operand:QIHI 0 "arith_reg_dest" "=r,m,*z")
-	(match_operand:QIHI 1 "register_operand" "r,*z,m"))]
-  "TARGET_SH1 && !t_reg_operand (operands[1], VOIDmode)"
-  "@
-mov		%1,%0
-mov.	%1,%0
-mov.	%1,%0"
-  [(set_attr "type" "move,store,load")])
-
+;; Specifying the displacement addressing load / store patterns separately
+;; before the generic movqi / movhi pattern allows controlling the order
+;; in which load / store insns are selected in a more fine grained way.
 ;; FIXME: The non-SH2A and SH2A variants should be combined by adding
 ;; "enabled" attribute as it is done in other targets.
 (define_insn "*mov_store_mem_disp04"
@@ -6908,38 +6883,44 @@
   [(set_attr "type" "load")
(set_attr "length" "2,2,4")])
 
-;; The m co

Re: [PATCH] Fix PR 59390

2013-12-06 Thread Sriraman Tallam
Patch updated with two more tests to check if the vfmadd insn is being
produced when possible.

Thanks
Sri

On Fri, Dec 6, 2013 at 11:12 AM, Sriraman Tallam  wrote:
> Hi,
>
> I have attached a patch to fix
> http://gcc.gnu.org/bugzilla/show_bug.cgi?id=59390. Please review.
>
> Here is the problem. GCC adds target-specific builtins on demand. The
> FMA target-specific builtin __builtin_ia32_vfmaddpd gets added via
> this declaration:
>
> void fun() __attribute__((target("fma")));
>
> Specifically, the builtin __builtin_ia32_vfmaddpd gets added when
> ix86_add_new_builtins is called from ix86_valid_target_attribute_tree
> when processing this target attribute.
>
> Now, when the vectorizer is processing the builtin "__builtin_fma" in
> function other_fn(), it checks to see if this function is vectorizable
> and calls ix86_builtin_vectorized_function in i386.c. That returns the
> builtin stored here:
>
>
> case BUILT_IN_FMA:
> if (out_mode == DFmode && in_mode == DFmode)
> {
>  if (out_n == 2 && in_n == 2)
>return ix86_builtins[IX86_BUILTIN_VFMADDPD];
>   
>
> ix86_builtins[IX86_BUILTIN_VFMADDPD] would have contained NULL_TREE
> had the builtin not been added by the previous target attribute. That
> is why the code works if we remove the previous declaration.
>
> The fix is to not just return the builtin but to also check if the
> current function's isa allows the use of the builtin. For instance,
> this patch would solve the problem:
>
> @@ -33977,7 +33977,13 @@ ix86_builtin_vectorized_function (tree fndecl, tre
>if (out_mode == DFmode && in_mode == DFmode)
>   {
>if (out_n == 2 && in_n == 2)
> -return ix86_builtins[IX86_BUILTIN_VFMADDPD];
> +{
> +  if (ix86_builtins_isa[IX86_BUILTIN_VFMADDPD].isa
> +  & global_options.x_ix86_isa_flags)
> +return ix86_builtins[IX86_BUILTIN_VFMADDPD];
> +  else
> + return NULL_TREE;
> +}
>
>
> but there are many instances of this usage in
> ix86_builtin_vectorized_function. This patch covers all the cases.
>
>
> Thanks,
> Sri
PR target/59390
* gcc.target/i386/pr59390.c: New test.
* gcc.target/i386/pr59390_1.c: New test.
* gcc.target/i386/pr59390_2.c: New test.
* config/i386/i386.c (get_builtin): New function.
(ix86_builtin_vectorized_function): Replace all instances of
ix86_builtins[...] with get_builtin(...).
(ix86_builtin_reciprocal): Ditto.


Index: testsuite/gcc.target/i386/pr59390.c
===
--- testsuite/gcc.target/i386/pr59390.c (revision 0)
+++ testsuite/gcc.target/i386/pr59390.c (revision 0)
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-std=c99 -O3" } */
+
+#include "math.h"
+void fun() __attribute__((target("fma")));
+
+void 
+other_fun(double *restrict out, double * restrict a, double * restrict b, 
double * restrict c, int n)
+{
+int i;
+for (i = 0; i < n; i++) {
+out[i] = fma(a[i], b[i], c[i]);
+}   
+}
+
+/* { dg-final { scan-assembler-not "vfmadd" } } */
Index: testsuite/gcc.target/i386/pr59390_1.c
===
--- testsuite/gcc.target/i386/pr59390_1.c   (revision 0)
+++ testsuite/gcc.target/i386/pr59390_1.c   (revision 0)
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-std=c99 -O3" } */
+
+#include "math.h"
+void fun() __attribute__((target("fma")));
+
+__attribute__((target("fma")))
+void 
+other_fun(double *restrict out, double * restrict a, double * restrict b, 
double * restrict c, int n)
+{
+int i;
+for (i = 0; i < n; i++) {
+out[i] = fma(a[i], b[i], c[i]);
+}   
+}
+
+/* { dg-final { scan-assembler "vfmadd" } } */
Index: testsuite/gcc.target/i386/pr59390_2.c
===
--- testsuite/gcc.target/i386/pr59390_2.c   (revision 0)
+++ testsuite/gcc.target/i386/pr59390_2.c   (revision 0)
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-std=c99 -O3 -mfma" } */
+
+#include "math.h"
+void fun() __attribute__((target("fma")));
+
+void 
+other_fun(double *restrict out, double * restrict a, double * restrict b, 
double * restrict c, int n)
+{
+int i;
+for (i = 0; i < n; i++) {
+out[i] = fma(a[i], b[i], c[i]);
+}   
+}
+
+/* { dg-final { scan-assembler "vfmadd" } } */
Index: config/i386/i386.c
===
--- config/i386/i386.c  (revision 205616)
+++ config/i386/i386.c  (working copy)
@@ -33649,6 +33649,30 @@ addcarryx:
   gcc_unreachable ();
 }
 
+/* This returns the target-specific builtin with code CODE if
+   current_function_decl has visibility on this builtin, which is checked
+   using isa flags.  Returns NULL_TREE otherwise.  */
+
+static tree get_builtin (enum ix86_builtins code)
+{
+  struct cl_target_option *opts;
+  tree target_tree = NULL_TREE;
+
+  /* Determine the isa flags of current_function_de

[PATCH] Fix small vtable verification bugs.

2013-12-06 Thread Caroline Tice
The attached  patch fixes two small problems with the current vtable
verification code:  it makes the libvtv function decls globally
visible, and it  updates all uses of the verified vtable pointer with
the verification results, rather than just the first use.

I have bootstrapped the compiler with this patch, both with and
without vtable verification enabled, and have run the regression
testsuites, both with and without vtable verification enabled, with no
regressions.  I have only tested this on Linux.

Is this patch OK to commit?

-- Caroline Tice
cmt...@google.com

gcc ChangeLog:

2013-12-06  Caroline Tice  

Submitting patch from Stephen Checkoway, s...@cs.jhu.edu
* vtable-class-hierarchy.c (init_functions): Make the libvtv
function decls externally visible.


gcc/cp ChangeLog:

2013-12-06  Caroline Tice  

Submitting patch from Stephen Checkoway, s...@cs.jhu.edu
* vtable-class-hierarchy.c (init_functions): Make the libvtv
function decls externally visible.
Index: gcc/cp/vtable-class-hierarchy.c
===
--- gcc/cp/vtable-class-hierarchy.c	(revision 205756)
+++ gcc/cp/vtable-class-hierarchy.c	(working copy)
@@ -258,6 +258,7 @@ init_functions (void)
   DECL_ATTRIBUTES (vlt_register_set_fndecl) =
 tree_cons (get_identifier ("leaf"), NULL,
DECL_ATTRIBUTES (vlt_register_set_fndecl));
+  DECL_EXTERNAL(vlt_register_set_fndecl) = 1;
   TREE_PUBLIC (vlt_register_set_fndecl) = 1;
   DECL_PRESERVE_P (vlt_register_set_fndecl) = 1;
   SET_DECL_LANGUAGE (vlt_register_set_fndecl, lang_cplusplus);
@@ -301,6 +302,7 @@ init_functions (void)
   DECL_ATTRIBUTES (vlt_register_pairs_fndecl) =
 tree_cons (get_identifier ("leaf"), NULL,
DECL_ATTRIBUTES (vlt_register_pairs_fndecl));
+  DECL_EXTERNAL(vlt_register_pairs_fndecl) = 1;
   TREE_PUBLIC (vlt_register_pairs_fndecl) = 1;
   DECL_PRESERVE_P (vlt_register_pairs_fndecl) = 1;
   SET_DECL_LANGUAGE (vlt_register_pairs_fndecl, lang_cplusplus);
Index: gcc/vtable-verify.c
===
--- gcc/vtable-verify.c	(revision 205756)
+++ gcc/vtable-verify.c	(working copy)
@@ -646,9 +646,6 @@ verify_bb_vtables (basic_block bb)
 
   if (vtable_map_node && vtable_map_node->vtbl_map_decl)
 {
-  use_operand_p use_p;
-  ssa_op_iter iter;
-
   vtable_map_node->is_used = true;
   vtbl_var_decl = vtable_map_node->vtbl_map_decl;
 
@@ -695,35 +692,27 @@ verify_bb_vtables (basic_block bb)
   gimple_call_set_lhs (call_stmt, tmp0);
   update_stmt (call_stmt);
 
-  /* Find the next stmt, after the vptr assignment
- statememt, which should use the result of the
- vptr assignment statement value. */
-  gsi_next (&gsi_vtbl_assign);
-  gimple next_stmt = gsi_stmt (gsi_vtbl_assign);
-
-  if (!next_stmt)
-return;
-
-  /* Find any/all uses of 'lhs' in next_stmt, and
- replace them with 'tmp0'.  */
+  /* Replace all uses of lhs with tmp0. */
   found = false;
-  FOR_EACH_PHI_OR_STMT_USE (use_p, next_stmt, iter,
-SSA_OP_ALL_USES)
+  imm_use_iterator iterator;
+  gimple use_stmt;
+  FOR_EACH_IMM_USE_STMT (use_stmt, iterator, lhs)
 {
-  tree op = USE_FROM_PTR (use_p);
-  if (op == lhs)
-{
-  SET_USE (use_p, tmp0);
-  found = true;
-}
+  use_operand_p use_p;
+  if (use_stmt == call_stmt)
+continue;
+  FOR_EACH_IMM_USE_ON_STMT (use_p, iterator)
+SET_USE (use_p, tmp0);
+  update_stmt (use_stmt);
+  found = true;
 }
-  update_stmt (next_stmt);
+
   gcc_assert (found);
 
   /* Insert the new verification call just after the
  statement that gets the vtable pointer out of the
  object.  */
-  gsi_vtbl_assign = gsi_for_stmt (stmt);
+  gcc_assert (gsi_stmt (gsi_vtbl_assign) == stmt);
   gsi_insert_after (&gsi_vtbl_assign, call_stmt,
 GSI_NEW_STMT);
 


Re: [PATCH] pr59043

2013-12-06 Thread Mike Stump
On Dec 6, 2013, at 11:03 AM, Dominique Dhumieres  wrote:
>> I think I blew by it the first time with the OK?  at the end of the line?
>> maybe I thought you thought it was trivial and checked it in
> 
> The patch is indeed trivial, but I don't have write access to CVS,

:-)  We stop using cvs a long time ago.


Re: [PATCH] pr59043

2013-12-06 Thread Mike Stump
On Nov 18, 2013, at 9:48 AM, Dominique Dhumieres  wrote:
> Adjust the following tests to take into account the change of
> "Length of Public Names Info" to "Pub Info Length"
> in revision 203936. Tested on x86_64-apple-darwin13.

Applied:

Committed revision 205758.

Be sure to attach patches to be applied instead of cut-n-paste if you use Mail, 
it destroys the text cut and pasted.

Re: Oleg Endo appointed co-maintainer of SH port

2013-12-06 Thread Oleg Endo
On Fri, 2013-12-06 at 09:05 -0500, David Edelsohn wrote:
>   I am pleased to announce that the GCC Steering Committee has
> appointed Oleg Endo as co-maintainer of the SH port.
> 
>   Please join me in congratulating Oleg on his new role.
> Oleg, please update your listing in the MAINTAINERS file.

Thank you.

I've just committed the following.

Index: MAINTAINERS
===
--- MAINTAINERS (revision 205756)
+++ MAINTAINERS (working copy)
@@ -102,6 +102,7 @@
 score port Chen Liqin  liqin@gmail.com
 sh portAlexandre Oliva aol...@redhat.com
 sh portKaz Kojima  kkoj...@gcc.gnu.org
+sh portOleg Endo   olege...@gcc.gnu.org
 sparc port Richard Henderson   r...@redhat.com
 sparc port David S. Miller da...@redhat.com
 sparc port Eric Botcazou   ebotca...@libertysurf.fr
@@ -364,7 +365,6 @@
 Bernd Edlinger bernd.edlin...@hotmail.de
 Phil Edwards   p...@gcc.gnu.org
 Mohan Embargnust...@thisiscool.com
-Oleg Endo  olege...@gcc.gnu.org
 Revital Eres   e...@il.ibm.com
 Marc Espie es...@cvs.openbsd.org
 Rafael �vila de Esp�ndola  espind...@google.com



Re: [PATCH PR41488]Recognize more induction variables by simplifying PEELED chrec in scalar evolution

2013-12-06 Thread Jeff Law

On 12/06/13 03:29, bin.cheng wrote:

Hi,
Entry pr41488 shows a case in which induction variable can't be recognized
or coalesced.  As noted in the bug entry, one possible fix is to teach PRE
not to do some specific transformation.  However, it's in nature a scalar
evolution issue.  Considering below snippet:

# i_17 = PHI 
# _20 = PHI <_5(5), start_4(D)(3)>
...
i_13 = i_17 + 1;
_5 = start_4(D) + i_13;

Variable _20 appears in the form of PEELED chrec like (start_4, _5)_LOOP,
meaning it has value "start_4" in the 1st iteration, then changes to _5 in
following iterations.  PEELED chrec is not implemented by GCC right now, it
can be simplified into either POLYNOMIAL or PERIOD one.
Right.  PEELED_CHREC was removed from the LNO branch back in 2004, 
presumably before the LNO branch was merged into the mainline.  No 
reason was given.


But what you're really discovering here is that _20 is just another 
standard looking IV that appears in the form of a peeled chrec, right?



 The POLYNOMIAL

chrec is processed by GCC after being recognized, as in the examle, _20 is
actually {start_4, 1}_LOOP.
Right.  Based on reading the archives, it looks like this stuff is/was 
generated by PRE.  I also suspect jump threading can create them.  There 
was talk of throttling PRE to leave things in a form that the IV 
analysis could more easily digest, but I'm not sure if that was ever 
completed.


[ snip ]



One point needs to be clarified that I use tree_to_aff_combination_expand in
the patch.  Rational is the number of the specific PEELED_CHRECs should be
moderate, I also check the equality literally firstly and resort to affine
facility if that fails.  By measuring bootstrap of gcc and spec2kint, I
collected the number of opportunities caught by this patch like below:
 literal comparison/affine comparison
GCC  ~1200/~250
Spec2Kint  93/34

I could back trace the ssa chain instead of using affine functions, but that
would miss some cases.
I assume tree_to_aff_combination_expand is relatively expensive, thus 
the two approaches, one which avoids tree_to_aff_combination_expand.



In add_old_iv_candidates, is it the case that the only time 
SSA_NAME_DEF_STMT (def) is another PHI node is when it's one of these 
affine ivs appearing in the form of a PEELED_CHREC?  And in that case, 
we do not want to record the possibility of leaving the original IV 
untouched?  -- Just trying to make sure I understand the code before 
giving a final approval.


jeff


[PATCH] Fix PR 59390

2013-12-06 Thread Sriraman Tallam
Hi,

I have attached a patch to fix
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=59390. Please review.

Here is the problem. GCC adds target-specific builtins on demand. The
FMA target-specific builtin __builtin_ia32_vfmaddpd gets added via
this declaration:

void fun() __attribute__((target("fma")));

Specifically, the builtin __builtin_ia32_vfmaddpd gets added when
ix86_add_new_builtins is called from ix86_valid_target_attribute_tree
when processing this target attribute.

Now, when the vectorizer is processing the builtin "__builtin_fma" in
function other_fn(), it checks to see if this function is vectorizable
and calls ix86_builtin_vectorized_function in i386.c. That returns the
builtin stored here:


case BUILT_IN_FMA:
if (out_mode == DFmode && in_mode == DFmode)
{
 if (out_n == 2 && in_n == 2)
   return ix86_builtins[IX86_BUILTIN_VFMADDPD];
  

ix86_builtins[IX86_BUILTIN_VFMADDPD] would have contained NULL_TREE
had the builtin not been added by the previous target attribute. That
is why the code works if we remove the previous declaration.

The fix is to not just return the builtin but to also check if the
current function's isa allows the use of the builtin. For instance,
this patch would solve the problem:

@@ -33977,7 +33977,13 @@ ix86_builtin_vectorized_function (tree fndecl, tre
   if (out_mode == DFmode && in_mode == DFmode)
  {
   if (out_n == 2 && in_n == 2)
-return ix86_builtins[IX86_BUILTIN_VFMADDPD];
+{
+  if (ix86_builtins_isa[IX86_BUILTIN_VFMADDPD].isa
+  & global_options.x_ix86_isa_flags)
+return ix86_builtins[IX86_BUILTIN_VFMADDPD];
+  else
+ return NULL_TREE;
+}


but there are many instances of this usage in
ix86_builtin_vectorized_function. This patch covers all the cases.


Thanks,
Sri
PR target/59390
* gcc.target/i386/pr59390.c: New test.
* config/i386/i386.c (get_builtin): New function.
(ix86_builtin_vectorized_function): Replace all instances of
ix86_builtins[...] with get_builtin(...).
(ix86_builtin_reciprocal): Ditto.


Index: testsuite/gcc.target/i386/pr59390.c
===
--- testsuite/gcc.target/i386/pr59390.c (revision 0)
+++ testsuite/gcc.target/i386/pr59390.c (revision 0)
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-std=c99 -O3" } */
+
+#include "math.h"
+void fun() __attribute__((target("fma")));
+
+void 
+other_fun(double *restrict out, double * restrict a, double * restrict b, 
double * restrict c, int n)
+{
+int i;
+for (i = 0; i < n; i++) {
+out[i] = fma(a[i], b[i], c[i]);
+}   
+}
Index: config/i386/i386.c
===
--- config/i386/i386.c  (revision 205616)
+++ config/i386/i386.c  (working copy)
@@ -33649,6 +33649,30 @@ addcarryx:
   gcc_unreachable ();
 }
 
+/* This returns the target-specific builtin with code CODE if
+   current_function_decl has visibility on this builtin, which is checked
+   using isa flags.  Returns NULL_TREE otherwise.  */
+
+static tree get_builtin (enum ix86_builtins code)
+{
+  struct cl_target_option *opts;
+  tree target_tree = NULL_TREE;
+
+  /* Determine the isa flags of current_function_decl.  */
+
+  if (current_function_decl)
+target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
+  if (target_tree)
+opts = TREE_TARGET_OPTION (target_tree);
+  else
+opts = TREE_TARGET_OPTION (target_option_default_node);
+
+  if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
+return ix86_builtin_decl (code, true);
+  else
+return NULL_TREE;
+}
+
 /* Returns a function decl for a vectorized version of the builtin function
with builtin function code FN and the result vector type TYPE, or NULL_TREE
if it is not available.  */
@@ -33677,9 +33701,9 @@ ix86_builtin_vectorized_function (tree fndecl, tre
   if (out_mode == DFmode && in_mode == DFmode)
{
  if (out_n == 2 && in_n == 2)
-   return ix86_builtins[IX86_BUILTIN_SQRTPD];
+   get_builtin (IX86_BUILTIN_SQRTPD);
  else if (out_n == 4 && in_n == 4)
-   return ix86_builtins[IX86_BUILTIN_SQRTPD256];
+   get_builtin (IX86_BUILTIN_SQRTPD256);
}
   break;
 
@@ -33687,9 +33711,9 @@ ix86_builtin_vectorized_function (tree fndecl, tre
   if (out_mode == SFmode && in_mode == SFmode)
{
  if (out_n == 4 && in_n == 4)
-   return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
+   get_builtin (IX86_BUILTIN_SQRTPS_NR);
  else if (out_n == 8 && in_n == 8)
-   return ix86_builtins[IX86_BUILTIN_SQRTPS_NR256];
+   get_builtin (IX86_BUILTIN_SQRTPS_NR256);
}
   break;
 
@@ -33703,9 +33727,9 @@ ix86_builtin_vectorized_function (tree fndecl, tre
   if (out_mode == SImode && in_mode == DFmode)
{
  if (out_n == 4 && in_n == 2)
-   return ix86_builtins[IX86_BUILTIN_FLOO

Re: [PATCH 10/13] Eliminate last_basic_block macro.

2013-12-06 Thread Oleg Endo
On Fri, 2013-12-06 at 16:57 +0100, Steven Bosscher wrote:
> On Fri, Dec 6, 2013 at 3:51 PM, David Malcolm wrote:
> > * asan.c (transform_statements): Eliminate use of last_basic_block
> > in favor of last_basic_block_for_fn, in order to make use of cfun
> > explicit.
> 
> Can we please make all this _for_fn go away?
> 

Sorry if this has been discussed before... but why not adding member
functions to 'function' instead of freestanding macros/functions that
take a function* as a first argument?  This would also make it easier to
eliminate the "_for_fn" (freestanding function/macro name clashes etc) I
think.

Cheers,
Oleg



Re: [PATCH] pr59043

2013-12-06 Thread Dominique Dhumieres
> I think I blew by it the first time with the OK?  at the end of the line?
> maybe I thought you thought it was trivial and checked it in

The patch is indeed trivial, but I don't have write access to CVS, so
someone else has to commit it.

Dominique


Re: RFA: patch to fix 2 testsuite failures for LRA on PPC

2013-12-06 Thread Vladimir Makarov

On 12/6/2013, 12:30 PM, Vladimir Makarov wrote:

On 12/6/2013, 11:28 AM, Michael Meissner wrote:

On Thu, Dec 05, 2013 at 12:40:17PM -0500, Vladimir Makarov wrote:

The following patch fixes two GCC testsuite failures for LRA.  The
patch makes swap through registers instead of memory for the test
cases when LRA is used.

There are differences in reload and LRA constraint matching
algorithm which results in different alternative choices when the
original pattern is used.

Actually my first proposed solution variant used one pattern which
is now for LRA in this patch.  But some doubt arose that it may
affect reload pass in some bad way.

Ok to commit?


I must admit to not remembering why I used ??&r.  I know I wanted it
to prefer
doing the memory patterns.  I would think we should try just the pattern
without the ??.



   I tried it about 2 months ago.  I did not see any problems of such
change for reload and LRA.  There were no regressions on GCC testsuite.

   So, Mike, if you don't see any compelling reason to keep ??, probably
we should remove them.

If you don't mind, I'll make the patch and test again and after that
submit it for approval.



Here is the patch.

Tested and bootstrapped on gcc110.fsffrance.org.

Ok to commit?

2013-12-05  Vladimir Makarov  

* config/rs6000/rs600.md (*bswapdi2_64bit): Remove ?? from the
constraint.

Index: config/rs6000/rs6000.md
===
--- config/rs6000/rs6000.md (revision 205753)
+++ config/rs6000/rs6000.md (working copy)
@@ -2379,7 +2379,7 @@

 ;; Non-power7/cell, fall back to use lwbrx/stwbrx
 (define_insn "*bswapdi2_64bit"
-  [(set (match_operand:DI 0 "reg_or_mem_operand" "=&r,Z,??&r")
+  [(set (match_operand:DI 0 "reg_or_mem_operand" "=&r,Z,&r")
(bswap:DI (match_operand:DI 1 "reg_or_mem_operand" "Z,r,r")))
(clobber (match_scratch:DI 2 "=&b,&b,&r"))
(clobber (match_scratch:DI 3 "=&r,&r,&r"))



Re: PING: Fwd: Re: [patch] implement Cilk Plus simd loops on trunk

2013-12-06 Thread Thomas Schwinge
Hi!

On Fri, 15 Nov 2013 14:44:45 -0700, Aldy Hernandez  wrote:
> I fixed a few nits Jason pointed out off-line, and both him and Jakub 
> have approved the patch for trunk.
> 
> In running the final round of tests I noticed a few problems with my 
> choice of bit numbers for the GF_OMP_* masks.  I fixed them, and re-ran 
> tests on x86-64 Linux.
> 
> Attached is the final version of the patch I have committed to trunk.

(As r204863, that is.)


> --- a/gcc/omp-low.c
> +++ b/gcc/omp-low.c
> @@ -10177,12 +10210,33 @@ diagnose_sb_0 (gimple_stmt_iterator *gsi_p,
>  error ("invalid entry to OpenMP structured block");
>  #endif
>  
> +  bool cilkplus_block = false;
> +  if (flag_enable_cilkplus)
> +{
> +  if ((branch_ctx
> +&& gimple_code (branch_ctx) == GIMPLE_OMP_FOR
> +&& gimple_omp_for_kind (branch_ctx) == GF_OMP_FOR_KIND_CILKSIMD)
> +   || (gimple_code (label_ctx) == GIMPLE_OMP_FOR
> +   && gimple_omp_for_kind (label_ctx) == GF_OMP_FOR_KIND_CILKSIMD))
> + cilkplus_block = true;
> +}

There is one issue here: consider the following code:

void baz()
{
  bad1:
  #pragma omp parallel
goto bad1;
}

Then, if both -fcilkplus and -fopenmp are specified, that will run into a
SIGSEGV/ICE because of label_ctx == NULL.  The fix is simple enough; OK
for trunk and gomp-4_0-branch (after full testing)?  The testcase is
basically a concatenation of gcc.dg/cilk-plus/jump.c and
gcc.dg/gomp/block-1.c -- should this be done differently/better?

commit eee16f8aad4527b705d327476b00bf9f5ba6dcce
Author: Thomas Schwinge 
Date:   Fri Dec 6 18:55:41 2013 +0100

Fix possible ICE (null pointer dereference) introduced in r204863.

gcc/
* omp-low.c (diagnose_sb_0): Make sure label_ctx is valid to
dereference.
gcc/testsuite/
* gcc.dg/cilk-plus/jump-openmp.c: New file.

diff --git gcc/omp-low.c gcc/omp-low.c
index e0f7d1d..91221c0 100644
--- gcc/omp-low.c
+++ gcc/omp-low.c
@@ -10865,7 +10865,8 @@ diagnose_sb_0 (gimple_stmt_iterator *gsi_p,
   if ((branch_ctx
   && gimple_code (branch_ctx) == GIMPLE_OMP_FOR
   && gimple_omp_for_kind (branch_ctx) == GF_OMP_FOR_KIND_CILKSIMD)
- || (gimple_code (label_ctx) == GIMPLE_OMP_FOR
+ || (label_ctx
+ && gimple_code (label_ctx) == GIMPLE_OMP_FOR
  && gimple_omp_for_kind (label_ctx) == GF_OMP_FOR_KIND_CILKSIMD))
cilkplus_block = true;
 }
diff --git gcc/testsuite/gcc.dg/cilk-plus/jump-openmp.c 
gcc/testsuite/gcc.dg/cilk-plus/jump-openmp.c
new file mode 100644
index 000..95e6b2d
--- /dev/null
+++ gcc/testsuite/gcc.dg/cilk-plus/jump-openmp.c
@@ -0,0 +1,49 @@
+/* { dg-do compile } */
+/* { dg-options "-fcilkplus -fopenmp" } */
+/* { dg-require-effective-target fopenmp } */
+
+int *a, *b, c;
+
+void foo()
+{
+#pragma simd
+  for (int i=0; i < 1000; ++i)
+{
+  a[i] = b[i];
+  if (c == 5)
+   return; /* { dg-error "invalid branch to/from a Cilk Plus structured 
block" } */
+}
+}
+
+void bar()
+{
+#pragma simd
+  for (int i=0; i < 1000; ++i)
+{
+lab:
+  a[i] = b[i];
+}
+  if (c == 6)
+goto lab; /* { dg-error "invalid entry to Cilk Plus structured block" } */
+}
+
+void baz()
+{
+  bad1:
+  #pragma omp parallel
+goto bad1; /* { dg-error "invalid branch to/from an OpenMP structured 
block" } */
+
+  goto bad2; /* { dg-error "invalid entry to OpenMP structured block" } */
+  #pragma omp parallel
+{
+  bad2: ;
+}
+
+  #pragma omp parallel
+{
+  int i;
+  goto ok1;
+  for (i = 0; i < 10; ++i)
+   { ok1: break; }
+}
+}


>/* If it's obvious we have an invalid entry, be specific about the error.  
> */
>if (branch_ctx == NULL)
> -error ("invalid entry to OpenMP structured block");
> +{
> +  if (cilkplus_block)
> + error ("invalid entry to Cilk Plus structured block");
> +  else
> + error ("invalid entry to OpenMP structured block");
> +}
>else
> -/* Otherwise, be vague and lazy, but efficient.  */
> -error ("invalid branch to/from an OpenMP structured block");
> +{
> +  /* Otherwise, be vague and lazy, but efficient.  */
> +  if (cilkplus_block)
> + error ("invalid branch to/from a Cilk Plus structured block");
> +  else
> + error ("invalid branch to/from an OpenMP structured block");
> +}

In fact, and keeping in mind that we're currently adding OpenACC support,
I'd suggest to do this differently; OK for trunk and gomp-4_0-branch?

commit 367dabfcc94a3e96d63b48c38d0dd94ca9f517f8
Author: Thomas Schwinge 
Date:   Fri Dec 6 19:23:47 2013 +0100

Generalize diagnose_omp_blocks' structured block logic.

gcc/
* omp-low.c (diagnose_sb_0): Generalize detection which kind of
structured block we're in.
gcc/testsuite/
* g++.dg/gomp/block-1.C: Adjust to changed error message and/or
be tighten matching rules.
* g++.dg

Re: [wide-int] small cleanup in wide-int.*

2013-12-06 Thread Richard Sandiford
Kenneth Zadeck  writes:
> On 12/03/2013 11:52 AM, Richard Sandiford wrote:
>> Kenneth Zadeck  writes:
>>> Index: tree-vrp.c
>>> ===
>>> --- tree-vrp.c  (revision 205597)
>>> +++ tree-vrp.c  (working copy)
>>> @@ -2611,22 +2611,28 @@ extract_range_from_binary_expr_1 (value_
>>>   
>>> signop sign = TYPE_SIGN (expr_type);
>>> unsigned int prec = TYPE_PRECISION (expr_type);
>>> -  unsigned int prec2 = (prec * 2) + (sign == UNSIGNED ? 2 : 0);
>>>   
>>> if (range_int_cst_p (&vr0)
>>>   && range_int_cst_p (&vr1)
>>>   && TYPE_OVERFLOW_WRAPS (expr_type))
>>> {
>>> - wide_int sizem1 = wi::mask (prec, false, prec2);
>>> - wide_int size = sizem1 + 1;
>>> + /* vrp_int is twice as wide as anything that the target
>>> +supports so it can support a full width multiply.  No
>>> +need to add any more padding for an extra sign bit
>>> +because that comes with the way that WIDE_INT_MAX_ELTS is
>>> +defined.  */
>>> + typedef FIXED_WIDE_INT (WIDE_INT_MAX_PRECISION * 2)
>>> +   vrp_int;
>>> + vrp_int sizem1 = wi::mask  (prec, false);
>>> + vrp_int size = sizem1 + 1;
>>>   
>>>   /* Extend the values using the sign of the result to PREC2.
>>>  From here on out, everthing is just signed math no matter
>>>  what the input types were.  */
>>> - wide_int min0 = wide_int::from (vr0.min, prec2, sign);
>>> - wide_int max0 = wide_int::from (vr0.max, prec2, sign);
>>> - wide_int min1 = wide_int::from (vr1.min, prec2, sign);
>>> - wide_int max1 = wide_int::from (vr1.max, prec2, sign);
>>> + vrp_int min0 = wi::to_vrp (vr0.min);
>>> + vrp_int max0 = wi::to_vrp (vr0.max);
>>> + vrp_int min1 = wi::to_vrp (vr1.min);
>>> + vrp_int max1 = wi::to_vrp (vr1.max);
>> I think we should avoid putting to_vrp in tree.h if vrp_int is only
>> local to this block.  Instead you could have:
>>
>>typedef generic_wide_int
>>> vrp_int_cst;
>>...
>>vrp_int_cst min0 = vr0.min;
>>vrp_int_cst max0 = vr0.max;
>>vrp_int_cst min1 = vr1.min;
>>vrp_int_cst max1 = vr1.max;
>>
> i did this in a different way because i had trouble doing it as you 
> suggested.the short answer is that all of the vrp_int code is now 
> local to tree-vrp.c which i think was your primary goal

Ah, so we later assign to these variables:

  /* Canonicalize the intervals.  */
  if (sign == UNSIGNED)
{
  if (wi::ltu_p (size, min0 + max0))
{
  min0 -= size;
  max0 -= size;
}

  if (wi::ltu_p (size, min1 + max1))
{
  min1 -= size;
  max1 -= size;
}
}

OK, in that case I suppose a temporary is needed.  But I'd prefer
not to put local stuff in the wi:: namespace.  You could just have:

  typedef generic_wide_int
 > vrp_int_cst;

  vrp_int min0 = vrp_int_cst (vr0.min);
  vrp_int max0 = vrp_int_cst (vr0.max);
  vrp_int min1 = vrp_int_cst (vr1.min);
  vrp_int max1 = vrp_int_cst (vr1.max);

which removes the need for:

+/* vrp_int is twice as wide as anything that the target supports so it
+   can support a full width multiply.  No need to add any more padding
+   for an extra sign bit because that comes with the way that
+   WIDE_INT_MAX_ELTS is defined.  */ 
+typedef FIXED_WIDE_INT (WIDE_INT_MAX_PRECISION * 2) vrp_int;
+namespace wi
+{
+  generic_wide_int  > to_vrp 
(const_tree);
+}
+
+inline generic_wide_int  >
+wi::to_vrp (const_tree t)
+{
+  return t;
+}
+

>>>   #define WIDE_INT_MAX_ELTS \
>>> -  ((4 * MAX_BITSIZE_MODE_ANY_INT + HOST_BITS_PER_WIDE_INT - 1) \
>>> -   / HOST_BITS_PER_WIDE_INT)
>>> +  (((MAX_BITSIZE_MODE_ANY_INT + HOST_BITS_PER_WIDE_INT - 1)\
>>> +/ HOST_BITS_PER_WIDE_INT) + 1)
>> I think this should be:
>>
>>(MAX_BITSIZE_MODE_ANY_INT / HOST_BITS_PER_WIDE_INT + 1)
>>
>> We only need an extra HWI if MAX_BITSIZE_MODE_ANY_INT is an exact multiple
>> of HOST_BITS_PER_WIDE_INT.
> we will do this later when some other issues that Eric B raised are settled.

I think you're talking about defining MAX_BITSIZE_MODE_ANY_INT as
MAX_SIZE_MODE_ANY_INT * BITS_PER_UNIT, but that's orthogonal to the
change above.  IMO it doesn't make sense to both round up the division
and also add 1 to the result.  So I think we should make this change
regardless of whatever follows.

Looks good to me otherwise, thanks.

Richard



libgo patch committed: Non-futex system fix

2013-12-06 Thread Ian Lance Taylor
This patch to libgo fixes the scheduler in the current libgo for systems
that do not use futexes, such as Solaris.  I fixed the futex code a
while back, but I managed to forget to fix the non-futex code.  This is
PR 59408.  Committed to mainline.

Ian

diff -r cc2d51f8c87a libgo/runtime/lock_sema.c
--- a/libgo/runtime/lock_sema.c	Tue Dec 03 17:34:17 2013 -0800
+++ b/libgo/runtime/lock_sema.c	Fri Dec 06 10:23:12 2013 -0800
@@ -152,8 +152,12 @@
 
 	m = runtime_m();
 
+  /* For gccgo it's OK to sleep in non-g0, and it happens in
+ stoptheworld because we have not implemented preemption.
+
 	if(runtime_g() != m->g0)
 		runtime_throw("notesleep not on g0");
+  */
 
 	if(m->waitsema == 0)
 		m->waitsema = runtime_semacreate();


Re: [PATCH] Improve scan pattern in gcc.dg/tree-ssa/loop-31.c

2013-12-06 Thread Jeff Law

On 12/06/13 10:26, Kyrill Tkachov wrote:

Hi all,

The testcase gcc.dg/tree-ssa/loop-31.c started failing on arm with
r202165. The scan dump pattern looks for "+ 2" appearing exactly once.
With r202165 the loop header is modified from:

   :
   ivtmp.5_10 = (unsigned int) &a[4294967295];
   _16 = (unsigned int) len_4(D);
   _17 = _16 * 2;
   _18 = (unsigned int) &a;
   _19 = _18 + 4294967294;
   _20 = _19 + _17;


to:

   :
   ivtmp.5_10 = (unsigned int) &a[4294967295];
   _16 = (sizetype) len_4(D);
   _17 = _16 + 2147483647;    "+ 2" will match here.
   _18 = _17 * 2;
   _19 = &a + _18;
   _20 = (unsigned int) _19;


Since the strength reduction in the loop itself that this testcase is
testing is unaffected, this patch just updates the pattern to be "+ 2;"
to match the induction variable increment: ivtmp.5_11 = ivtmp.5_1 + 2;

Now the testcase passes on arm.

Ok for trunk?

Thanks,
Kyrill

2013-12-06  Kyrylo Tkachov  

 * gcc.dg/tree-ssa/loop-31.c: Update scan pattern.

OK.

Thanks,
Jeff



Re: [wide-int] small cleanup in wide-int.*

2013-12-06 Thread Kenneth Zadeck
Richard asked to see the patch where i did the changes the way that he 
asked in his email. Doing it the way he wants potentially has advantages 
over the way that i did it, but his technique fails for non obvious 
reasons. The failure in building is:


g++ -c -g -DIN_GCC -fno-exceptions -fno-rtti 
-fasynchronous-unwind-tables -W -Wall -Wwrite-strings -Wcast-qual 
-Wno-format -Wmissing-format-attribute -pedantic -Wno-long-long 
-Wno-variadic-macros -Wno-overlength-strings -fno-common -DHAVE_CONFIG_H 
-I. -I. -I../../gccBadMulVrp/gcc -I../../gccBadMulVrp/gcc/. 
-I../../gccBadMulVrp/gcc/../include 
-I../../gccBadMulVrp/gcc/../libcpp/include 
-I../../gccBadMulVrp/gcc/../libdecnumber 
-I../../gccBadMulVrp/gcc/../libdecnumber/bid -I../libdecnumber 
-I../../gccBadMulVrp/gcc/../libbacktrace -o tree-vrp.o -MT tree-vrp.o 
-MMD -MP -MF ./.deps/tree-vrp.TPo ../../gccBadMulVrp/gcc/tree-vrp.c

In file included from ../../gccBadMulVrp/gcc/double-int.h:23:0,
from ../../gccBadMulVrp/gcc/tree-core.h:28,
from ../../gccBadMulVrp/gcc/tree.h:23,
from ../../gccBadMulVrp/gcc/tree-vrp.c:26:
../../gccBadMulVrp/gcc/wide-int.h: In member function 
‘generic_wide_int& generic_wide_int::operator=(const T&) 
[with T = generic_wide_int >, storage = 
wi::extended_tree<1152>]’:
../../gccBadMulVrp/gcc/wide-int.h:701:3: instantiated from 
‘generic_wide_int& generic_wide_int::operator-=(const T&) [with T 
= generic_wide_int >, storage = 
wi::extended_tree<1152>, generic_wide_int = 
generic_wide_int >]’

../../gccBadMulVrp/gcc/tree-vrp.c:2646:13: instantiated from here
../../gccBadMulVrp/gcc/wide-int.h:860:3: error: no matching function for 
call to ‘generic_wide_int >::operator=(const 
generic_wide_int >&)’

../../gccBadMulVrp/gcc/wide-int.h:860:3: note: candidate is:
../../gccBadMulVrp/gcc/tree.h:4529:9: note: wi::extended_tree<1152>& 
wi::extended_tree<1152>::operator=(const wi::extended_tree<1152>&)
../../gccBadMulVrp/gcc/tree.h:4529:9: note: no known conversion for 
argument 1 from ‘const generic_wide_int >’ 
to ‘const wi::extended_tree<1152>&’

make[3]: *** [tree-vrp.o] Error 1
make[3]: Leaving directory `/home/zadeck/gcc/gbbBadMulVrp/gcc'
make[2]: *** [all-stage1-gcc] Error 2
make[2]: Leaving directory `/home/zadeck/gcc/gbbBadMulVrp'
make[1]: *** [stage1-bubble] Error 2
make[1]: Leaving directory `/home/zadeck/gcc/gbbBadMulVrp'
make: *** [all] Error 2
heracles:~/gcc/gbbBadMulVrp(9) cd ../gccBadMulVrp/



On 12/06/2013 11:45 AM, Kenneth Zadeck wrote:

On 12/03/2013 11:52 AM, Richard Sandiford wrote:

Kenneth Zadeck  writes:

Index: tree-vrp.c
===
--- tree-vrp.c (revision 205597)
+++ tree-vrp.c (working copy)
@@ -2611,22 +2611,28 @@ extract_range_from_binary_expr_1 (value_
signop sign = TYPE_SIGN (expr_type);
unsigned int prec = TYPE_PRECISION (expr_type);
- unsigned int prec2 = (prec * 2) + (sign == UNSIGNED ? 2 : 0);
if (range_int_cst_p (&vr0)
&& range_int_cst_p (&vr1)
&& TYPE_OVERFLOW_WRAPS (expr_type))
{
- wide_int sizem1 = wi::mask (prec, false, prec2);
- wide_int size = sizem1 + 1;
+ /* vrp_int is twice as wide as anything that the target
+ supports so it can support a full width multiply. No
+ need to add any more padding for an extra sign bit
+ because that comes with the way that WIDE_INT_MAX_ELTS is
+ defined. */
+ typedef FIXED_WIDE_INT (WIDE_INT_MAX_PRECISION * 2)
+ vrp_int;
+ vrp_int sizem1 = wi::mask  (prec, false);
+ vrp_int size = sizem1 + 1;
/* Extend the values using the sign of the result to PREC2.
From here on out, everthing is just signed math no matter
what the input types were. */
- wide_int min0 = wide_int::from (vr0.min, prec2, sign);
- wide_int max0 = wide_int::from (vr0.max, prec2, sign);
- wide_int min1 = wide_int::from (vr1.min, prec2, sign);
- wide_int max1 = wide_int::from (vr1.max, prec2, sign);
+ vrp_int min0 = wi::to_vrp (vr0.min);
+ vrp_int max0 = wi::to_vrp (vr0.max);
+ vrp_int min1 = wi::to_vrp (vr1.min);
+ vrp_int max1 = wi::to_vrp (vr1.max);

I think we should avoid putting to_vrp in tree.h if vrp_int is only
local to this block. Instead you could have:

typedef generic_wide_int
 > vrp_int_cst;
...
vrp_int_cst min0 = vr0.min;
vrp_int_cst max0 = vr0.max;
vrp_int_cst min1 = vr1.min;
vrp_int_cst max1 = vr1.max;

i did this in a different way because i had trouble doing it as you 
suggested. the short answer is that all of the vrp_int code is now 
local to tree-vrp.c which i think was your primary goal

@@ -228,15 +228,16 @@ along with GCC; see the file COPYING3.
#endif
/* The MAX_BITSIZE_MODE_ANY_INT is automatically generated by a very
- early examination of the target's mode file. Thus it is safe that
- some small multiple of this number is easily larger than any number
- that that target could compute. The place in the compiler that
- currently needs the widest ints is the code that determines the
- range of a multiply. This code needs 2n + 2 bits. */
-
+ early examination of the target's mode file. The WIDE_INT_MAX_ELTS
+ can accomodate

Re: PATCH: Mention -mtune=intel in gcc-4.9/changes.html

2013-12-06 Thread Jeff Law

On 12/06/13 11:04, Gerald Pfeifer wrote:

On Fri, 6 Dec 2013, H.J. Lu wrote:

This patch mentions  -mtune=intel in gcc-4.9/changes.html.
OK to install?


Okay.  (I guess we can ignore Itanium.)

Seems like the best thing to do for sanity's sake.

jeff


Re: [patch,libgcc] Add microblaze-*-rtems*

2013-12-06 Thread Michael Eager

On 12/06/13 09:00, Ralf Corsepius wrote:

Hi,

I am going to apply the patch below to trunk and 4.8-branch.

It adds a copy of the microblaze-*-elf section for microblaze-rtems* to 
libgcc/config.host.

This is the missing patch I mentioned in
http://gcc.gnu.org/ml/gcc/2013-11/msg00548.html

Ralf



OK.

--
Michael Eagerea...@eagercon.com
1960 Park Blvd., Palo Alto, CA 94306  650-325-8077


Re: PATCH: Mention -mtune=intel in gcc-4.9/changes.html

2013-12-06 Thread Gerald Pfeifer
On Fri, 6 Dec 2013, H.J. Lu wrote:
> This patch mentions  -mtune=intel in gcc-4.9/changes.html.
> OK to install?

Okay.  (I guess we can ignore Itanium.)

Gerald


Re: [PATCH] Fix PR59058

2013-12-06 Thread H.J. Lu
On Thu, Dec 5, 2013 at 6:46 AM, Richard Biener  wrote:
>
> This finally fixes PR59058.
>
> Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.
>
> Richard.
>
> 2013-12-05  Richard Biener  
>
> PR tree-optimization/59058
> * tree-vectorizer.h (struct _loop_vec_info): Add num_itersm1
> member.
> (LOOP_VINFO_NITERSM1): New macro.
> * tree-vect-loop-manip.c (slpeel_tree_peel_loop_to_edge): Express
> the vector loop entry test in terms of scalar latch executions.
> (vect_do_peeling_for_alignment): Update LOOP_VINFO_NITERSM1.
> * tree-vect-loop.c (vect_get_loop_niters): Also return the
> number of latch executions.
> (new_loop_vec_info): Initialize LOOP_VINFO_NITERSM1.
> (vect_analyze_loop_form): Likewise.
> (vect_generate_tmps_on_preheader): Compute the number of
> vectorized iterations differently.
>

This caused:

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=59409

-- 
H.J.


Re: [Ping]Two pending IVOPT patches

2013-12-06 Thread Jeff Law

On 12/06/13 02:37, Bin.Cheng wrote:

Do you have any codes where iv_ca_extend helps?  I can see how that hunk
appears to be safe, and I'm guessing that setting the cost pair at each step
could potentially give more accurate costing on the next iteration of the
loop.   But I'd love to be able to see this effect directly rather than just
assuming it's helpful.  Given that I'm prepared to approve the iv_ca_extend
hunk.

Very sorry I can't provide an example about this now.  I remember it's
a case in eembc I encountered, but with current trunk I can't
reproduce it with the change about iv_ca_extend.  Maybe recent
checking has changed the behavior of IVOPT.  Considering there is no
case about this change, I am fine to discard this part of patch and
continue with iv_ca_narrow part.
Let's drop this part for now, obviously we can come back to it if you 
come across a testcase in your development.  I'll focus on the 
iv_ca_narrow from a review standpoint.


jeff




Re: [PATCH] pr59043

2013-12-06 Thread Mike Stump
On Nov 18, 2013, at 9:48 AM, Dominique Dhumieres  wrote:
> Adjust the following tests to take into account the change of
> "Length of Public Names Info" to "Pub Info Length"
> in revision 203936. Tested on x86_64-apple-darwin13. OK?

Ok.  [ also, this is trivial ]

I think I blew by it the first time with the OK?  at the end of the line…  
maybe I thought you thought it was trivial and checked it in.  Sorry.

PATCH: Mention -mtune=intel in gcc-4.9/changes.html

2013-12-06 Thread H.J. Lu
Hi,

This patch mentions  -mtune=intel in gcc-4.9/changes.html.
OK to install?

Thanks.

-- 
H.J.
--
Index: gcc-4.9/changes.html
===
RCS file: /cvs/gcc/wwwdocs/htdocs/gcc-4.9/changes.html,v
retrieving revision 1.45
diff -u -p -r1.45 changes.html
--- gcc-4.9/changes.html5 Dec 2013 16:07:28 -1.45
+++ gcc-4.9/changes.html6 Dec 2013 17:38:57 -
@@ -399,6 +399,10 @@ auto incr = [](auto x) { return x++; };
   Intel Pentium-M, and Pentium4 based CPUs is no longer
considered important
   for generic.
 
+-mtune=intel can now be used to generate code running
+  well on the most current Intel processors, which are Haswell
+  and Silvermont for GCC 4.9.
+
 Better inlining of memcpy and memset
 that is aware of value ranges and produces shorter alignment prologues.
 


Re: [PATCH] Add -mtune=ia support

2013-12-06 Thread H.J. Lu
On Fri, Dec 6, 2013 at 2:44 AM, Uros Bizjak  wrote:
> On Fri, Dec 6, 2013 at 10:38 AM, Richard Biener
>  wrote:
>> On Thu, Dec 5, 2013 at 10:05 PM, H.J. Lu  wrote:
>>> On Thu, Dec 5, 2013 at 1:02 PM, Patrick Marlier
>>>  wrote:
 Hi,


 On 12/05/2013 07:22 PM, H.J. Lu wrote:
>
> We'd like to add a new -mtune=ia option for x86 to optimize for both
> Haswell and Silvermont.  Currently, -mtune=ia is aliased to -mtune=slm.
> We will improve it further for Haswell and Silvermont.  Later, we will
> update it to future Intel processors.


 At first, 'ia' means to me Itanium, ie IA-64. I would personally prefer
 another name but maybe I am the only one to think that.

>>>
>>> "ia" stands for Intel Architecture.  It is the natural name for
>>> this option.
>>
>> I think "ia" and the natural "aa" are too obfuscated.  Why didn't you
>> chose simply "intel" here?  (will the next patch add -mtune=a as
>> that's natural for "AMD"?)
>
> -mtune=intel indeed sounds better.
>

This is the patch I checked in.

Thanks.

-- 
H.J.
--
2013-12-06  H.J. Lu  

* config.gcc: Change --with-cpu=ia to --with-cpu=intel.

* config/i386/i386.c (cpu_names): Replace "ia" with "intel".
(processor_alias_table): Likewise.
(ix86_option_override_internal): Likewise.
* config/i386/i386.h (target_cpu_default): Replace
TARGET_CPU_DEFAULT_ia with TARGET_CPU_DEFAULT_intel.

* doc/invoke.texi: Replace -mtune=ia with -mtune=intel.

diff --git a/gcc/config.gcc b/gcc/config.gcc
index dd180a0..dc76c82 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -1398,7 +1398,7 @@ i[34567]86-*-linux* | i[34567]86-*-kfreebsd*-gnu
| i[34567]86-*-knetbsd*-gnu | i
 TM_MULTILIB_CONFIG=`echo $TM_MULTILIB_CONFIG | sed 's/^,//'`
 need_64bit_isa=yes
 case X"${with_cpu}" in
-
Xgeneric|Xia|Xatom|Xslm|Xcore2|Xcorei7|Xcorei7-avx|Xnocona|Xx86-64|Xbdver4|Xbdver3|Xbdver2|Xbdver1|Xbtver2|Xbtver1|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx|Xathlon64-sse3|Xk8-sse3|Xopteron-sse3)
+
Xgeneric|Xintel|Xatom|Xslm|Xcore2|Xcorei7|Xcorei7-avx|Xnocona|Xx86-64|Xbdver4|Xbdver3|Xbdver2|Xbdver1|Xbtver2|Xbtver1|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx|Xathlon64-sse3|Xk8-sse3|Xopteron-sse3)
 ;;
 X)
 if test x$with_cpu_64 = x; then
@@ -1407,7 +1407,7 @@ i[34567]86-*-linux* | i[34567]86-*-kfreebsd*-gnu
| i[34567]86-*-knetbsd*-gnu | i
 ;;
 *)
 echo "Unsupported CPU used in --with-cpu=$with_cpu,
supported values:" 1>&2
-echo "generic ia atom slm core2 corei7 corei7-avx
nocona x86-64 bdver4 bdver3 bdver2 bdver1 btver2 btver1 amdfam10
barcelona k8 opteron athlon64 athlon-fx athlon64-sse3 k8-sse3
opteron-sse3" 1>&2
+echo "generic intel atom slm core2 corei7 corei7-avx
nocona x86-64 bdver4 bdver3 bdver2 bdver1 btver2 btver1 amdfam10
barcelona k8 opteron athlon64 athlon-fx athlon64-sse3 k8-sse3
opteron-sse3" 1>&2
 exit 1
 ;;
 esac
@@ -1519,7 +1519,7 @@ i[34567]86-*-solaris2* | x86_64-*-solaris2.1[0-9]*)
 tmake_file="$tmake_file i386/t-sol2-64"
 need_64bit_isa=yes
 case X"${with_cpu}" in
-
Xgeneric|Xia|Xatom|Xslm|Xcore2|Xcorei7|Xcorei7-avx|Xnocona|Xx86-64|Xbdver4|Xbdver3|Xbdver2|Xbdver1|Xbtver2|Xbtver1|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx|Xathlon64-sse3|Xk8-sse3|Xopteron-sse3)
+
Xgeneric|Xintel|Xatom|Xslm|Xcore2|Xcorei7|Xcorei7-avx|Xnocona|Xx86-64|Xbdver4|Xbdver3|Xbdver2|Xbdver1|Xbtver2|Xbtver1|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx|Xathlon64-sse3|Xk8-sse3|Xopteron-sse3)
 ;;
 X)
 if test x$with_cpu_64 = x; then
@@ -1528,7 +1528,7 @@ i[34567]86-*-solaris2* | x86_64-*-solaris2.1[0-9]*)
 ;;
 *)
 echo "Unsupported CPU used in --with-cpu=$with_cpu,
supported values:" 1>&2
-echo "generic ia atom slm core2 corei7 corei7-avx nocona
x86-64 bdver4 bdver3 bdver2 bdver1 btver2 btver1 amdfam10 barcelona k8
opteron athlon64 athlon-fx athlon64-sse3 k8-sse3 opteron-sse3" 1>&2
+echo "generic intel atom slm core2 corei7 corei7-avx
nocona x86-64 bdver4 bdver3 bdver2 bdver1 btver2 btver1 amdfam10
barcelona k8 opteron athlon64 athlon-fx athlon64-sse3 k8-sse3
opteron-sse3" 1>&2
 exit 1
 ;;
 esac
@@ -1604,7 +1604,7 @@ i[34567]86-*-mingw* | x86_64-*-mingw*)
 if test x$enable_targets = xall; then
 tm_defines="${tm_defines} TARGET_BI_ARCH=1"
 case X"${with_cpu}" in
-
Xgeneric|Xia|Xatom|Xslm|Xcore2|Xcorei7|Xcorei7-avx|Xnocona|Xx86-64|Xbdver4|Xbdver3|Xbdver2|Xbdver1|Xbtver2|Xbtver1|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx|Xathlon64-sse3|Xk8-sse3|Xopteron-sse3)
+
Xgeneric|Xintel|Xatom|Xslm|Xcore2|Xcorei7|Xcorei7-avx|Xnocona|Xx86-64|Xbdver4|Xbdver3|Xbdver2|Xbdver1|X

[Patch, AArch64] [4/6] Implement support for Crypto -- SHA1.

2013-12-06 Thread Tejas Belagod


Hi,

The attached patch implements support for SHA1 crypto insn.

Tested on aarch64-none-elf. OK for trunk?

Thanks,
Tejas.

2013-12-06  Tejas Belagod  

gcc/
* config/aarch64/aarch64-simd-builtins.def: Update builtins table.
* config/aarch64/aarch64-simd.md (aarch64_crypto_sha1hsi,
aarch64_crypto_sha1su1v4si, aarch64_crypto_sha1v4si,
aarch64_crypto_sha1su0v4si): New.
* config/aarch64/arm_neon.h (vsha1cq_u32, sha1mq_u32, vsha1pq_u32,
vsha1h_u32, vsha1su0q_u32, vsha1su1q_u32): New.
* config/aarch64/iterators.md (UNSPEC_SHA1. UNSPEC_SHA1SU<01>):
New.
(CRYPTO_SHA1): New int iterator.
(sha1_op): New int attribute.

testsuite/
* gcc.target/aarch64/sha1.c: New.diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def 
b/gcc/config/aarch64/aarch64-simd-builtins.def
index 49ab482..b0b9bf1 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -367,3 +367,12 @@
   VAR1 (BINOP, crypto_aesd, 0, v16qi)
   VAR1 (UNOP, crypto_aesmc, 0, v16qi)
   VAR1 (UNOP, crypto_aesimc, 0, v16qi)
+
+  /* Implemented by aarch64_crypto_sha1.  */
+  VAR1 (UNOP, crypto_sha1h, 0, si)
+  VAR1 (BINOP, crypto_sha1su1, 0, v4si)
+  VAR1 (TERNOP, crypto_sha1c, 0, v4si)
+  VAR1 (TERNOP, crypto_sha1m, 0, v4si)
+  VAR1 (TERNOP, crypto_sha1p, 0, v4si)
+  VAR1 (TERNOP, crypto_sha1su0, 0, v4si)
+
diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index 4b17748..ab9ad57 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -4096,3 +4096,46 @@
   [(set_attr "type" "crypto_aes")]
 )
 
+;; sha1
+
+(define_insn "aarch64_crypto_sha1hsi"
+  [(set (match_operand:SI 0 "register_operand" "=w")
+(unspec:SI [(match_operand:SI 1
+   "register_operand" "w")]
+ UNSPEC_SHA1H))]
+  "TARGET_SIMD && TARGET_CRYPTO"
+  "sha1h\\t%s0, %s1"
+  [(set_attr "type" "crypto_sha1_fast")]
+)
+
+(define_insn "aarch64_crypto_sha1su1v4si"
+  [(set (match_operand:V4SI 0 "register_operand" "=w")
+(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
+  (match_operand:V4SI 2 "register_operand" "w")]
+ UNSPEC_SHA1SU1))]
+  "TARGET_SIMD && TARGET_CRYPTO"
+  "sha1su1\\t%0.4s, %2.4s"
+  [(set_attr "type" "crypto_sha1_fast")]
+)
+
+(define_insn "aarch64_crypto_sha1v4si"
+  [(set (match_operand:V4SI 0 "register_operand" "=w")
+(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
+  (match_operand:SI 2 "register_operand" "w")
+  (match_operand:V4SI 3 "register_operand" "w")]
+ CRYPTO_SHA1))]
+  "TARGET_SIMD && TARGET_CRYPTO"
+  "sha1\\t%q0, %s2, %3.4s"
+  [(set_attr "type" "crypto_sha1_slow")]
+)
+
+(define_insn "aarch64_crypto_sha1su0v4si"
+  [(set (match_operand:V4SI 0 "register_operand" "=w")
+(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
+  (match_operand:V4SI 2 "register_operand" "w")
+  (match_operand:V4SI 3 "register_operand" "w")]
+ UNSPEC_SHA1SU0))]
+  "TARGET_SIMD && TARGET_CRYPTO"
+  "sha1su0\\t%0.4s, %2.4s, %3.4s"
+  [(set_attr "type" "crypto_sha1_xor")]
+)
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 9f35e09..244abe7 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -23176,6 +23176,58 @@ vrsrad_n_u64 (uint64x1_t __a, uint64x1_t __b, const 
int __c)
   return (uint64x1_t) __builtin_aarch64_ursra_ndi (__a, __b, __c);
 }
 
+#ifdef __ARM_FEATURE_CRYPTO
+
+/* vsha1  */
+
+static __inline uint32x4_t
+vsha1cq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
+{
+  return
+(uint32x4_t) __builtin_aarch64_crypto_sha1cv4si ((int32x4_t) hash_abcd,
+(int32_t) hash_e,
+(int32x4_t) wk);
+}
+static __inline uint32x4_t
+vsha1mq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
+{
+  return
+(uint32x4_t) __builtin_aarch64_crypto_sha1mv4si ((int32x4_t) hash_abcd,
+(int32_t) hash_e,
+(int32x4_t) wk);
+}
+static __inline uint32x4_t
+vsha1pq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
+{
+  return
+(uint32x4_t) __builtin_aarch64_crypto_sha1pv4si ((int32x4_t) hash_abcd,
+(int32_t) hash_e,
+(int32x4_t) wk);
+}
+
+static __inline uint32_t
+vsha1h_u32 (uint32_t hash_e)
+{
+  return (uint32_t)__builtin_aarch64_crypto_sha1hsi (hash_e);
+}
+
+static __inline uint32x4_t
+vsha1su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7, uint32x4_t w8_11)
+{
+  return (uint32x4_t) __builtin_aarch64_crypto_sha1su0v4si ((int32x4_t) w0_3,
+  

[Patch, AArch64] [5/6] Implement support for Crypto -- SHA256.

2013-12-06 Thread Tejas Belagod


Hi,

The attached patch implements support for crypto sha256.

Tested on aarch64-none-elf. OK for trunk?

Thanks,
Tejas.

2013-12-06  Tejas Belagod  

gcc/
* config/aarch64/aarch64-simd-builtins.def: Update builtins table.
* config/aarch64/aarch64-simd.md (aarch64_crypto_sha256hv4si,
aarch64_crypto_sha256su0v4si, aarch64_crypto_sha256su1v4si): New.
* config/aarch64/arm_neon.h (vsha256hq_u32, vsha256h2q_u32,
vsha256su0q_u32, vsha256su1q_u32): New.
* config/aarch64/iterators.md (UNSPEC_SHA256H<2>. UNSPEC_SHA256SU<01>):
New.
(CRYPTO_SHA256): New int iterator.
(sha256_op): New int attribute.

testsuite/
* gcc.target/aarch64/sha256.c: New.diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def 
b/gcc/config/aarch64/aarch64-simd-builtins.def
index b0b9bf1..dd21d9c 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -376,3 +376,8 @@
   VAR1 (TERNOP, crypto_sha1p, 0, v4si)
   VAR1 (TERNOP, crypto_sha1su0, 0, v4si)
 
+  /* Implemented by aarch64_crypto_sha256.  */
+  VAR1 (TERNOP, crypto_sha256h, 0, v4si)
+  VAR1 (TERNOP, crypto_sha256h2, 0, v4si)
+  VAR1 (BINOP, crypto_sha256su0, 0, v4si)
+  VAR1 (TERNOP, crypto_sha256su1, 0, v4si)
diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index ab9ad57..5bcada2 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -4139,3 +4139,37 @@
   "sha1su0\\t%0.4s, %2.4s, %3.4s"
   [(set_attr "type" "crypto_sha1_xor")]
 )
+
+;; sha256
+
+(define_insn "aarch64_crypto_sha256hv4si"
+  [(set (match_operand:V4SI 0 "register_operand" "=w")
+(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
+  (match_operand:V4SI 2 "register_operand" "w")
+  (match_operand:V4SI 3 "register_operand" "w")]
+ CRYPTO_SHA256))]
+  "TARGET_SIMD && TARGET_CRYPTO"
+  "sha256h\\t%q0, %q2, %3.4s"
+  [(set_attr "type" "crypto_sha256_slow")]
+)
+
+(define_insn "aarch64_crypto_sha256su0v4si"
+  [(set (match_operand:V4SI 0 "register_operand" "=w")
+(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
+  (match_operand:V4SI 2 "register_operand" "w")]
+ UNSPEC_SHA256SU0))]
+  "TARGET_SIMD &&TARGET_CRYPTO"
+  "sha256su0\\t%0.4s, %2.4s"
+  [(set_attr "type" "crypto_sha256_fast")]
+)
+
+(define_insn "aarch64_crypto_sha256su1v4si"
+  [(set (match_operand:V4SI 0 "register_operand" "=w")
+(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
+  (match_operand:V4SI 2 "register_operand" "w")
+  (match_operand:V4SI 3 "register_operand" "w")]
+ UNSPEC_SHA256SU1))]
+  "TARGET_SIMD &&TARGET_CRYPTO"
+  "sha256su1\\t%0.4s, %2.4s, %3.4s"
+  [(set_attr "type" "crypto_sha256_slow")]
+)
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 244abe7..d038e37 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -23226,6 +23226,34 @@ vsha1su1q_u32 (uint32x4_t tw0_3, uint32x4_t w12_15)
(int32x4_t) w12_15);
 }
 
+static __inline uint32x4_t
+vsha256hq_u32 (uint32x4_t hash_abcd, uint32x4_t hash_efgh, uint32x4_t wk)
+{
+  return (uint32x4_t) __builtin_aarch64_crypto_sha256hv4si
+   ((int32x4_t) hash_abcd, (int32x4_t) hash_efgh, (int32x4_t) wk);
+}
+
+static __inline uint32x4_t
+vsha256h2q_u32 (uint32x4_t hash_efgh, uint32x4_t hash_abcd, uint32x4_t wk)
+{
+  return (uint32x4_t) __builtin_aarch64_crypto_sha256h2v4si
+   ((int32x4_t) hash_efgh, (int32x4_t) hash_abcd, (int32x4_t) wk);
+}
+
+static __inline uint32x4_t
+vsha256su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7)
+{
+  return (uint32x4_t) __builtin_aarch64_crypto_sha256su0v4si
+   ((int32x4_t) w0_3, (int32x4_t) w4_7);
+}
+
+static __inline uint32x4_t
+vsha256su1q_u32 (uint32x4_t tw0_3, uint32x4_t w8_11, uint32x4_t w12_15)
+{
+  return (uint32x4_t) __builtin_aarch64_crypto_sha256su1v4si
+  ((int32x4_t) tw0_3, (int32x4_t) w8_11, (int32x4_t) w12_15);
+}
+
 #endif
 
 /* vshl */
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 650b503..ae94e5a 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -273,6 +273,10 @@
 UNSPEC_SHA1H; Used in aarch64-simd.md.
 UNSPEC_SHA1SU0  ; Used in aarch64-simd.md.
 UNSPEC_SHA1SU1  ; Used in aarch64-simd.md.
+UNSPEC_SHA256H  ; Used in aarch64-simd.md.
+UNSPEC_SHA256H2 ; Used in aarch64-simd.md.
+UNSPEC_SHA256SU0; Used in aarch64-simd.md.
+UNSPEC_SHA256SU1; Used in aarch64-simd.md.
 ])
 
 ;; ---
@@ -858,6 +862,8 @@
 
 (define_int_iterator CRYPTO_SHA1 [UNSPEC_SHA1C UNSPEC_SHA1M UNSPEC_SHA1P])
 
+(define_int_iterator CRYPTO_SHA256 [UNSPEC_SHA256H 

[Patch, AArch64] [3/6] Implement support for Crypto -- AES.

2013-12-06 Thread Tejas Belagod

Hi,

The attached patch implements support for AES crypto instructions.

Tested on aarch64-none-elf. OK for trunk?

Thanks,
Tejas.

2013-12-06  Tejas Belagod  

gcc/
* config/aarch64/aarch64-simd-builtins.def: Update builtins table.
* config/aarch64/aarch64-simd.md (aarch64_crypto_aesv16qi,
aarch64_crypto_aesv16qi): New.
* config/aarch64/arm_neon.h (vaeseq_u8, vaesdq_u8, vaesmcq_u8,
vaesimcq_u8): New.
* config/aarch64/iterators.md (UNSPEC_AESE, UNSPEC_AESD, UNSPEC_AESMC,
UNSPEC_AESIMC): New.
(CRYPTO_AES, CRYPTO_AESMC): New int iterators.
(aes_op, aesmc_op): New int attributes.

testsuite/
* gcc.target/aarch64/aes.c: New.diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def 
b/gcc/config/aarch64/aarch64-simd-builtins.def
index c18b150..49ab482 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -362,3 +362,8 @@
   /* Implemented by fma4.  */
   BUILTIN_VDQF (TERNOP, fma, 4)
 
+  /* Implemented by aarch64_crypto_aes.  */
+  VAR1 (BINOP, crypto_aese, 0, v16qi)
+  VAR1 (BINOP, crypto_aesd, 0, v16qi)
+  VAR1 (UNOP, crypto_aesmc, 0, v16qi)
+  VAR1 (UNOP, crypto_aesimc, 0, v16qi)
diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index 5dcbc62..4b17748 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -4074,3 +4074,25 @@
   (gen_aarch64_get_lane (operands[0], operands[1], operands[2]));
 DONE;
 })
+
+;; aes
+
+(define_insn "aarch64_crypto_aesv16qi"
+  [(set (match_operand:V16QI 0 "register_operand" "=w")
+(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
+  (match_operand:V16QI 2 "register_operand" "w")]
+ CRYPTO_AES))]
+  "TARGET_SIMD && TARGET_CRYPTO"
+  "aes\\t%0.16b, %2.16b"
+  [(set_attr "type" "crypto_aes")]
+)
+
+(define_insn "aarch64_crypto_aesv16qi"
+  [(set (match_operand:V16QI 0 "register_operand" "=w")
+   (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "w")]
+CRYPTO_AESMC))]
+  "TARGET_SIMD && TARGET_CRYPTO"
+  "aes\\t%0.16b, %1.16b"
+  [(set_attr "type" "crypto_aes")]
+)
+
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index dc56170..9f35e09 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -15793,6 +15793,42 @@ vaddvq_f64 (float64x2_t __a)
   return vgetq_lane_f64 (__t, __LANE0 (2));
 }
 
+#ifdef __ARM_FEATURE_CRYPTO
+
+/* vaes  */
+
+static __inline uint8x16_t
+vaeseq_u8 (uint8x16_t data, uint8x16_t key)
+{
+  return
+(uint8x16_t) __builtin_aarch64_crypto_aesev16qi ((int8x16_t) data,
+(int8x16_t) key);
+}
+
+static __inline uint8x16_t
+vaesdq_u8 (uint8x16_t data, uint8x16_t key)
+{
+  return
+(uint8x16_t) __builtin_aarch64_crypto_aesdv16qi ((int8x16_t) data,
+(int8x16_t) key);
+}
+
+static __inline uint8x16_t
+vaesmcq_u8 (uint8x16_t data)
+{
+  return
+(uint8x16_t) __builtin_aarch64_crypto_aesmcv16qi ((int8x16_t) data);
+}
+
+static __inline uint8x16_t
+vaesimcq_u8 (uint8x16_t data)
+{
+  return
+(uint8x16_t) __builtin_aarch64_crypto_aesimcv16qi ((int8x16_t) data);
+}
+
+#endif
+
 /* vcage  */
 
 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index fd7152c..91d6f74 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -263,6 +263,10 @@
 UNSPEC_UZP2; Used in vector permute patterns.
 UNSPEC_TRN1; Used in vector permute patterns.
 UNSPEC_TRN2; Used in vector permute patterns.
+UNSPEC_AESE; Used in aarch64-simd.md.
+UNSPEC_AESD ; Used in aarch64-simd.md.
+UNSPEC_AESMC; Used in aarch64-simd.md.
+UNSPEC_AESIMC   ; Used in aarch64-simd.md.
 ])
 
 ;; ---
@@ -843,6 +847,9 @@
 
 (define_int_iterator FRECP [UNSPEC_FRECPE UNSPEC_FRECPX])
 
+(define_int_iterator CRYPTO_AES [UNSPEC_AESE UNSPEC_AESD])
+(define_int_iterator CRYPTO_AESMC [UNSPEC_AESMC UNSPEC_AESIMC])
+
 ;; ---
 ;; Int Iterators Attributes.
 ;; ---
@@ -959,3 +966,7 @@
(UNSPEC_UZP1 "1") (UNSPEC_UZP2 "2")])
 
 (define_int_attr frecp_suffix  [(UNSPEC_FRECPE "e") (UNSPEC_FRECPX "x")])
+
+(define_int_attr aes_op [(UNSPEC_AESE "e") (UNSPEC_AESD "d")])
+(define_int_attr aesmc_op [(UNSPEC_AESMC "mc") (UNSPEC_AESIMC "imc")])
+
diff --git a/gcc/testsuite/gcc.target/aarch64/aes.c 
b/gcc/testsuite/gcc.target/aarch64/aes.c
new file mode 100644
index 000..82665fa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/aes.c
@@ -

[Patch, AArch64] [6/6] Implement support for Crypto -- PMULL.64.

2013-12-06 Thread Tejas Belagod


Hi,

This patch implements support for crypto pmull.64.

Tested on aarch64-none-elf. OK for trunk?

Thanks,
Tejas.

2013-12-06  Tejas Belagod  

gcc/
* config/aarch64/aarch64-builtins.c: Define builtin types for poly64_t
poly128_t.
* aarch64/aarch64-simd-builtins.def: Update builtins table.
* config/aarch64/aarch64-simd.md (aarch64_crypto_pmulldi,
aarch64_crypto_pmullv2di): New.
* config/aarch64/aarch64.c (aarch64_simd_mangle_map): Update table for
poly64x2_t mangler.
* config/aarch64/arm_neon.h (poly64x2_t, poly64_t, poly128_t): Define.
(vmull_p64, vmull_high_p64): New.
* config/aarch64/iterators.md (UNSPEC_PMULL<2>): New.

testsuite/

* gcc.target/aarch64/pmull.c: New.diff --git a/gcc/config/aarch64/aarch64-builtins.c 
b/gcc/config/aarch64/aarch64-builtins.c
index f4d23e7..748206f 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -487,6 +487,10 @@ aarch64_init_simd_builtins (void)
 make_signed_type (GET_MODE_PRECISION (QImode));
   tree aarch64_simd_polyHI_type_node =
 make_signed_type (GET_MODE_PRECISION (HImode));
+  tree aarch64_simd_polyDI_type_node =
+make_unsigned_type (GET_MODE_PRECISION (DImode));
+  tree aarch64_simd_polyTI_type_node =
+make_unsigned_type (GET_MODE_PRECISION (TImode));
 
   /* Scalar type nodes.  */
   tree aarch64_simd_intQI_type_node = aarch64_build_type (QImode, false);
@@ -526,6 +530,10 @@ aarch64_init_simd_builtins (void)
 "__builtin_aarch64_simd_poly8");
   (*lang_hooks.types.register_builtin_type) (aarch64_simd_polyHI_type_node,
 "__builtin_aarch64_simd_poly16");
+  (*lang_hooks.types.register_builtin_type) (aarch64_simd_polyDI_type_node,
+"__builtin_aarch64_simd_poly64");
+  (*lang_hooks.types.register_builtin_type) (aarch64_simd_polyTI_type_node,
+"__builtin_aarch64_simd_poly128");
   (*lang_hooks.types.register_builtin_type) (aarch64_simd_intTI_type_node,
 "__builtin_aarch64_simd_ti");
   (*lang_hooks.types.register_builtin_type) (aarch64_simd_intEI_type_node,
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def 
b/gcc/config/aarch64/aarch64-simd-builtins.def
index dd21d9c..ec010f3 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -381,3 +381,7 @@
   VAR1 (TERNOP, crypto_sha256h2, 0, v4si)
   VAR1 (BINOP, crypto_sha256su0, 0, v4si)
   VAR1 (TERNOP, crypto_sha256su1, 0, v4si)
+
+  /* Implemented by aarch64_crypto_pmull.  */
+  VAR1 (BINOP, crypto_pmull, 0, di)
+  VAR1 (BINOP, crypto_pmull, 0, v2di)
diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index 5bcada2..6d3d70e 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -4173,3 +4173,25 @@
   "sha256su1\\t%0.4s, %2.4s, %3.4s"
   [(set_attr "type" "crypto_sha256_slow")]
 )
+
+;; pmull
+
+(define_insn "aarch64_crypto_pmulldi"
+  [(set (match_operand:TI 0 "register_operand" "=w")
+(unspec:TI  [(match_operand:DI 1 "register_operand" "w")
+(match_operand:DI 2 "register_operand" "w")]
+   UNSPEC_PMULL))]
+ "TARGET_SIMD && TARGET_CRYPTO"
+ "pmull\\t%0.1q, %1.1d, %2.1d"
+  [(set_attr "type" "neon_mul_d_long")]
+)
+
+(define_insn "aarch64_crypto_pmullv2di"
+ [(set (match_operand:TI 0 "register_operand" "=w")
+   (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
+  (match_operand:V2DI 2 "register_operand" "w")]
+ UNSPEC_PMULL2))]
+  "TARGET_SIMD && TARGET_CRYPTO"
+  "pmull2\\t%0.1q, %1.2d, %2.2d"
+  [(set_attr "type" "neon_mul_d_long")]
+)
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index c85947a..963bd2e 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -6370,6 +6370,7 @@ static aarch64_simd_mangle_map_entry 
aarch64_simd_mangle_map[] = {
   { V2DFmode,  "__builtin_aarch64_simd_df", "13__Float64x2_t" },
   { V16QImode, "__builtin_aarch64_simd_poly8",  "12__Poly8x16_t" },
   { V8HImode,  "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
+  { V2DImode,  "__builtin_aarch64_simd_poly64", "12__Poly64x2_t" },
   { VOIDmode, NULL, NULL }
 };
 
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index d038e37..509b1a7 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -75,6 +75,8 @@ typedef __builtin_aarch64_simd_poly8 poly8x16_t
   __attribute__ ((__vector_size__ (16)));
 typedef __builtin_aarch64_simd_poly16 poly16x8_t
   __attribute__ ((__vector_size__ (16)));
+typedef __builtin_aarch64_simd_poly64 poly64x2_t
+  __attribute__ ((__vector_size__ (16)));
 typedef __builtin_aarch64_simd_uqi uint8x16_t
   __attribute__ ((__vec

[Patch, AArch64] [2/6] Implement support for Crypto -- Instruction types.

2013-12-06 Thread Tejas Belagod


Hi,

The attached patch adds crypto types for instruction classificiation.

Tested on aarch64-none-elf. OK for trunk?

Thanks,
Tejas

2013-12-06  Tejas Belagod  

* config/arm/types.md (neon_mul_d_long, crypto_aes, crypto_sha1_xor,
crypto_sha1_fast, crypto_sha1_slow, crypto_sha256_fast,
crypto_sha256_slow): New.diff --git a/gcc/config/arm/types.md b/gcc/config/arm/types.md
index 1c4b9e3..81ca62d 100644
--- a/gcc/config/arm/types.md
+++ b/gcc/config/arm/types.md
@@ -326,6 +326,7 @@
 ; neon_mul_b_long
 ; neon_mul_h_long
 ; neon_mul_s_long
+; neon_mul_d_long
 ; neon_mul_h_scalar
 ; neon_mul_h_scalar_q
 ; neon_mul_s_scalar
@@ -519,6 +520,15 @@
 ; neon_fp_div_s_q
 ; neon_fp_div_d
 ; neon_fp_div_d_q
+;
+; The classification below is for Crypto instructions.
+;
+; crypto_aes
+; crypto_sha1_xor
+; crypto_sha1_fast
+; crypto_sha1_slow
+; crypto_sha256_fast
+; crypto_sha256_slow
 
 (define_attr "type"
  "adc_imm,\
@@ -821,6 +831,7 @@
   neon_mul_b_long,\
   neon_mul_h_long,\
   neon_mul_s_long,\
+  neon_mul_d_long,\
   neon_mul_h_scalar,\
   neon_mul_h_scalar_q,\
   neon_mul_s_scalar,\
@@ -1035,7 +1046,14 @@
   neon_fp_div_s,\
   neon_fp_div_s_q,\
   neon_fp_div_d,\
-  neon_fp_div_d_q"
+  neon_fp_div_d_q,\
+\
+  crypto_aes,\
+  crypto_sha1_xor,\
+  crypto_sha1_fast,\
+  crypto_sha1_slow,\
+  crypto_sha256_fast,\
+  crypto_sha256_slow"
(const_string "untyped"))
 
 ; Is this an (integer side) multiply with a 32-bit (or smaller) result?

[Patch, AArch64] [1/6] Implement support for Crypto -- Define TARGET_CRYPTO.

2013-12-06 Thread Tejas Belagod


Hi,

The attached patch defines TARGET_CRYPTO macro and defines builtin preprocessor 
macro __ARM_FEATURE_CRYPTO.


Tested on aarch64-none-elf. OK for trunk?

Thanks,
Tejas.

Changelog

2013-12-06  Tejas Belagod  

* config/aarch64/aarch64.h (TARGET_CRYPTO): New.
(__ARM_FEATURE_CRYPTO): Define if TARGET_CRYPTO is true.diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index cead022..59f15cb 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -54,6 +54,8 @@
  cpp_define (parse_in, "_ILP32");  \
  cpp_define (parse_in, "__ILP32__");   \
}   \
+  if (TARGET_CRYPTO)   \
+   builtin_define ("__ARM_FEATURE_CRYPTO");\
 } while (0)
 
 
@@ -180,6 +182,8 @@ extern unsigned long aarch64_isa_flags;
 extern unsigned long aarch64_tune_flags;
 #define AARCH64_TUNE_SLOWMUL   (aarch64_tune_flags & AARCH64_FL_SLOWMUL)
 
+/* Crypto is an optional feature.  */
+#define TARGET_CRYPTO AARCH64_ISA_CRYPTO
 
 /* Standard register usage.  */
 

Re: RFA: patch to fix 2 testsuite failures for LRA on PPC

2013-12-06 Thread Vladimir Makarov

On 12/6/2013, 11:28 AM, Michael Meissner wrote:

On Thu, Dec 05, 2013 at 12:40:17PM -0500, Vladimir Makarov wrote:

The following patch fixes two GCC testsuite failures for LRA.  The
patch makes swap through registers instead of memory for the test
cases when LRA is used.

There are differences in reload and LRA constraint matching
algorithm which results in different alternative choices when the
original pattern is used.

Actually my first proposed solution variant used one pattern which
is now for LRA in this patch.  But some doubt arose that it may
affect reload pass in some bad way.

Ok to commit?


I must admit to not remembering why I used ??&r.  I know I wanted it to prefer
doing the memory patterns.  I would think we should try just the pattern
without the ??.



  I tried it about 2 months ago.  I did not see any problems of such 
change for reload and LRA.  There were no regressions on GCC testsuite.


  So, Mike, if you don't see any compelling reason to keep ??, probably 
we should remove them.


If you don't mind, I'll make the patch and test again and after that 
submit it for approval.




[PATCH] Improve scan pattern in gcc.dg/tree-ssa/loop-31.c

2013-12-06 Thread Kyrill Tkachov

Hi all,

The testcase gcc.dg/tree-ssa/loop-31.c started failing on arm with r202165. The 
scan dump pattern looks for "+ 2" appearing exactly once. With r202165 the loop 
header is modified from:


  :
  ivtmp.5_10 = (unsigned int) &a[4294967295];
  _16 = (unsigned int) len_4(D);
  _17 = _16 * 2;
  _18 = (unsigned int) &a;
  _19 = _18 + 4294967294;
  _20 = _19 + _17;


to:

  :
  ivtmp.5_10 = (unsigned int) &a[4294967295];
  _16 = (sizetype) len_4(D);
  _17 = _16 + 2147483647;    "+ 2" will match here.
  _18 = _17 * 2;
  _19 = &a + _18;
  _20 = (unsigned int) _19;


Since the strength reduction in the loop itself that this testcase is testing is 
unaffected, this patch just updates the pattern to be "+ 2;" to match the 
induction variable increment: ivtmp.5_11 = ivtmp.5_1 + 2;


Now the testcase passes on arm.

Ok for trunk?

Thanks,
Kyrill

2013-12-06  Kyrylo Tkachov  

* gcc.dg/tree-ssa/loop-31.c: Update scan pattern.diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-31.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-31.c
index 4f22637..fa18f5e 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/loop-31.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-31.c
@@ -15,7 +15,7 @@ short foo (int len, int v)
 
 /* When we do not have addressing mode including multiplication,
the memory access should be strength-reduced.  */
-/* { dg-final { scan-tree-dump-times " \\+ 2" 1 "optimized" { target arm*-*-* } } } */
-/* { dg-final { scan-tree-dump-times " \\+ 2" 1 "optimized" { target { ia64-*-* && ilp32 } } } } */
-/* { dg-final { scan-tree-dump-times " \\+ 2" 2 "optimized" { target { ia64-*-* && lp64 } } } } */
+/* { dg-final { scan-tree-dump-times " \\+ 2;" 1 "optimized" { target arm*-*-* } } } */
+/* { dg-final { scan-tree-dump-times " \\+ 2;" 1 "optimized" { target { ia64-*-* && ilp32 } } } } */
+/* { dg-final { scan-tree-dump-times " \\+ 2;" 2 "optimized" { target { ia64-*-* && lp64 } } } } */
 /* { dg-final { cleanup-tree-dump "optimized" } } */

[PATCH, i386]: Fix warning in PR59405

2013-12-06 Thread Uros Bizjak
Hello!

Currently, gcc emits wrong warning for MMX argument passing on 32 bit
targets, even when MMX is enabled:

pr59405.c: In function ‘foo32x2_be’:
pr59405.c:7:1: warning: SSE vector argument without SSE enabled
changes the ABI [enabled by default]
 foo32x2_be (float32x2_t x)

Attached patch fixes this oversight.

The testcase also tests correct function of necessary _mm_empty intrinsic.

2013-12-06  Uros Bizjak  

PR target/59405
* config/i386/i386.c (type_natural_mode): Properly handle
size 8 for !TARGET_64BIT.

testsuite/ChangeLog:

2013-12-06  Uros Bizjak  

PR target/59405
* gcc.target/i386/pr59405.c: New test.

Bootstrapped and regression tested on x86_64-pc-linux-gnu {,-m32} and
committed to mainline SVN. The patch will be backported to other
release branches.

Uros.
Index: config/i386/i386.c
===
--- config/i386/i386.c  (revision 205748)
+++ config/i386/i386.c  (working copy)
@@ -6172,7 +6172,8 @@ type_natural_mode (const_tree type, const CUMULATI
  }
return TYPE_MODE (type);
  }
-   else if ((size == 8 || size == 16) && !TARGET_SSE)
+   else if (((size == 8 && TARGET_64BIT) || size == 16)
+&& !TARGET_SSE)
  {
static bool warnedsse;
 
@@ -6184,10 +6185,21 @@ type_natural_mode (const_tree type, const CUMULATI
warning (0, "SSE vector argument without SSE "
 "enabled changes the ABI");
  }
-   return mode;
  }
-   else
- return mode;
+   else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
+ {
+   static bool warnedmmx;
+
+   if (cum
+   && !warnedmmx
+   && cum->warn_mmx)
+ {
+   warnedmmx = true;
+   warning (0, "MMX vector argument without MMX "
+"enabled changes the ABI");
+ }
+ }
+   return mode;
  }
 
  gcc_unreachable ();
Index: testsuite/gcc.target/i386/pr59405.c
===
--- testsuite/gcc.target/i386/pr59405.c (revision 0)
+++ testsuite/gcc.target/i386/pr59405.c (working copy)
@@ -0,0 +1,24 @@
+/* { dg-do run } */
+/* { dg-options "-mmmx -mfpmath=387" } */
+
+#include "mmx-check.h"
+
+#include 
+
+typedef float float32x2_t __attribute__ ((vector_size (8)));
+
+float
+foo32x2_be (float32x2_t x)
+{
+  _mm_empty();
+  return x[1];
+}
+
+static void
+mmx_test (void)
+{
+  float32x2_t b = { 0.0f, 1.0f };
+
+  if (foo32x2_be (b) != 1.0f)
+abort ();
+}


Re: [Patch, RTL] Eliminate redundant vec_select moves.

2013-12-06 Thread Jakub Jelinek
On Fri, Dec 06, 2013 at 05:12:08PM +, Tejas Belagod wrote:
> 2013-12-06  Tejas Belagod  
> 
> testsuite/
>   * gcc.dg/vect/vect-nop-move.c: Fix dg options.

Ok, thanks.

> --- a/gcc/testsuite/gcc.dg/vect/vect-nop-move.c
> +++ b/gcc/testsuite/gcc.dg/vect/vect-nop-move.c
> @@ -1,6 +1,6 @@
>  /* { dg-do run } */ 
>  /* { dg-require-effective-target vect_float } */
> -/* { dg-options "-O3 -fdump-rtl-combine-details" } */
> +/* { dg-additional-options "-fdump-rtl-combine-details" } */
>  
>  extern void abort (void);
>  
> @@ -62,3 +62,4 @@ main()
>  
>  /* { dg-final { scan-rtl-dump "deleting noop move" "combine" { target 
> aarch64*-*-* } } } */
>  /* { dg-final { cleanup-rtl-dump "combine" } } */
> +/* { dg-final { cleanup-tree-dump "vect" } } */


Jakub


[PATCH][ARM] Implement vceq_p64 and vtst_p64 intrinsics in arm_neon.h

2013-12-06 Thread Kyrill Tkachov

Hi all,

Following the implementation of the Crypto intrinsics I posted earlier this 
week, this patch implements the vceq_p64 and vtst_p64 intrinsics that operate on 
the new poly64_t type. They do not have a regular form and can thus not be 
autogenerated from our beloved ML scripts and are therefore synthesised as a 
vceq_u32 or vtst_u32 operation, followed by a pairwise reduce with min or max 
respectively.


These intrinsics are only available when the crypto intrinsics are available 
(i.e. -mfpu=crypto-neon-fp-armv8 and -mfloat-abi=(hard|softfp)).


I've added two runtime tests to make sure they generate correct results.

Ok for trunk?

Thanks,
Kyrill

2013-12-06  Kyrylo Tkachov  

* config/arm/neon.ml (crypto_intrinsics): Add vceq_64 and vtst_p64.
* config/arm/arm_neon.h: Regenerate.
* config/arm/neon-docgen.ml: Add vceq_p64 and vtst_p64.
* doc/arm-neon-intrinsics.texi: Regenerate.

2013-12-06  Kyrylo Tkachov  

* gcc.target/arm/neon-vceq_p64.c: New test.
* gcc.target/arm/neon-vtst_p64.c: Likewise.diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h
index 59ef22c..cc3f56c 100644
--- a/gcc/config/arm/arm_neon.h
+++ b/gcc/config/arm/arm_neon.h
@@ -13278,6 +13278,26 @@ vstrq_p128 (poly128_t * __ptr, poly128_t __val)
 #endif
 }
 
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vceq_p64 (poly64x1_t a, poly64x1_t b)
+{
+  uint32x2_t t_a = vreinterpret_u32_p64 (a);
+  uint32x2_t t_b = vreinterpret_u32_p64 (b);
+  uint32x2_t c = vceq_u32 (t_a, t_b);
+  uint32x2_t m = vpmin_u32 (c, c);
+  return vreinterpret_u64_u32 (m);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vtst_p64 (poly64x1_t a, poly64x1_t b)
+{
+  uint32x2_t t_a = vreinterpret_u32_p64 (a);
+  uint32x2_t t_b = vreinterpret_u32_p64 (b);
+  uint32x2_t c = vtst_u32 (t_a, t_b);
+  uint32x2_t m = vpmax_u32 (c, c);
+  return vreinterpret_u64_u32 (m);
+}
+
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vaeseq_u8 (uint8x16_t __data, uint8x16_t __key)
 {
diff --git a/gcc/config/arm/neon-docgen.ml b/gcc/config/arm/neon-docgen.ml
index 41ae059..8945da7 100644
--- a/gcc/config/arm/neon-docgen.ml
+++ b/gcc/config/arm/neon-docgen.ml
@@ -340,6 +340,14 @@ let crypto_doc =
 @end itemize
 
 @itemize @bullet
+@item uint64x1_t vceq_p64 (poly64x1_t, poly64x1_t)
+@end itemize
+
+@itemize @bullet
+@item uint64x1_t vtst_p64 (poly64x1_t, poly64x1_t)
+@end itemize
+
+@itemize @bullet
 @item uint32_t vsha1h_u32 (uint32_t)
 @*@emph{Form of expected instruction(s):} @code{sha1h.32 @var{q0}, @var{q1}}
 @end itemize
diff --git a/gcc/config/arm/neon.ml b/gcc/config/arm/neon.ml
index 968c171..69618d0 100644
--- a/gcc/config/arm/neon.ml
+++ b/gcc/config/arm/neon.ml
@@ -2208,6 +2208,26 @@ vstrq_p128 (poly128_t * __ptr, poly128_t __val)
 #endif
 }
 
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vceq_p64 (poly64x1_t a, poly64x1_t b)
+{
+  uint32x2_t t_a = vreinterpret_u32_p64 (a);
+  uint32x2_t t_b = vreinterpret_u32_p64 (b);
+  uint32x2_t c = vceq_u32 (t_a, t_b);
+  uint32x2_t m = vpmin_u32 (c, c);
+  return vreinterpret_u64_u32 (m);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vtst_p64 (poly64x1_t a, poly64x1_t b)
+{
+  uint32x2_t t_a = vreinterpret_u32_p64 (a);
+  uint32x2_t t_b = vreinterpret_u32_p64 (b);
+  uint32x2_t c = vtst_u32 (t_a, t_b);
+  uint32x2_t m = vpmax_u32 (c, c);
+  return vreinterpret_u64_u32 (m);
+}
+
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vaeseq_u8 (uint8x16_t __data, uint8x16_t __key)
 {
diff --git a/gcc/doc/arm-neon-intrinsics.texi b/gcc/doc/arm-neon-intrinsics.texi
index 610892d..b146868 100644
--- a/gcc/doc/arm-neon-intrinsics.texi
+++ b/gcc/doc/arm-neon-intrinsics.texi
@@ -11939,6 +11939,14 @@
 @end itemize
 
 @itemize @bullet
+@item uint64x1_t vceq_p64 (poly64x1_t, poly64x1_t)
+@end itemize
+
+@itemize @bullet
+@item uint64x1_t vtst_p64 (poly64x1_t, poly64x1_t)
+@end itemize
+
+@itemize @bullet
 @item uint32_t vsha1h_u32 (uint32_t)
 @*@emph{Form of expected instruction(s):} @code{sha1h.32 @var{q0}, @var{q1}}
 @end itemize
diff --git a/gcc/testsuite/gcc.target/arm/neon-vceq_p64.c b/gcc/testsuite/gcc.target/arm/neon-vceq_p64.c
new file mode 100644
index 000..21a6a78
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon-vceq_p64.c
@@ -0,0 +1,38 @@
+/* { dg-do run } */
+/* { dg-require-effective-target arm_crypto_ok } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-add-options arm_crypto } */
+
+#include "arm_neon.h"
+#include 
+
+extern void abort (void);
+
+int
+main (void)
+{
+  uint64_t args[] = { 0x0, 0xdeadbeef, ~0xdeadbeef, 0x,
+  ~0x, 0x, ~0x, ~0x0 };
+  int i, j;
+
+  for (i = 0; i < sizeof (args) / sizeof (args[0]); ++i)
+{
+   for (j = 0; j < sizeof (args) / sizeof (args[0]); ++j)
+ {
+   uint64_t a1 = args[i];
+   

Re: [Patch, RTL] Eliminate redundant vec_select moves.

2013-12-06 Thread Tejas Belagod

Jakub Jelinek wrote:

On Wed, Dec 04, 2013 at 08:14:43AM -0800, H.J. Lu wrote:

--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-nop-move.c
@@ -0,0 +1,64 @@
+/* { dg-do run } */
+/* { dg-require-effective-target vect_float } */
+/* { dg-options "-O3 -fdump-rtl-combine-details" } */


Please change dg-options to dg-additional-options, otherwise
it overrides the target basic vectorization options and thus
fails on i686-linux.


+/* { dg-final { scan-rtl-dump "deleting noop move" "combine" { target
aarch64*-*-* } } } */

Any particular reason why it doesn't work for x86?


+/* { dg-final { cleanup-rtl-dump "combine" } } */


You also need to add

/* { dg-final { cleanup-tree-dump "vect" } } */

because all vectorizer tests dump *.vect dumps.


Here is a patch, OK to commit?

Thanks,
Tejas.

2013-12-06  Tejas Belagod  

testsuite/
  * gcc.dg/vect/vect-nop-move.c: Fix dg options.diff --git a/gcc/testsuite/gcc.dg/vect/vect-nop-move.c 
b/gcc/testsuite/gcc.dg/vect/vect-nop-move.c
index 1941933..98f72f1 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-nop-move.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-nop-move.c
@@ -1,6 +1,6 @@
 /* { dg-do run } */ 
 /* { dg-require-effective-target vect_float } */
-/* { dg-options "-O3 -fdump-rtl-combine-details" } */
+/* { dg-additional-options "-fdump-rtl-combine-details" } */
 
 extern void abort (void);
 
@@ -62,3 +62,4 @@ main()
 
 /* { dg-final { scan-rtl-dump "deleting noop move" "combine" { target 
aarch64*-*-* } } } */
 /* { dg-final { cleanup-rtl-dump "combine" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */

Re: [patch i386]: Fix PR 56807

2013-12-06 Thread Kai Tietz
Upps ... here is the missing Changlog

ChangeLog

2013-12-06  Kai Tietz  

PR target/56807
* config/i386/i386.c (ix86_expand_prologue): Address saved
registers stack-relative, not via frame-pointer.


Re: [patch i386]: Fix PR 56807

2013-12-06 Thread H.J. Lu
On Fri, Dec 6, 2013 at 9:06 AM, Kai Tietz  wrote:
> Hi,
>
>
> ChangeLog
>
> 2013-12-06  Kai Tietz  
>
> PR target/56807
> * config/i386/i386.c (ix86_expand_prologue):
>

Incomplete ChangeLog entry.

-- 
H.J.


[patch i386]: Fix PR 56807

2013-12-06 Thread Kai Tietz
Hi,


ChangeLog

2013-12-06  Kai Tietz  

PR target/56807
* config/i386/i386.c (ix86_expand_prologue):

Tested for i686-w64-mingw32, x86_64-unknown-linux-gnu.  Ok for apply?

Regards,
Kai

Index: config/i386/i386.c
===
--- config/i386/i386.c(Revision 205719)
+++ config/i386/i386.c(Arbeitskopie)
@@ -10934,18 +10937,21 @@ ix86_expand_prologue (void)
 }
   m->fs.sp_offset += allocate;

+  /* Use stack_pointer_rtx for relative addressing so that code
+ works for realigned stack, too.  */
   if (r10_live && eax_live)
 {
-  t = choose_baseaddr (m->fs.sp_offset - allocate);
+  t = plus_constant (Pmode, stack_pointer_rtx, allocate);
   emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
   gen_frame_mem (word_mode, t));
-  t = choose_baseaddr (m->fs.sp_offset - allocate - UNITS_PER_WORD);
+  t = plus_constant (Pmode, stack_pointer_rtx,
+ allocate - UNITS_PER_WORD);
   emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
   gen_frame_mem (word_mode, t));
 }
   else if (eax_live || r10_live)
 {
-  t = choose_baseaddr (m->fs.sp_offset - allocate);
+  t = plus_constant (Pmode, stack_pointer_rtx, allocate);
   emit_move_insn (gen_rtx_REG (word_mode,
(eax_live ? AX_REG : R10_REG)),
   gen_frame_mem (word_mode, t));


[patch,libgcc] Add microblaze-*-rtems*

2013-12-06 Thread Ralf Corsepius

Hi,

I am going to apply the patch below to trunk and 4.8-branch.

It adds a copy of the microblaze-*-elf section for microblaze-rtems* to 
libgcc/config.host.


This is the missing patch I mentioned in
http://gcc.gnu.org/ml/gcc/2013-11/msg00548.html

Ralf

2013-12-06  Ralf Corsépius  

	* config.host (microblaze-*-rtems*): New.

Index: ChangeLog
===
--- ChangeLog	(revision 205749)
+++ ChangeLog	(working copy)
@@ -1,3 +1,7 @@
Index: config.host
===
--- config.host	(revision 205749)
+++ config.host	(working copy)
@@ -754,6 +754,10 @@
 	tmake_file="${tmake_file} microblaze/t-microblaze t-fdpbit"
 	extra_parts="$extra_parts crtbeginS.o crtendS.o crtbeginT.o crti.o crtn.o"
 	;;
+microblaze*-*-rtems*)
+	tmake_file="${tmake_file} microblaze/t-microblaze t-fdpbit"
+	extra_parts="$extra_parts crtbeginS.o crtendS.o crtbeginT.o crti.o crtn.o"
+	;;
 mips*-*-netbsd*)			# NetBSD/mips, either endian.
 	;;
 mips*-*-linux*)# Linux MIPS, either endian.


Re: [wide-int] small cleanup in wide-int.*

2013-12-06 Thread Kenneth Zadeck

On 12/03/2013 11:52 AM, Richard Sandiford wrote:

Kenneth Zadeck  writes:

Index: tree-vrp.c
===
--- tree-vrp.c  (revision 205597)
+++ tree-vrp.c  (working copy)
@@ -2611,22 +2611,28 @@ extract_range_from_binary_expr_1 (value_
  
signop sign = TYPE_SIGN (expr_type);

unsigned int prec = TYPE_PRECISION (expr_type);
-  unsigned int prec2 = (prec * 2) + (sign == UNSIGNED ? 2 : 0);
  
if (range_int_cst_p (&vr0)

  && range_int_cst_p (&vr1)
  && TYPE_OVERFLOW_WRAPS (expr_type))
{
- wide_int sizem1 = wi::mask (prec, false, prec2);
- wide_int size = sizem1 + 1;
+ /* vrp_int is twice as wide as anything that the target
+supports so it can support a full width multiply.  No
+need to add any more padding for an extra sign bit
+because that comes with the way that WIDE_INT_MAX_ELTS is
+defined.  */
+ typedef FIXED_WIDE_INT (WIDE_INT_MAX_PRECISION * 2)
+   vrp_int;
+ vrp_int sizem1 = wi::mask  (prec, false);
+ vrp_int size = sizem1 + 1;
  
  	  /* Extend the values using the sign of the result to PREC2.

 From here on out, everthing is just signed math no matter
 what the input types were.  */
- wide_int min0 = wide_int::from (vr0.min, prec2, sign);
- wide_int max0 = wide_int::from (vr0.max, prec2, sign);
- wide_int min1 = wide_int::from (vr1.min, prec2, sign);
- wide_int max1 = wide_int::from (vr1.max, prec2, sign);
+ vrp_int min0 = wi::to_vrp (vr0.min);
+ vrp_int max0 = wi::to_vrp (vr0.max);
+ vrp_int min1 = wi::to_vrp (vr1.min);
+ vrp_int max1 = wi::to_vrp (vr1.max);

I think we should avoid putting to_vrp in tree.h if vrp_int is only
local to this block.  Instead you could have:

  typedef generic_wide_int
   > vrp_int_cst;
   ...
   vrp_int_cst min0 = vr0.min;
   vrp_int_cst max0 = vr0.max;
   vrp_int_cst min1 = vr1.min;
   vrp_int_cst max1 = vr1.max;

i did this in a different way because i had trouble doing it as you 
suggested.the short answer is that all of the vrp_int code is now 
local to tree-vrp.c which i think was your primary goal

@@ -228,15 +228,16 @@ along with GCC; see the file COPYING3.
  #endif
  
  /* The MAX_BITSIZE_MODE_ANY_INT is automatically generated by a very

-   early examination of the target's mode file.  Thus it is safe that
-   some small multiple of this number is easily larger than any number
-   that that target could compute.  The place in the compiler that
-   currently needs the widest ints is the code that determines the
-   range of a multiply.  This code needs 2n + 2 bits.  */
-
+   early examination of the target's mode file.  The WIDE_INT_MAX_ELTS
+   can accomodate at least 1 more bit so that unsigned numbers of that
+   mode can be represented.  This will accomodate every place in the
+   compiler except for a multiply routine in tree-vrp.  That function
+   makes its own arrangements for larger wide-ints.  */

I think we should drop the "This will accomodate..." bit, since it'll soon
get out of date.  Maybe something like:

 Note that it is still possible to create fixed_wide_ints that have
 precisions greater than MAX_BITSIZE_MODE_ANY_INT.  This can be useful
 when representing a double-width multiplication result, for example.  */

done

  #define WIDE_INT_MAX_ELTS \
-  ((4 * MAX_BITSIZE_MODE_ANY_INT + HOST_BITS_PER_WIDE_INT - 1) \
-   / HOST_BITS_PER_WIDE_INT)
+  (((MAX_BITSIZE_MODE_ANY_INT + HOST_BITS_PER_WIDE_INT - 1)\
+/ HOST_BITS_PER_WIDE_INT) + 1)

I think this should be:

   (MAX_BITSIZE_MODE_ANY_INT / HOST_BITS_PER_WIDE_INT + 1)

We only need an extra HWI if MAX_BITSIZE_MODE_ANY_INT is an exact multiple
of HOST_BITS_PER_WIDE_INT.

we will do this later when some other issues that Eric B raised are settled.

ok to commit to the branch?

kenny

Looks good to me otherwise FWIW.

You probably already realise this, but for avoidance of doubt, Richard
was also asking that we reduce MAX_BITSIZE_MODE_ANY_INT to 128 on x86_64,
since that's the largest scalar_mode_supported_p mode.

Thanks,
Richard



Index: gcc/tree-vrp.c
===
--- gcc/tree-vrp.c	(revision 205726)
+++ gcc/tree-vrp.c	(working copy)
@@ -2213,6 +2213,22 @@ extract_range_from_multiplicative_op_1 (
 set_value_range (vr, type, min, max, NULL);
 }
 
+/* vrp_int is twice as wide as anything that the target supports so it
+   can support a full width multiply.  No need to add any more padding
+   for an extra sign bit because that comes with the way that
+   WIDE_INT_MAX_ELTS is defined.  */ 
+typedef FIXED_WIDE_INT (WIDE_INT_MAX_PRECISION * 2) vrp_int;
+namespace wi
+{
+  generic_wide_int  > to_vrp (const_tree);
+}
+
+inline generic_wide_int  >
+wi::to_vrp (co

Re: RFA: patch to fix 2 testsuite failures for LRA on PPC

2013-12-06 Thread Michael Meissner
On Thu, Dec 05, 2013 at 12:40:17PM -0500, Vladimir Makarov wrote:
> The following patch fixes two GCC testsuite failures for LRA.  The
> patch makes swap through registers instead of memory for the test
> cases when LRA is used.
> 
> There are differences in reload and LRA constraint matching
> algorithm which results in different alternative choices when the
> original pattern is used.
> 
> Actually my first proposed solution variant used one pattern which
> is now for LRA in this patch.  But some doubt arose that it may
> affect reload pass in some bad way.
> 
> Ok to commit?

I must admit to not remembering why I used ??&r.  I know I wanted it to prefer
doing the memory patterns.  I would think we should try just the pattern
without the ??.

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460, USA
email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797



Re: AARCH64 configure check for gas -mabi support

2013-12-06 Thread Yufeng Zhang

Hi Kugan,

Thanks for working on this issue.

On 12/04/13 21:03, Kugan wrote:

Hi,

gcc trunk aarch64 bootstrapping fails with gas version 2.23.2 (with
error message similar to cannot compute suffix of object files) as this
particular version does not support -mabi=lp64. It succeeds with later
versions of gas that supports -mabi.


The -mabi option was introduced to gas when the support for ILP32 was 
added.  Initially the options were named -milp32 and -mlp64:


  http://sourceware.org/ml/binutils/2013-06/msg00178.html

and later on they were change to -mabi=ilp32 and -mabi=lp64 for 
consistency with those in the aarch64 gcc:


  http://sourceware.org/ml/binutils/2013-07/msg00180.html

The following gcc patch made the driver use the explicit option to drive 
gas:


  http://gcc.gnu.org/ml/gcc-patches/2013-07/msg00083.html

It is a neglect of the backward compatibility with binutils 2.23.



Attached patch add checking for -mabi=lp64 and prompts upgradation. Is
this Ok?


I think instead of mandating the support for the -mabi option, the 
compiler shall be changed able to work with binutils 2.23.  The 2.23 
binutils have a good support for aarch64 and the main difference from 
2.24 is the ILP32 support.  I think it is necessary to maintain the 
backward compatibility, and it should be achieved by suppressing the 
compiler's support for ILP32 when the -mabi option is not found 
available in gas during the configuration time.


I had a quick look at areas need to be updated:

* multilib support

In gcc/config.gcc, the default and the only accepted value for 
--with-multilib-list and --with-abi shall be lp64 when -mabi is not 
available.


* -mabi option

I suggest we keep the -mabi option, but reject -mabi=ilp32 in 
gcc/config/aarch64/aarch64.c:aarch64_override_options ()


* driver spec

In gcc/config/aarch64/aarch64-elf.h, the DRIVER_SELF_SPECS and ASM_SPEC 
shall be updated to not pass/specify -mabi for gas.


* documentation

I think it needs to be mentioned in gcc/doc/install.texi the constraint 
of using pre-2.24 binutils with aarch64 gcc that is 4.9 or later.


It is a quick scouting, but hopefully it has provided provide some 
guidance.  If you need more help, just let me know.



Yufeng

P.s. some minor comments on the attached patch.



diff --git a/gcc/configure b/gcc/configure
index fdf0cd0..17b6e85 100755
--- a/gcc/configure
+++ b/gcc/configure


Diff result of auto-generation is usually excluded from a patch.


diff --git a/gcc/configure.ac b/gcc/configure.ac
index 91a22d5..730ada0 100644
--- a/gcc/configure.ac
+++ b/gcc/configure.ac
@@ -3532,6 +3532,15 @@ case "$target" in
[Define if your assembler supports the -no-mul-bug-abort 
option.])])
  ;;

+ aarch64-*-*)


aarch64*-*-*


+gcc_GAS_CHECK_FEATURE([-mabi option],
+  gcc_cv_as_aarch64_mabi,,
+  [-mabi=lp64], [.text],,,)
+if test x$gcc_cv_as_aarch64_mabi = xno; then
+   AC_MSG_ERROR([Assembler support for -mabi=lp64 is required. Upgrade the 
Assembler.])
+fi
+;;
+
sparc*-*-*)
  gcc_GAS_CHECK_FEATURE([.register], gcc_cv_as_sparc_register_op,,,
[.register %g2, #scratch],,






Re: RFA: patch to fix 2 testsuite failures for LRA on PPC

2013-12-06 Thread Jakub Jelinek
On Fri, Dec 06, 2013 at 10:59:37AM -0500, Vladimir Makarov wrote:
> It is still two different patterns.  One for reload and one for LRA.
> Attribute enabled is mostly used to describe insn constraints for
> subtargets.

I meant something like:
(define_insn "*bswapdi2_64bit"
  [(set (match_operand:DI 0 "reg_or_mem_operand" "=&r,Z,??&r,r")
   (bswap:DI (match_operand:DI 1 "reg_or_mem_operand" "Z,r,r,r")))
   (clobber (match_scratch:DI 2 "=&b,&b,&r,&r"))
   (clobber (match_scratch:DI 3 "=&r,&r,&r,&r"))
   (clobber (match_scratch:DI 4 "=&r,X,&r,&r"))]
  "TARGET_POWERPC64 && !TARGET_LDBRX
   && (REG_P (operands[0]) || REG_P (operands[1]))
   && !(MEM_P (operands[0]) && MEM_VOLATILE_P (operands[0]))
   && !(MEM_P (operands[1]) && MEM_VOLATILE_P (operands[1]))"
  "#"
  [(set_attr "length" "16,12,36,36")
   (set (attr "enabled")
(cond [(eq_attr "alternative" "0,1")
 (const_int 1)
   (and (eq_attr "alternative" "2")
(match_test "!rs6000_lra_flag"))
 (const_int 1)
   (and (eq_attr "alternative" "3")
(match_test "rs6000_lra_flag"))
 (const_int 1)]
  (const_int 0)]))])

That is just one pattern.
And of course
(define_attr "enabled" "" (const_int 1))
somewhere early, because rs6000 wasn't using enabled attribute yet.

Jakub


Re: RFA: patch to fix 2 testsuite failures for LRA on PPC

2013-12-06 Thread Vladimir Makarov

On 12/6/2013, 10:45 AM, Jakub Jelinek wrote:

On Fri, Dec 06, 2013 at 10:39:29AM -0500, Vladimir Makarov wrote:

   Ok. I guess there is only one option to use one pattern for LRA
and reload without ?? in register alternative.  In this case, reload
and LRA will actually work according to GCC documentation (LRA
treats ? cost as the cost of one reload, reload does the same but
not in this case).

   That was my first solution but you were not comfortable with this too.

   Changing LRA most sensitive code to behave (wrongly in this case)
as reload is not an option for me.

   So I don't know what to do anymore to fix this 2 failures.


Could it be handled by enabled attribute?  You'd duplicate the
alternatives, one would be with the ??, one without, and enabled
attribute on the insn would be 1 for the first two alternatives
and also for the ?? alternative if not LRA, or non-?? alternative
if LRA.




It is still two different patterns.  One for reload and one for LRA.
Attribute enabled is mostly used to describe insn constraints for 
subtargets.


IMO removing ?? is the most right choice as both reload and LRA works 
fine without this.


On the other hand, it is not so important bug as it is performance one 
and as IBM guys at least for now are oriented to reload.  They have too 
many things (power8) to do besides LRA.


So we could postpone resolving these failures.



Re: [PATCH 10/13] Eliminate last_basic_block macro.

2013-12-06 Thread Steven Bosscher
On Fri, Dec 6, 2013 at 3:51 PM, David Malcolm wrote:
> * asan.c (transform_statements): Eliminate use of last_basic_block
> in favor of last_basic_block_for_fn, in order to make use of cfun
> explicit.

Can we please make all this _for_fn go away?

Ciao!
Steven


RE: [PATCH, ARM] Implement __builtin_trap

2013-12-06 Thread Ian Bolton
> > Hi,
> >
> > Currently, on ARM, you have to either call abort() or raise(SIGTRAP)
> > to achieve a handy crash.
> >
> > This patch allows you to instead call __builtin_trap() which is much
> > more efficient at falling over because it becomes just a single
> > instruction that will trap for you.
> >
> > Two testcases have been added (for ARM and Thumb) and both pass.
> >
> >
> > Note: This is a modified version of a patch originally submitted by
> Mark
> > Mitchell back in 2010, which came in response to PR target/59091.
> >
> > http://gcc.gnu.org/ml/gcc-patches/2010-09/msg00639.html
> > http://gcc.gnu.org/bugzilla/show_bug.cgi?id=59091
> >
> > The main update, other than cosmetic differences, is that we've
> chosen
> > the same ARM encoding as LLVM for practical purposes.  (The Thumb
> > encoding in Mark's patch already matched LLVM.)
> >
> >
> > OK for trunk?
> >
> > Cheers,
> > Ian
> >
> >
> > 2013-12-04  Ian Bolton  
> >Mark Mitchell  
> >
> > gcc/
> >* config/arm/arm.md (trap): New pattern.
> >* config/arm/types.md: Added a type for trap.
> >
> > testsuite/
> >* gcc.target/arm/builtin-trap.c: New test.
> >* gcc.target/arm/thumb-builtin-trap.c: Likewise.
> > 
> 
> This needs to set the conds attribute to "unconditional".  Otherwise
> the ARM backend might try to turn this into a conditional instruction.
> 
> R.

Thanks, Richard. I fixed it up, tested it and committed as trivial
difference compared to what was approved already.diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index dd73366..934b859 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -9927,6 +9927,23 @@
(set_attr "type" "mov_reg")]
 )
 
+(define_insn "trap"
+  [(trap_if (const_int 1) (const_int 0))]
+  ""
+  "*
+  if (TARGET_ARM)
+return \".inst\\t0xe7f000f0\";
+  else
+return \".inst\\t0xdeff\";
+  "
+  [(set (attr "length")
+   (if_then_else (eq_attr "is_thumb" "yes")
+ (const_int 2)
+ (const_int 4)))
+   (set_attr "type" "trap")
+   (set_attr "conds" "unconditional")]
+)
+
 
 ;; Patterns to allow combination of arithmetic, cond code and shifts
 
diff --git a/gcc/config/arm/types.md b/gcc/config/arm/types.md
index 1c4b9e3..6351f08 100644
--- a/gcc/config/arm/types.md
+++ b/gcc/config/arm/types.md
@@ -152,6 +152,7 @@
 ; store2 store 2 words to memory from arm registers.
 ; store3 store 3 words to memory from arm registers.
 ; store4 store 4 (or more) words to memory from arm registers.
+; trap   cause a trap in the kernel.
 ; udiv   unsigned division.
 ; umaal  unsigned multiply accumulate accumulate long.
 ; umlal  unsigned multiply accumulate long.
@@ -645,6 +646,7 @@
   store2,\
   store3,\
   store4,\
+  trap,\
   udiv,\
   umaal,\
   umlal,\
diff --git a/gcc/testsuite/gcc.target/arm/builtin-trap.c 
b/gcc/testsuite/gcc.target/arm/builtin-trap.c
new file mode 100644
index 000..4ff8d25
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/builtin-trap.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm32 } */
+
+void
+trap ()
+{
+  __builtin_trap ();
+}
+
+/* { dg-final { scan-assembler "0xe7f000f0" { target { arm_nothumb } } } } */
diff --git a/gcc/testsuite/gcc.target/arm/thumb-builtin-trap.c 
b/gcc/testsuite/gcc.target/arm/thumb-builtin-trap.c
new file mode 100644
index 000..22e90e7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/thumb-builtin-trap.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-mthumb" } */
+/* { dg-require-effective-target arm_thumb1_ok } */
+
+void
+trap ()
+{
+  __builtin_trap ();
+}
+
+/* { dg-final { scan-assembler "0xdeff" } } */


Re: RFC Asan instrumentation control

2013-12-06 Thread Ondřej Bílka
On Fri, Dec 06, 2013 at 04:16:04PM +0100, Jakub Jelinek wrote:
> On Fri, Dec 06, 2013 at 04:10:31PM +0100, Ondřej Bílka wrote:
> > Currently this code with sanitize=address gets expanded
> > 
> > int foo(char *x, char *y, int i)
> > {
> >   x[i] = y[i];
> > }
> > 
> > to 
> > 
> > snip
> > movq%rsi, %rax
> > movq%rsi, %rdx
> > shrq$3, %rax
> > andl$7, %edx
> > movzbl  2147450880(%rax), %eax
> > cmpb%dl, %al
> > jle .L18
> > .L2:
> > 
> > snip 
> > 
> > .L18:
> > .cfi_restore_state
> > testb   %al, %al
> > je  .L2
> > movq%rsi, %rdi
> > call__asan_report_load1
> > 
> > There is nothing imposible about disabling these checks. You just fill a
> > page to make this check pass and use mmap to make entire shadow memory point
> > to that page.
> 
> I don't think I understand you.  __asan_report_* is a fatal error, the
> program is terminated there.  What is costly on the asan instrumentation is
> exactly the >>, memory loads, extra comparisons, in non-buggy programs
> you never enter the __asan_report_* calls.
>
This thread started on disabling features to decrease memory
consumption. This makes checks always pass because they read always read
from shared page that is set to do it. It should also be bit faster due
of better cache locality that causes .L18 path never be reached.


Also as you mentioned that __asan_report_load1 do not return if this is
dropped you could implement runtime equivalent of blacklists by
LD_PRELOADing a interceptor like this one

void *blacklisted[] = {foo, bar, baz, NULL};

void
__asan_report_load1 ()
{
  void *bt[2];
  backtrace (&bt, 1);
  for (i = 0; i < blacklisted[i]; i++)
  if (bt[0] != blacklisted[i])
dlsym(RTLD_NEXT, "__asan_report_load1") ();
  return;
}




Re: RFA: patch to fix 2 testsuite failures for LRA on PPC

2013-12-06 Thread Jakub Jelinek
On Fri, Dec 06, 2013 at 10:39:29AM -0500, Vladimir Makarov wrote:
>   Ok. I guess there is only one option to use one pattern for LRA
> and reload without ?? in register alternative.  In this case, reload
> and LRA will actually work according to GCC documentation (LRA
> treats ? cost as the cost of one reload, reload does the same but
> not in this case).
> 
>   That was my first solution but you were not comfortable with this too.
> 
>   Changing LRA most sensitive code to behave (wrongly in this case)
> as reload is not an option for me.
> 
>   So I don't know what to do anymore to fix this 2 failures.

Could it be handled by enabled attribute?  You'd duplicate the
alternatives, one would be with the ??, one without, and enabled
attribute on the insn would be 1 for the first two alternatives
and also for the ?? alternative if not LRA, or non-?? alternative
if LRA.

Jakub


Re: RFA: patch to fix 2 testsuite failures for LRA on PPC

2013-12-06 Thread Vladimir Makarov

On 12/6/2013, 8:44 AM, David Edelsohn wrote:

On Thu, Dec 5, 2013 at 12:40 PM, Vladimir Makarov  wrote:

The following patch fixes two GCC testsuite failures for LRA.  The patch
makes swap through registers instead of memory for the test cases when LRA
is used.

There are differences in reload and LRA constraint matching algorithm which
results in different alternative choices when the original pattern is used.

Actually my first proposed solution variant used one pattern which is now
for LRA in this patch.  But some doubt arose that it may affect reload pass
in some bad way.

Ok to commit?


I understand that LRA requires different tuning than reload, but I
continue to be a little uncomfortable with different patterns for LRA
and reload.

I would like to head some additional opinions.




  Ok. I guess there is only one option to use one pattern for LRA and 
reload without ?? in register alternative.  In this case, reload and LRA 
will actually work according to GCC documentation (LRA treats ? cost as 
the cost of one reload, reload does the same but not in this case).


  That was my first solution but you were not comfortable with this too.

  Changing LRA most sensitive code to behave (wrongly in this case) as 
reload is not an option for me.


  So I don't know what to do anymore to fix this 2 failures.





Re: [PATCH 00/13] Remove remaining cfun-using macros from basic-block.h

2013-12-06 Thread Richard Biener
David Malcolm  wrote:
>I have a series of 13 follow-up patches which remove the remaining
>"cfun"-using macros from basic-block.h
>
>Successfully bootstrapped®tested on x86_64-unknown-linux-gnu.
>
>These were pre-approved in stage1, and are mechanical in nature [1]
>
>I'd like to apply these to trunk now, but given that we're now in
>stage3, do I need to wait until the next stage1?

No, its ok now.

>The first 4 patches rename various "_for_function|_FOR_FUNCTION"
>macros to "_for_fn|_FOR_FN" for consistency with the earlier
>patches in this thread.
>
>The remaining patches eliminate cfun-using macros in favor of
>the "_for_fn|_FOR_FN" variant, making uses of cfun explicit.
>There are still some macros in function.h that implicitly use
>cfun, but it's less clear what to replace them with.
>
>Note to self: here's a grep invocation for ensuring that no new
>uses sneak into the sources:
>for m in \
>  basic_block_info_for_function BASIC_BLOCK_FOR_FUNCTION \
>  SET_BASIC_BLOCK_FOR_FUNCTION last_basic_block_for_function \
>  label_to_block_map_for_function profile_status_for_function \
>  SET_BASIC_BLOCK BASIC_BLOCK basic_block_info label_to_block_map \
>  profile_status last_basic_block FOR_EACH_BB FOR_EACH_BB_REVERSE \
>  FOR_ALL_BB ; 
>do
>  grep -nH -E -w $m \
> gcc/*.[ch] gcc/config/*.[ch] gcc/config/*/*.{c,h,md} ; 
>done
>
>(this currently has 11 false-positives)

After the patches the macros should be removed so that no new uses appear.

Thanks, Richard.

>[1] with one exception, in patch 10 in gcc/ira-emit.c (ira_emit) where
>I introduced a new local to avoid overlong lines.
>
>David Malcolm (13):
>  Rename macros (basic_block_info_for_function,
>BASIC_BLOCK_FOR_FUNCTION, SET_BASIC_BLOCK_FOR_FUNCTION)
>  Rename last_basic_block_for_function to last_basic_block_for_fn.
>  Rename label_to_block_map_for_function to label_to_block_map_for_fn.
>  Rename profile_status_for_function to profile_status_for_fn.
>  Eliminate SET_BASIC_BLOCK macro.
>  Eliminate BASIC_BLOCK macro.
>  Eliminate basic_block_info macro.
>  Eliminate label_to_block_map macro.
>  Eliminate profile_status macro.
>  Eliminate last_basic_block macro.
>  Eliminate FOR_EACH_BB macro.
>  Eliminate FOR_EACH_BB_REVERSE macro.
>  Eliminate FOR_ALL_BB macro.
>
> gcc/alias.c  |   2 +-
> gcc/asan.c   |   6 +-
> gcc/auto-inc-dec.c   |   2 +-
> gcc/basic-block.h|  32 +++--
> gcc/bb-reorder.c |  29 
> gcc/bt-load.c|  45 ++--
> gcc/caller-save.c|   8 +--
> gcc/cfg.c|  32 -
> gcc/cfganal.c|  35 +-
> gcc/cfgbuild.c   |  12 ++--
> gcc/cfgcleanup.c |   6 +-
> gcc/cfgexpand.c  |  14 ++--
> gcc/cfghooks.c   |  16 ++---
> gcc/cfgloop.c|  20 +++---
> gcc/cfgloopanal.c|   8 +--
> gcc/cfgloopmanip.c   |   6 +-
> gcc/cfgrtl.c |  61 
> gcc/cgraphbuild.c|   8 +--
> gcc/combine-stack-adj.c  |   2 +-
> gcc/combine.c|   8 +--
> gcc/config/arm/arm.c |   4 +-
> gcc/config/bfin/bfin.c   |   4 +-
> gcc/config/c6x/c6x.c |   6 +-
> gcc/config/epiphany/resolve-sw-modes.c   |   6 +-
> gcc/config/frv/frv.c |   8 +--
> gcc/config/i386/i386.c   |   2 +-
> gcc/config/ia64/ia64.c   |   6 +-
> gcc/config/mips/mips.c   |   8 +--
> gcc/config/picochip/picochip.c   |   2 +-
> gcc/config/rs6000/rs6000.c   |   2 +-
> gcc/config/s390/s390.c   |   4 +-
> gcc/config/sh/sh.c   |   2 +-
> gcc/config/spu/spu.c |   6 +-
> gcc/config/tilegx/tilegx.c   |   4 +-
> gcc/config/tilepro/tilepro.c |   4 +-
> gcc/coverage.c   |   2 +-
> gcc/cprop.c  |  23 ---
> gcc/cse.c|   8 +--
> gcc/dce.c|  10 +--
> gcc/df-core.c|  68 +-
> gcc/df-problems.c|  54 +++
> gcc/df-scan.c|  42 ++-
> gcc/df.h |   2 +-
> gcc/dominance.c  |  37 +-
> gcc/domwalk.c|   2 +-
> gcc/dse.c|  14 ++--
> gcc/except.c |   2 +-
> gcc/final.c  |   6 +-
> gcc/function.c   |  16 ++---
> gcc/gcse.c   |  54 

Re: RFC Asan instrumentation control

2013-12-06 Thread Jakub Jelinek
On Fri, Dec 06, 2013 at 04:10:31PM +0100, Ondřej Bílka wrote:
> Currently this code with sanitize=address gets expanded
> 
> int foo(char *x, char *y, int i)
> {
>   x[i] = y[i];
> }
> 
> to 
> 
> snip
>   movq%rsi, %rax
> movq%rsi, %rdx
> shrq$3, %rax
> andl$7, %edx
> movzbl  2147450880(%rax), %eax
> cmpb%dl, %al
> jle .L18
> .L2:
> 
> snip 
> 
> .L18:
> .cfi_restore_state
> testb   %al, %al
> je  .L2
> movq%rsi, %rdi
> call__asan_report_load1
> 
> There is nothing imposible about disabling these checks. You just fill a
> page to make this check pass and use mmap to make entire shadow memory point
> to that page.

I don't think I understand you.  __asan_report_* is a fatal error, the
program is terminated there.  What is costly on the asan instrumentation is
exactly the >>, memory loads, extra comparisons, in non-buggy programs
you never enter the __asan_report_* calls.

Jakub


[PATCH 13/13] Eliminate FOR_ALL_BB macro.

2013-12-06 Thread David Malcolm
gcc/
* basic-block.h (FOR_ALL_BB): Eliminate macro.

* cfg.c (alloc_aux_for_blocks, clear_aux_for_blocks): Replace
uses of FOR_ALL_BB with FOR_ALL_BB_FN, making uses of cfun explicit.

* cfganal.c (inverted_post_order_compute): Likewise.
* cfgcleanup.c (try_optimize_cfg): Likewise.
* cfgexpand.c (add_scope_conflicts): Likewise.
* cfghooks.c (dump_flow_info, account_profile_record): Likewise.
* cfgrtl.c (relink_block_chain): Likewise.
* dce.c (mark_artificial_uses): Likewise.
* df-core.c (df_set_blocks, df_compute_cfg_image, df_dump): Likewise.
* df-problems.c (df_lr_verify_solution_start,
df_lr_verify_solution_end, df_lr_verify_transfer_functions,
df_live_verify_solution_start, df_live_verify_solution_end,
df_live_set_all_dirty, df_live_verify_transfer_functions,
df_md_local_comput): Likewise.
* df-scan.c (df_scan_free_internal, df_scan_alloc)
df_reorganize_refs_by_insn, df_scan_verify): Likewise.
* dominance.c (compute_dom_fast_query, calculate_dominance_info,
free_dominance_info): Likewise.
* dse.c (dse_step1, dse_step3, dse_step4, dse_step6): Likewise.
* graph.c (draw_cfg_edges): Likewise.
* graphite-scop-detection.c (print_graphite_scop_statistics,
dot_all_scops_1): Likewise.
* graphite.c (print_global_statistics,
print_graphite_scop_statistics): Likewise.
* ira.c (do_reload): Likewise.
* loop-init.c (loop_optimizer_finalize): Likewise.
* lto-streamer-in.c (input_function): Likewise.
* lto-streamer-out.c (output_function): Likewise.
* mcf.c (adjust_cfg_counts): Likewise.
* predict.c (estimate_loops): Likewise.
* sched-rgn.c (haifa_find_rgns): Likewise.
* tree-cfg.c (split_critical_edges): Likewise.
* tree-dfa.c (renumber_gimple_stmt_uids): Likewise.
* tree-loop-distribution.c (tree_loop_distribution): Likewise.
* tree-ssa-pre.c (compute_antic, insert, init_pre): Likewise.
* tree-ssa-propagate.c (ssa_prop_init): Likewise.
* var-tracking.c (vt_initialize, vt_finalize): Likewise.
* vtable-verify.c (vtable_verify_main): Likewise.
* web.c (web_main): Likewise.
---
 gcc/basic-block.h |  3 ---
 gcc/cfg.c |  4 ++--
 gcc/cfganal.c |  2 +-
 gcc/cfgcleanup.c  |  2 +-
 gcc/cfgexpand.c   |  4 ++--
 gcc/cfghooks.c|  4 ++--
 gcc/cfgrtl.c  |  2 +-
 gcc/dce.c |  2 +-
 gcc/df-core.c |  8 
 gcc/df-problems.c | 22 +++---
 gcc/df-scan.c |  8 
 gcc/df.h  |  2 +-
 gcc/dominance.c   |  6 +++---
 gcc/dse.c |  8 
 gcc/graph.c   |  2 +-
 gcc/graphite-scop-detection.c |  6 +++---
 gcc/graphite.c|  4 ++--
 gcc/ira.c |  4 ++--
 gcc/loop-init.c   |  2 +-
 gcc/lto-streamer-in.c |  4 ++--
 gcc/lto-streamer-out.c|  4 ++--
 gcc/mcf.c |  2 +-
 gcc/predict.c |  2 +-
 gcc/sched-rgn.c   |  2 +-
 gcc/tree-cfg.c|  2 +-
 gcc/tree-dfa.c|  2 +-
 gcc/tree-loop-distribution.c  |  2 +-
 gcc/tree-ssa-pre.c|  8 
 gcc/tree-ssa-propagate.c  |  2 +-
 gcc/var-tracking.c|  4 ++--
 gcc/vtable-verify.c   |  2 +-
 gcc/web.c |  6 +++---
 32 files changed, 67 insertions(+), 70 deletions(-)

diff --git a/gcc/basic-block.h b/gcc/basic-block.h
index 75f16ac..b323a1f 100644
--- a/gcc/basic-block.h
+++ b/gcc/basic-block.h
@@ -362,9 +362,6 @@ struct GTY(()) control_flow_graph {
 /* Cycles through _all_ basic blocks, even the fake ones (entry and
exit block).  */
 
-#define FOR_ALL_BB(BB) \
-  for (BB = ENTRY_BLOCK_PTR_FOR_FN (cfun); BB; BB = BB->next_bb)
-
 #define FOR_ALL_BB_FN(BB, FN) \
   for (BB = ENTRY_BLOCK_PTR_FOR_FN (FN); BB; BB = BB->next_bb)
 
diff --git a/gcc/cfg.c b/gcc/cfg.c
index 4f9d769..d4d00a4 100644
--- a/gcc/cfg.c
+++ b/gcc/cfg.c
@@ -576,7 +576,7 @@ alloc_aux_for_blocks (int size)
 {
   basic_block bb;
 
-  FOR_ALL_BB (bb)
+  FOR_ALL_BB_FN (bb, cfun)
alloc_aux_for_block (bb, size);
 }
 }
@@ -588,7 +588,7 @@ clear_aux_for_blocks (void)
 {
   basic_block bb;
 
-  FOR_ALL_BB (bb)
+  FOR_ALL_BB_FN (bb, cfun)
 bb->aux = NULL;
 }
 
diff --git a/gcc/cfganal.c b/gcc/cfganal.c
index 3371b4a..d7e0382 100644
--- a/gcc/cfganal.c
+++ b/gcc/cfganal.c
@@ -784,7 +784,7 @@ inverted_post_order_compute (int *post_order)
   bitmap_clear (visited);
 
   /* Put all blocks that have no successor into the initial work list.  */
-  FOR_ALL_BB (bb)
+  FOR_ALL_BB_FN (bb, cfun)
 if (EDGE_COUNT (bb->succs) == 0)
   {
 /* Push the initia

Re: RFC Asan instrumentation control

2013-12-06 Thread Ondřej Bílka
On Fri, Dec 06, 2013 at 01:34:43PM +0100, Jakub Jelinek wrote:
> On Fri, Dec 06, 2013 at 01:32:42PM +0100, Ondřej Bílka wrote:
> > On second though besides of decreasing of code size there is no reason
> > to complicate compilation for these features. A more flexible way is add
> > environment variable that will disable these at runtime.
> 
> That is not possible, the instrumentation is inserted inline by the
> compiler.
> 
>   Jakub

Currently this code with sanitize=address gets expanded

int foo(char *x, char *y, int i)
{
  x[i] = y[i];
}

to 

snip
movq%rsi, %rax
movq%rsi, %rdx
shrq$3, %rax
andl$7, %edx
movzbl  2147450880(%rax), %eax
cmpb%dl, %al
jle .L18
.L2:

snip 

.L18:
.cfi_restore_state
testb   %al, %al
je  .L2
movq%rsi, %rdi
call__asan_report_load1

There is nothing imposible about disabling these checks. You just fill a
page to make this check pass and use mmap to make entire shadow memory point
to that page.


[PATCH 09/13] Eliminate profile_status macro.

2013-12-06 Thread David Malcolm
gcc/
* basic-block.h (profile_status): Eliminate macro.

* cfgbuild.c (find_many_sub_basic_blocks): Eliminate use of
profile_status macro in favor of profile_status_for_fn, making
use of cfun explicit.
* cfghooks.c (account_profile_record): Likewise.
* cfgloopanal.c (single_likely_exit):
* cfgrtl.c (rtl_verify_edges, rtl_account_profile_record): Likewise.
* graphite.c (graphite_finalize):
* internal-fn.c (ubsan_expand_si_overflow_addsub_check,
ubsan_expand_si_overflow_neg_check,
ubsan_expand_si_overflow_mul_check): Likewise.
* ipa-split.c (consider_split, execute_split_functions):
* loop-unroll.c (decide_peel_simple):
* optabs.c (emit_cmp_and_jump_insn_1):
* predict.c (maybe_hot_edge_p, probably_never_executed,
predictable_edge_p, probability_reliable_p, gimple_predict_edge,
tree_estimate_probability_driver, estimate_bb_frequencies,
compute_function_frequency, rebuild_frequencies): Likewise.
* profile.c (compute_branch_probabilities): Likewise.
* tree-cfg.c (gimple_account_profile_record): Likewise.
* tree-inline.c (optimize_inline_calls): Likewise.
---
 gcc/basic-block.h |  1 -
 gcc/cfgbuild.c|  2 +-
 gcc/cfghooks.c|  4 ++--
 gcc/cfgloopanal.c |  2 +-
 gcc/cfgrtl.c  |  6 +++---
 gcc/graphite.c|  2 +-
 gcc/internal-fn.c |  6 +++---
 gcc/ipa-split.c   |  4 ++--
 gcc/loop-unroll.c |  2 +-
 gcc/optabs.c  |  2 +-
 gcc/predict.c | 26 +-
 gcc/profile.c |  4 ++--
 gcc/tree-cfg.c|  4 ++--
 gcc/tree-inline.c |  3 ++-
 14 files changed, 34 insertions(+), 34 deletions(-)

diff --git a/gcc/basic-block.h b/gcc/basic-block.h
index 4ab8289..d000a43 100644
--- a/gcc/basic-block.h
+++ b/gcc/basic-block.h
@@ -328,7 +328,6 @@ struct GTY(()) control_flow_graph {
 
 /* Defines for textual backward source compatibility.  */
 #define last_basic_block   (cfun->cfg->x_last_basic_block)
-#define profile_status (cfun->cfg->x_profile_status)
 
 /* For iterating over basic blocks.  */
 #define FOR_BB_BETWEEN(BB, FROM, TO, DIR) \
diff --git a/gcc/cfgbuild.c b/gcc/cfgbuild.c
index 08534d4..a0c2c66 100644
--- a/gcc/cfgbuild.c
+++ b/gcc/cfgbuild.c
@@ -618,7 +618,7 @@ find_many_sub_basic_blocks (sbitmap blocks)
 
   /* Update branch probabilities.  Expect only (un)conditional jumps
  to be created with only the forward edges.  */
-  if (profile_status != PROFILE_ABSENT)
+  if (profile_status_for_fn (cfun) != PROFILE_ABSENT)
 FOR_BB_BETWEEN (bb, min, max->next_bb, next_bb)
   {
edge e;
diff --git a/gcc/cfghooks.c b/gcc/cfghooks.c
index 0cd6af0..ab1c15f 100644
--- a/gcc/cfghooks.c
+++ b/gcc/cfghooks.c
@@ -1411,7 +1411,7 @@ account_profile_record (struct profile_record *record, 
int after_pass)
   FOR_ALL_BB (bb)
{
   if (bb != EXIT_BLOCK_PTR_FOR_FN (cfun)
- && profile_status != PROFILE_ABSENT)
+ && profile_status_for_fn (cfun) != PROFILE_ABSENT)
{
  sum = 0;
  FOR_EACH_EDGE (e, ei, bb->succs)
@@ -1426,7 +1426,7 @@ account_profile_record (struct profile_record *record, 
int after_pass)
record->num_mismatched_count_out[after_pass]++;
}
   if (bb != ENTRY_BLOCK_PTR_FOR_FN (cfun)
- && profile_status != PROFILE_ABSENT)
+ && profile_status_for_fn (cfun) != PROFILE_ABSENT)
{
  sum = 0;
  FOR_EACH_EDGE (e, ei, bb->preds)
diff --git a/gcc/cfgloopanal.c b/gcc/cfgloopanal.c
index 0cee6c6..2260f4b 100644
--- a/gcc/cfgloopanal.c
+++ b/gcc/cfgloopanal.c
@@ -470,7 +470,7 @@ single_likely_exit (struct loop *loop)
 ruled out by this test.  The static branch prediction algorithm
  will not assign such a low probability to conditionals for usual
  reasons.  */
-  if (profile_status != PROFILE_ABSENT
+  if (profile_status_for_fn (cfun) != PROFILE_ABSENT
  && ex->probability < 5 && !ex->count)
continue;
   if (!found)
diff --git a/gcc/cfgrtl.c b/gcc/cfgrtl.c
index 772d939..34fe4f3 100644
--- a/gcc/cfgrtl.c
+++ b/gcc/cfgrtl.c
@@ -2420,7 +2420,7 @@ rtl_verify_edges (void)
  && any_condjump_p (BB_END (bb)))
{
  if (XINT (note, 0) != BRANCH_EDGE (bb)->probability
- && profile_status != PROFILE_ABSENT)
+ && profile_status_for_fn (cfun) != PROFILE_ABSENT)
{
  error ("verify_flow_info: REG_BR_PROB does not match cfg %i %i",
 XINT (note, 0), BRANCH_EDGE (bb)->probability);
@@ -5011,10 +5011,10 @@ rtl_account_profile_record (basic_block bb, int 
after_pass,
   {
record->size[after_pass]
  += insn_rtx_cost (PATTERN (insn), false);
-   if (profile_status == PROFILE_READ)
+   if (profile_status_for_fn (cfun) == PROFILE_READ)
  record->time[after_pass]
+= insn_rtx_cost (PATTERN (insn), true) * bb->count;
-   else if (prof

[PATCH 08/13] Eliminate label_to_block_map macro.

2013-12-06 Thread David Malcolm
gcc/
* basic-block.h (label_to_block_map): Eliminate macro.

* gimple.c (gimple_set_bb): Replace uses of label_to_block_map
with uses of label_to_block_map_for_fn, making uses of cfun be
explicit.
* tree-cfg.c (delete_tree_cfg_annotations): Likewise.
(verify_gimple_label): Likewise.
---
 gcc/basic-block.h | 1 -
 gcc/gimple.c  | 8 +---
 gcc/tree-cfg.c| 5 +++--
 3 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/gcc/basic-block.h b/gcc/basic-block.h
index 69689f3..4ab8289 100644
--- a/gcc/basic-block.h
+++ b/gcc/basic-block.h
@@ -328,7 +328,6 @@ struct GTY(()) control_flow_graph {
 
 /* Defines for textual backward source compatibility.  */
 #define last_basic_block   (cfun->cfg->x_last_basic_block)
-#define label_to_block_map (cfun->cfg->x_label_to_block_map)
 #define profile_status (cfun->cfg->x_profile_status)
 
 /* For iterating over basic blocks.  */
diff --git a/gcc/gimple.c b/gcc/gimple.c
index f11362a..077dca5 100644
--- a/gcc/gimple.c
+++ b/gcc/gimple.c
@@ -1475,17 +1475,19 @@ gimple_set_bb (gimple stmt, basic_block bb)
   uid = LABEL_DECL_UID (t);
   if (uid == -1)
{
- unsigned old_len = vec_safe_length (label_to_block_map);
+ unsigned old_len =
+   vec_safe_length (label_to_block_map_for_fn (cfun));
  LABEL_DECL_UID (t) = uid = cfun->cfg->last_label_uid++;
  if (old_len <= (unsigned) uid)
{
  unsigned new_len = 3 * uid / 2 + 1;
 
- vec_safe_grow_cleared (label_to_block_map, new_len);
+ vec_safe_grow_cleared (label_to_block_map_for_fn (cfun),
+new_len);
}
}
 
-  (*label_to_block_map)[uid] = bb;
+  (*label_to_block_map_for_fn (cfun))[uid] = bb;
 }
 }
 
diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
index 9558546..f384b04 100644
--- a/gcc/tree-cfg.c
+++ b/gcc/tree-cfg.c
@@ -2379,7 +2379,7 @@ stmt_ends_bb_p (gimple t)
 void
 delete_tree_cfg_annotations (void)
 {
-  vec_free (label_to_block_map);
+  vec_free (label_to_block_map_for_fn (cfun));
 }
 
 
@@ -4281,7 +4281,8 @@ verify_gimple_label (gimple stmt)
 
   uid = LABEL_DECL_UID (decl);
   if (cfun->cfg
-  && (uid == -1 || (*label_to_block_map)[uid] != gimple_bb (stmt)))
+  && (uid == -1
+ || (*label_to_block_map_for_fn (cfun))[uid] != gimple_bb (stmt)))
 {
   error ("incorrect entry in label_to_block_map");
   err |= true;
-- 
1.7.11.7



[PATCH 07/13] Eliminate basic_block_info macro.

2013-12-06 Thread David Malcolm
gcc/
* basic-block.h (basic_block_info): Eliminate macro.

* cfgrtl.c (rtl_create_basic_block): Replace uses of
basic_block_info with basic_block_info_for_fn, making uses
of cfun be explicit.
* tree-cfg.c (build_gimple_cfg, create_bb): Likewise.
---
 gcc/basic-block.h |  1 -
 gcc/cfgrtl.c  |  4 ++--
 gcc/tree-cfg.c| 10 ++
 3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/gcc/basic-block.h b/gcc/basic-block.h
index 3bd011e..69689f3 100644
--- a/gcc/basic-block.h
+++ b/gcc/basic-block.h
@@ -327,7 +327,6 @@ struct GTY(()) control_flow_graph {
   ((*basic_block_info_for_fn (FN))[(N)] = (BB))
 
 /* Defines for textual backward source compatibility.  */
-#define basic_block_info   (cfun->cfg->x_basic_block_info)
 #define last_basic_block   (cfun->cfg->x_last_basic_block)
 #define label_to_block_map (cfun->cfg->x_label_to_block_map)
 #define profile_status (cfun->cfg->x_profile_status)
diff --git a/gcc/cfgrtl.c b/gcc/cfgrtl.c
index de110f4..772d939 100644
--- a/gcc/cfgrtl.c
+++ b/gcc/cfgrtl.c
@@ -355,10 +355,10 @@ rtl_create_basic_block (void *headp, void *endp, 
basic_block after)
   basic_block bb;
 
   /* Grow the basic block array if needed.  */
-  if ((size_t) last_basic_block >= basic_block_info->length ())
+  if ((size_t) last_basic_block >= basic_block_info_for_fn (cfun)->length ())
 {
   size_t new_size = last_basic_block + (last_basic_block + 3) / 4;
-  vec_safe_grow_cleared (basic_block_info, new_size);
+  vec_safe_grow_cleared (basic_block_info_for_fn (cfun), new_size);
 }
 
   n_basic_blocks_for_fn (cfun)++;
diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
index a706730..9558546 100644
--- a/gcc/tree-cfg.c
+++ b/gcc/tree-cfg.c
@@ -242,8 +242,10 @@ build_gimple_cfg (gimple_seq seq)
 create_empty_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
 
   /* Adjust the size of the array.  */
-  if (basic_block_info->length () < (size_t) n_basic_blocks_for_fn (cfun))
-vec_safe_grow_cleared (basic_block_info, n_basic_blocks_for_fn (cfun));
+  if (basic_block_info_for_fn (cfun)->length ()
+  < (size_t) n_basic_blocks_for_fn (cfun))
+vec_safe_grow_cleared (basic_block_info_for_fn (cfun),
+  n_basic_blocks_for_fn (cfun));
 
   /* To speed up statement iterator walks, we first purge dead labels.  */
   cleanup_dead_labels ();
@@ -603,10 +605,10 @@ create_bb (void *h, void *e, basic_block after)
   link_block (bb, after);
 
   /* Grow the basic block array if needed.  */
-  if ((size_t) last_basic_block == basic_block_info->length ())
+  if ((size_t) last_basic_block == basic_block_info_for_fn (cfun)->length ())
 {
   size_t new_size = last_basic_block + (last_basic_block + 3) / 4;
-  vec_safe_grow_cleared (basic_block_info, new_size);
+  vec_safe_grow_cleared (basic_block_info_for_fn (cfun), new_size);
 }
 
   /* Add the newly created block to the array.  */
-- 
1.7.11.7



[PATCH 04/13] Rename profile_status_for_function to profile_status_for_fn.

2013-12-06 Thread David Malcolm
gcc/
* basic-block.h (profile_status_for_function): Rename to...
(profile_status_for_fn): ...this.

* cfg.c (check_bb_profile): Update for renaming.
* cgraphbuild.c (compute_call_stmt_bb_frequency): Likewise.
* lto-streamer-in.c (input_cfg): Likewise.
* lto-streamer-out.c (output_cfg):  Likewise.
* predict.c (maybe_hot_frequency_p, maybe_hot_count_p,
maybe_hot_bb_p, probably_never_executed)
(handle_missing_profiles): Likewise.
* tree-cfg.c (init_empty_tree_cfg_for_function): Likewise.
* tree-inline.c (copy_bb, initialize_cfun): Likewise.
---
 gcc/basic-block.h  |  2 +-
 gcc/cfg.c  |  2 +-
 gcc/cgraphbuild.c  |  2 +-
 gcc/lto-streamer-in.c  |  4 ++--
 gcc/lto-streamer-out.c |  2 +-
 gcc/predict.c  | 12 ++--
 gcc/tree-cfg.c |  2 +-
 gcc/tree-inline.c  |  4 ++--
 8 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/gcc/basic-block.h b/gcc/basic-block.h
index 1471972..da93c6f 100644
--- a/gcc/basic-block.h
+++ b/gcc/basic-block.h
@@ -319,7 +319,7 @@ struct GTY(()) control_flow_graph {
 #define n_edges_for_fn(FN)  ((FN)->cfg->x_n_edges)
 #define last_basic_block_for_fn(FN) ((FN)->cfg->x_last_basic_block)
 #define label_to_block_map_for_fn(FN)   ((FN)->cfg->x_label_to_block_map)
-#define profile_status_for_function(FN) 
((FN)->cfg->x_profile_status)
+#define profile_status_for_fn(FN)   ((FN)->cfg->x_profile_status)
 
 #define BASIC_BLOCK_FOR_FN(FN,N) \
   ((*basic_block_info_for_fn (FN))[(N)])
diff --git a/gcc/cfg.c b/gcc/cfg.c
index 6bceca5..786fe48 100644
--- a/gcc/cfg.c
+++ b/gcc/cfg.c
@@ -408,7 +408,7 @@ check_bb_profile (basic_block bb, FILE * file, int indent, 
int flags)
   memset ((void *) s_indent, ' ', (size_t) indent);
   s_indent[indent] = '\0';
 
-  if (profile_status_for_function (fun) == PROFILE_ABSENT)
+  if (profile_status_for_fn (fun) == PROFILE_ABSENT)
 return;
 
   if (bb != EXIT_BLOCK_PTR_FOR_FN (fun))
diff --git a/gcc/cgraphbuild.c b/gcc/cgraphbuild.c
index 9a63982..6c6698b 100644
--- a/gcc/cgraphbuild.c
+++ b/gcc/cgraphbuild.c
@@ -208,7 +208,7 @@ compute_call_stmt_bb_frequency (tree decl, basic_block bb)
 (DECL_STRUCT_FUNCTION (decl))->frequency;
   int freq = bb->frequency;
 
-  if (profile_status_for_function (DECL_STRUCT_FUNCTION (decl)) == 
PROFILE_ABSENT)
+  if (profile_status_for_fn (DECL_STRUCT_FUNCTION (decl)) == PROFILE_ABSENT)
 return CGRAPH_FREQ_BASE;
 
   if (!entry_freq)
diff --git a/gcc/lto-streamer-in.c b/gcc/lto-streamer-in.c
index 91fb12d..8dc94bd 100644
--- a/gcc/lto-streamer-in.c
+++ b/gcc/lto-streamer-in.c
@@ -632,8 +632,8 @@ input_cfg (struct lto_input_block *ib, struct data_in 
*data_in,
   init_empty_tree_cfg_for_function (fn);
   init_ssa_operands (fn);
 
-  profile_status_for_function (fn) = streamer_read_enum (ib, profile_status_d,
-PROFILE_LAST);
+  profile_status_for_fn (fn) = streamer_read_enum (ib, profile_status_d,
+  PROFILE_LAST);
 
   bb_count = streamer_read_uhwi (ib);
 
diff --git a/gcc/lto-streamer-out.c b/gcc/lto-streamer-out.c
index 858d49e..615cc84 100644
--- a/gcc/lto-streamer-out.c
+++ b/gcc/lto-streamer-out.c
@@ -1630,7 +1630,7 @@ output_cfg (struct output_block *ob, struct function *fn)
   ob->main_stream = ob->cfg_stream;
 
   streamer_write_enum (ob->main_stream, profile_status_d, PROFILE_LAST,
-  profile_status_for_function (fn));
+  profile_status_for_fn (fn));
 
   /* Output the number of the highest basic block.  */
   streamer_write_uhwi (ob, last_basic_block_for_fn (fn));
diff --git a/gcc/predict.c b/gcc/predict.c
index 1cd3fa6..e959a3b 100644
--- a/gcc/predict.c
+++ b/gcc/predict.c
@@ -121,7 +121,7 @@ maybe_hot_frequency_p (struct function *fun, int freq)
   if (node->frequency == NODE_FREQUENCY_HOT)
 return true;
 }
-  if (profile_status_for_function (fun) == PROFILE_ABSENT)
+  if (profile_status_for_fn (fun) == PROFILE_ABSENT)
 return true;
   if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE
   && freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency * 2 / 3))
@@ -164,7 +164,7 @@ set_hot_bb_threshold (gcov_type min)
 static inline bool
 maybe_hot_count_p (struct function *fun, gcov_type count)
 {
-  if (fun && profile_status_for_function (fun) != PROFILE_READ)
+  if (fun && profile_status_for_fn (fun) != PROFILE_READ)
 return true;
   /* Code executed at most once is not hot.  */
   if (profile_info->runs >= count)
@@ -179,7 +179,7 @@ bool
 maybe_hot_bb_p (struct function *fun, const_basic_block bb)
 {
   gcc_checking_assert (fun);
-  if (profile_status_for_function (fun) == PROFILE_READ)
+  if (profile_status_for_fn (fun) == PROFILE_READ)
 return maybe_hot_count_p (fun, bb->count);
   return maybe_hot_frequency_p (fun, bb->frequency);
 }
@@ -239

[PATCH 05/13] Eliminate SET_BASIC_BLOCK macro.

2013-12-06 Thread David Malcolm
gcc/
* basic-block.h (SET_BASIC_BLOCK): Eliminate macro.

* cfg.c (compact_blocks): Replace uses of SET_BASIC_BLOCK
with SET_BASIC_BLOCK_FOR_FN, making use of cfun explicit.
(expunge_block): Likewise.
* cfgrtl.c (create_basic_block_structure): Likewise.
* df-core.c (df_compact_blocks, df_bb_replace): Likewise.
* sel-sched.c (create_block_for_bookkeeping): Likewise.
* tree-cfg.c (create_bb): Likewise.
---
 gcc/basic-block.h |  1 -
 gcc/cfg.c | 10 +-
 gcc/cfgrtl.c  |  2 +-
 gcc/df-core.c |  8 
 gcc/sel-sched.c   |  4 ++--
 gcc/tree-cfg.c|  2 +-
 6 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/gcc/basic-block.h b/gcc/basic-block.h
index da93c6f..f759e27 100644
--- a/gcc/basic-block.h
+++ b/gcc/basic-block.h
@@ -333,7 +333,6 @@ struct GTY(()) control_flow_graph {
 #define profile_status (cfun->cfg->x_profile_status)
 
 #define BASIC_BLOCK(N) ((*basic_block_info)[(N)])
-#define SET_BASIC_BLOCK(N,BB)  ((*basic_block_info)[(N)] = (BB))
 
 /* For iterating over basic blocks.  */
 #define FOR_BB_BETWEEN(BB, FROM, TO, DIR) \
diff --git a/gcc/cfg.c b/gcc/cfg.c
index 786fe48..f386168 100644
--- a/gcc/cfg.c
+++ b/gcc/cfg.c
@@ -153,8 +153,8 @@ compact_blocks (void)
 {
   int i;
 
-  SET_BASIC_BLOCK (ENTRY_BLOCK, ENTRY_BLOCK_PTR_FOR_FN (cfun));
-  SET_BASIC_BLOCK (EXIT_BLOCK, EXIT_BLOCK_PTR_FOR_FN (cfun));
+  SET_BASIC_BLOCK_FOR_FN (cfun, ENTRY_BLOCK, ENTRY_BLOCK_PTR_FOR_FN (cfun));
+  SET_BASIC_BLOCK_FOR_FN (cfun, EXIT_BLOCK, EXIT_BLOCK_PTR_FOR_FN (cfun));
 
   if (df)
 df_compact_blocks ();
@@ -165,14 +165,14 @@ compact_blocks (void)
   i = NUM_FIXED_BLOCKS;
   FOR_EACH_BB (bb)
{
- SET_BASIC_BLOCK (i, bb);
+ SET_BASIC_BLOCK_FOR_FN (cfun, i, bb);
  bb->index = i;
  i++;
}
   gcc_assert (i == n_basic_blocks_for_fn (cfun));
 
   for (; i < last_basic_block; i++)
-   SET_BASIC_BLOCK (i, NULL);
+   SET_BASIC_BLOCK_FOR_FN (cfun, i, NULL);
 }
   last_basic_block = n_basic_blocks_for_fn (cfun);
 }
@@ -183,7 +183,7 @@ void
 expunge_block (basic_block b)
 {
   unlink_block (b);
-  SET_BASIC_BLOCK (b->index, NULL);
+  SET_BASIC_BLOCK_FOR_FN (cfun, b->index, NULL);
   n_basic_blocks_for_fn (cfun)--;
   /* We should be able to ggc_free here, but we are not.
  The dead SSA_NAMES are left pointing to dead statements that are pointing
diff --git a/gcc/cfgrtl.c b/gcc/cfgrtl.c
index 63f44af..045d78b 100644
--- a/gcc/cfgrtl.c
+++ b/gcc/cfgrtl.c
@@ -331,7 +331,7 @@ create_basic_block_structure (rtx head, rtx end, rtx 
bb_note, basic_block after)
   bb->index = last_basic_block++;
   bb->flags = BB_NEW | BB_RTL;
   link_block (bb, after);
-  SET_BASIC_BLOCK (bb->index, bb);
+  SET_BASIC_BLOCK_FOR_FN (cfun, bb->index, bb);
   df_bb_refs_record (bb->index, false);
   update_bb_for_insn (bb);
   BB_SET_PARTITION (bb, BB_UNPARTITIONED);
diff --git a/gcc/df-core.c b/gcc/df-core.c
index 37876af..4fb92a9 100644
--- a/gcc/df-core.c
+++ b/gcc/df-core.c
@@ -1601,7 +1601,7 @@ df_compact_blocks (void)
   i = NUM_FIXED_BLOCKS;
   FOR_EACH_BB (bb)
 {
-  SET_BASIC_BLOCK (i, bb);
+  SET_BASIC_BLOCK_FOR_FN (cfun, i, bb);
   bb->index = i;
   i++;
 }
@@ -1609,7 +1609,7 @@ df_compact_blocks (void)
   gcc_assert (i == n_basic_blocks_for_fn (cfun));
 
   for (; i < last_basic_block; i++)
-SET_BASIC_BLOCK (i, NULL);
+SET_BASIC_BLOCK_FOR_FN (cfun, i, NULL);
 
 #ifdef DF_DEBUG_CFG
   if (!df_lr->solutions_dirty)
@@ -1645,10 +1645,10 @@ df_bb_replace (int old_index, basic_block new_block)
 }
 
   df_clear_bb_dirty (new_block);
-  SET_BASIC_BLOCK (old_index, new_block);
+  SET_BASIC_BLOCK_FOR_FN (cfun, old_index, new_block);
   new_block->index = old_index;
   df_set_bb_dirty (BASIC_BLOCK (old_index));
-  SET_BASIC_BLOCK (new_block_index, NULL);
+  SET_BASIC_BLOCK_FOR_FN (cfun, new_block_index, NULL);
 }
 
 
diff --git a/gcc/sel-sched.c b/gcc/sel-sched.c
index 1e3fcf0..1195f7e 100644
--- a/gcc/sel-sched.c
+++ b/gcc/sel-sched.c
@@ -4663,8 +4663,8 @@ create_block_for_bookkeeping (edge e1, edge e2)
  new_bb->index = succ->index;
  succ->index = i;
 
- SET_BASIC_BLOCK (new_bb->index, new_bb);
- SET_BASIC_BLOCK (succ->index, succ);
+ SET_BASIC_BLOCK_FOR_FN (cfun, new_bb->index, new_bb);
+ SET_BASIC_BLOCK_FOR_FN (cfun, succ->index, succ);
 
  memcpy (&gbi, SEL_GLOBAL_BB_INFO (new_bb), sizeof (gbi));
  memcpy (SEL_GLOBAL_BB_INFO (new_bb), SEL_GLOBAL_BB_INFO (succ),
diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
index 6c2cc16..2d7916b 100644
--- a/gcc/tree-cfg.c
+++ b/gcc/tree-cfg.c
@@ -610,7 +610,7 @@ create_bb (void *h, void *e, basic_block after)
 }
 
   /* Add the newly created block to the array.  */
-  SET_BASIC_BLOCK (last_basic_block, bb);
+  SET_BASIC_BLOCK_FOR_FN (cfun, last_basic_block, bb);
 
   n_basic_blocks_for_fn

[PATCH 03/13] Rename label_to_block_map_for_function to label_to_block_map_for_fn.

2013-12-06 Thread David Malcolm
gcc/
* basic-block.h (label_to_block_map_for_function): Rename to...
(label_to_block_map_for_fn): ...this.
* lto-streamer-in.c (input_cfg): Update for renaming.
* tree-cfg.c (init_empty_tree_cfg_for_function): Likewise.
---
 gcc/basic-block.h | 2 +-
 gcc/lto-streamer-in.c | 4 ++--
 gcc/tree-cfg.c| 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/gcc/basic-block.h b/gcc/basic-block.h
index 88b0e48..1471972 100644
--- a/gcc/basic-block.h
+++ b/gcc/basic-block.h
@@ -318,7 +318,7 @@ struct GTY(()) control_flow_graph {
 #define n_basic_blocks_for_fn(FN)   ((FN)->cfg->x_n_basic_blocks)
 #define n_edges_for_fn(FN)  ((FN)->cfg->x_n_edges)
 #define last_basic_block_for_fn(FN) ((FN)->cfg->x_last_basic_block)
-#define label_to_block_map_for_function(FN)  ((FN)->cfg->x_label_to_block_map)
+#define label_to_block_map_for_fn(FN)   ((FN)->cfg->x_label_to_block_map)
 #define profile_status_for_function(FN) 
((FN)->cfg->x_profile_status)
 
 #define BASIC_BLOCK_FOR_FN(FN,N) \
diff --git a/gcc/lto-streamer-in.c b/gcc/lto-streamer-in.c
index 9ad4f5f..91fb12d 100644
--- a/gcc/lto-streamer-in.c
+++ b/gcc/lto-streamer-in.c
@@ -641,8 +641,8 @@ input_cfg (struct lto_input_block *ib, struct data_in 
*data_in,
   if (bb_count > basic_block_info_for_fn (fn)->length ())
 vec_safe_grow_cleared (basic_block_info_for_fn (fn), bb_count);
 
-  if (bb_count > label_to_block_map_for_function (fn)->length ())
-vec_safe_grow_cleared (label_to_block_map_for_function (fn), bb_count);
+  if (bb_count > label_to_block_map_for_fn (fn)->length ())
+vec_safe_grow_cleared (label_to_block_map_for_fn (fn), bb_count);
 
   index = streamer_read_hwi (ib);
   while (index != -1)
diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
index 3df4cbe..998ee26 100644
--- a/gcc/tree-cfg.c
+++ b/gcc/tree-cfg.c
@@ -190,8 +190,8 @@ init_empty_tree_cfg_for_function (struct function *fn)
 initial_cfg_capacity);
 
   /* Build a mapping of labels to their associated blocks.  */
-  vec_alloc (label_to_block_map_for_function (fn), initial_cfg_capacity);
-  vec_safe_grow_cleared (label_to_block_map_for_function (fn),
+  vec_alloc (label_to_block_map_for_fn (fn), initial_cfg_capacity);
+  vec_safe_grow_cleared (label_to_block_map_for_fn (fn),
 initial_cfg_capacity);
 
   SET_BASIC_BLOCK_FOR_FN (fn, ENTRY_BLOCK, ENTRY_BLOCK_PTR_FOR_FN (fn));
-- 
1.7.11.7



[PATCH 01/13] Rename macros (basic_block_info_for_function, BASIC_BLOCK_FOR_FUNCTION, SET_BASIC_BLOCK_FOR_FUNCTION)

2013-12-06 Thread David Malcolm
gcc/
* basic-block.h (basic_block_info_for_function): Rename to...
(basic_block_info_for_fn): ...this.
(BASIC_BLOCK_FOR_FUNCTION): Rename to...
(BASIC_BLOCK_FOR_FN): ...this.
(SET_BASIC_BLOCK_FOR_FUNCTION): Rename to...
(SET_BASIC_BLOCK_FOR_FN): ...this.

* gimple-streamer-in.c (input_phi, input_bb): Update for renaming
of BASIC_BLOCK_FOR_FUNCTION to BASIC_BLOCK_FOR_FN.
* ipa-utils.c (ipa_merge_profiles): Likewise.
* lto-streamer-in.c (make_new_block): Update for renaming of
SET_BASIC_BLOCK_FOR_FUNCTION to SET_BASIC_BLOCK_FOR_FN.
(input_cfg): Update for renamings.
* tree-cfg.c (init_empty_tree_cfg_for_function): Likewise.
(dump_function_to_file): Update for renaming of
basic_block_info_for_function to basic_block_info_for_fn.
---
 gcc/basic-block.h| 10 +-
 gcc/gimple-streamer-in.c |  4 ++--
 gcc/ipa-utils.c  |  4 ++--
 gcc/lto-streamer-in.c| 14 +++---
 gcc/tree-cfg.c   | 12 +---
 5 files changed, 21 insertions(+), 23 deletions(-)

diff --git a/gcc/basic-block.h b/gcc/basic-block.h
index 58bacc3..234f6e9 100644
--- a/gcc/basic-block.h
+++ b/gcc/basic-block.h
@@ -314,17 +314,17 @@ struct GTY(()) control_flow_graph {
 /* Defines for accessing the fields of the CFG structure for function FN.  */
 #define ENTRY_BLOCK_PTR_FOR_FN(FN)  ((FN)->cfg->x_entry_block_ptr)
 #define EXIT_BLOCK_PTR_FOR_FN(FN)   ((FN)->cfg->x_exit_block_ptr)
-#define basic_block_info_for_function(FN)((FN)->cfg->x_basic_block_info)
+#define basic_block_info_for_fn(FN) ((FN)->cfg->x_basic_block_info)
 #define n_basic_blocks_for_fn(FN)   ((FN)->cfg->x_n_basic_blocks)
 #define n_edges_for_fn(FN)  ((FN)->cfg->x_n_edges)
 #define last_basic_block_for_function(FN)((FN)->cfg->x_last_basic_block)
 #define label_to_block_map_for_function(FN)  ((FN)->cfg->x_label_to_block_map)
 #define profile_status_for_function(FN) 
((FN)->cfg->x_profile_status)
 
-#define BASIC_BLOCK_FOR_FUNCTION(FN,N) \
-  ((*basic_block_info_for_function (FN))[(N)])
-#define SET_BASIC_BLOCK_FOR_FUNCTION(FN,N,BB) \
-  ((*basic_block_info_for_function (FN))[(N)] = (BB))
+#define BASIC_BLOCK_FOR_FN(FN,N) \
+  ((*basic_block_info_for_fn (FN))[(N)])
+#define SET_BASIC_BLOCK_FOR_FN(FN,N,BB) \
+  ((*basic_block_info_for_fn (FN))[(N)] = (BB))
 
 /* Defines for textual backward source compatibility.  */
 #define basic_block_info   (cfun->cfg->x_basic_block_info)
diff --git a/gcc/gimple-streamer-in.c b/gcc/gimple-streamer-in.c
index 57b0d87..bc85ae9 100644
--- a/gcc/gimple-streamer-in.c
+++ b/gcc/gimple-streamer-in.c
@@ -67,7 +67,7 @@ input_phi (struct lto_input_block *ib, basic_block bb, struct 
data_in *data_in,
   int src_index = streamer_read_uhwi (ib);
   bitpack_d bp = streamer_read_bitpack (ib);
   location_t arg_loc = stream_input_location (&bp, data_in);
-  basic_block sbb = BASIC_BLOCK_FOR_FUNCTION (fn, src_index);
+  basic_block sbb = BASIC_BLOCK_FOR_FN (fn, src_index);
 
   edge e = NULL;
   int j;
@@ -258,7 +258,7 @@ input_bb (struct lto_input_block *ib, enum LTO_tags tag,
   gcc_assert (cfun == fn);
 
   index = streamer_read_uhwi (ib);
-  bb = BASIC_BLOCK_FOR_FUNCTION (fn, index);
+  bb = BASIC_BLOCK_FOR_FN (fn, index);
 
   bb->count = apply_scale (streamer_read_gcov_count (ib),
count_materialization_scale);
diff --git a/gcc/ipa-utils.c b/gcc/ipa-utils.c
index 312d75d..0253bb0 100644
--- a/gcc/ipa-utils.c
+++ b/gcc/ipa-utils.c
@@ -727,7 +727,7 @@ ipa_merge_profiles (struct cgraph_node *dst,
{
  unsigned int i;
 
- dstbb = BASIC_BLOCK_FOR_FUNCTION (dstcfun, srcbb->index);
+ dstbb = BASIC_BLOCK_FOR_FN (dstcfun, srcbb->index);
  if (dstbb == NULL)
{
  if (cgraph_dump_file)
@@ -772,7 +772,7 @@ ipa_merge_profiles (struct cgraph_node *dst,
{
  unsigned int i;
 
- dstbb = BASIC_BLOCK_FOR_FUNCTION (dstcfun, srcbb->index);
+ dstbb = BASIC_BLOCK_FOR_FN (dstcfun, srcbb->index);
  dstbb->count += srcbb->count;
  for (i = 0; i < EDGE_COUNT (srcbb->succs); i++)
{
diff --git a/gcc/lto-streamer-in.c b/gcc/lto-streamer-in.c
index 862e49d..5a604d3 100644
--- a/gcc/lto-streamer-in.c
+++ b/gcc/lto-streamer-in.c
@@ -611,7 +611,7 @@ make_new_block (struct function *fn, unsigned int index)
 {
   basic_block bb = alloc_block ();
   bb->index = index;
-  SET_BASIC_BLOCK_FOR_FUNCTION (fn, index, bb);
+  SET_BASIC_BLOCK_FOR_FN (fn, index, bb);
   n_basic_blocks_for_fn (fn)++;
   return bb;
 }
@@ -638,8 +638,8 @@ input_cfg (struct lto_input_block *ib, struct data_in 
*data_in,
   bb_count = streamer_read_uhwi (ib);
 
   last_basic_block_for_function (fn) = bb_count;
-  if (bb_count > basic_block_info_for_function (fn)->length ())
-vec_safe_grow_cleared (basic_block_info_for_functio

[PATCH 12/13] Eliminate FOR_EACH_BB_REVERSE macro.

2013-12-06 Thread David Malcolm
gcc/
* basic-block.h (FOR_EACH_BB_REVERSE): Eliminate macro.

* cfghooks.c (verify_flow_info): Replace uses of FOR_EACH_BB_REVERSE
with FOR_EACH_BB_REVERSE_FN, making uses of cfun explicit.
* cfgrtl.c (print_rtl_with_bb, rtl_verify_edges,
rtl_verify_bb_insns, rtl_verify_bb_pointers,
rtl_verify_bb_insn_chain, rtl_verify_fallthru): Likewise.
* config/ia64/ia64.c (emit_predicate_relation_info): Likewise.
* config/sh/sh.c (sh_md_init_global): Likewise.
* dce.c (reset_unmarked_insns_debug_uses, delete_unmarked_insns):
Likewise.
* dominance.c (calc_dfs_tree): Likewise.
* final.c (final): Likewise.
* function.c (thread_prologue_and_epilogue_insns): Likewise.
* gcse.c (compute_code_hoist_vbeinout): Likewise.
* ira.c (update_equiv_regs, build_insn_chain): Likewise.
* lcm.c (compute_antinout_edge): Likewise.
* mode-switching.c (optimize_mode_switching): Likewise.
* postreload.c (reload_combine): Likewise.
* recog.c (split_all_insns, peephole2_optimize): Likewise.
* tree-ssa-live.c (live_worklist): Likewise.
---
 gcc/basic-block.h  |  2 --
 gcc/cfghooks.c |  2 +-
 gcc/cfgrtl.c   | 12 ++--
 gcc/config/ia64/ia64.c |  4 ++--
 gcc/config/sh/sh.c |  2 +-
 gcc/dce.c  |  4 ++--
 gcc/dominance.c|  4 ++--
 gcc/final.c|  2 +-
 gcc/function.c |  2 +-
 gcc/gcse.c |  2 +-
 gcc/ira.c  |  4 ++--
 gcc/lcm.c  |  2 +-
 gcc/mode-switching.c   |  4 ++--
 gcc/postreload.c   |  2 +-
 gcc/recog.c|  4 ++--
 gcc/tree-ssa-live.c|  2 +-
 16 files changed, 26 insertions(+), 28 deletions(-)

diff --git a/gcc/basic-block.h b/gcc/basic-block.h
index b378a5b..75f16ac 100644
--- a/gcc/basic-block.h
+++ b/gcc/basic-block.h
@@ -336,8 +336,6 @@ struct GTY(()) control_flow_graph {
 #define FOR_EACH_BB_REVERSE_FN(BB, FN) \
   FOR_BB_BETWEEN (BB, (FN)->cfg->x_exit_block_ptr->prev_bb, 
(FN)->cfg->x_entry_block_ptr, prev_bb)
 
-#define FOR_EACH_BB_REVERSE(BB) FOR_EACH_BB_REVERSE_FN (BB, cfun)
-
 /* For iterating over insns in basic block.  */
 #define FOR_BB_INSNS(BB, INSN) \
   for ((INSN) = BB_HEAD (BB);  \
diff --git a/gcc/cfghooks.c b/gcc/cfghooks.c
index 2400965..78218b5 100644
--- a/gcc/cfghooks.c
+++ b/gcc/cfghooks.c
@@ -123,7 +123,7 @@ verify_flow_info (void)
 }
 
   /* Now check the basic blocks (boundaries etc.) */
-  FOR_EACH_BB_REVERSE (bb)
+  FOR_EACH_BB_REVERSE_FN (bb, cfun)
 {
   int n_fallthru = 0;
   edge e;
diff --git a/gcc/cfgrtl.c b/gcc/cfgrtl.c
index daadd9b..7734ac1 100644
--- a/gcc/cfgrtl.c
+++ b/gcc/cfgrtl.c
@@ -2153,7 +2153,7 @@ print_rtl_with_bb (FILE *outf, const_rtx rtx_first, int 
flags)
 
   if (flags & TDF_BLOCKS)
{
- FOR_EACH_BB_REVERSE (bb)
+ FOR_EACH_BB_REVERSE_FN (bb, cfun)
{
  rtx x;
 
@@ -2408,7 +2408,7 @@ rtl_verify_edges (void)
   int err = 0;
   basic_block bb;
 
-  FOR_EACH_BB_REVERSE (bb)
+  FOR_EACH_BB_REVERSE_FN (bb, cfun)
 {
   int n_fallthru = 0, n_branch = 0, n_abnormal_call = 0, n_sibcall = 0;
   int n_eh = 0, n_abnormal = 0;
@@ -2586,7 +2586,7 @@ rtl_verify_bb_insns (void)
   int err = 0;
   basic_block bb;
 
-  FOR_EACH_BB_REVERSE (bb)
+  FOR_EACH_BB_REVERSE_FN (bb, cfun)
 {
   /* Now check the header of basic
 block.  It ought to contain optional CODE_LABEL followed
@@ -2649,7 +2649,7 @@ rtl_verify_bb_pointers (void)
   basic_block bb;
 
   /* Check the general integrity of the basic blocks.  */
-  FOR_EACH_BB_REVERSE (bb)
+  FOR_EACH_BB_REVERSE_FN (bb, cfun)
 {
   rtx insn;
 
@@ -2739,7 +2739,7 @@ rtl_verify_bb_insn_chain (void)
 
   bb_info = XCNEWVEC (basic_block, max_uid);
 
-  FOR_EACH_BB_REVERSE (bb)
+  FOR_EACH_BB_REVERSE_FN (bb, cfun)
 {
   rtx head = BB_HEAD (bb);
   rtx end = BB_END (bb);
@@ -2821,7 +2821,7 @@ rtl_verify_fallthru (void)
   basic_block bb;
   int err = 0;
 
-  FOR_EACH_BB_REVERSE (bb)
+  FOR_EACH_BB_REVERSE_FN (bb, cfun)
 {
   edge e;
 
diff --git a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c
index a837974..99bc094 100644
--- a/gcc/config/ia64/ia64.c
+++ b/gcc/config/ia64/ia64.c
@@ -9613,7 +9613,7 @@ emit_predicate_relation_info (void)
 {
   basic_block bb;
 
-  FOR_EACH_BB_REVERSE (bb)
+  FOR_EACH_BB_REVERSE_FN (bb, cfun)
 {
   int r;
   rtx head = BB_HEAD (bb);
@@ -9641,7 +9641,7 @@ emit_predicate_relation_info (void)
  relations around them.  Otherwise the assembler will assume the call
  returns, and complain about uses of call-clobbered predicates after
  the call.  */
-  FOR_EACH_BB_REVERSE (bb)
+  FOR_EACH_BB_REVERSE_FN (bb, cfun)
 {
   rtx insn = BB_HEAD (bb);
 
diff --git a/gcc/config/sh/sh.c b/gcc/config/sh/sh.c
index 3e907b2..26c8957 100644
--- a/gcc/config/sh/sh.c
+++ b/gcc/config/sh/sh.c
@@ -0,

[PATCH 00/13] Remove remaining cfun-using macros from basic-block.h

2013-12-06 Thread David Malcolm
I have a series of 13 follow-up patches which remove the remaining
"cfun"-using macros from basic-block.h

Successfully bootstrapped®tested on x86_64-unknown-linux-gnu.

These were pre-approved in stage1, and are mechanical in nature [1]

I'd like to apply these to trunk now, but given that we're now in
stage3, do I need to wait until the next stage1?

The first 4 patches rename various "_for_function|_FOR_FUNCTION"
macros to "_for_fn|_FOR_FN" for consistency with the earlier
patches in this thread.

The remaining patches eliminate cfun-using macros in favor of
the "_for_fn|_FOR_FN" variant, making uses of cfun explicit.
There are still some macros in function.h that implicitly use
cfun, but it's less clear what to replace them with.

Note to self: here's a grep invocation for ensuring that no new
uses sneak into the sources:
for m in \
  basic_block_info_for_function BASIC_BLOCK_FOR_FUNCTION \
  SET_BASIC_BLOCK_FOR_FUNCTION last_basic_block_for_function \
  label_to_block_map_for_function profile_status_for_function \
  SET_BASIC_BLOCK BASIC_BLOCK basic_block_info label_to_block_map \
  profile_status last_basic_block FOR_EACH_BB FOR_EACH_BB_REVERSE \
  FOR_ALL_BB ; 
do
  grep -nH -E -w $m \
 gcc/*.[ch] gcc/config/*.[ch] gcc/config/*/*.{c,h,md} ; 
done

(this currently has 11 false-positives)

[1] with one exception, in patch 10 in gcc/ira-emit.c (ira_emit) where
I introduced a new local to avoid overlong lines.

David Malcolm (13):
  Rename macros (basic_block_info_for_function,
BASIC_BLOCK_FOR_FUNCTION, SET_BASIC_BLOCK_FOR_FUNCTION)
  Rename last_basic_block_for_function to last_basic_block_for_fn.
  Rename label_to_block_map_for_function to label_to_block_map_for_fn.
  Rename profile_status_for_function to profile_status_for_fn.
  Eliminate SET_BASIC_BLOCK macro.
  Eliminate BASIC_BLOCK macro.
  Eliminate basic_block_info macro.
  Eliminate label_to_block_map macro.
  Eliminate profile_status macro.
  Eliminate last_basic_block macro.
  Eliminate FOR_EACH_BB macro.
  Eliminate FOR_EACH_BB_REVERSE macro.
  Eliminate FOR_ALL_BB macro.

 gcc/alias.c  |   2 +-
 gcc/asan.c   |   6 +-
 gcc/auto-inc-dec.c   |   2 +-
 gcc/basic-block.h|  32 +++--
 gcc/bb-reorder.c |  29 
 gcc/bt-load.c|  45 ++--
 gcc/caller-save.c|   8 +--
 gcc/cfg.c|  32 -
 gcc/cfganal.c|  35 +-
 gcc/cfgbuild.c   |  12 ++--
 gcc/cfgcleanup.c |   6 +-
 gcc/cfgexpand.c  |  14 ++--
 gcc/cfghooks.c   |  16 ++---
 gcc/cfgloop.c|  20 +++---
 gcc/cfgloopanal.c|   8 +--
 gcc/cfgloopmanip.c   |   6 +-
 gcc/cfgrtl.c |  61 
 gcc/cgraphbuild.c|   8 +--
 gcc/combine-stack-adj.c  |   2 +-
 gcc/combine.c|   8 +--
 gcc/config/arm/arm.c |   4 +-
 gcc/config/bfin/bfin.c   |   4 +-
 gcc/config/c6x/c6x.c |   6 +-
 gcc/config/epiphany/resolve-sw-modes.c   |   6 +-
 gcc/config/frv/frv.c |   8 +--
 gcc/config/i386/i386.c   |   2 +-
 gcc/config/ia64/ia64.c   |   6 +-
 gcc/config/mips/mips.c   |   8 +--
 gcc/config/picochip/picochip.c   |   2 +-
 gcc/config/rs6000/rs6000.c   |   2 +-
 gcc/config/s390/s390.c   |   4 +-
 gcc/config/sh/sh.c   |   2 +-
 gcc/config/spu/spu.c |   6 +-
 gcc/config/tilegx/tilegx.c   |   4 +-
 gcc/config/tilepro/tilepro.c |   4 +-
 gcc/coverage.c   |   2 +-
 gcc/cprop.c  |  23 ---
 gcc/cse.c|   8 +--
 gcc/dce.c|  10 +--
 gcc/df-core.c|  68 +-
 gcc/df-problems.c|  54 +++
 gcc/df-scan.c|  42 ++-
 gcc/df.h |   2 +-
 gcc/dominance.c  |  37 +-
 gcc/domwalk.c|   2 +-
 gcc/dse.c|  14 ++--
 gcc/except.c |   2 +-
 gcc/final.c  |   6 +-
 gcc/function.c   |  16 ++---
 gcc/gcse.c   |  54 ---
 gcc/gimple-iterator.c|   2 +-
 gcc/gimple-ssa-isolate-paths.c   |   4 +-
 gcc/gimple-streamer-in.c |   4 +-
 gcc/gimple.c |   8 ++-
 gcc/graph.c  

[PATCH 02/13] Rename last_basic_block_for_function to last_basic_block_for_fn.

2013-12-06 Thread David Malcolm
gcc/
* basic-block.h (last_basic_block_for_function): Rename to...
(last_basic_block_for_fn): ...this.
* ipa-utils.c (ipa_merge_profiles): Update for renaming of
last_basic_block_for_function to last_basic_block_for_fn.
* lto-streamer-in.c (input_cfg): Likewise.
* lto-streamer-out.c (output_cfg): Likewise.
* tree-cfg.c (init_empty_tree_cfg_for_function): Likewise.
* tree-sra.c (propagate_dereference_distances, ipa_early_sra):
Likewise.
---
 gcc/basic-block.h  | 2 +-
 gcc/ipa-utils.c| 4 ++--
 gcc/lto-streamer-in.c  | 2 +-
 gcc/lto-streamer-out.c | 2 +-
 gcc/tree-cfg.c | 2 +-
 gcc/tree-sra.c | 4 ++--
 6 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/gcc/basic-block.h b/gcc/basic-block.h
index 234f6e9..88b0e48 100644
--- a/gcc/basic-block.h
+++ b/gcc/basic-block.h
@@ -317,7 +317,7 @@ struct GTY(()) control_flow_graph {
 #define basic_block_info_for_fn(FN) ((FN)->cfg->x_basic_block_info)
 #define n_basic_blocks_for_fn(FN)   ((FN)->cfg->x_n_basic_blocks)
 #define n_edges_for_fn(FN)  ((FN)->cfg->x_n_edges)
-#define last_basic_block_for_function(FN)((FN)->cfg->x_last_basic_block)
+#define last_basic_block_for_fn(FN) ((FN)->cfg->x_last_basic_block)
 #define label_to_block_map_for_function(FN)  ((FN)->cfg->x_label_to_block_map)
 #define profile_status_for_function(FN) 
((FN)->cfg->x_profile_status)
 
diff --git a/gcc/ipa-utils.c b/gcc/ipa-utils.c
index 0253bb0..569626d 100644
--- a/gcc/ipa-utils.c
+++ b/gcc/ipa-utils.c
@@ -711,8 +711,8 @@ ipa_merge_profiles (struct cgraph_node *dst,
 "Giving up; number of basic block mismatch.\n");
   match = false;
 }
-  else if (last_basic_block_for_function (srccfun)
-  != last_basic_block_for_function (dstcfun))
+  else if (last_basic_block_for_fn (srccfun)
+  != last_basic_block_for_fn (dstcfun))
 {
   if (cgraph_dump_file)
fprintf (cgraph_dump_file,
diff --git a/gcc/lto-streamer-in.c b/gcc/lto-streamer-in.c
index 5a604d3..9ad4f5f 100644
--- a/gcc/lto-streamer-in.c
+++ b/gcc/lto-streamer-in.c
@@ -637,7 +637,7 @@ input_cfg (struct lto_input_block *ib, struct data_in 
*data_in,
 
   bb_count = streamer_read_uhwi (ib);
 
-  last_basic_block_for_function (fn) = bb_count;
+  last_basic_block_for_fn (fn) = bb_count;
   if (bb_count > basic_block_info_for_fn (fn)->length ())
 vec_safe_grow_cleared (basic_block_info_for_fn (fn), bb_count);
 
diff --git a/gcc/lto-streamer-out.c b/gcc/lto-streamer-out.c
index e99424e..858d49e 100644
--- a/gcc/lto-streamer-out.c
+++ b/gcc/lto-streamer-out.c
@@ -1633,7 +1633,7 @@ output_cfg (struct output_block *ob, struct function *fn)
   profile_status_for_function (fn));
 
   /* Output the number of the highest basic block.  */
-  streamer_write_uhwi (ob, last_basic_block_for_function (fn));
+  streamer_write_uhwi (ob, last_basic_block_for_fn (fn));
 
   FOR_ALL_BB_FN (bb, fn)
 {
diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
index e4a1371..3df4cbe 100644
--- a/gcc/tree-cfg.c
+++ b/gcc/tree-cfg.c
@@ -184,7 +184,7 @@ init_empty_tree_cfg_for_function (struct function *fn)
   init_flow (fn);
   profile_status_for_function (fn) = PROFILE_ABSENT;
   n_basic_blocks_for_fn (fn) = NUM_FIXED_BLOCKS;
-  last_basic_block_for_function (fn) = NUM_FIXED_BLOCKS;
+  last_basic_block_for_fn (fn) = NUM_FIXED_BLOCKS;
   vec_alloc (basic_block_info_for_fn (fn), initial_cfg_capacity);
   vec_safe_grow_cleared (basic_block_info_for_fn (fn),
 initial_cfg_capacity);
diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c
index 0890613..9aa526f 100644
--- a/gcc/tree-sra.c
+++ b/gcc/tree-sra.c
@@ -3793,7 +3793,7 @@ propagate_dereference_distances (void)
 {
   basic_block bb;
 
-  auto_vec queue (last_basic_block_for_function (cfun));
+  auto_vec queue (last_basic_block_for_fn (cfun));
   queue.quick_push (ENTRY_BLOCK_PTR_FOR_FN (cfun));
   FOR_EACH_BB (bb)
 {
@@ -4970,7 +4970,7 @@ ipa_early_sra (void)
 
   bb_dereferences = XCNEWVEC (HOST_WIDE_INT,
 func_param_count
-* last_basic_block_for_function (cfun));
+* last_basic_block_for_fn (cfun));
   final_bbs = BITMAP_ALLOC (NULL);
 
   scan_function ();
-- 
1.7.11.7



Re: [PATCH] Handle PIEs in libbacktrace

2013-12-06 Thread Ian Lance Taylor
On Fri, Dec 6, 2013 at 1:53 AM, Jakub Jelinek  wrote:
>
> The alternative would be to just (perhaps under #ifdef SANITIZER_CP_DEMANGLE)
> compile in libiberty/cp-demangle.c (similarly how libstdc++ compiles it in)
> as part of libsanitizer/libiberty/ or even libsanitizer/libbacktrace/,
> and tweak it, so that like libsanitizer/libbacktrace it uses internal_memcpy
> etc. and uses InternalAlloc/InternalFree.  The problem is that cp-demangle.c
> uses only realloc and free, and doesn't provide any hint on how large the
> previously allocated memory chunk is.  So, either there is some easy way
> how to query the size of InternalAlloc returned allocation, or we would need
> to allocate uptr extra and store there number of bytes allocated
> and emulate realloc/free that way.

There was a recent buggy patch to the demangler that added calls to
malloc and realloc (2013-10-25 Gary Benson ).
That patch must be fixed or reverted before the 4.9 release.  The main
code in the demangler must not call malloc/realloc.

When that patch is fixed, you can use the cplus_demangle_v3_callback
function to get a demangler that never calls malloc.

Ian


Re: [PATCH] Handle PIEs in libbacktrace

2013-12-06 Thread Ian Lance Taylor
On Thu, Dec 5, 2013 at 11:50 PM, Jakub Jelinek  wrote:
>
> 2013-12-06  Jakub Jelinek  
>
> * elf.c (ET_DYN): Undefine and define again.
> (elf_add): Add exe argument, if true and ehdr.e_type is ET_DYN,
> return early -1 without closing the descriptor.
> (struct phdr_data): Add exe_descriptor.
> (phdr_callback): If pd->exe_descriptor is not -1, for very first
> call if dlpi_name is NULL just call elf_add with the exe_descriptor,
> otherwise backtrace_close the exe_descriptor if not -1.  Adjust
> call to elf_add.
> (backtrace_initialize): Adjust call to elf_add.  If it returns
> -1, set pd.exe_descriptor to descriptor, otherwise set it to -1.

Please update the comment for elf_add to explain the return value.
This patch is OK with that change.

Thanks.

Ian


Re: [RFC] libgcov.c re-factoring and offline profile-tool

2013-12-06 Thread Jan Hubicka
> Hi, all
> 
> This is the new patch for gcov-tool (previously profile-tool).
> 
> Honza: can you comment on the new merge interface? David posted some
> comments in an earlier email and we want to know what's your opinion.
> 
> Test patch has been tested with boostrap, regresssion,
> profiledbootstrap and SPEC2006.
> 
> Noticeable changes from the earlier version:
> 
> 1. create a new file libgcov.h and move libgcov-*.h headers to libgcov.h
> So we can included multiple libgcov-*.c without adding new macros.
> 
> 2. split libgcov.h specific code in gcvo-io.h to libcc/libgcov.h
> Avoid multiple-page of code under IN_LIBGCOV macro -- this
> improves the readability.
> 
> 3. make gcov_var static, and move the definition from gcov-io.h to
> gcov-io.c. Also
>move some static functions accessing gcov_var to gcvo-io.c
> Current code rely on GCOV_LINKAGE tricks to avoid multi-definition. I don't 
> see
> a reason that gcov_var needs to exposed as a global.
> 
> 4. expose gcov_write_strings() and gcov_sync() to gcov_tool usage
> 
> 5. rename profile-tool to gcov-tool per Honza's suggestion.
> 
> Thanks,

Hi,
I did not read in deatil the gcov-tool source itself, but lets first make the 
interface changes
needed.

> 2013-11-18  Rong Xu  
> 
>   * gcc/gcov-io.c (gcov_var): Moved from gcov-io.h and make it static.
>   (gcov_position): Move from gcov-io.h
>   (gcov_is_error): Ditto.
>   (gcov_rewrite): Ditto.
>   * gcc/gcov-io.h: Re-factoring. Move gcov_var to gcov-io.h and
> move the libgcov only part of libgcc/libgcov.h.
>   * libgcc/libgcov.h: New common header files for libgcov-*.h
>   * libgcc/Makefile.in: Add dependence to libgcov.h
>   * libgcc/libgcov-profiler.c: Use libgcov.h
>   * libgcc/libgcov-driver.c: Ditto.
>   * libgcc/libgcov-interface.c: Ditto.
>   * libgcc/libgcov-driver-system.c (allocate_filename_struct): use
>   xmalloc instread of malloc.
>   * libgcc/libgcov-merge.c (void __gcov_merge_delta): Add more
>   parameters to merge function.
>   (__gcov_merge_add): Ditto.
>   (__gcov_merge_ior): Ditto.
>   (__gcov_merge_time_profile): Ditto.
>   (__gcov_merge_single): Ditto.
>   (__gcov_merge_delta): Ditto.
>   * libgcc/libgcov-tool.c (void gcov_tool_set_verbose): New for
>   gcov-tool support.
>   (set_fn_ctrs): Ditto.
>   (tag_function): Ditto.
>   (tag_blocks): Ditto.
>   (tag_arcs): Ditto.
>   (tag_lines): Ditto.
>   (tag_counters): Ditto.
>   (tag_summary): Ditto.
>   (read_gcda_finalize): Ditto.
>   (read_gcda_file): Ditto.
>   (ftw_read_file): Ditto.
>   (read_profile_dir_init) Ditto.:
>   (gcov_read_profile_dir): Ditto.
>   (gcov_merge): Ditto.
>   (find_match_gcov_inf Ditto.o):
>   (gcov_profile_merge): Ditto.
>   (__gcov_scale_add): Ditto.
>   (__gcov_scale_ior): Ditto.
>   (__gcov_scale_delta): Ditto.
>   (__gcov_scale_single): Ditto.
>   (gcov_profile_scale): Ditto.
>   (gcov_profile_normalize): Ditto.
>   (__gcov_scale2_add): Ditto.
>   (__gcov_scale2_ior): Ditto.
>   (__gcov_scale2_delta): Ditto.
>   (__gcov_scale2_single): Ditto.
>   (gcov_profile_scale2): Ditto.
>   * gcc/gcov-tool.c (unlink_file): Gcov-tool driver support.
>   (unlink_dir): Ditto.
>   (profile_merge): Ditto.
>   (print_merge_usage_message): Ditto.
>   (merge_usage): Ditto.
>   (do_merge): Ditto.
>   (profile_rewrite2): Ditto.
>   (profile_rewrite): Ditto.
>   (print_rewrite_usage_message): Ditto.
>   (rewrite_usage): Ditto.
>   (do_rewrite): Ditto.
>   (print_usage): Ditto.
>   (print_version): Ditto.
>   (process_args): Ditto.
>   (main): Ditto.
>   * gcc/Makefile.in: Build and install gcov-tool.

> Index: gcc/gcov-io.c
> ===
> --- gcc/gcov-io.c (revision 204895)
> +++ gcc/gcov-io.c (working copy)
> @@ -36,6 +36,37 @@ static const gcov_unsigned_t *gcov_read_words (uns
>  static void gcov_allocate (unsigned);
>  #endif
>  
> +/* Moved for gcov-io.h and make it static.  */
> +static struct gcov_var gcov_var;

This is more an changelog message than a comment in source file.
Just describe what gcov_var is.

Do you know how the size of libgcov changed with your patch? 
Quick check of current mainline on compiling empty main gives:

jh@gcc10:~/trunk/build/gcc$ cat t.c
main()
{
}
jh@gcc10:~/trunk/build/gcc$ ./xgcc -B ./ -O2 -fprofile-generate -o a.out-new 
--static t.c
jh@gcc10:~/trunk/build/gcc$ gcc -O2 -fprofile-generate -o a.out-old --static t.c
jh@gcc10:~/trunk/build/gcc$ size a.out-old
   textdata bss dec hex filename
 6081413560   16728  628429   996cd a.out-old
jh@gcc10:~/trunk/build/gcc$ size a.out-new
   textdata bss dec hex filename
 6126213688   22880  639189   9c0d5 a.out-new

Without profiling I get:
jh@gcc10:~/trunk/build/gcc$ size a.out-new-

Re: RFA: patch to fix 2 testsuite failures for LRA on PPC

2013-12-06 Thread David Edelsohn
On Thu, Dec 5, 2013 at 12:40 PM, Vladimir Makarov  wrote:
> The following patch fixes two GCC testsuite failures for LRA.  The patch
> makes swap through registers instead of memory for the test cases when LRA
> is used.
>
> There are differences in reload and LRA constraint matching algorithm which
> results in different alternative choices when the original pattern is used.
>
> Actually my first proposed solution variant used one pattern which is now
> for LRA in this patch.  But some doubt arose that it may affect reload pass
> in some bad way.
>
> Ok to commit?

I understand that LRA requires different tuning than reload, but I
continue to be a little uncomfortable with different patterns for LRA
and reload.

I would like to head some additional opinions.

Thanks, David


Re: RFC Asan instrumentation control

2013-12-06 Thread Yury Gribov

Konstantin wrote:
> My comment about "don't implement them" was only about the flags
> that change shadow offset & scale

Ah, I see. I didn't mention them in the first place so I got puzzled.

So it looks like people are generally ok with
* --param asan-instrument-reads=0/1
* --param asan-instrument-writes=0/1
* --param asan-stack=0/1
* --param asan-globals=0/1
* --param asan-memintrin=0/1
but not with blacklists (which is sad but understandable).

-Y


Re: [PATCH] Masked load/store vectorization (take 6)

2013-12-06 Thread Richard Biener
On Fri, 6 Dec 2013, Jakub Jelinek wrote:

> On Fri, Dec 06, 2013 at 01:49:50PM +0100, Richard Biener wrote:
> > Comments inline (scary large this patch for this stage ...)
> 
> Thanks.
> 
> > > +(define_expand "maskload"
> > > +  [(set (match_operand:V48_AVX2 0 "register_operand")
> > > + (unspec:V48_AVX2
> > > +   [(match_operand: 2 "register_operand")
> > > +(match_operand:V48_AVX2 1 "memory_operand")]
> > > +   UNSPEC_MASKMOV))]
> > > +  "TARGET_AVX")
> > > +
> > > +(define_expand "maskstore"
> > > +  [(set (match_operand:V48_AVX2 0 "memory_operand")
> > > + (unspec:V48_AVX2
> > > +   [(match_operand: 2 "register_operand")
> > > +(match_operand:V48_AVX2 1 "register_operand")
> > > +(match_dup 0)]
> > > +   UNSPEC_MASKMOV))]
> > > +  "TARGET_AVX")
> > > +
> > >  (define_insn_and_split "avx__"
> > >[(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
> > >   (unspec:AVX256MODE2P
> > 
> > x86 maintainers should comment here (ick - unspecs)
> 
> Well, the unspecs are preexisting (right now used by intrinsics only), I'm
> just adding expanders that will expand to those instructions.
> 
> > > @@ -4386,16 +4396,35 @@ get_references_in_stmt (gimple stmt, vec
> > >  {
> > >unsigned i, n;
> > >  
> > > -  op0 = gimple_call_lhs_ptr (stmt);
> > > +  ref.is_read = false;
> > > +  if (gimple_call_internal_p (stmt))
> > > + switch (gimple_call_internal_fn (stmt))
> > > +   {
> > > +   case IFN_MASK_LOAD:
> > > + ref.is_read = true;
> > > +   case IFN_MASK_STORE:
> > > + ref.ref = build2 (MEM_REF,
> > > +   ref.is_read
> > > +   ? TREE_TYPE (gimple_call_lhs (stmt))
> > > +   : TREE_TYPE (gimple_call_arg (stmt, 3)),
> > > +   gimple_call_arg (stmt, 0),
> > > +   gimple_call_arg (stmt, 1));
> > > + references->safe_push (ref);
> > 
> > This may not be a canonical MEM_REF AFAIK, so you should
> > use fold_build2 here (if the address is &a.b the .b needs folding
> 
> Ok, will try that.
> 
> > into the offset).  I assume the 2nd arg is always constant and
> > thus doesn't change pointer-type during propagations?
> 
> Yes, it is, always created by
>   ptr = build_int_cst (reference_alias_ptr_type (ref), 0);
> (and for vectorized IFN_MASK_* copied over from the non-vectorized
> IFN_MASK_* call).
> 
> > > @@ -4464,7 +4493,7 @@ graphite_find_data_references_in_stmt (l
> > >  
> > >FOR_EACH_VEC_ELT (references, i, ref)
> > >  {
> > > -  dr = create_data_ref (nest, loop, *ref->pos, stmt, ref->is_read);
> > > +  dr = create_data_ref (nest, loop, ref->ref, stmt, ref->is_read);
> > >gcc_assert (dr != NULL);
> > >datarefs->safe_push (dr);
> > >  }
> > 
> > Interetsting that you succeeded in removing the indirection
> > on ref.pos ... I remember trying that twice at least and
> > failing ;)
> > 
> > You can install that as cleanup now if you split it out (so hopefully
> > no users creep back that make removing it impossible).
> 
> Ok, will do.
> 
> > > +  /* Check whether this is a load or store.  */
> > > +  lhs = gimple_assign_lhs (stmt);
> > > +  if (TREE_CODE (lhs) != SSA_NAME)
> > > +{
> > 
> > gimple_store_p ()?
> 
> Likely.
> 
> > > +  if (!is_gimple_val (gimple_assign_rhs1 (stmt)))
> > > + return false;
> > > +  op = maskstore_optab;
> > > +  ref = lhs;
> > > +}
> > > +  else if (gimple_assign_load_p (stmt))
> > > +{
> > > +  op = maskload_optab;
> > > +  ref = gimple_assign_rhs1 (stmt);
> > > +}
> > > +  else
> > > +return false;
> > > +
> > > +  /* And whether REF isn't a MEM_REF with non-addressable decl.  */
> > > +  if (TREE_CODE (ref) == MEM_REF
> > > +  && TREE_CODE (TREE_OPERAND (ref, 0)) == ADDR_EXPR
> > > +  && DECL_P (TREE_OPERAND (TREE_OPERAND (ref, 0), 0))
> > > +  && !TREE_ADDRESSABLE (TREE_OPERAND (TREE_OPERAND (ref, 0), 0)))
> > > +return false;
> > 
> > I think that's overly conservative and not conservative enough.  Just
> > use may_be_nonaddressable_p () (even though the implementation can
> > need some TLC) and make sure to set TREE_ADDRESSABLE when you
> > end up taking its address.
> 
> Will try.
> 
> > Please factor out the target bits into a predicate in optabs.c
> > so you can reduce the amount of includes here.  You can eventually
> > re-use that from the vectorization parts.
> 
> Okay.
> 
> > > @@ -1404,7 +1530,8 @@ insert_gimplified_predicates (loop_p loo
> > >basic_block bb = ifc_bbs[i];
> > >gimple_seq stmts;
> > >  
> > > -  if (!is_predicated (bb))
> > > +  if (!is_predicated (bb)
> > > +   || dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
> > 
> > isn't that redundant now?
> 
> Will try (and read the corresponding threads and IRC about that).
> 
> > >for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
> > > - if ((stmt = gsi_stmt (gsi))
> > > - && gimple_assign_single_p (stmt)
> > > - &

Re: [PATCH] Masked load/store vectorization (take 6)

2013-12-06 Thread Jakub Jelinek
On Fri, Dec 06, 2013 at 01:49:50PM +0100, Richard Biener wrote:
> Comments inline (scary large this patch for this stage ...)

Thanks.

> > +(define_expand "maskload"
> > +  [(set (match_operand:V48_AVX2 0 "register_operand")
> > +   (unspec:V48_AVX2
> > + [(match_operand: 2 "register_operand")
> > +  (match_operand:V48_AVX2 1 "memory_operand")]
> > + UNSPEC_MASKMOV))]
> > +  "TARGET_AVX")
> > +
> > +(define_expand "maskstore"
> > +  [(set (match_operand:V48_AVX2 0 "memory_operand")
> > +   (unspec:V48_AVX2
> > + [(match_operand: 2 "register_operand")
> > +  (match_operand:V48_AVX2 1 "register_operand")
> > +  (match_dup 0)]
> > + UNSPEC_MASKMOV))]
> > +  "TARGET_AVX")
> > +
> >  (define_insn_and_split "avx__"
> >[(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
> > (unspec:AVX256MODE2P
> 
> x86 maintainers should comment here (ick - unspecs)

Well, the unspecs are preexisting (right now used by intrinsics only), I'm
just adding expanders that will expand to those instructions.

> > @@ -4386,16 +4396,35 @@ get_references_in_stmt (gimple stmt, vec
> >  {
> >unsigned i, n;
> >  
> > -  op0 = gimple_call_lhs_ptr (stmt);
> > +  ref.is_read = false;
> > +  if (gimple_call_internal_p (stmt))
> > +   switch (gimple_call_internal_fn (stmt))
> > + {
> > + case IFN_MASK_LOAD:
> > +   ref.is_read = true;
> > + case IFN_MASK_STORE:
> > +   ref.ref = build2 (MEM_REF,
> > + ref.is_read
> > + ? TREE_TYPE (gimple_call_lhs (stmt))
> > + : TREE_TYPE (gimple_call_arg (stmt, 3)),
> > + gimple_call_arg (stmt, 0),
> > + gimple_call_arg (stmt, 1));
> > +   references->safe_push (ref);
> 
> This may not be a canonical MEM_REF AFAIK, so you should
> use fold_build2 here (if the address is &a.b the .b needs folding

Ok, will try that.

> into the offset).  I assume the 2nd arg is always constant and
> thus doesn't change pointer-type during propagations?

Yes, it is, always created by
  ptr = build_int_cst (reference_alias_ptr_type (ref), 0);
(and for vectorized IFN_MASK_* copied over from the non-vectorized
IFN_MASK_* call).

> > @@ -4464,7 +4493,7 @@ graphite_find_data_references_in_stmt (l
> >  
> >FOR_EACH_VEC_ELT (references, i, ref)
> >  {
> > -  dr = create_data_ref (nest, loop, *ref->pos, stmt, ref->is_read);
> > +  dr = create_data_ref (nest, loop, ref->ref, stmt, ref->is_read);
> >gcc_assert (dr != NULL);
> >datarefs->safe_push (dr);
> >  }
> 
> Interetsting that you succeeded in removing the indirection
> on ref.pos ... I remember trying that twice at least and
> failing ;)
> 
> You can install that as cleanup now if you split it out (so hopefully
> no users creep back that make removing it impossible).

Ok, will do.

> > +  /* Check whether this is a load or store.  */
> > +  lhs = gimple_assign_lhs (stmt);
> > +  if (TREE_CODE (lhs) != SSA_NAME)
> > +{
> 
> gimple_store_p ()?

Likely.

> > +  if (!is_gimple_val (gimple_assign_rhs1 (stmt)))
> > +   return false;
> > +  op = maskstore_optab;
> > +  ref = lhs;
> > +}
> > +  else if (gimple_assign_load_p (stmt))
> > +{
> > +  op = maskload_optab;
> > +  ref = gimple_assign_rhs1 (stmt);
> > +}
> > +  else
> > +return false;
> > +
> > +  /* And whether REF isn't a MEM_REF with non-addressable decl.  */
> > +  if (TREE_CODE (ref) == MEM_REF
> > +  && TREE_CODE (TREE_OPERAND (ref, 0)) == ADDR_EXPR
> > +  && DECL_P (TREE_OPERAND (TREE_OPERAND (ref, 0), 0))
> > +  && !TREE_ADDRESSABLE (TREE_OPERAND (TREE_OPERAND (ref, 0), 0)))
> > +return false;
> 
> I think that's overly conservative and not conservative enough.  Just
> use may_be_nonaddressable_p () (even though the implementation can
> need some TLC) and make sure to set TREE_ADDRESSABLE when you
> end up taking its address.

Will try.

> Please factor out the target bits into a predicate in optabs.c
> so you can reduce the amount of includes here.  You can eventually
> re-use that from the vectorization parts.

Okay.

> > @@ -1404,7 +1530,8 @@ insert_gimplified_predicates (loop_p loo
> >basic_block bb = ifc_bbs[i];
> >gimple_seq stmts;
> >  
> > -  if (!is_predicated (bb))
> > +  if (!is_predicated (bb)
> > + || dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
> 
> isn't that redundant now?

Will try (and read the corresponding threads and IRC about that).

> >for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
> > -   if ((stmt = gsi_stmt (gsi))
> > -   && gimple_assign_single_p (stmt)
> > -   && gimple_vdef (stmt))
> > +   if ((stmt = gsi_stmt (gsi)) == NULL
> 
> I don't think gsi_stmt can be NULL

It can if gsi_end_p, but that is apparently in the loop condition.
It was preexisting code anyway, but will change.

> > +   will be then if-converted, the new cop

Re: RFC Asan instrumentation control

2013-12-06 Thread Konstantin Serebryany
On Fri, Dec 6, 2013 at 5:10 PM, Yury Gribov  wrote:
> Konstantin wrote:
>> Can you have a target specific config for the particular target
>> that will have its own shadow offset & scale?
>
> Yes, we have this but I don't see how this can help with code
> instrumentation overheads.
My comment about "don't implement them" was only about the flags that
change shadow offset & scale
>
> -Y


Re: [PATCH] Allow building if libsanitizer on RHEL5 (i.e. with 2.6.18-ish kernel headers)

2013-12-06 Thread Richard Biener
On Fri, Dec 6, 2013 at 2:04 PM, Jakub Jelinek  wrote:
> On Fri, Dec 06, 2013 at 01:52:29PM +0100, Richard Biener wrote:
>> > Note that testing still shows some issues, e.g. because the
>> > kThreadDescriptorSize change has not been applied.  But it at least builds
>> > and for many tests works.
>>
>> What's wrong with just copying the kernel headers 1:1?  ISTR that is what
>> kernel folks recommend if you use kernel APIs that don't have a glibc
>> correspondent.
>
> Well, for many things that is already the case, sanitizer_common has
> apparently it's own __sanitizer_* macros/constants/structures that
> are supposed to match kernel ones.  The problem is that
> the code is comparing them as compile time assertions against the kernel
> ones inside of the library code.  IMHO that is not a very good idea, much
> better would be just say a testcase that would include the sanitizer +
> kernel headers, guarded by recent enough LINUX_VERSION_CODE or configure or
> similar, so it wouldn't prevent library build on older kernel headers,
> the kernel ABI better be stable (only new things added, not size of
> structures/magic constants etc. changed from time to time).
>
> But Kostya is apparently not willing to do that, so this patch provides
> a workaround in non-compiler-rt maintained files.

:(

>> Of course you then need runtime checks on whether the running kernel
>> supports the API.
>
> Usually not, say if the structures/magic constants etc. are to wrap syscalls
> or ioctls etc., if kernel doesn't support those syscalls/ioctls, either
> nothing will use those syscalls/ioctls, or if it does, it will fail from the
> kernel, worst case you get some diagnostics from pre-syscall/ioctl wrapper,
> otherwise the kernel syscall/ioctl will just fail and post-syscall/ioctl
> wrapper will do likely nothing at all.

Yes, what I meant is that the code needs to expect things to fail dependent
on the kernel currently running.  Not sure if sanitizer does that.

Richard.

> Jakub


Re: RFC Asan instrumentation control

2013-12-06 Thread Yury Gribov

Konstantin wrote:
> Can you have a target specific config for the particular target
> that will have its own shadow offset & scale?

Yes, we have this but I don't see how this can help with code 
instrumentation overheads.


-Y


Re: [PATCH] Allow building if libsanitizer on RHEL5 (i.e. with 2.6.18-ish kernel headers)

2013-12-06 Thread Jakub Jelinek
On Fri, Dec 06, 2013 at 01:52:29PM +0100, Richard Biener wrote:
> > Note that testing still shows some issues, e.g. because the
> > kThreadDescriptorSize change has not been applied.  But it at least builds
> > and for many tests works.
> 
> What's wrong with just copying the kernel headers 1:1?  ISTR that is what
> kernel folks recommend if you use kernel APIs that don't have a glibc
> correspondent.

Well, for many things that is already the case, sanitizer_common has
apparently it's own __sanitizer_* macros/constants/structures that
are supposed to match kernel ones.  The problem is that
the code is comparing them as compile time assertions against the kernel
ones inside of the library code.  IMHO that is not a very good idea, much
better would be just say a testcase that would include the sanitizer +
kernel headers, guarded by recent enough LINUX_VERSION_CODE or configure or
similar, so it wouldn't prevent library build on older kernel headers,
the kernel ABI better be stable (only new things added, not size of
structures/magic constants etc. changed from time to time).

But Kostya is apparently not willing to do that, so this patch provides
a workaround in non-compiler-rt maintained files.

> Of course you then need runtime checks on whether the running kernel
> supports the API.

Usually not, say if the structures/magic constants etc. are to wrap syscalls
or ioctls etc., if kernel doesn't support those syscalls/ioctls, either
nothing will use those syscalls/ioctls, or if it does, it will fail from the
kernel, worst case you get some diagnostics from pre-syscall/ioctl wrapper,
otherwise the kernel syscall/ioctl will just fail and post-syscall/ioctl
wrapper will do likely nothing at all.

Jakub


Re: RFC Asan instrumentation control

2013-12-06 Thread Konstantin Serebryany
On Fri, Dec 6, 2013 at 4:39 PM, Yury Gribov  wrote:
> Konstantin wrote:
>
>> Jakub wrote:
>>> I'm strongly against the blacklist,
>> I don't like it either. We were forced to implement it by reality.
>> ...
>
>> imagine third-party code which you can build but can not change
>
> Same situation here. Reality is that Asan is often thrown at huge (I mean
> several MLoc) apps with old, ugly and complicated build systems
> (cmake+make+weird bash scripts). Blacklists are by far the easiest solution
> in this case (the other being even uglier like writing bash wrapper around
> gcc). I agree that ugly codebases aren't GCC's problem but still.
>
>
> Jakub wrote:
>> I'd prefer not to implement ABI changing options
>
> Agreed.
>
>
> Konstantin wrote:
>>> For others, perhaps, the question is what options to use for them
>> Jakub wrote:
>> These are asan-internal and not documented anyway.
>> Mostly for our own experiments. Don't implement them.
>
> I see. But what about the use-case outlined above (embedded system with
> little RAM)? IMHO this is an interesting niche for Asan and friends.

Can you have a target specific config for the particular target that
will have its own shadow offset & scale?

>
> -Y


Re: [PATCH] Allow building if libsanitizer on RHEL5 (i.e. with 2.6.18-ish kernel headers)

2013-12-06 Thread Richard Biener
On Fri, Dec 6, 2013 at 1:43 PM, Jakub Jelinek  wrote:
> Hi!
>
> Here is an alternative version of the patch I've posted earlier to allow
> building libsanitizer on 2.6.18-ish kernel headers, this time by adding
> 5 tiny kernel header wrappers.
> The only drawback of this are warnings like:
> ../../../../libsanitizer/include/linux/aio_abi.h:2:2: warning: #include_next 
> is a GCC extension [enabled by default]
> (and generally just on the two source files that include these problematic
> kernel headers, so about 10 warnings total).
> We could avoid that by not building with -pedantic, or by using
> -isystem instead of -I for the libsanitizer/include headers (which has
> drawback that we wouldn't get warnings for stuff in
> libsanitizer/include/sanitizer/ headers), or these could live in
> some other directory, say libsanitizer/include/wrappers/linux/*.h
> and we would add -isystem $(top_srcdir)/include/wrappers/.
>
> Note that testing still shows some issues, e.g. because the
> kThreadDescriptorSize change has not been applied.  But it at least builds
> and for many tests works.

What's wrong with just copying the kernel headers 1:1?  ISTR that is what
kernel folks recommend if you use kernel APIs that don't have a glibc
correspondent.

Of course you then need runtime checks on whether the running kernel
supports the API.

Richard.

> 2013-12-06  Jakub Jelinek  
>
> * include/linux/aio_abi.h: New header.
> * include/linux/mroute.h: New header.
> * include/linux/mroute6.h: New header.
> * include/linux/perf_event.h: New header.
> * include/linux/types.h: New header.
>
> --- libsanitizer/include/linux/aio_abi.h.jj 2013-12-06 06:02:29.0 
> -0500
> +++ libsanitizer/include/linux/aio_abi.h2013-12-06 06:03:11.0 
> -0500
> @@ -0,0 +1,7 @@
> +#include 
> +#include_next 
> +/* IOCB_CMD_PREADV/PWRITEV has been added in 2.6.19 */
> +#if LINUX_VERSION_CODE < 132627
> +#define IOCB_CMD_PREADV 7
> +#define IOCB_CMD_PWRITEV 8
> +#endif
> --- libsanitizer/include/linux/mroute.h.jj  2013-12-06 06:14:30.0 
> -0500
> +++ libsanitizer/include/linux/mroute.h 2013-12-06 06:10:09.0 -0500
> @@ -0,0 +1,8 @@
> +#include 
> +/*  before 2.6.26 included 
> +   which clashes with userspace headers.  */
> +#if LINUX_VERSION_CODE < 132634
> +#define _LINUX_IN_H
> +#include 
> +#endif
> +#include_next 
> --- libsanitizer/include/linux/mroute6.h.jj 2013-12-06 03:58:19.0 
> -0500
> +++ libsanitizer/include/linux/mroute6.h2013-12-06 06:13:59.0 
> -0500
> @@ -0,0 +1,5 @@
> +#include 
> +/*  has been added in 2.6.26 */
> +#if LINUX_VERSION_CODE >= 132634
> +#include_next 
> +#endif
> --- libsanitizer/include/linux/perf_event.h.jj  2013-12-06 03:58:01.0 
> -0500
> +++ libsanitizer/include/linux/perf_event.h 2013-12-06 05:59:16.0 
> -0500
> @@ -0,0 +1,7 @@
> +#include 
> +/*  has been added in 2.6.32 */
> +#if LINUX_VERSION_CODE >= 132640
> +#include_next 
> +#else
> +#define perf_event_attr __sanitizer_perf_event_attr
> +#endif
> --- libsanitizer/include/linux/types.h.jj   2013-12-06 03:57:37.0 
> -0500
> +++ libsanitizer/include/linux/types.h  2013-12-06 03:57:33.0 -0500
> @@ -0,0 +1,12 @@
> +#ifndef LINUX_TYPES_WRAPPER_H
> +#define LINUX_TYPES_WRAPPER_H
> +
> +/* Before
> +   
> https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/include/linux/types.h?id=6c7c6afbb8c0e60d32a563cae7c6889211e9d9d8
> +   linux/types.h conflicted with sys/ustat.h.  Work around it.  */
> +
> +#define ustat __asan_bad_ustat
> +#include_next 
> +#undef ustat
> +
> +#endif
>
> Jakub


  1   2   >