Re: Move some bit and binary optimizations in simplify and match

2015-10-13 Thread Marc Glisse


+(simplify
+ (plus (convert? @0) (convert? (xdivamulminusa @0 @1)))
+  (if ((INTEGRAL_TYPE_P (type) || VECTOR_INTEGER_TYPE_P (type))
+   && tree_nop_conversion_p (type, TREE_TYPE (@0)))
+   (trunc_mod (convert @0) (convert @1

See PR 67953.

+(match (abitandnotb @0 @1)
+ (bit_and:c @0 (bit_not INTEGER_CST@1)))

Does that work?

+/* Fold (a * (1 << b)) into (a << b)  */
+(simplify
+ (mult:c @0 (convert? (lshift integer_onep@1 @2)))
+  (if (! FLOAT_TYPE_P (type)
+&& tree_nop_conversion_p (type, TREE_TYPE (@2)))
+   (lshift @0 (convert @2

You don't need/want to convert @2 (fold-const doesn't convert, does it?), 
and you don't need to check for tree_nop_conversion_p.



--
Marc Glisse


Re: Move some bit and binary optimizations in simplify and match

2015-10-13 Thread Hurugalawadi, Naveen
Hi.

>> please adjust also according to these comments.
Adjusted the patch as per your comments.

Please find attached the patch as per your comments.
Please review the patch and let me know if any further modifications 
are required.

Thanks,
Naveendiff --git a/gcc/fold-const.c b/gcc/fold-const.c
index de45a2c..2d81b2c 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -9232,26 +9232,6 @@ fold_binary_loc (location_t loc,
   return NULL_TREE;
 
 case PLUS_EXPR:
-  if (INTEGRAL_TYPE_P (type) || VECTOR_INTEGER_TYPE_P (type))
-	{
-	  /* X + (X / CST) * -CST is X % CST.  */
-	  if (TREE_CODE (arg1) == MULT_EXPR
-	  && TREE_CODE (TREE_OPERAND (arg1, 0)) == TRUNC_DIV_EXPR
-	  && operand_equal_p (arg0,
-  TREE_OPERAND (TREE_OPERAND (arg1, 0), 0), 0))
-	{
-	  tree cst0 = TREE_OPERAND (TREE_OPERAND (arg1, 0), 1);
-	  tree cst1 = TREE_OPERAND (arg1, 1);
-	  tree sum = fold_binary_loc (loc, PLUS_EXPR, TREE_TYPE (cst1),
-  cst1, cst0);
-	  if (sum && integer_zerop (sum))
-		return fold_convert_loc (loc, type,
-	 fold_build2_loc (loc, TRUNC_MOD_EXPR,
-		  TREE_TYPE (arg0), arg0,
-		  cst0));
-	}
-	}
-
   /* Handle (A1 * C1) + (A2 * C2) with A1, A2 or C1, C2 being the same or
 	 one.  Make sure the type is not saturating and has the signedness of
 	 the stripped operands, as fold_plusminus_mult_expr will re-associate.
@@ -9692,28 +9672,6 @@ fold_binary_loc (location_t loc,
 			fold_convert_loc (loc, type,
 	  TREE_OPERAND (arg0, 0)));
 
-  if (! FLOAT_TYPE_P (type))
-	{
-	  /* Fold (A & ~B) - (A & B) into (A ^ B) - B, where B is
-	 any power of 2 minus 1.  */
-	  if (TREE_CODE (arg0) == BIT_AND_EXPR
-	  && TREE_CODE (arg1) == BIT_AND_EXPR
-	  && operand_equal_p (TREE_OPERAND (arg0, 0),
-  TREE_OPERAND (arg1, 0), 0))
-	{
-	  tree mask0 = TREE_OPERAND (arg0, 1);
-	  tree mask1 = TREE_OPERAND (arg1, 1);
-	  tree tem = fold_build1_loc (loc, BIT_NOT_EXPR, type, mask0);
-
-	  if (operand_equal_p (tem, mask1, 0))
-		{
-		  tem = fold_build2_loc (loc, BIT_XOR_EXPR, type,
- TREE_OPERAND (arg0, 0), mask1);
-		  return fold_build2_loc (loc, MINUS_EXPR, type, tem, mask1);
-		}
-	}
-	}
-
   /* Fold __complex__ ( x, 0 ) - __complex__ ( 0, y ) to
 	 __complex__ ( x, -y ).  This is not the same for SNaNs or if
 	 signed zeros are involved.  */
@@ -9803,20 +9761,6 @@ fold_binary_loc (location_t loc,
   goto associate;
 
 case MULT_EXPR:
-  /* (-A) * (-B) -> A * B  */
-  if (TREE_CODE (arg0) == NEGATE_EXPR && negate_expr_p (arg1))
-	return fold_build2_loc (loc, MULT_EXPR, type,
-			fold_convert_loc (loc, type,
-	  TREE_OPERAND (arg0, 0)),
-			fold_convert_loc (loc, type,
-	  negate_expr (arg1)));
-  if (TREE_CODE (arg1) == NEGATE_EXPR && negate_expr_p (arg0))
-	return fold_build2_loc (loc, MULT_EXPR, type,
-			fold_convert_loc (loc, type,
-	  negate_expr (arg0)),
-			fold_convert_loc (loc, type,
-	  TREE_OPERAND (arg1, 0)));
-
   if (! FLOAT_TYPE_P (type))
 	{
 	  /* Transform x * -C into -x * C if x is easily negatable.  */
@@ -9830,16 +9774,6 @@ fold_binary_loc (location_t loc,
 		  negate_expr (arg0)),
 tem);
 
-	  /* (a * (1 << b)) is (a << b)  */
-	  if (TREE_CODE (arg1) == LSHIFT_EXPR
-	  && integer_onep (TREE_OPERAND (arg1, 0)))
-	return fold_build2_loc (loc, LSHIFT_EXPR, type, op0,
-TREE_OPERAND (arg1, 1));
-	  if (TREE_CODE (arg0) == LSHIFT_EXPR
-	  && integer_onep (TREE_OPERAND (arg0, 0)))
-	return fold_build2_loc (loc, LSHIFT_EXPR, type, op1,
-TREE_OPERAND (arg0, 1));
-
 	  /* (A + A) * C -> A * 2 * C  */
 	  if (TREE_CODE (arg0) == PLUS_EXPR
 	  && TREE_CODE (arg1) == INTEGER_CST
@@ -9882,21 +9816,6 @@ fold_binary_loc (location_t loc,
 	}
   else
 	{
-	  /* Convert (C1/X)*C2 into (C1*C2)/X.  This transformation may change
- the result for floating point types due to rounding so it is applied
- only if -fassociative-math was specify.  */
-	  if (flag_associative_math
-	  && TREE_CODE (arg0) == RDIV_EXPR
-	  && TREE_CODE (arg1) == REAL_CST
-	  && TREE_CODE (TREE_OPERAND (arg0, 0)) == REAL_CST)
-	{
-	  tree tem = const_binop (MULT_EXPR, TREE_OPERAND (arg0, 0),
-  arg1);
-	  if (tem)
-		return fold_build2_loc (loc, RDIV_EXPR, type, tem,
-TREE_OPERAND (arg0, 1));
-	}
-
   /* Strip sign operations from X in X*X, i.e. -Y*-Y -> Y*Y.  */
 	  if (operand_equal_p (arg0, arg1, 0))
 	{
@@ -10013,28 +9932,6 @@ fold_binary_loc (location_t loc,
 arg1);
 	}
 
-  /* (X & ~Y) | (~X & Y) is X ^ Y */
-  if (TREE_CODE (arg0) == BIT_AND_EXPR
-	  && TREE_CODE (arg1) == BIT_AND_EXPR)
-{
-	  tree a0, a1, l0, l1, n0, n1;
-
-	  a0 = fold_convert_loc (loc, type, TREE_OPERAND (arg1, 0));
-	  a1 = fold_convert_loc (loc, type, TREE_OPERAND (arg1, 1));
-
-	  l0 = fold_convert_loc (loc, type, TREE

Re: Do not use TYPE_CANONICAL in useless_type_conversion

2015-10-13 Thread Jan Hubicka
> On Oct 13, 2015, Eric Botcazou  wrote:
> 
> > Note that this is PR middle-end/67912.
> 
> Thanks.  I added this piece of information to the ChangeLog entry, and
> checked the patch in.
Thanks, Alexandre. That indeed looks better than my variant of the patch.
Does it also fix the IA-64 issue?

Honza
> 
> -- 
> Alexandre Oliva, freedom fighterhttp://FSFLA.org/~lxoliva/
> You must be the change you wish to see in the world. -- Gandhi
> Be Free! -- http://FSFLA.org/   FSF Latin America board member
> Free Software Evangelist|Red Hat Brasil GNU Toolchain Engineer


Re: [PR67891] drop is_gimple_reg test from set_parm_rtl

2015-10-13 Thread Alexandre Oliva
On Oct 12, 2015, Richard Biener  wrote:

> On Sat, Oct 10, 2015 at 3:16 PM, Alexandre Oliva  wrote:
>> On Oct  9, 2015, Richard Biener  wrote:
>> 
>>> Ok.  Note that I think emit_block_move shouldn't mess with the addressable 
>>> flag.
>> 
>> I have successfully tested a patch that stops it from doing so,
>> reverting https://gcc.gnu.org/bugzilla/show_bug.cgi?id=49429#c11 but
>> according to bugs 49429 and 49454, it looks like removing it would mess
>> with escape analysis introduced in r175063 for bug 44194.  The thread
>> that introduces the mark_addressable calls suggests some discomfort with
>> this solution, and even a suggestion that the markings should be
>> deferred past the end of expand, but in the end there was agreement to
>> go with it.  https://gcc.gnu.org/ml/gcc-patches/2011-06/msg01746.html

> Aww, indeed.  Of course the issue is that we don't track pointers to the
> stack introduced during RTL properly.

> Thanks for checking.  Might want to add a comment before that
> addressable setting now that you've done the archeology.

I decided to give the following approach a try instead.  The following
patch was regstrapped on x86_64-linux-gnu and i686-linux-gnu.
Ok to install?

Would anyone with access to hpux (pa and ia64 are both affected) give it
a spin?


defer mark_addressable calls during expand till the end of expand

From: Alexandre Oliva 

for  gcc/ChangeLog

* gimple-expr.c: Include hash-set.h and rtl.h.
(mark_addressable_queue): New var.
(mark_addressable): Factor actual marking into...
(mark_addressable_1): ... this.  Queue it up during expand.
(mark_addressable_2): New.
(flush_mark_addressable_queue): New.
* gimple-expr.h (flush_mark_addressable_queue): Declare.
* cfgexpand.c: Include gimple-expr.h.
(pass_expand::execute): Flush mark_addressable queue.
---
 gcc/cfgexpand.c   |3 +++
 gcc/gimple-expr.c |   50 --
 gcc/gimple-expr.h |1 +
 3 files changed, 52 insertions(+), 2 deletions(-)

diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
index eaad859..a362e17 100644
--- a/gcc/cfgexpand.c
+++ b/gcc/cfgexpand.c
@@ -51,6 +51,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "internal-fn.h"
 #include "tree-eh.h"
 #include "gimple-iterator.h"
+#include "gimple-expr.h"
 #include "gimple-walk.h"
 #include "cgraph.h"
 #include "tree-cfg.h"
@@ -6373,6 +6374,8 @@ pass_expand::execute (function *fun)
   /* We're done expanding trees to RTL.  */
   currently_expanding_to_rtl = 0;
 
+  flush_mark_addressable_queue ();
+
   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (fun)->next_bb,
  EXIT_BLOCK_PTR_FOR_FN (fun), next_bb)
 {
diff --git a/gcc/gimple-expr.c b/gcc/gimple-expr.c
index 2a6ba1a..db249a3 100644
--- a/gcc/gimple-expr.c
+++ b/gcc/gimple-expr.c
@@ -35,6 +35,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "gimplify.h"
 #include "stor-layout.h"
 #include "demangle.h"
+#include "hash-set.h"
+#include "rtl.h"
 
 /* - Type related -  */
 
@@ -823,6 +825,50 @@ is_gimple_mem_ref_addr (tree t)
  || decl_address_invariant_p (TREE_OPERAND (t, 0);
 }
 
+/* Hold trees marked addressable during expand.  */
+
+static hash_set *mark_addressable_queue;
+
+/* Mark X as addressable or queue it up if called during expand.  */
+
+static void
+mark_addressable_1 (tree x)
+{
+  if (!currently_expanding_to_rtl)
+{
+  TREE_ADDRESSABLE (x) = 1;
+  return;
+}
+
+  if (!mark_addressable_queue)
+mark_addressable_queue = new hash_set();
+  mark_addressable_queue->add (x);
+}
+
+/* Adaptor for mark_addressable_1 for use in hash_set traversal.  */
+
+bool
+mark_addressable_2 (tree const &x, void * ATTRIBUTE_UNUSED = NULL)
+{
+  mark_addressable_1 (x);
+  return false;
+}
+
+/* Mark all queued trees as addressable, and empty the queue.  To be
+   called right after clearing CURRENTLY_EXPANDING_TO_RTL.  */
+
+void
+flush_mark_addressable_queue ()
+{
+  gcc_assert (!currently_expanding_to_rtl);
+  if (mark_addressable_queue)
+{
+  mark_addressable_queue->traverse (NULL);
+  delete mark_addressable_queue;
+  mark_addressable_queue = NULL;
+}
+}
+
 /* Mark X addressable.  Unlike the langhook we expect X to be in gimple
form and we don't do any syntax checking.  */
 
@@ -838,7 +884,7 @@ mark_addressable (tree x)
   && TREE_CODE (x) != PARM_DECL
   && TREE_CODE (x) != RESULT_DECL)
 return;
-  TREE_ADDRESSABLE (x) = 1;
+  mark_addressable_1 (x);
 
   /* Also mark the artificial SSA_NAME that points to the partition of X.  */
   if (TREE_CODE (x) == VAR_DECL
@@ -849,7 +895,7 @@ mark_addressable (tree x)
 {
   tree *namep = cfun->gimple_df->decls_to_pointers->get (x);
   if (namep)
-   TREE_ADDRESSABLE (*namep) = 1;
+   mark_addressable_1 (*namep);
 }
 }
 
diff --git a/gcc/gimple-expr.h b/gcc/gimple-expr.h
index 3d1c89f..2917d2752c 100644
--- a/gcc/gi

Re: using scratchpads to enhance RTL-level if-conversion: revised patch

2015-10-13 Thread Richard Henderson

On 10/14/2015 12:05 PM, Richard Henderson wrote:

If you're using one of the switches that checks for stack overflow at the start
of the function, you certainly don't want to do any such stores.


Oh, and for a given target the kernel may consider any write to the stack vma 
below the stack pointer as invalid.


The x86 kernels will at least handle "enter $65535, $31", which can write to a 
bit more than 64k below %esp before %esp gets updated, but that's probably not 
going to be true of most risc targets.



r~


Re: using scratchpads to enhance RTL-level if-conversion: revised patch

2015-10-13 Thread Richard Henderson

On 10/09/2015 12:23 AM, Bernd Schmidt wrote:

On 10/08/2015 01:29 AM, Abe wrote:

Attached please find my revised patch to the RTL if converter.  This
patch enables the
if-conversion of half-hammocks with a store in them that the internal
GCC machinery
otherwise considers too hazardous to if-convert.  This is made safe by
using the
"scratchpad" technique, i.e. throwing away the store into a safe
location where nothing
of any importance is currently stored.  The scratchpads are allocated in
the stack frame.


So, one conceptual issue first. Obviously this increases the size of the stack
frame, which makes the transformation more expensive. The patch does not appear
to attempt to estimate costs. However, why do we need to allocate anything in
the first place? If you want to store something that will be thrown away, just
pick an address below the stack pointer.


If you're using one of the switches that checks for stack overflow at the start 
of the function, you certainly don't want to do any such stores.



r~


Re: [Patch] PowerPC IEEE 128-bit patch #7 (revised #2)

2015-10-13 Thread Joseph Myers
On Tue, 13 Oct 2015, Michael Meissner wrote:

> I believe every non-NaN value that IBM extended double supports is
> representable in IEEE 754R 128-bit floating point, since IEEE has 112 bits of
> mantissa plus the hidden bit, while IBM extended double has 106 bits (52 bits
> of mantissa for each part plus 2 hidden bits). Even with all of the extra bits

No, because IBM long double can represent values with discontiguous 
mantissa bits (these are the values that 
libgcc/config/rs6000/ibm-ldouble-format describes as denormal but not 
subnormal).

> that you can hand craft into silent/signalling NaNs, you should be able
> represent those values in IEEE 128-bit floating point as similar NaNs.

The low part of a NaN in IBM long double is documented as don't-care (so 
all IBM long double NaNs are correctly converted to binary128 simply by 
converting the high part - as is also the case with zeroes).

-- 
Joseph S. Myers
jos...@codesourcery.com


[PATCH] c/67925 - update documentation on `inline'

2015-10-13 Thread Arkadiusz Drabczyk
* gcc/doc/extend.texi: documentation says that functions declared
`inline' would not be integrated if they are called before they are
defined or if they are recursive. Both of these statements is now
false as shown in examples on Bugzilla.
---
 gcc/doc/extend.texi | 9 +++--
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 79440d3..7ea4b62 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -7088,12 +7088,9 @@ function are integrated into the caller, and the 
function's address is
 never used, then the function's own assembler code is never referenced.
 In this case, GCC does not actually output assembler code for the
 function, unless you specify the option @option{-fkeep-inline-functions}.
-Some calls cannot be integrated for various reasons (in particular,
-calls that precede the function's definition cannot be integrated, and
-neither can recursive calls within the definition).  If there is a
-nonintegrated call, then the function is compiled to assembler code as
-usual.  The function must also be compiled as usual if the program
-refers to its address, because that can't be inlined.
+If there is a nonintegrated call, then the function is compiled to
+assembler code as usual.  The function must also be compiled as usual if
+the program refers to its address, because that can't be inlined.
 
 @opindex Winline
 Note that certain usages in a function definition can make it unsuitable
-- 
2.3.5


-- 
Arkadiusz Drabczyk 


Re: [PATCH, 3/5] Handle original loop tree in expand_omp_for_generic

2015-10-13 Thread Thomas Schwinge
Hi Tom!

On Mon, 12 Oct 2015 18:56:29 +0200, Tom de Vries  wrote:
> Handle original loop tree in expand_omp_for_generic
> 
> 2015-09-12  Tom de Vries  
> 
>   PR tree-optimization/67476
>   * omp-low.c (expand_omp_for_generic): Handle original loop tree.

Working on a merge from trunk into gomp-4_0-branch, I'm seeing your
change (trunk r228754) conflict with code Chung-Lin changed
(gomp-4_0-branch r224505).  So, would you two please cherry-pick/merge
trunk r228754 into gomp-4_0-branch?  Thanks!  (I'm assuming you can
easily tell what needs to be done here; it's been a long time that
Chung-Lin touched this code, so CCing him just in case.)  Thanks!


Chung-Lin's gomp-4_0-branch r224505:

commit 5f9849b7f0723d06fcd18a18e0880d4df75da92a
Author: cltang 
Date:   Tue Jun 16 08:59:01 2015 +

2015-06-16  Chung-Lin Tang  

* omp-low.c (struct omp_region): Add inside_kernels_p field.
(expand_omp_for_generic): Adjust to generate a 'sequential' loop
when GOMP builtin arguments are BUILT_IN_NONE.
(expand_omp_for): Use expand_omp_for_generic() to generate a
non-parallelized loop for OMP_FORs inside OpenACC kernels regions.
(expand_omp): Mark inside_kernels_p field true for regions
nested inside OpenACC kernels constructs.



git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gomp-4_0-branch@224505 
138bc75d-0d04-0410-961f-82ee72b054a4

diff --git gcc/ChangeLog.gomp gcc/ChangeLog.gomp
index be09b0f..6fa08da 100644
--- gcc/ChangeLog.gomp
+++ gcc/ChangeLog.gomp
@@ -1,3 +1,13 @@
+2015-06-16  Chung-Lin Tang  
+
+   * omp-low.c (struct omp_region): Add inside_kernels_p field.
+   (expand_omp_for_generic): Adjust to generate a 'sequential' loop
+   when GOMP builtin arguments are BUILT_IN_NONE.
+   (expand_omp_for): Use expand_omp_for_generic() to generate a
+   non-parallelized loop for OMP_FORs inside OpenACC kernels regions.
+   (expand_omp): Mark inside_kernels_p field true for regions
+   nested inside OpenACC kernels constructs.
+
 2015-06-15  Cesar Philippidis  
 
* omp-low.c (expand_omp_for_static_nochunk): Update entry_bb after
diff --git gcc/omp-low.c gcc/omp-low.c
index c7451c9..a3dab12 100644
--- gcc/omp-low.c
+++ gcc/omp-low.c
@@ -161,6 +161,9 @@ struct omp_region
   /* True if this is a combined parallel+workshare region.  */
   bool is_combined_parallel;
 
+  /* True if this is nested inside an OpenACC kernels construct.  */
+  bool inside_kernels_p;
+
   /* For an OpenACC loop, the level of parallelism requested.  */
   int gwv_this;
 
@@ -6862,6 +6865,7 @@ expand_omp_for_generic (struct omp_region *region,
   gassign *assign_stmt;
   bool in_combined_parallel = is_combined_parallel (region);
   bool broken_loop = region->cont == NULL;
+  bool seq_loop = (!start_fn || !next_fn);
   edge e, ne;
   tree *counts = NULL;
   int i;
@@ -6949,7 +6953,20 @@ expand_omp_for_generic (struct omp_region *region,
zero_iter_bb));
}
 }
-  if (in_combined_parallel)
+  if (seq_loop)
+{
+  tree n1 = fold_convert (fd->iter_type, fd->loop.n1);
+  tree n2 = fold_convert (fd->iter_type, fd->loop.n2);
+
+  assign_stmt = gimple_build_assign (istart0, n1);
+  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
+
+  assign_stmt = gimple_build_assign (iend0, n2);
+  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
+
+  t = fold_build2 (NE_EXPR, boolean_type_node, istart0, iend0);
+}
+  else if (in_combined_parallel)
 {
   /* In a combined parallel loop, emit a call to
 GOMP_loop_foo_next.  */
@@ -7135,32 +7152,38 @@ expand_omp_for_generic (struct omp_region *region,
collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb);
 
   /* Emit code to get the next parallel iteration in L2_BB.  */
-  gsi = gsi_start_bb (l2_bb);
+  if (!seq_loop)
+   {
+ gsi = gsi_start_bb (l2_bb);
 
-  t = build_call_expr (builtin_decl_explicit (next_fn), 2,
-  build_fold_addr_expr (istart0),
-  build_fold_addr_expr (iend0));
-  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
-   false, GSI_CONTINUE_LINKING);
-  if (TREE_TYPE (t) != boolean_type_node)
-   t = fold_build2 (NE_EXPR, boolean_type_node,
-t, build_int_cst (TREE_TYPE (t), 0));
-  gcond *cond_stmt = gimple_build_cond_empty (t);
-  gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
+ t = build_call_expr (builtin_decl_explicit (next_fn), 2,
+  build_fold_addr_expr (istart0),
+  build_fold_addr_expr (iend0));
+ t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
+   false, GSI_CONTINUE_LINKING);
+ if (TREE_TYPE (t) != boolean_type_node)
+   t = fold_

Re: [PATCH 1/9] ENABLE_CHECKING refactoring

2015-10-13 Thread Jeff Law

On 10/05/2015 05:27 PM, Mikhail Maltsev wrote:

Hi!

This is an updated series of patches which converts 'ENABLE_CHECKING' macro into
a flag, 'flag_checking' (and 'CHECKING_P' macro in several cases). For now
flag_checking is always initialized with the value of 'CHECKING_P', but later it
can be turned into a proper command-line flag and probably split into several
checks. I also added several function which verify internal data structures when
flag_checking is enabled (e.g. checking_verify_flow_info which calls
verify_flow_info). These functions make their callers look somewhat cleaner.

The cases where I left 'CHECKING_P' are:
1. libcpp (turn ICE after an error into fatal error) and pretty-printers (that
would require to pass flag_checking to libcpp just for this single case).
2. Code which fills memory in the pools with some predefined patterns in
deallocation methods (this would add some overhead to each deallocation), though
I have not measured performance impact yet.
3. Generators and generated code.
4. Target-specific code
5. 'struct lra_reg' which has an additional field in checking build
6. Likewise, 'struct moveop_static_params' in insn scheduler and
'cumulative_args_t' in target.h.
7. macro-related code in libcpp (for the same reason)
8. real.c and fwprop.c - I'll profile these and also fix to use flag_checking if
there won't be any measurable overhead.

There are 9 patches:
1. Add flag_checking and CHECKING_P macros
2. Use CHECKING_P in libcpp
3. Ada and Java frontends
4. Fortran frontend
5. Pool allocators
6. Generator programs
7. Most of middle-end (GIMPLE, IPA, RTL) - it can be split further, if needed.
8. Target-specific code
9. C++ frontend - in progress (I will send this part soon).

Some issues related to checking builds:
1. Useless check in graphite:https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67842
2. I found a test which fails only on release builds:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58583  (reopened)
3. Another one: gcc.c-torture/compile/pr52073.c which is, I guess, caused by
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67816  (the backtrace is the same,
at least).

Each patch (when applied on top of all the previous ones) compiles in both
checking and release builds. The combined patch passes bootstrap and regression
tests in checking an release builds (apart from 2 issues mentioned above) on
x86_64-linux. I'll also run it through config-list.mk.

-- Regards, Mikhail Maltsev gcc/ChangeLog: 2015-10-05 Mikhail Maltsev
 * common.opt: Add flag_checking. * system.h
(CHECKING_P): Define. libcpp/ChangeLog: 2015-10-05 Mikhail Maltsev
 * system.h (CHECKING_P, gcc_checking_assert): Define.


0001-Prerequisites-for-ENABLE_CHECKING-conversion.patch


 From 8096ea4714b3b7a96b414a70fd0de34e5e5a707a Mon Sep 17 00:00:00 2001
From: Mikhail Maltsev
Date: Sun, 20 Sep 2015 04:30:42 +0300
Subject: [PATCH 1/9] Prerequisites for ENABLE_CHECKING conversion

Define CHECKING_P macros. Add flag_checking.
Define gcc_checking_assert in libcpp
---
  gcc/common.opt  | 5 +
  gcc/system.h| 3 +++
  libcpp/system.h | 8 
  3 files changed, 16 insertions(+)

I committed this prerequisite patch to the trunk.

jeff



Re: [PATCH 8/9] Add TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID

2015-10-13 Thread Richard Henderson

On 10/14/2015 02:49 AM, Jeff Law wrote:

The problem here is we don't know what address space the *0 is going to hit,
right?


Correct, not before we do the walk of stmt to see what's present.


Isn't that also an issue for code generation as well?


What sort of problem are you thinking of?  I haven't seen one yet.


r~


[GOOGLE] Reduce max-vartrack-size

2015-10-13 Thread Teresa Johnson
Reduce the maximum variable tracking size by 20% to avoid extreme
compilation times.

Ok for google-4_9?

2015-10-13  Teresa Johnson  

Google ref b/24569916
* params.def (PARAM_MAX_VARTRACK_SIZE): Reduce default to 40M.

Index: params.def
===
--- params.def (revision 228063)
+++ params.def (working copy)
@@ -1160,7 +1160,7 @@ DEFPARAM (PARAM_PREFETCH_MIN_INSN_TO_MEM_RATIO,
 DEFPARAM (PARAM_MAX_VARTRACK_SIZE,
   "max-vartrack-size",
   "Max. size of var tracking hash tables",
-  5000, 0, 0)
+  4000, 0, 0)

 /* Set maximum recursion depth for var tracking expression expansion
and resolution.  */


-- 
Teresa Johnson | Software Engineer | tejohn...@google.com | 408-460-2413


Re: [gomp4] privatize internal array variables introduced by the fortran FE

2015-10-13 Thread Cesar Philippidis
On 10/13/2015 01:29 PM, Jakub Jelinek wrote:
> On Tue, Oct 13, 2015 at 01:12:25PM -0700, Cesar Philippidis wrote:
>> Arrays in fortran have a couple of internal variables associated with
>> them, e.g. stride, lbound, ubound, size, etc. Depending on how and where
>> the array was declared, these internal variables may be packed inside an
>> array descriptor represented by a struct or defined individually. The
>> major problem with this is that kernels and parallel regions with
>> default(none) will generate errors if those internal variables are
>> defined individually since the user has no way to add clauses to them. I
>> suspect this is also true for arrays inside omp target regions.
> 
> I believe gfc_omp_predetermined_sharing is supposed to handle this,
> returning predetermined shared for certain DECL_ARTIFICIAL decls.
> If you are not using that hook, perhaps you should have similar one tuned
> for OpenACC purposes?

We do have one for openacc. I thought it's job was to mark variables as
firstprivate or pcopy as necessary. Anyway, it might be too late to call
gfc_omp_predetermined_sharing from the gimplifier from a performance
standpoint. Consider something like this:

  !$acc data copy (array)
  do i = 1,n
!$acc parallel loop
 do j = 1,n
   ...array...
 end do
  end do
  !$acc end data

The problem here is that all of those internal variables would end up
getting marked as firstprivate. And that would cause more data to be
transferred to the accelerator. This patch reinitialized those variables
on the accelerator so they don't have to be transferred at all.

Cesar


Re: [gomp4] privatize internal array variables introduced by the fortran FE

2015-10-13 Thread Jakub Jelinek
On Tue, Oct 13, 2015 at 01:12:25PM -0700, Cesar Philippidis wrote:
> Arrays in fortran have a couple of internal variables associated with
> them, e.g. stride, lbound, ubound, size, etc. Depending on how and where
> the array was declared, these internal variables may be packed inside an
> array descriptor represented by a struct or defined individually. The
> major problem with this is that kernels and parallel regions with
> default(none) will generate errors if those internal variables are
> defined individually since the user has no way to add clauses to them. I
> suspect this is also true for arrays inside omp target regions.

I believe gfc_omp_predetermined_sharing is supposed to handle this,
returning predetermined shared for certain DECL_ARTIFICIAL decls.
If you are not using that hook, perhaps you should have similar one tuned
for OpenACC purposes?

Jakub


Re: using scratchpads to enhance RTL-level if-conversion: revised patch

2015-10-13 Thread Bernd Schmidt

_Potentially_ so, yes.  However, GCC is free to put the allocation into
an otherwise-unused part of the stack frame.


Well, I looked at code generation changes, and it usually seems to come 
with an increase in stack frame size - sometimes causing extra 
instructions to be emitted.



However, why do we need to allocate anything in the first place?

 > If you want to store something that will be thrown away,
 > just pick an address below the stack pointer.

Because allocating a scratchpad should work on all relevant targets.  We
do not have the resources to test on all GCC-supported
CPU ISAs and on all GCC-supported OSes, and we would like to have an
optimization that works on as many targets as makes sense
[those with cmove-like ability and withOUT full-blown conditional
execution].


Yeah, but if you put in a new facility like this, chances are 
maintainers for active targets will pick it up and add the necessary 
hooks. That's certainly what happened with shrink-wrapping. So I don't 
think this is a concern.



I agree that your suggestion of having one global default scratchpad
allocation policy plus per-target
overrides that are more efficient _is_ a good one, but it will have to
wait a while for implementation
if that`s to be done by me.  In the meantime, the existing allocation
policy is compatible with
multiple targets and costs very little space in the stack frame, if and
when any at all.


I'm afraid I'll have to reject the patch then, on these grounds:
 * it may pessimize code
 * it does not even estimate costs to attempt avoiding this
 * a much simpler, more efficient implementation is possible.


+MEM_NOTRAP_P (mem) = true;

So I'm still not entirely sure which cases you are trying to optimize
and which ones not,


The current patch focuses entirely on half-hammock writes with stores to
addresses
about which GCC "feels nervous", i.e. "may trap or fault"; for example:

   if (condition)
 *pointer = 9;
   // no "else" or "else if"



but couldn't this technique allow a trapping store here?


The purpose of the new if-conversion is to take a may-trap-or-fault
store and replace it with a store
that will be OK if the original program was OK with respect to the
current execution`s inputs,
environment, PRNG results, etc.  For example, the only way the
if-converted code would dereference a
null pointer is if/when the original program would have done the same
thing under the same conditions.


Yeah, but it could still trap if the original program had an error. So I 
don't think setting MEM_NOTRAP_P is right.



Bernd


[gomp4] privatize internal array variables introduced by the fortran FE

2015-10-13 Thread Cesar Philippidis
Arrays in fortran have a couple of internal variables associated with
them, e.g. stride, lbound, ubound, size, etc. Depending on how and where
the array was declared, these internal variables may be packed inside an
array descriptor represented by a struct or defined individually. The
major problem with this is that kernels and parallel regions with
default(none) will generate errors if those internal variables are
defined individually since the user has no way to add clauses to them. I
suspect this is also true for arrays inside omp target regions.

My fix for this involves two parts. First, I reinitialize those private
array variables which aren't associated with array descriptors at the
beginning of the parallel/kernels region they are used in. Second, I
added OMP_CLAUSE_PRIVATE for those internal variables.

I'll apply this patch to gomp-4_0-branch shortly.

Is there any reason why only certain arrays have array descriptors? The
arrays with descriptors don't have this problem. It's only the ones
without descriptors that leak new internal variables that cause errors
with default(none).

Cesar
2015-10-13  Cesar Philippidis  

	gcc/fortran/
	* trans-array.c (gfc_trans_array_bounds): Add an INIT_VLA argument
	to control whether VLAs should be initialized.  Don't mark this
	function as static.
	(gfc_trans_auto_array_allocation): Update call to
	gfc_trans_array_bounds.
	(gfc_trans_g77_array): Likewise.
	* trans-array.h: Declare gfc_trans_array_bounds.
	* trans-openmp.c (gfc_scan_nodesc_arrays): New function.
	(gfc_privatize_nodesc_arrays_1): New function.
	(gfc_privatize_nodesc_arrays): New function.
	(gfc_init_nodesc_arrays): New function.
	(gfc_trans_oacc_construct): Initialize any internal variables for
	arrays without array descriptors inside the offloaded parallel and
	kernels region.
	(gfc_trans_oacc_combined_directive): Likewise.

	gcc/testsuite/
	* gfortran.dg/goacc/default_none.f95: New test.

diff --git a/gcc/fortran/trans-array.c b/gcc/fortran/trans-array.c
index a6b761b..86f983a 100644
--- a/gcc/fortran/trans-array.c
+++ b/gcc/fortran/trans-array.c
@@ -5709,9 +5709,9 @@ gfc_trans_array_cobounds (tree type, stmtblock_t * pblock,
 /* Generate code to evaluate non-constant array bounds.  Sets *poffset and
returns the size (in elements) of the array.  */
 
-static tree
+tree
 gfc_trans_array_bounds (tree type, gfc_symbol * sym, tree * poffset,
-stmtblock_t * pblock)
+stmtblock_t * pblock, bool init_vla)
 {
   gfc_array_spec *as;
   tree size;
@@ -5788,7 +5788,9 @@ gfc_trans_array_bounds (tree type, gfc_symbol * sym, tree * poffset,
 }
 
   gfc_trans_array_cobounds (type, pblock, sym);
-  gfc_trans_vla_type_sizes (sym, pblock);
+
+  if (init_vla)
+gfc_trans_vla_type_sizes (sym, pblock);
 
   *poffset = offset;
   return size;
@@ -5852,7 +5854,7 @@ gfc_trans_auto_array_allocation (tree decl, gfc_symbol * sym,
   && !INTEGER_CST_P (sym->ts.u.cl->backend_decl))
 gfc_conv_string_length (sym->ts.u.cl, NULL, &init);
 
-  size = gfc_trans_array_bounds (type, sym, &offset, &init);
+  size = gfc_trans_array_bounds (type, sym, &offset, &init, true);
 
   /* Don't actually allocate space for Cray Pointees.  */
   if (sym->attr.cray_pointee)
@@ -5947,7 +5949,7 @@ gfc_trans_g77_array (gfc_symbol * sym, gfc_wrapped_block * block)
 gfc_conv_string_length (sym->ts.u.cl, NULL, &init);
 
   /* Evaluate the bounds of the array.  */
-  gfc_trans_array_bounds (type, sym, &offset, &init);
+  gfc_trans_array_bounds (type, sym, &offset, &init, true);
 
   /* Set the offset.  */
   if (TREE_CODE (GFC_TYPE_ARRAY_OFFSET (type)) == VAR_DECL)
diff --git a/gcc/fortran/trans-array.h b/gcc/fortran/trans-array.h
index 52f1c9a..8dbafb9 100644
--- a/gcc/fortran/trans-array.h
+++ b/gcc/fortran/trans-array.h
@@ -44,6 +44,8 @@ void gfc_trans_g77_array (gfc_symbol *, gfc_wrapped_block *);
 /* Generate code to deallocate an array, if it is allocated.  */
 tree gfc_trans_dealloc_allocated (tree, bool, gfc_expr *);
 
+tree gfc_trans_array_bounds (tree, gfc_symbol *, tree *, stmtblock_t *, bool);
+
 tree gfc_full_array_size (stmtblock_t *, tree, int);
 
 tree gfc_duplicate_allocatable (tree, tree, tree, int, tree);
diff --git a/gcc/fortran/trans-openmp.c b/gcc/fortran/trans-openmp.c
index 8c1e897..f2e9803 100644
--- a/gcc/fortran/trans-openmp.c
+++ b/gcc/fortran/trans-openmp.c
@@ -39,6 +39,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "arith.h"
 #include "omp-low.h"
 #include "gomp-constants.h"
+#include "hash-set.h"
+#include "tree-iterator.h"
 
 int ompws_flags;
 
@@ -2716,22 +2718,157 @@ gfc_trans_omp_code (gfc_code *code, bool force_empty)
   return stmt;
 }
 
+void gfc_debug_expr (gfc_expr *);
+
+/* Add any array that does not have an array descriptor to the hash_set
+   pointed to by DATA.  */
+
+static int
+gfc_scan_nodesc_arrays (gfc_expr **e, int *walk_subtrees ATTRIBUTE_UNUSED,
+		void *data)
+{
+  hash_set *arrays = (hash_set *)data;
+
+  if 

Re: [gomp4.1] Add new versions of GOMP_target{,_data,_update} and GOMP_target_enter_exit_data

2015-10-13 Thread Jakub Jelinek
On Tue, Oct 13, 2015 at 05:48:11PM +0300, Ilya Verbin wrote:
> On Mon, Jun 15, 2015 at 22:48:50 +0300, Ilya Verbin wrote:
> > @@ -950,50 +997,41 @@ GOMP_target (int device, void (*fn) (void *), const 
> > void *unused,
> > ...
> > +  devicep->run_func (devicep->target_id, fn_addr, (void *) 
> > tgt_vars->tgt_start);
> 
> If mapnum is 0, tgt_vars->tgt_start is uninitialized.  This is not a big bug,
> because in this case the target function doesn't use this pointer, however
> valgrind warns about sending uninitialized data to target.
> OK for gomp-4_1-branch?
> 
> 
> libgomp/
>   * target.c (gomp_map_vars): Zero tgt->tgt_start when mapnum is 0.

gomp-4_1-branch is frozen.  I'd prefer to initialize tgt_start and tgt_end
to 0 just in the
  if (mapnum == 0)
return tgt;
case.  With that change it is ok for trunk.

> diff --git a/libgomp/target.c b/libgomp/target.c
> index 95360d1..c4e3323 100644
> --- a/libgomp/target.c
> +++ b/libgomp/target.c
> @@ -323,6 +323,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t 
> mapnum,
>struct splay_tree_key_s cur_node;
>struct target_mem_desc *tgt
>  = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
> +  tgt->tgt_start = 0;
>tgt->list_count = mapnum;
>tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1;
>tgt->device_descr = devicep;

Jakub


[PATCH, i386]: Use CEIL where applicable.

2015-10-13 Thread Uros Bizjak
No functional changes.

2015-10-13  Uros Bizjak  

* config/i386/i386.c (classify_argument): Use CEIL where applicable.
(ix86_function_arg_advance): Ditto.
(ix86_function_arg): Ditto.
(ix86_gimplify_va_arg): Ditto.
(ix86_class_max_nregs): Ditto.
(inline_memory_move_cost): Ditto.
(ix86_set_reg_reg_cost): Ditto.
* config/i386/i386.h (HARD_REGNO_NREGS): Ditto.

Bootstrapped and regression tested on x86_64-linux-gnu {-m32},
committed to mainline SVN.

Uros.
Index: config/i386/i386.c
===
--- config/i386/i386.c  (revision 228770)
+++ config/i386/i386.c  (working copy)
@@ -7925,8 +7925,7 @@ classify_argument (machine_mode mode, const_tree t
 {
   HOST_WIDE_INT bytes =
 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
-  int words
-= (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+  int words = CEIL (bytes + (bit_offset % 64) / 8, UNITS_PER_WORD);
 
   /* Variable sized entities are always passed/returned in memory.  */
   if (bytes < 0)
@@ -8791,7 +8790,7 @@ ix86_function_arg_advance (cumulative_args_t cum_v
 bytes = int_size_in_bytes (type);
   else
 bytes = GET_MODE_SIZE (mode);
-  words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+  words = CEIL (bytes, UNITS_PER_WORD);
 
   if (type)
 mode = type_natural_mode (type, NULL, false);
@@ -9124,7 +9123,7 @@ ix86_function_arg (cumulative_args_t cum_v, machin
 bytes = int_size_in_bytes (type);
   else
 bytes = GET_MODE_SIZE (mode);
-  words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+  words = CEIL (bytes, UNITS_PER_WORD);
 
   /* To simplify the code below, represent vector types with a vector mode
  even if MMX/SSE are not active.  */
@@ -10271,7 +10270,7 @@ ix86_gimplify_va_arg (tree valist, tree type, gimp
   if (indirect_p)
 type = build_pointer_type (type);
   size = int_size_in_bytes (type);
-  rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+  rsize = CEIL (size, UNITS_PER_WORD);
 
   nat_mode = type_natural_mode (type, NULL, false);
   switch (nat_mode)
@@ -42971,7 +42970,7 @@ ix86_class_max_nregs (reg_class_t rclass, machine_
   else if (mode == XCmode)
return (TARGET_64BIT ? 4 : 6);
   else
-   return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
+   return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
 }
   else
 {
@@ -43130,8 +43129,7 @@ inline_memory_move_cost (machine_mode mode, enum r
  cost = ix86_cost->int_load[2];
else
  cost = ix86_cost->int_store[2];
-   return (cost * (((int) GET_MODE_SIZE (mode)
-   + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
+   return cost * CEIL ((int) GET_MODE_SIZE (mode), UNITS_PER_WORD);
 }
 }
 
@@ -43417,7 +43415,7 @@ ix86_set_reg_reg_cost (machine_mode mode)
 
   /* Return the cost of moving between two registers of mode MODE,
  assuming that the move will be in pieces of at most UNITS bytes.  */
-  return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
+  return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode), units));
 }
 
 /* Compute a (partial) cost for rtx X.  Return true if the complete
Index: config/i386/i386.h
===
--- config/i386/i386.h  (revision 228770)
+++ config/i386/i386.h  (working copy)
@@ -1084,9 +1084,9 @@ extern const char *host_detect_local_cpu (int argc
? (COMPLEX_MODE_P (MODE) ? 2 : 1)   \
: ((MODE) == XFmode \
   ? (TARGET_64BIT ? 2 : 3) \
-  : (MODE) == XCmode   \
-  ? (TARGET_64BIT ? 4 : 6) \
-  : ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)))
+  : ((MODE) == XCmode  \
+? (TARGET_64BIT ? 4 : 6)   \
+: CEIL (GET_MODE_SIZE (MODE), UNITS_PER_WORD
 
 #define HARD_REGNO_NREGS_HAS_PADDING(REGNO, MODE)  \
   ((TARGET_128BIT_LONG_DOUBLE && !TARGET_64BIT)
\


Re: [PATCH] reduce size penalty for including C++11 on x86 systems

2015-10-13 Thread Jonathan Wakely

On 13/10/15 21:44 -0400, Nathan Froyd wrote:

Including  in C++11 mode (typically done for
std::{min,max,swap}) includes , for
std::uniform_int_distribution.  On x86 platforms,  manages to
drag in  through x86's opt_random.h header, and
 has gotten rather large recently with the addition of AVX
intrinsics.  The comparison between C++03 mode and C++11 mode is not
quite exact, but it gives an idea of the penalty we're talking about
here:

froydnj@thor:~/src$ echo '#include ' | g++ -x c++ - -o - -E 
-std=c++11 | wc
 53460  127553 1401268
froydnj@thor:~/src$ echo '#include ' | g++ -x c++ - -o - -E 
-std=c++03 | wc
  9202   18933  218189

That's approximately a 7x penalty in C++11 mode (granted, C++11 includes
more than just ) with GCC 4.9.2 on a Debian system; current
mainline is somewhat worse:

froydnj@thor: gcc-build$ echo '#include ' | xgcc [...] -std=c++11 | 
wc
 84851  210475 2369616
froydnj@thor: gcc-build$ echo '#include ' | xgcc [...] -std=c++03 | 
wc
  9383   19402  239676

 itself clocks in at 1.3MB+ of preprocessed text.


Yep, that's been bothering me for a while.


This patch aims to reduce that size penalty by recognizing that both of
the places that #include  do not need the full set of x86
intrinsics, but can get by with a smaller, more focused header in each
case.   needs only  to declare __m128i, while
x86's opt_random.h must include  for declarations of
various intrinsic functions.

The net result is that the size of mainline's  is significantly 
reduced:

froydnj@thor: gcc-build$ echo '#include ' | xgcc [...] -std=c++11 | 
wc
 39174   88538 1015281

which seems like a win.


Indeed!


Bootstrapped on x86_64-pc-linux-gnu with --enable-languages=c,c++,
tested with check-target-libstdc++-v3, no regressions.  Also verified
that  and  pass -fsyntax-check with
-march=native (on a recent Haswell chip); if an -march=native bootstrap
is necessary, I am happy to do that if somebody instructs me in getting
everything properly set up.

OK?


OK, thanks.



Re: [Patch] PowerPC IEEE 128-bit patch #7 (revised #2)

2015-10-13 Thread Michael Meissner
On Thu, Oct 08, 2015 at 09:30:45PM +, Joseph Myers wrote:
> Question: what happens if you mix __float128 and __ibm128 in an arithmetic 
> or conditional expression?
> 
> __float128 a;
> __ibm128 b;
> int x;
> /* ... */
> a + b;
> x ? a : b;
> 
> (And likewise if one or both are the corresponding complex types.)  As I 
> suggested in  I think 
> this would best be rejected for both C and C++ (with testcases).  That 
> accords with TS 18661-3 (just published) making floating-point conversions 
> undefined where neither type's set of values is a subset of the other's.
> 
> The invalid_binary_op target hook should be usable to cover the case where 
> a binary operation would cause implicit conversions, but I don't see an 
> existing hook that would deal with ? : expressions.

Good points. I hadn't delved into error conditions, but I will code up a target
hook not allowing inter-mixing different formats.

I believe every non-NaN value that IBM extended double supports is
representable in IEEE 754R 128-bit floating point, since IEEE has 112 bits of
mantissa plus the hidden bit, while IBM extended double has 106 bits (52 bits
of mantissa for each part plus 2 hidden bits). Even with all of the extra bits
that you can hand craft into silent/signalling NaNs, you should be able
represent those values in IEEE 128-bit floating point as similar NaNs.

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797



Re: [PATCH] v4 of diagnostic_show_locus and rich_location

2015-10-13 Thread David Malcolm
On Mon, 2015-10-12 at 17:36 +0100, Manuel López-Ibáñez wrote:
> On 12 October 2015 at 16:44, David Malcolm  wrote:
> > v4 of the patch does the conversion of Fortran, and eliminates the
> > adaptation layer.  No partial transitions here!
> >
> > Manu: I hope this addresses your concerns.
> 
> Yes, it looks great. I don't understand how this
> 
> -   and for two locations that do not fit in the same locus line:
> -
> -   [name]:[locus]: Error: (1)
> -   [name]:[locus2]: Error: Some error at (1) and (2)
> +   [locus of primary range]: Error: Some error at (1) and (2)
> 
> 
> passes the Fortran regression testsuite since the testcases normally
> try to match the two locus separately, but I guess you figured out a
> way to make it work and I must admit I did not have the time to read
> the patch in deep detail.

The way it works is that the patch kit emulates the behavior of the old
printer for the -fno-diagnostics-show-caret case.

Consider this two-locus error, where the loci are on different lines.
With the patch it prints:

associate_5.f03:33:6:

 ASSOCIATE (y => x) ! { dg-error "variable definition context" }
2
   y = 5 ! { dg-error "variable definition context" }
  1

Error: Associate-name ‘y’ can not appear in a variable definition context 
(assignment) at (1) because its target at (2) can not, either

...using the new implementation of diagnostic-show-locus.


With -fno-diagnostics-show-caret, it prints:

associate_5.f03:33:6: Error: (1)
associate_5.f03:32:20: Error: Associate-name ‘y’ can not appear in a variable 
definition context (assignment) at (1) because its target at (2) can not, either

where the latter is the same behavior as before the patch.

The testsuite passes since it's faithfully emulating the old
-fno-diagnostics-show-caret behavior.

> But it is a bit strange that you also
> deleted this part:
> 
> -   With -fdiagnostic-show-caret (the default) and for valid locations,
> -   it prints for one location:
> +   With -fdiagnostic-show-caret (the default) it prints:
> 
> -   [locus]:
> +   [locus of primary range]:
> 
>some code
>   1
> Error: Some error at (1)
> 
> -   for two locations that fit in the same locus line:
> +  With -fno-diagnostic-show-caret or if the primary range is not
> +  valid, it prints:
> 
> -   [locus]:
> -
> - some code and some more code
> -1   2
> -   Error: Some error at (1) and (2)
> -
> -   and for two locations that do not fit in the same locus line:
> -
> -   [locus]:
> -
> - some code
> -1
> -   [locus2]:
> -
> - some other code
> -   2
> -   Error: Some error at (1) and (2)
> -
> 
> which should work the same before and after your patch. 

But this isn't what the new printer prints, for the
-fdiagnostic-show-caret case.  It doesn't print multiple "[locusN]:"
lines; these are only printed for the no-d-show-caret case.

> Independently
> of whether the actual logic moved into some new mechanism in the new
> rich locations world, this seems like useful info to keep in
> fortran/error.c.

Perhaps it's easiest to approach this from the POV of what the comment
*should* say.  For reference, the comment for gfc_diagnostic_starter
reads like this after the patch:

/* This function prints the locus (file:line:column), the diagnostic kind
   (Error, Warning) and (optionally) the relevant lines of code with
   annotation lines with '1' and/or '2' below them.

   With -fdiagnostic-show-caret (the default) it prints:

   [locus of primary range]:
   
  some code
 1
   Error: Some error at (1)

  With -fno-diagnostic-show-caret or if the primary range is not
  valid, it prints:

   [locus of primary range]: Error: Some error at (1) and (2)
*/

Does this look OK?

Thanks
Dave



[PATCH] reduce size penalty for including C++11 on x86 systems

2015-10-13 Thread Nathan Froyd
From: Nathan Froyd 

Including  in C++11 mode (typically done for
std::{min,max,swap}) includes , for
std::uniform_int_distribution.  On x86 platforms,  manages to
drag in  through x86's opt_random.h header, and
 has gotten rather large recently with the addition of AVX
intrinsics.  The comparison between C++03 mode and C++11 mode is not
quite exact, but it gives an idea of the penalty we're talking about
here:

froydnj@thor:~/src$ echo '#include ' | g++ -x c++ - -o - -E 
-std=c++11 | wc
  53460  127553 1401268
froydnj@thor:~/src$ echo '#include ' | g++ -x c++ - -o - -E 
-std=c++03 | wc
   9202   18933  218189

That's approximately a 7x penalty in C++11 mode (granted, C++11 includes
more than just ) with GCC 4.9.2 on a Debian system; current
mainline is somewhat worse:

froydnj@thor: gcc-build$ echo '#include ' | xgcc [...] -std=c++11 | 
wc
  84851  210475 2369616
froydnj@thor: gcc-build$ echo '#include ' | xgcc [...] -std=c++03 | 
wc
   9383   19402  239676

 itself clocks in at 1.3MB+ of preprocessed text.

This patch aims to reduce that size penalty by recognizing that both of
the places that #include  do not need the full set of x86
intrinsics, but can get by with a smaller, more focused header in each
case.   needs only  to declare __m128i, while
x86's opt_random.h must include  for declarations of
various intrinsic functions.

The net result is that the size of mainline's  is significantly 
reduced:

froydnj@thor: gcc-build$ echo '#include ' | xgcc [...] -std=c++11 | 
wc
  39174   88538 1015281

which seems like a win.

Bootstrapped on x86_64-pc-linux-gnu with --enable-languages=c,c++,
tested with check-target-libstdc++-v3, no regressions.  Also verified
that  and  pass -fsyntax-check with
-march=native (on a recent Haswell chip); if an -march=native bootstrap
is necessary, I am happy to do that if somebody instructs me in getting
everything properly set up.

OK?

-Nathan

* config/cpu/i486/opt/bits/opt_random.h: Include pmmintrin.h instead
of x86intrin.h, and only do so when __SSE3__
* include/ext/random: Include emmintrin.h instead of x86intrin.h
---
 libstdc++-v3/ChangeLog | 6 ++
 libstdc++-v3/config/cpu/i486/opt/bits/opt_random.h | 4 +++-
 libstdc++-v3/include/ext/random| 2 +-
 3 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog
index e3061ef..ff0b048 100644
--- a/libstdc++-v3/ChangeLog
+++ b/libstdc++-v3/ChangeLog
@@ -1,3 +1,9 @@
+2015-10-13  Nathan Froyd  
+
+   * config/cpu/i486/opt/bits/opt_random.h: Include pmmintrin.h instead
+   of x86intrin.h, and only do so when __SSE3__
+   * include/ext/random: Include emmintrin.h instead of x86intrin.h
+
 2015-10-11  Joseph Myers  
 
* crossconfig.m4 (GLIBCXX_CROSSCONFIG) <*-linux* | *-uclinux* |
diff --git a/libstdc++-v3/config/cpu/i486/opt/bits/opt_random.h 
b/libstdc++-v3/config/cpu/i486/opt/bits/opt_random.h
index 4495569..a9f6c13 100644
--- a/libstdc++-v3/config/cpu/i486/opt/bits/opt_random.h
+++ b/libstdc++-v3/config/cpu/i486/opt/bits/opt_random.h
@@ -30,7 +30,9 @@
 #ifndef _BITS_OPT_RANDOM_H
 #define _BITS_OPT_RANDOM_H 1
 
-#include 
+#ifdef __SSE3__
+#include 
+#endif
 
 
 #pragma GCC system_header
diff --git a/libstdc++-v3/include/ext/random b/libstdc++-v3/include/ext/random
index 0bcfa4a..ba363ce 100644
--- a/libstdc++-v3/include/ext/random
+++ b/libstdc++-v3/include/ext/random
@@ -40,7 +40,7 @@
 #include 
 #include 
 #ifdef __SSE2__
-# include 
+# include 
 #endif
 
 #if defined(_GLIBCXX_USE_C99_STDINT_TR1) && defined(UINT32_C)
-- 
2.1.4



Re: [patch 0/6] scalar-storage-order merge (2)

2015-10-13 Thread Eric Botcazou
> My main question about this series is - how generally useful do you
> expect it to be? I know of some different projects that would like
> bi-endian capability, but it looks like this series implements something
> that is a little too limited to be of use in these cases.

AdaCore has customers who have been using it for a few years.  With the inline 
pragma and either the configuration pragma (Ada) or the switch (C/C++), you 
can use it without much code rewriting.

> It looks like it comes with a nontrivial maintenance cost.

Nontrivial but manageable IMO and the heavily modified parts (mostly the RTL 
expander) are "cold" these days.  I suspect that less "limited" versions would 
be far more intrusive and less manageable.

Of course I would do the maintenance (I have been doing it for a few years at 
AdaCore), except for the C++ front-end that I don't know at all; that's why 
I'm OK to drop the C++ support for now.

-- 
Eric Botcazou


[PATCH] c++/67942 - diagnose placement new buffer overflow

2015-10-13 Thread Martin Sebor

C++ placement new expression is susceptible to buffer overflow flaws
(see [1]).  In many such cases GCC has sufficient information to
detect and diagnose such defects. The attached patch is a starting
point for this feature.  It lets GCC diagnose basic cases of buffer
overflows when both the size of the buffer and the type being
constructed are constant expressions.  A more sophisticated
implementation would try to detect additional cases in a manner
similar to _FORTIFY_SOURCE.

Besides buffer overflow, placement new can also be misused to
construct objects in unaligned storage (also discussed in the paper
below).  I leave diagnosing such cases and improving the detection
of buffer overflows via a mechanism like Object Size Checking for
a future patch.

Tested on x86_64 with no regressions.

Martin

[1] A New Class of Buffer Overflow Attacks, Kundu, A., Bertino, E.,
31st International Conference on Distributed Computing Systems (ICDCS),
2011 http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=5961725

gcc ChangeLog
2015-10-12  Martin Sebor  

	PR c++/67942
* invoke.texi (-Wplacement-new): Document new option.
	* gcc/testsuite/g++.dg/warn/Wplacement-new-size.C: New test.

gcc/c-family ChangeLog
2015-10-12  Martin Sebor  

	PR c++/67942
* c.opt (-Wplacement-new): New option.

gcc/cp ChangeLog
2015-10-12  Martin Sebor  

	PR c++/67942
	* cp/init.c (warn_placement_new_too_small): New function.
	(build_new_1): Call it.

iff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
index 47ba070..5e9d7a3 100644
--- a/gcc/c-family/c.opt
+++ b/gcc/c-family/c.opt
@@ -760,6 +760,10 @@ Wprotocol
 ObjC ObjC++ Var(warn_protocol) Init(1) Warning
 Warn if inherited methods are unimplemented

+Wplacement-new
+C++ Var(warn_placement_new) Init(1) Warning
+Warn for placement new expressions with undefined behavior
+
 Wredundant-decls
 C ObjC C++ ObjC++ Var(warn_redundant_decls) Warning
 Warn about multiple declarations of the same object
diff --git a/gcc/cp/init.c b/gcc/cp/init.c
index 1ed8f6c..9d23fea 100644
--- a/gcc/cp/init.c
+++ b/gcc/cp/init.c
@@ -2269,6 +2269,183 @@ throw_bad_array_new_length (void)
   return build_cxx_call (fn, 0, NULL, tf_warning_or_error);
 }

+/* Attempt to verify that the argument, OPER, of a placement new expression
+   refers to an object sufficiently large for an object of TYPE or an array
+   of NELTS of such objects when NELTS is non-null, and issue a warning when
+   it does not.  SIZE specifies the size needed to construct the object or
+   array and captures the result of NELTS * sizeof (TYPE). (SIZE could, in
+   theory, be greater when the array under construction requires a cookie
+   to store NELTS, but GCC's placement new does not store the cookie.  */
+static void
+warn_placement_new_too_small (tree type, tree nelts, tree size, tree oper)
+{
+  const_tree orig_oper = oper;
+
+  /* The number of bytes to add or subtract from the size of the provided
+ buffer based on an offset into an array or an array element reference.  */
+  HOST_WIDE_INT adjust = 0;
+  bool addr_expr = false;
+  bool use_obj_size = false;
+
+  while (TREE_CODE (oper) == NOP_EXPR)
+oper = TREE_OPERAND (oper, 0);
+
+  /* Using a function argument or a (non-array) variable as an argument
+ to placement new is not checked since it's unknownwhat it might
+ point to.  */
+  if (TREE_CODE (oper) == PARM_DECL
+  || TREE_CODE (oper) == VAR_DECL
+  || TREE_CODE (oper) == COMPONENT_REF)
+return;
+
+  /* Evaluate any constant expressions.  */
+  size = fold_non_dependent_expr (size);
+
+  /* Handle the common case of array + offset expression when the offset
+ is a constant.  */
+  if (TREE_CODE (oper) == POINTER_PLUS_EXPR)
+{
+  /* If the offset is comple-time constant, use it to compute a more
+	 accurate estimate of the size of the buffer.  Otherwise, use
+	 the size of the entire array as an optimistic estimate (this
+	 may lead to false negatives).  */
+  const_tree adj = TREE_OPERAND (oper, 1);
+  if (CONSTANT_CLASS_P (adj))
+	adjust += (HOST_WIDE_INT)tree_to_uhwi (adj);
+  else
+	use_obj_size = true;
+
+  oper = TREE_OPERAND (oper, 0);
+
+  while (TREE_CODE (oper) == NOP_EXPR)
+	oper = TREE_OPERAND (oper, 0);
+}
+
+  if (TREE_CODE (oper) == TARGET_EXPR)
+oper = TREE_OPERAND (oper, 1);
+  else if (TREE_CODE (oper) == ADDR_EXPR) {
+addr_expr = true;
+oper = TREE_OPERAND (oper, 0);
+  }
+
+  while (TREE_CODE (oper) == NOP_EXPR)
+oper = TREE_OPERAND (oper, 0);
+
+  if (TREE_CODE (oper) == ARRAY_REF)
+{
+  // fold_array_ref (oper);
+
+  /* Similar to the offset computed above, see if the array index
+	 is a compile-time constant.  If so, and unless the offset was
+	 not a compile-time constant, use the index to determine the
+	 size of the buffer.  Otherwise, use the entire array as
+	 an optimistic estimate of the size.  */
+  const_tree adj = TREE_OPERAND (oper, 1);
+  if (!use_obj_size && CONSTANT_CLASS_P (adj))

Re: [PATCH 0/7] Libsanitizer merge from upstream r249633.

2015-10-13 Thread Andrew Pinski
On Tue, Oct 13, 2015 at 4:13 AM, Maxim Ostapenko
 wrote:
> Hi,
>
> it's been a while since the last libsanitizer merge from upstream into GCC
> happened and the library has significantly changed since that time. The main
> features to be ported are:
>
> -New common strings interceptors were added.
> -Various allocator improvements were performed.
> -Improvements for ASan deactivated start were performed.
> -TSan and LSan were enabled for Aarch64.
> -Fast unwinding was enabled for Aarch64.
> -New tsan_unaligned_{load, store}_[n] functions were intoduced.
> -asan_stack_malloc_[n] doesn't take a local stack as a second parameter
> anymore.
> -sanitization for std containers is supported now.
> -New interface functions for dynamic allocas and VLA's poisoning/unpoisoning
> were introduced.
>
> Some features are not ported for now, by might be enabled in future:
>
> -Embedded UBSan runtime into ASan and TSan ones. I don't enable this now,
> because of errors during ASan static linkage: GCC uses -whole-archive option
> that would lead to undefined references to C++ stuff.
> -UBSan data descriptors for float-cast conversion support location
> propagation now. But sometimes we have loc == UNKNOWN_LOCATION in
> ubsan_instrument_float_cast, so use old ABI for now. See below for details.
>
> The first patch of the series is the merge itself.
>
> The second one introduces corresponding compiler changes.
>
> Other patches are applied to library and they are GCC-specific:
>
> Patches 3 and 4 are just reapplied David's and Jakub's patches for SPARC and
> disabling ODR violation detection respectively.
>
> Patch 5 removes UBSan stubs from ASan and TSan code since we don't support
> embedded UBSan runtime into ASan and TSan.
>
> Patch 6 changes heuristic for extracting last PC from stack frame for ARM in
> fast unwind routine. More details can be found here
> (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61771).
>
> Patch 7 forces libsanitizer to use an old ABI for ubsan float cast data
> descriptors, because sometimes we can have loc == UNKNOWN_LOCATION in
> ubsan_instrument_float_cast e.g. in a such case:
>
> ..
> volatile double foo; // ubsan_instrument_float_cast is called by convert
> function.
> ..
>
> Since foo is a tcc_declaration, loc is UNKNOWN_LOCATION. I'm actually not
> sure about this, perhaps we can fix this in GCC somehow.
>
> I've regtested and {A, UB}San bootstrapped these patches on
> x86-64-unknown-linux-gnu and aarch64-linux-gnueabi (Juno board, 39 bit VA
> space) and tested for ARM under QEMU-ARM.
> Testing ASan under QEMU-AARCH64 revealed many test failures due to LSan was
> enabled. In particular, it tries to call internal_clone function in LSan
> internals, that in turn calls _NR_clone syscall and than QEMU exits with
> EINTR error code (that might be expected, AFAIK QEMU is not very good with
> threads). So, I wonder, if I should disable LSan for AArch64 now?

We should just disable ASAN and TSAN for AARCH64 until 48bit VA is
supported.  Since the majority of the distros are going to be using
48bit VA as it is required to be used to support dual node ThunderX
(since ThunderX's physical address space uses all 48bits).

Thanks,
Andrew Pinski

>
> I'm also asking community to help me with testing these patches on various
> targets (ARM, PPC, etc) I'm lack of, so could you help me on this please?
>
> -Maxim


[PATCHv2, ARM, libgcc] New aeabi_idiv function for armv6-m

2015-10-13 Thread Andre Vieira
This patch ports the aeabi_idiv routine from Linaro Cortex-Strings 
(https://git.linaro.org/toolchain/cortex-strings.git), which was 
contributed by ARM under Free BSD license.


The new aeabi_idiv routine is used to replace the one in 
libgcc/config/arm/lib1funcs.S. This replacement happens within the 
Thumb1 wrapper. The new routine is under LGPLv3 license.


The main advantage of this version is that it can improve the 
performance of the aeabi_idiv function for Thumb1. This solution will 
also increase the code size. So it will only be used if 
__OPTIMIZE_SIZE__ is not defined.


Make check passed for armv6-m.

libgcc/ChangeLog:
2015-08-10  Hale Wang  
Andre Vieira  

  * config/arm/lib1funcs.S: Add new wrapper.
From 832a3d6af6f06399f70b5a4ac3727d55960c93b7 Mon Sep 17 00:00:00 2001
From: Andre Simoes Dias Vieira 
Date: Fri, 21 Aug 2015 14:23:28 +0100
Subject: [PATCH] new wrapper idivmod

---
 libgcc/config/arm/lib1funcs.S | 250 --
 1 file changed, 217 insertions(+), 33 deletions(-)

diff --git a/libgcc/config/arm/lib1funcs.S b/libgcc/config/arm/lib1funcs.S
index 252efcbd5385cc58a5ce1e48c6816d36a6f4c797..c9e544114590da8cde88382bea0f67206e593816 100644
--- a/libgcc/config/arm/lib1funcs.S
+++ b/libgcc/config/arm/lib1funcs.S
@@ -306,34 +306,12 @@ LSYM(Lend_fde):
 #ifdef __ARM_EABI__
 .macro THUMB_LDIV0 name signed
 #if defined(__ARM_ARCH_6M__)
-	.ifc \signed, unsigned
-	cmp	r0, #0
-	beq	1f
-	mov	r0, #0
-	mvn	r0, r0		@ 0x
-1:
-	.else
-	cmp	r0, #0
-	beq	2f
-	blt	3f
+
+	push	{r0, lr}
 	mov	r0, #0
-	mvn	r0, r0
-	lsr	r0, r0, #1	@ 0x7fff
-	b	2f
-3:	mov	r0, #0x80
-	lsl	r0, r0, #24	@ 0x8000
-2:
-	.endif
-	push	{r0, r1, r2}
-	ldr	r0, 4f
-	adr	r1, 4f
-	add	r0, r1
-	str	r0, [sp, #8]
-	@ We know we are not on armv4t, so pop pc is safe.
-	pop	{r0, r1, pc}
-	.align	2
-4:
-	.word	__aeabi_idiv0 - 4b
+	bl	SYM(__aeabi_idiv0)
+	pop	{r1, pc}
+
 #elif defined(__thumb2__)
 	.syntax unified
 	.ifc \signed, unsigned
@@ -945,7 +923,170 @@ LSYM(Lover7):
 	add	dividend, work
   .endif
 LSYM(Lgot_result):
-.endm	
+.endm
+
+#if defined(__prefer_thumb__) && !defined(__OPTIMIZE_SIZE__)
+/* If performance is preferred, the following functions are provided.  */
+
+/* Branch to div(n), and jump to label if curbit is lo than divisior.  */
+.macro BranchToDiv n, label
+	lsr	curbit, dividend, \n
+	cmp	curbit, divisor
+	blo	\label
+.endm
+
+/* Body of div(n).  Shift the divisor in n bits and compare the divisor
+   and dividend.  Update the dividend as the substruction result.  */
+.macro DoDiv n
+	lsr	curbit, dividend, \n
+	cmp	curbit, divisor
+	bcc	1f
+	lsl	curbit, divisor, \n
+	sub	dividend, dividend, curbit
+
+1:	adc	result, result
+.endm
+
+/* The body of division with positive divisor.  Unless the divisor is very
+   big, shift it up in multiples of four bits, since this is the amount of
+   unwinding in the main division loop.  Continue shifting until the divisor
+   is larger than the dividend.  */
+.macro THUMB1_Div_Positive
+	mov	result, #0
+	BranchToDiv #1, LSYM(Lthumb1_div1)
+	BranchToDiv #4, LSYM(Lthumb1_div4)
+	BranchToDiv #8, LSYM(Lthumb1_div8)
+	BranchToDiv #12, LSYM(Lthumb1_div12)
+	BranchToDiv #16, LSYM(Lthumb1_div16)
+LSYM(Lthumb1_div_large_positive):
+	mov	result, #0xff
+	lsl	divisor, divisor, #8
+	rev	result, result
+	lsr	curbit, dividend, #16
+	cmp	curbit, divisor
+	blo	1f
+	asr	result, #8
+	lsl	divisor, divisor, #8
+	beq	LSYM(Ldivbyzero_waypoint)
+
+1:	lsr	curbit, dividend, #12
+	cmp	curbit, divisor
+	blo	LSYM(Lthumb1_div12)
+	b	LSYM(Lthumb1_div16)
+LSYM(Lthumb1_div_loop):
+	lsr	divisor, divisor, #8
+LSYM(Lthumb1_div16):
+	Dodiv	#15
+	Dodiv	#14
+	Dodiv	#13
+	Dodiv	#12
+LSYM(Lthumb1_div12):
+	Dodiv	#11
+	Dodiv	#10
+	Dodiv	#9
+	Dodiv	#8
+	bcs	LSYM(Lthumb1_div_loop)
+LSYM(Lthumb1_div8):
+	Dodiv	#7
+	Dodiv	#6
+	Dodiv	#5
+LSYM(Lthumb1_div5):
+	Dodiv	#4
+LSYM(Lthumb1_div4):
+	Dodiv	#3
+LSYM(Lthumb1_div3):
+	Dodiv	#2
+LSYM(Lthumb1_div2):
+	Dodiv	#1
+LSYM(Lthumb1_div1):
+	sub	divisor, dividend, divisor
+	bcs	1f
+	cpy	divisor, dividend
+
+1:	adc	result, result
+	cpy	dividend, result
+	RET
+
+LSYM(Ldivbyzero_waypoint):
+	b	LSYM(Ldiv0)
+.endm
+
+/* The body of division with negative divisor.  Similar with
+   THUMB1_Div_Positive except that the shift steps are in multiples
+   of six bits.  */
+.macro THUMB1_Div_Negative
+	lsr	result, divisor, #31
+	beq	1f
+	neg	divisor, divisor
+
+1:	asr	curbit, dividend, #32
+	bcc	2f
+	neg	dividend, dividend
+
+2:	eor	curbit, result
+	mov	result, #0
+	cpy	ip, curbit
+	BranchToDiv #4, LSYM(Lthumb1_div_negative4)
+	BranchToDiv #8, LSYM(Lthumb1_div_negative8)
+LSYM(Lthumb1_div_large):
+	mov	result, #0xfc
+	lsl	divisor, divisor, #6
+	rev	result, result
+	lsr	curbit, dividend, #8
+	cmp	curbit, divisor
+	blo	LSYM(Lthumb1_div_negative8)
+
+	lsl	divisor, divisor, #6
+	asr	result, result, #6
+	cmp	curbit, divisor
+	blo	LSYM(Lthumb1_div_negative8)
+
+	lsl	divisor, divisor, #6
+	asr	result, result, #6
+	cmp	curbit, divisor
+	blo	LSYM(Lthumb

Re: [patch 5/6] scalar-storage-order merge: rest

2015-10-13 Thread Jeff Law

On 10/06/2015 05:05 AM, Eric Botcazou wrote:

This is the rest of the implementation.

* asan.c (instrument_derefs): Adjust call to get_inner_reference.
* builtins.c (get_object_alignment_2): Likewise.
* cfgexpand.c (expand_debug_expr): Adjust call to get_inner_reference
and get_ref_base_and_extent.
* dbxout.c (dbxout_expand_expr): Likewise.
* dwarf2out.c (add_var_loc_to_decl): Likewise.
(loc_list_for_address_of_addr_expr_of_indirect_ref): Likewise.
(loc_list_from_tree): Likewise.
(fortran_common): Likewise.
* gimple-fold.c (gimple_fold_builtin_memory_op): Adjust calls to
get_ref_base_and_extent.
(get_base_constructor): Likewise.
(fold_const_aggregate_ref_1): Likewise.
* gimple-laddress.c (pass_laddress::execute): Adjust call to
get_inner_reference.
* gimple-ssa-strength-reduction.c (slsr_process_ref): Adjust call to
get_inner_reference and bail out on reverse storage order.
* ifcvt.c (noce_emit_move_insn): Adjust calls to store_bit_field.
* ipa-cp.c (ipa_get_jf_ancestor_result): Adjust call to
build_ref_for_offset.
* ipa-polymorphic-call.c (set_by_invariant): Adjust call to
get_ref_base_and_extent.
(ipa_polymorphic_call_context): Likewise.
(extr_type_from_vtbl_ptr_store): Likewise.
(check_stmt_for_type_change): Likewise.
(get_dynamic_type): Likewise.
* ipa-prop.c (ipa_load_from_parm_agg_1): Adjust call to
get_ref_base_and_extent.
(compute_complex_assign_jump_func): Likewise.
(get_ancestor_addr_info): Likewise.
(compute_known_type_jump_func): Likewise.
(determine_known_aggregate_parts): Likewise.
(ipa_get_adjustment_candidate): Likewise.
(ipa_modify_call_arguments): Set REF_REVERSE_STORAGE_ORDER on
MEM_REF.
* ipa-prop.h (ipa_parm_adjustment): Add REVERSE field.
(build_ref_for_offset): Adjust prototype.
* simplify-rtx.c (delegitimize_mem_from_attrs): Adjust call to
get_inner_reference.
* tree-affine.c (tree_to_aff_combination): Adjust call to
get_inner_reference.
(get_inner_reference_aff): Likewise.
* tree-data-ref.c (split_constant_offset_1): Likewise.
(dr_analyze_innermost): Likewise.  Bail out if reverse storage order.
* tree-scalar-evolution.c (interpret_rhs_expr): Adjust call to
get_inner_reference.
* tree-sra.c (struct access): Add REVERSE and move WRITE around.
(dump_access): Print new fields.
(create_access): Adjust call to get_ref_base_and_extent and set the
REVERSE flag according to the result.
(completely_scalarize_record): Set the REVERSE flag.
(scalarize_elem): Add REVERSE parameter.
(build_access_from_expr_1): Preserve storage order barriers.
(build_accesses_from_assign): Likewise.
(build_ref_for_offset): Add REVERSE parameter and set the
REF_REVERSE_STORAGE_ORDER flag accordingly.
(build_ref_for_model): Adjust call to build_ref_for_offset and clear
the REF_REVERSE_STORAGE_ORDER flag if there are components.
(analyze_access_subtree): Likewise.
(create_artificial_child_access): Set the REVERSE flag.
(get_access_for_expr): Adjust call to get_ref_base_and_extent.
(turn_representatives_into_adjustments): Propagate REVERSE flag.
(ipa_sra_check_caller): Adjust call to get_inner_reference.
* tree-ssa-alias.c (ao_ref_base): Adjust call to
get_ref_base_and_extent.
(aliasing_component_refs_p): Likewise.
(stmt_kills_ref_p_1): Likewise.
* tree-ssa-dce.c (mark_aliased_reaching_defs_necessary_1): Likewise.
* tree-ssa-loop-ivopts.c (may_be_nonaddressable_p) : New.
Return true if reverse storage order.
: Likewise.
: Likewise.
: Likewise.
: Likewise.
(split_address_cost): Likewise.  Bail out if reverse storage order.
* tree-ssa-math-opts.c (find_bswap_or_nop_load): Adjust call to
get_inner_reference.  Bail out if reverse storage order.
(bswap_replace): Adjust call to get_inner_reference.
* tree-ssa-pre.c (create_component_ref_by_pieces_1) : Set
the REF_REVERSE_STORAGE_ORDER flag.
: Likewise.
* tree-ssa-sccvn.c (vn_reference_eq): Return false on storage order
barriers.
(copy_reference_ops_from_ref) : Set REVERSE field according
to the REF_REVERSE_STORAGE_ORDER flag.
: Likewise.
: Set it for storage order barriers.
(contains_storage_order_barrier_p): New predicate.
(vn_reference_lookup_3): Adjust calls to get_ref_base_and_extent.
Punt on storage order barriers if necessary.
* tree-ssa-sccvn.h (struct vn_reference_op_struct): Add REVERSE.
* tree-ssa-structalias.c (get_constraint_for_compone

Re: Do not use TYPE_CANONICAL in useless_type_conversion

2015-10-13 Thread Alexandre Oliva
On Oct 13, 2015, Eric Botcazou  wrote:

> Note that this is PR middle-end/67912.

Thanks.  I added this piece of information to the ChangeLog entry, and
checked the patch in.

-- 
Alexandre Oliva, freedom fighterhttp://FSFLA.org/~lxoliva/
You must be the change you wish to see in the world. -- Gandhi
Be Free! -- http://FSFLA.org/   FSF Latin America board member
Free Software Evangelist|Red Hat Brasil GNU Toolchain Engineer


[PATCH] Optimize const1 * copysign (const2, y) in reassoc (PR tree-optimization/67815)

2015-10-13 Thread Marek Polacek
This patch implements the copysign optimization for reassoc I promised
I'd look into.  I.e.,

CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0
CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0

After getting familiar with reassoc a bit this wasn't that hard.  But
I'm hopeless when it comes to floating-point stuff, so I'd appreciate
if you could glance over the tests.  The reassoc-40.c should address
Joseph's comment in the audit trail (with -fno-rounding-math the
optimization would take place).

For 0.0 * copysign (cst, x), the result is folded into 0.0 way before
reassoc, so we probably don't have to pay attention to this case.

Bootstrapped/regtested on x86_64-linux, ok for trunk?

2015-10-13  Marek Polacek  

PR tree-optimization/67815
* tree-ssa-reassoc.c (attempt_builtin_copysign): New function.
(reassociate_bb): Call it.

* gcc.dg/tree-ssa/reassoc-39.c: New test.
* gcc.dg/tree-ssa/reassoc-40.c: New test.

diff --git gcc/testsuite/gcc.dg/tree-ssa/reassoc-39.c 
gcc/testsuite/gcc.dg/tree-ssa/reassoc-39.c
index e69de29..589d06b 100644
--- gcc/testsuite/gcc.dg/tree-ssa/reassoc-39.c
+++ gcc/testsuite/gcc.dg/tree-ssa/reassoc-39.c
@@ -0,0 +1,41 @@
+/* PR tree-optimization/67815 */
+/* { dg-do compile } */
+/* { dg-options "-Ofast -fdump-tree-reassoc1-details" } */
+
+float
+f0 (float x)
+{
+  return 7.5 * __builtin_copysignf (2.0, x);
+}
+
+float
+f1 (float x)
+{
+  return -7.5 * __builtin_copysignf (2.0, x);
+}
+
+double
+f2 (double x, double y)
+{
+  return x * ((1.0/12) * __builtin_copysign (1.0, y));
+}
+
+double
+f3 (double x, double y)
+{
+  return (x * (-1.0/12)) * __builtin_copysign (1.0, y);
+}
+
+double
+f4 (double x, double y, double z)
+{
+  return (x * z) * ((1.0/12) * __builtin_copysign (4.0, y));
+}
+
+double
+f5 (double x, double y, double z)
+{
+  return (x * (-1.0/12)) * z * __builtin_copysign (2.0, y);
+}
+
+/* { dg-final { scan-tree-dump-times "Optimizing copysign" 6 "reassoc1"} }*/
diff --git gcc/testsuite/gcc.dg/tree-ssa/reassoc-40.c 
gcc/testsuite/gcc.dg/tree-ssa/reassoc-40.c
index e69de29..d65bcc1b 100644
--- gcc/testsuite/gcc.dg/tree-ssa/reassoc-40.c
+++ gcc/testsuite/gcc.dg/tree-ssa/reassoc-40.c
@@ -0,0 +1,21 @@
+/* PR tree-optimization/67815 */
+/* { dg-do compile } */
+/* { dg-options "-Ofast -frounding-math -fdump-tree-reassoc1-details" } */
+
+/* Test that the copysign reassoc optimization doesn't fire for
+   -frounding-math (i.e. HONOR_SIGN_DEPENDENT_ROUNDING) if the multiplication
+   is inexact.  */
+
+double
+f1 (double y)
+{
+  return (1.2 * __builtin_copysign (1.1, y));
+}
+
+double
+f2 (double y)
+{
+  return (-1.2 * __builtin_copysign (1.1, y));
+}
+
+/* { dg-final { scan-tree-dump-not "Optimizing copysign" "reassoc1" } } */
diff --git gcc/tree-ssa-reassoc.c gcc/tree-ssa-reassoc.c
index 879722e..b8897b7 100644
--- gcc/tree-ssa-reassoc.c
+++ gcc/tree-ssa-reassoc.c
@@ -4622,6 +4622,95 @@ attempt_builtin_powi (gimple *stmt, vec 
*ops)
   return result;
 }
 
+/* Attempt to optimize
+   CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
+   CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0.  */
+
+static void
+attempt_builtin_copysign (vec *ops)
+{
+  operand_entry *oe;
+  unsigned int i;
+  unsigned int length = ops->length ();
+  tree cst1 = ops->last ()->op;
+
+  if (length == 1 || TREE_CODE (cst1) != REAL_CST)
+return;
+
+  FOR_EACH_VEC_ELT (*ops, i, oe)
+{
+  if (TREE_CODE (oe->op) == SSA_NAME)
+   {
+ gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
+ if (is_gimple_call (def_stmt))
+   {
+ tree fndecl = gimple_call_fndecl (def_stmt);
+ tree cst2;
+ switch (DECL_FUNCTION_CODE (fndecl))
+   {
+   CASE_FLT_FN (BUILT_IN_COPYSIGN):
+ cst2 = gimple_call_arg (def_stmt, 0);
+ /* The first argument of copysign must be a constant,
+otherwise there's nothing to do.  */
+ if (TREE_CODE (cst2) == REAL_CST)
+   {
+ tree mul = const_binop (MULT_EXPR, TREE_TYPE (cst1),
+ cst1, cst2);
+ /* If we couldn't fold to a single constant, skip it.  */
+ if (mul == NULL_TREE)
+   break;
+ /* We're going to replace the copysign argument with
+the multiplication product.  Remove the constant.  */
+ ops->pop ();
+ gimple_call_set_arg (def_stmt, 0, mul);
+ bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst1));
+ /* Handle the CST1 < 0 case -- negate the result.  */
+ if (cst1_neg)
+   {
+ tree lhs = gimple_call_lhs (def_stmt);
+ tree negrhs = make_ssa_name (TREE_TYPE (lhs));
+  

Re: [gomp4, committed] Move kernels pass group before pass_fre

2015-10-13 Thread Tom de Vries

On 13/10/15 17:59, Tom de Vries wrote:

Hi,

this patch moves the kernels pass group to before pass_fre. Instead we
use pass_dominator_oacc_kernels in the pass group.



And that means we can get rid of the .omp_data_i init handling in 
tree-ssa-sccvn.c.


Committed to gomp-4_0-branch.

Thanks,
- Tom


Revert .omp_data_i init handling in tree-ssa-sccvn.c

2015-10-13  Tom de Vries  

	Revert:
	2015-04-21  Tom de Vries  

	* tree-ssa-sccvn.c: Include omp-low.h.
	(visit_use): Handle .omp_data_i init conservatively.
---
 gcc/tree-ssa-sccvn.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/gcc/tree-ssa-sccvn.c b/gcc/tree-ssa-sccvn.c
index d5964b4..5b06d29 100644
--- a/gcc/tree-ssa-sccvn.c
+++ b/gcc/tree-ssa-sccvn.c
@@ -58,7 +58,6 @@ along with GCC; see the file COPYING3.  If not see
 #include "domwalk.h"
 #include "cgraph.h"
 #include "gimple-iterator.h"
-#include "omp-low.h"
 
 /* This algorithm is based on the SCC algorithm presented by Keith
Cooper and L. Taylor Simpson in "SCC-Based Value numbering"
@@ -3623,8 +3622,7 @@ visit_use (tree use)
 {
   if (gimple_code (stmt) == GIMPLE_PHI)
 	changed = visit_phi (stmt);
-  else if (gimple_has_volatile_ops (stmt)
-	   || gimple_stmt_omp_data_i_init_p (stmt))
+  else if (gimple_has_volatile_ops (stmt))
 	changed = defs_to_varying (stmt);
   else if (is_gimple_assign (stmt))
 	{
-- 
1.9.1



Re: [patch 4/6] scalar-storage-order merge: bulk

2015-10-13 Thread Jeff Law

On 10/06/2015 05:04 AM, Eric Botcazou wrote:

This is the bulk of the implementation.

* calls.c (store_unaligned_arguments_into_pseudos): Adjust calls to
extract_bit_field and store_bit_field.
(initialize_argument_information): Adjust call to store_expr.
(load_register_parameters): Adjust call to extract_bit_field.
* expmed.c (check_reverse_storage_order_support): New function.
(check_reverse_float_storage_order_support): Likewise.
(flip_storage_order): Likewise.
(store_bit_field_1): Add REVERSE parameter.  Flip the storage order
of the value if it is true.  Pass REVERSE to recursive call after
adjusting the target offset.
Do not use extraction or movstrict instruction if REVERSE is true.
Pass REVERSE to store_fixed_bit_field.
(store_bit_field): Add REVERSE parameter and pass to it to above.
(store_fixed_bit_field): Add REVERSE parameter and pass to it to
store_split_bit_field and store_fixed_bit_field_1.
(store_fixed_bit_field_1):  Add REVERSE parameter.  Flip the storage
order of the value if it is true and adjust the target offset.
(store_split_bit_field): Add REVERSE parameter and pass it to
store_fixed_bit_field.  Adjust the target offset if it is true.
(extract_bit_field_1): Add REVERSE parameter.  Flip the storage order
of the value if it is true.  Pass REVERSE to recursive call after
adjusting the target offset.
Do not use extraction or subreg instruction if REVERSE is true.
Pass REVERSE to extract_fixed_bit_field.
(extract_bit_field): Add REVERSE parameter and pass to it to above.
(extract_fixed_bit_field): Add REVERSE parameter and pass to it to
extract_split_bit_field and extract_fixed_bit_field_1.
(extract_fixed_bit_field_1): Add REVERSE parameter.  Flip the storage
order of the value if it is true and adjust the target offset.
(extract_split_bit_field): Add REVERSE parameter and pass it to
extract_fixed_bit_field.  Adjust the target offset if it is true.
* expmed.h (flip_storage_order): Declare.
(store_bit_field): Adjust prototype.
(extract_bit_field): Likewise.
* expr.c (emit_group_load_1): Adjust calls to extract_bit_field.
(emit_group_store): Adjust call to store_bit_field.
(copy_blkmode_from_reg): Likewise.
(copy_blkmode_to_reg): Likewise.
(write_complex_part): Likewise.
(read_complex_part): Likewise.
(optimize_bitfield_assignment_op): Add REVERSE parameter.  Assert
that it isn't true if the target is a register.
: If it is, do not optimize unless bitsize is equal to 1,
and flip the storage order of the value.
: Flip the storage order of the value.
(get_bit_range): Adjust call to get_inner_reference.
(expand_assignment): Adjust calls to get_inner_reference, store_expr,
optimize_bitfield_assignment_op and store_field.  Handle MEM_EXPRs
with reverse storage order.
(store_expr_with_bounds): Add REVERSE parameter and pass it to
recursive calls and call to store_bit_field.  Force the value into a
register if it is true and then flip the storage order of the value.
(store_expr): Add REVERSE parameter and pass it to above.
(categorize_ctor_elements_1): Adjust call to
initializer_constant_valid_p.
(store_constructor_field): Add REVERSE parameter and pass it to
recursive calls and call to store_field.
(store_constructor): Add REVERSE parameter and pass it to calls to
store_constructor_field and store_expr.  Set it to true for an
aggregate type with TYPE_REVERSE_STORAGE_ORDER.
(store_field): Add REVERSE parameter and pass it to recursive calls
and calls to store_expr and store_bit_field.  Temporarily flip the
storage order of the value with record type and integral mode and
adjust the shift if it is true.
(get_inner_reference): Add PREVERSEP parameter and set it to true
upon encoutering a reference with reverse storage order.
(expand_expr_addr_expr_1): Adjust call to get_inner_reference.
(expand_constructor): Adjust call to store_constructor.
(expand_expr_real_2) : Pass TYPE_REVERSE_STORAGE_ORDER
of the union type to store_expr in the MEM case and assert that it
isn't set in the REG case.  Adjust call to store_field.
(expand_expr_real_1) : Handle reverse storage order.
: Add REVERSEP variable and adjust calls to
get_inner_reference and extract_bit_field. Temporarily flip the
storage order of the value with record type and integral mode and
adjust the shift if it is true.  Flip the storage order of the value
at the end if it is true.
: Add REVERSEP variable and adjust call to
ge

[gomp4, committed] Move kernels pass group before pass_fre

2015-10-13 Thread Tom de Vries

Hi,

this patch moves the kernels pass group to before pass_fre. Instead we 
use pass_dominator_oacc_kernels in the pass group.


This fixes an ICE while compiling the test-case included in the patch.

Committed to gomp-4_0-branch.

Thanks,
- Tom
Move kernels pass group before pass_fre

2015-10-13  Tom de Vries  

	* tree-ssa-dom.c (pass_dominator_oacc_kernels::clone): New function.
	* passes.def: Move pass group pass_oacc_kernels to before pass_fre. Add
	pass_dominator_oacc_kernels twice in the pass_oacc_kernels pass group.

	* c-c++-common/goacc/kernels-acc-on-device-2.c: New test.
	* c-c++-common/goacc/kernels-counter-var-redundant-load.c: Update.
---
 gcc/passes.def |  4 ++-
 .../c-c++-common/goacc/kernels-acc-on-device-2.c   | 37 ++
 .../goacc/kernels-counter-var-redundant-load.c | 10 +++---
 gcc/tree-ssa-dom.c |  1 +
 4 files changed, 47 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/c-c++-common/goacc/kernels-acc-on-device-2.c

diff --git a/gcc/passes.def b/gcc/passes.def
index bc454c0..4ed4ccd 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -86,12 +86,13 @@ along with GCC; see the file COPYING3.  If not see
 	  /* pass_build_ealias is a dummy pass that ensures that we
 	 execute TODO_rebuild_alias at this point.  */
 	  NEXT_PASS (pass_build_ealias);
-	  NEXT_PASS (pass_fre);
 	  /* Pass group that runs when there are oacc kernels in the
 	 function.  */
 	  NEXT_PASS (pass_oacc_kernels);
 	  PUSH_INSERT_PASSES_WITHIN (pass_oacc_kernels)
+	  NEXT_PASS (pass_dominator_oacc_kernels);
 	  NEXT_PASS (pass_ch_oacc_kernels);
+	  NEXT_PASS (pass_dominator_oacc_kernels);
 	  NEXT_PASS (pass_tree_loop_init);
 	  NEXT_PASS (pass_lim);
 	  NEXT_PASS (pass_copy_prop);
@@ -105,6 +106,7 @@ along with GCC; see the file COPYING3.  If not see
 	  NEXT_PASS (pass_expand_omp_ssa);
 	  NEXT_PASS (pass_tree_loop_done);
 	  POP_INSERT_PASSES ()
+	  NEXT_PASS (pass_fre);
 	  NEXT_PASS (pass_merge_phi);
   NEXT_PASS (pass_dse);
 	  NEXT_PASS (pass_cd_dce);
diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-acc-on-device-2.c b/gcc/testsuite/c-c++-common/goacc/kernels-acc-on-device-2.c
new file mode 100644
index 000..2c7297b
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/kernels-acc-on-device-2.c
@@ -0,0 +1,37 @@
+/* { dg-additional-options "-O2" } */
+
+#include "openacc.h"
+
+#define N 32
+
+void
+foo (float *a, float *b)
+{
+#pragma acc kernels copyin(a[0:N]) copyout(b[0:N])
+  {
+int ii;
+int on_host = acc_on_device (acc_device_X);
+
+for (ii = 0; ii < N; ii++)
+  {
+	if (on_host)
+	  b[ii] = a[ii] + 1;
+	else
+	  b[ii] = a[ii];
+  }
+  }
+
+#pragma acc kernels copyin(a[0:N]) copyout(b[0:N])
+  {
+int ii;
+int on_host = acc_on_device (acc_device_X);
+
+for (ii = 0; ii < N; ii++)
+  {
+	if (on_host)
+	  b[ii] = a[ii] + 2;
+	else
+	  b[ii] = a[ii];
+  }
+  }
+}
diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-counter-var-redundant-load.c b/gcc/testsuite/c-c++-common/goacc/kernels-counter-var-redundant-load.c
index 84dee69..c4ffc1d 100644
--- a/gcc/testsuite/c-c++-common/goacc/kernels-counter-var-redundant-load.c
+++ b/gcc/testsuite/c-c++-common/goacc/kernels-counter-var-redundant-load.c
@@ -1,5 +1,5 @@
 /* { dg-additional-options "-O2" } */
-/* { dg-additional-options "-fdump-tree-dom_oacc_kernels" } */
+/* { dg-additional-options "-fdump-tree-dom_oacc_kernels3" } */
 
 #include 
 
@@ -28,7 +28,9 @@ foo (unsigned int *c)
_15 = .omp_data_i_10->c;
c.1_16 = *_15;
 
-   Check that there's only one load from anonymous ssa-name (which we assume to
-   be the one to read c), and that there's no such load for ii.  */
+   Check that there are two loads from anonymous ssa-names, which we assume to
+   be:
+   - the one to read c
+   - the one to read ii after the kernels region.  */
 
-/* { dg-final { scan-tree-dump-times "(?n)\\*_\[0-9\]\[0-9\]*;$" 1 "dom_oacc_kernels" } } */
+/* { dg-final { scan-tree-dump-times "(?n)\\*_\[0-9\]\[0-9\]*;$" 2 "dom_oacc_kernels3" } } */
diff --git a/gcc/tree-ssa-dom.c b/gcc/tree-ssa-dom.c
index c7dc7b0..87f9daa 100644
--- a/gcc/tree-ssa-dom.c
+++ b/gcc/tree-ssa-dom.c
@@ -788,6 +788,7 @@ public:
   {}
 
   /* opt_pass methods: */
+  opt_pass * clone () { return new pass_dominator_oacc_kernels (m_ctxt); }
   virtual bool gate (function *) { return true; }
 
  private:
-- 
1.9.1



Re: [Boolean Vector, patch 3/5] Use boolean vector in C/C++ FE

2015-10-13 Thread Ilya Enkovich
2015-10-13 18:42 GMT+03:00 Jeff Law :
> On 10/13/2015 08:14 AM, Ilya Enkovich wrote:

 +
 +static tree
 +build_vec_cmp (tree_code code, tree type,
 +  tree arg0, tree arg1)
 +{
 +  tree zero_vec = build_zero_cst (type);
 +  tree minus_one_vec = build_minus_one_cst (type);
 +  tree cmp_type = build_same_sized_truth_vector_type (type);
 +  tree cmp = build2 (code, cmp_type, arg0, arg1);
 +  return build3 (VEC_COND_EXPR, type, cmp, minus_one_vec, zero_vec);
 +}
>>>
>>> Isn't this implementation the same for C & C++?  Does it make sense to
>>> put
>>> it in c-family/c-common.c?
>>
>>
>> C++ version calls fold_if_not_in_template for generated comparison.  It is
>> required there to successfully recognize vector MIN, MAX and ABS templates
>> for vector ?: conditional operator.  Vector form of ?: conditional operator
>> is supported for C++ only.
>
> Ah, nevermind then.
>
>
>>>
>>> However, more generally, do we need to do anything for the other
>>> languages?
>>
>>
>> Looking into that I got an impression vector modes are used by C/C++
>> vector extensions only.  And I think regression testing would reveal some
>> failures otherwise.
>
> Maybe this stuff hasn't bled into the Fortran front-end, but the gfortran
> front-end certainly has OpenMP support which presumably has vector
> extensions.

OpenMP extension doesn't produce any vector code in front-end. Code
will be produced by vectorizer anyway.

>
> The fact that nothing's failing in the testsuite is encouraging, but it'd be
> worth spending a few minutes taking a look to see if there's something that
> might need updating.

I also grepped for VEC_COND_EXPR and it never occurs in front-ends
other than C/C++.

Thanks,
Ilya

>
> Jeff
>


Re: [gomp4, committed] Add goacc/kernels-acc-on-device.c

2015-10-13 Thread Tom de Vries

On 12/10/15 14:52, Tom de Vries wrote:

On 12/10/15 12:49, Thomas Schwinge wrote:

Hi Tom!

On Sat, 10 Oct 2015 12:49:01 +0200, Tom de
Vries  wrote:

>--- /dev/null
>+++ b/gcc/testsuite/c-c++-common/goacc/kernels-acc-on-device.c
>@@ -0,0 +1,39 @@
>+/* { dg-additional-options "-O2" } */
>+
>+#include 


Hi Thomas,


That doesn't work (at least in build-tree testing), as gcc/testsuite/ is
not set up to look for header files in [target]/libgomp/:

[...]/source-gcc/gcc/testsuite/c-c++-common/goacc/kernels-acc-on-device.c:3:21:
fatal error: openacc.h: No such file or directory
 compilation terminated.
 compiler exited with status 1



Ah, I see. I was doing 'make' followed by 'make install', and then
build-tree testing. The build-tree testing seems to pick up the header
file from the install directory. So for me test passed.


>+
>+#define N 32
>+
>+void
>+foo (float *a, float *b)
>+{
>+  float exp;
>+  int i;
>+  int n;
>+
>+#pragma acc kernels copyin(a[0:N]) copyout(b[0:N])
>+  {
>+int ii;
>+
>+for (ii = 0; ii < N; ii++)
>+  {
>+if (acc_on_device (acc_device_host))

Your two options are: if that's applicable/sufficient for what you intend
to test here, use __builtin_acc_on_device with a hard-coded acc_device_*,
or duplicate part of  as done for example in
gcc/testsuite/c-c++-common/goacc/acc_on_device-2.c.



Went with second option, committed as attached.


As a follow-up patch, I've factored the code into a mockup openacc.h, 
now shared by several test-cases.


Committed to gomp-4_0-branch.

Thanks,
- Tom
Factor out goacc/openacc.h

2015-10-13  Tom de Vries  

	* c-c++-common/goacc/openacc.h: New header file, factored out of ...
	* c-c++-common/goacc/kernels-acc-on-device.c: ... here.
	* c-c++-common/goacc/acc_on_device-2-off.c: Use openacc.h.
	* c-c++-common/goacc/acc_on_device-2.c: Same.
---
 .../c-c++-common/goacc/acc_on_device-2-off.c  | 11 +--
 gcc/testsuite/c-c++-common/goacc/acc_on_device-2.c| 13 +
 .../c-c++-common/goacc/kernels-acc-on-device.c| 19 +--
 gcc/testsuite/c-c++-common/goacc/openacc.h| 18 ++
 4 files changed, 21 insertions(+), 40 deletions(-)
 create mode 100644 gcc/testsuite/c-c++-common/goacc/openacc.h

diff --git a/gcc/testsuite/c-c++-common/goacc/acc_on_device-2-off.c b/gcc/testsuite/c-c++-common/goacc/acc_on_device-2-off.c
index 71abe11..cce58de 100644
--- a/gcc/testsuite/c-c++-common/goacc/acc_on_device-2-off.c
+++ b/gcc/testsuite/c-c++-common/goacc/acc_on_device-2-off.c
@@ -3,16 +3,7 @@
 
 /* Duplicate parts of libgomp/openacc.h, because we can't include it here.  */
 
-#if __cplusplus
-extern "C" {
-#endif
-
-typedef enum acc_device_t { acc_device_X = 123 } acc_device_t;
-extern int acc_on_device (int);
-
-#if __cplusplus
-}
-#endif
+#include "openacc.h"
 
 int
 f (void)
diff --git a/gcc/testsuite/c-c++-common/goacc/acc_on_device-2.c b/gcc/testsuite/c-c++-common/goacc/acc_on_device-2.c
index 243e562..19a5bd3 100644
--- a/gcc/testsuite/c-c++-common/goacc/acc_on_device-2.c
+++ b/gcc/testsuite/c-c++-common/goacc/acc_on_device-2.c
@@ -1,18 +1,7 @@
 /* Have to enable optimizations, as otherwise builtins won't be expanded.  */
 /* { dg-additional-options "-O -fdump-rtl-expand" } */
 
-/* Duplicate parts of libgomp/openacc.h, because we can't include it here.  */
-
-#if __cplusplus
-extern "C" {
-#endif
-
-typedef enum acc_device_t { acc_device_X = 123 } acc_device_t;
-extern int acc_on_device (int);
-
-#if __cplusplus
-}
-#endif
+#include "openacc.h"
 
 int
 f (void)
diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-acc-on-device.c b/gcc/testsuite/c-c++-common/goacc/kernels-acc-on-device.c
index 784c66a..958b65b 100644
--- a/gcc/testsuite/c-c++-common/goacc/kernels-acc-on-device.c
+++ b/gcc/testsuite/c-c++-common/goacc/kernels-acc-on-device.c
@@ -1,23 +1,6 @@
 /* { dg-additional-options "-O2" } */
 
-#if __cplusplus
-extern "C" {
-#endif
-
-#if __cplusplus >= 201103
-# define __GOACC_NOTHROW noexcept
-#elif __cplusplus
-# define __GOACC_NOTHROW throw ()
-#else /* Not C++ */
-# define __GOACC_NOTHROW __attribute__ ((__nothrow__))
-#endif
-
-typedef enum acc_device_t { acc_device_X = 123 } acc_device_t;
-int acc_on_device (int) __GOACC_NOTHROW;
-
-#if __cplusplus
-}
-#endif
+#include "openacc.h"
 
 #define N 32
 
diff --git a/gcc/testsuite/c-c++-common/goacc/openacc.h b/gcc/testsuite/c-c++-common/goacc/openacc.h
new file mode 100644
index 000..a74a482
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/openacc.h
@@ -0,0 +1,18 @@
+#if __cplusplus
+extern "C" {
+#endif
+
+#if __cplusplus >= 201103
+# define __GOACC_NOTHROW noexcept
+#elif __cplusplus
+# define __GOACC_NOTHROW throw ()
+#else /* Not C++ */
+# define __GOACC_NOTHROW __attribute__ ((__nothrow__))
+#endif
+
+typedef enum acc_device_t { acc_device_X = 123 } acc_device_t;
+int acc_on_device (int) __GOACC_NOTHROW;
+
+#if __cplusplus
+}
+#endif
-- 
1.9.1



Re: [PATCH 8/9] Add TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID

2015-10-13 Thread Jeff Law

On 10/13/2015 04:13 AM, Richard Biener wrote:

On Tue, Oct 13, 2015 at 1:27 AM, Richard Henderson  wrote:

On 10/12/2015 09:10 PM, Richard Biener wrote:


The check_loadstore change should instead have adjusted the
flag_delete_null_pointer_checks guard in
infer_nonnull_range_by_dereference.




Nope, that doesn't work.  You have to wait until you see the actual MEM
being dereferenced before you can look at it's address space.



Well, as we are explicitely looking for the pointer 'op' we know the
address-space
beforehand, no?  TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (op)))?



No.  We don't even know what type we're looking for; we're merely looking
for any use of NULL within any memory reference within STMT.

Specifically, when we're not looking for a specific SSA_NAME (which would be
properly typed), we always pass in a plain (void *)0:

   bool by_dereference
 = infer_nonnull_range_by_dereference (stmt, null_pointer_node);


Ick.
It's just looking to see if there's an explicit *0 in stmt.  That can 
occur due to cprop & friends obviously.  It was an easy way to avoid 
having to write a special walker.


The problem here is we don't know what address space the *0 is going to 
hit, right?   Isn't that also an issue for code generation as well?


Jeff


Re: [[Boolean Vector, patch 5/5] Support boolean vectors in vector lowering

2015-10-13 Thread Ilya Enkovich
2015-10-13 18:35 GMT+03:00 Jeff Law :
> On 10/13/2015 08:56 AM, Ilya Enkovich wrote:
>>
>> 2015-10-12 13:37 GMT+03:00 Alan Lawrence :
>>>
>>> On 09/10/15 22:01, Jeff Law wrote:
>>>
 So my question for the series as a whole is whether or not we need to do
 something for the other languages, particularly Fortran.  I was a bit
 surprised to see this stuff bleed into the C/C++ front-ends and
 obviously wonder if it's bled into Fortran, Ada, Java, etc.
>>>
>>>
>>>
>>> Isn't that just because, we have GNU extensions to C/C++, for vectors? I
>>> admit I don't know enough Ada/Fortran to know whether we've added GNU
>>> extensions to those languages as well...
>>>
>>> A.
>>
>>
>> I also got an impression only GNU vector extensions should be
>> affected. And those are for C/C++ only.
>
> I'd be surprised if Fortran doesn't have vector capabilities.  I think some
> sanity checking in there would be wise.

Vector type in language doesn't mean SIMD. AFAIK OpenMP is used in
Fortran for SIMD features. Also I would get a lot of Fortran
regressions in case such feature exists due to fixed IL checker.

Thanks,
Ilya

>
> jeff


Re: [Boolean Vector, patch 3/5] Use boolean vector in C/C++ FE

2015-10-13 Thread Jeff Law

On 10/13/2015 08:14 AM, Ilya Enkovich wrote:

+
+static tree
+build_vec_cmp (tree_code code, tree type,
+  tree arg0, tree arg1)
+{
+  tree zero_vec = build_zero_cst (type);
+  tree minus_one_vec = build_minus_one_cst (type);
+  tree cmp_type = build_same_sized_truth_vector_type (type);
+  tree cmp = build2 (code, cmp_type, arg0, arg1);
+  return build3 (VEC_COND_EXPR, type, cmp, minus_one_vec, zero_vec);
+}

Isn't this implementation the same for C & C++?  Does it make sense to put
it in c-family/c-common.c?


C++ version calls fold_if_not_in_template for generated comparison.  It is 
required there to successfully recognize vector MIN, MAX and ABS templates for 
vector ?: conditional operator.  Vector form of ?: conditional operator is 
supported for C++ only.

Ah, nevermind then.




However, more generally, do we need to do anything for the other languages?


Looking into that I got an impression vector modes are used by C/C++ vector 
extensions only.  And I think regression testing would reveal some failures 
otherwise.
Maybe this stuff hasn't bled into the Fortran front-end, but the 
gfortran front-end certainly has OpenMP support which presumably has 
vector extensions.


The fact that nothing's failing in the testsuite is encouraging, but 
it'd be worth spending a few minutes taking a look to see if there's 
something that might need updating.


Jeff



Re: [Patch] [x86_64]: Add bdver4 for multi versioning and fix AMD cpu model detection.

2015-10-13 Thread Uros Bizjak
On Tue, Oct 13, 2015 at 5:16 PM, Kumar, Venkataramanan
 wrote:
> Hi Uros,
>
> I realized both GCC 4.9 and GCC 5 branches includes processor subtype 
> AMDFAM15H_BDVER4.
> So I need to back port not only model selection fix but also the detection of 
> model for bdver4.
>
> Is that fine?

OK, but to avoid ABI mismatches, please double check that enum values
passed between library and compiled code are always the same in all
gcc releases.

Uros.

> Regards,
> Venkat.
>
>> -Original Message-
>> From: Kumar, Venkataramanan
>> Sent: Friday, October 09, 2015 3:31 PM
>> To: 'Uros Bizjak'
>> Cc: gcc-patches@gcc.gnu.org
>> Subject: RE: [Patch] [x86_64]: Add bdver4 for multi versioning and fix AMD
>> cpu model detection.
>>
>> Thank you Uros,
>>
>> I will  test and commit model selection change in all release branches as 
>> well.
>>
>> Regards,
>> Venkat.
>>
>> > -Original Message-
>> > From: Uros Bizjak [mailto:ubiz...@gmail.com]
>> > Sent: Friday, October 09, 2015 3:25 PM
>> > To: Kumar, Venkataramanan
>> > Cc: gcc-patches@gcc.gnu.org
>> > Subject: Re: [Patch] [x86_64]: Add bdver4 for multi versioning and fix
>> > AMD cpu model detection.
>> >
>> > On Fri, Oct 9, 2015 at 11:50 AM, Kumar, Venkataramanan
>> >  wrote:
>> > > Hi Uros,
>> > >
>> > > Please find below patch that adds bdver4 target for multi versioning.
>> > > Also I while computing model, the extended_model is incorrectly left
>> > shifted  by 4. I have removed it now.
>> > >
>> > > Is below patch Ok for trunk ?
>> > > GCC bootstrap and regressions passed.
>> >
>> > OK for trunk and release branches, where applicable. IMO, model
>> > selection fix should be applied to all release branches.
>> >
>> > Thanks,
>> > Uros.
>> >
>> > > diff --git a/libgcc/ChangeLog b/libgcc/ChangeLog index
>> > > bb3a722..8676747 100644
>> > > --- a/libgcc/ChangeLog
>> > > +++ b/libgcc/ChangeLog
>> > > @@ -1,3 +1,8 @@
>> > > +2015-10-09  Venkataramanan Kumar
>> > 
>> > > +
>> > > +   * config/i386/cpuinfo.c (get_amd_cpu): Detect bdver4.
>> > > +   (__cpu_indicator_init): Fix model selection for AMD CPUs.
>> > > +
>> > >  2015-10-05  Kirill Yukhin  
>> > >
>> > > * config/i386/cpuinfo.c (get_intel_cpu): Detect "skylake-avx512".
>> > > diff --git a/libgcc/config/i386/cpuinfo.c
>> > > b/libgcc/config/i386/cpuinfo.c index 0cbbc85..1313ca3 100644
>> > > --- a/libgcc/config/i386/cpuinfo.c
>> > > +++ b/libgcc/config/i386/cpuinfo.c
>> > > @@ -169,6 +169,9 @@ get_amd_cpu (unsigned int family, unsigned int
>> > model)
>> > >/* Bulldozer version 3 "Steamroller"  */
>> > >if (model >= 0x30 && model <= 0x4f)
>> > > __cpu_model.__cpu_subtype = AMDFAM15H_BDVER3;
>> > > +  /* Bulldozer version 4 "Excavator"   */
>> > > +  if (model >= 0x60 && model <= 0x7f)
>> > > +   __cpu_model.__cpu_subtype = AMDFAM15H_BDVER4;
>> > >break;
>> > >  /* AMD Family 16h "btver2" */
>> > >  case 0x16:
>> > > @@ -455,7 +458,7 @@ __cpu_indicator_init (void)
>> > >if (family == 0x0f)
>> > > {
>> > >   family += extended_family;
>> > > - model += (extended_model << 4);
>> > > + model += extended_model;
>> > > }
>> > >
>> > >/* Get CPU type.  */
>> > >
>> > > Regards,
>> > > Venkat.
>> > >
>> > >
>> > >
>> > >
>> > >
>> > >


Re: [[Boolean Vector, patch 5/5] Support boolean vectors in vector lowering

2015-10-13 Thread Jeff Law

On 10/13/2015 08:56 AM, Ilya Enkovich wrote:

2015-10-12 13:37 GMT+03:00 Alan Lawrence :

On 09/10/15 22:01, Jeff Law wrote:


So my question for the series as a whole is whether or not we need to do
something for the other languages, particularly Fortran.  I was a bit
surprised to see this stuff bleed into the C/C++ front-ends and
obviously wonder if it's bled into Fortran, Ada, Java, etc.



Isn't that just because, we have GNU extensions to C/C++, for vectors? I
admit I don't know enough Ada/Fortran to know whether we've added GNU
extensions to those languages as well...

A.


I also got an impression only GNU vector extensions should be
affected. And those are for C/C++ only.
I'd be surprised if Fortran doesn't have vector capabilities.  I think 
some sanity checking in there would be wise.


jeff


Benchmarks of v2 (was Re: [PATCH 0/5] RFC: Overhaul of diagnostics (v2))

2015-10-13 Thread David Malcolm
On Thu, 2015-09-24 at 10:15 +0200, Richard Biener wrote:
> On Thu, Sep 24, 2015 at 2:25 AM, David Malcolm  wrote:
> > On Wed, 2015-09-23 at 15:36 +0200, Richard Biener wrote:
> >> On Wed, Sep 23, 2015 at 3:19 PM, Michael Matz  wrote:
> >> > Hi,
> >> >
> >> > On Tue, 22 Sep 2015, David Malcolm wrote:
> >> >
> >> >> The drawback is that it could bloat the ad-hoc table.  Can the ad-hoc
> >> >> table ever get smaller, or does it only ever get inserted into?
> >> >
> >> > It only ever grows.
> >> >
> >> >> An idea I had is that we could stash short ranges directly into the 32
> >> >> bits of location_t, by offsetting the per-column-bits somewhat.
> >> >
> >> > It's certainly worth an experiment: let's say you restrict yourself to
> >> > tokens less than 8 characters, you need an additional 3 bits (using one
> >> > value, e.g. zero, as the escape value).  That leaves 20 bits for the line
> >> > numbers (for the normal 8 bit columns), which might be enough for most
> >> > single-file compilations.  For LTO compilation this often won't be 
> >> > enough.
> >> >
> >> >> My plan is to investigate the impact these patches have on the time and
> >> >> memory consumption of the compiler,
> >> >
> >> > When you do so, make sure you're also measuring an LTO compilation with
> >> > debug info of something big (firefox).  I know that we already had issues
> >> > with the size of the linemap data in the past for these cases (probably
> >> > when we added columns).
> >>
> >> The issue we have with LTO is that the linemap gets populated in quite
> >> random order and thus we repeatedly switch files (we've mitigated this
> >> somewhat for GCC 5).  We also considered dropping column info
> >> (and would drop range info) as diagnostics are from optimizers only
> >> with LTO and we keep locations merely for debug info.
> >
> > Thanks.  Presumably the mitigation you're referring to is the
> > lto_location_cache class in lto-streamer-in.c?
> >
> > Am I right in thinking that, right now, the LTO code doesn't support
> > ad-hoc locations? (presumably the block pointers only need to exist
> > during optimization, which happens after the serialization)
> 
> LTO code does support ad-hoc locations but they are "restored" only
> when reading function bodies and stmts (by means of COMBINE_LOCATION_DATA).
> 
> > The obvious simplification would be, as you suggest, to not bother
> > storing range information with LTO, falling back to just the existing
> > representation.  Then there's no need to extend LTO to serialize ad-hoc
> > data; simply store the underlying locus into the bit stream.  I think
> > that this happens already: lto-streamer-out.c calls expand_location and
> > stores the result, so presumably any ad-hoc location_t values made by
> > the v2 patches would have dropped their range data there when I ran the
> > test suite.
> 
> Yep.  We only preserve BLOCKs, so if you don't add extra code to
> preserve ranges they'll be "dropped".
> 
> > If it's acceptable to not bother with ranges for LTO, one way to do the
> > "stashing short ranges into the location_t" idea might be for the
> > bits-per-range of location_t values to be a property of the line_table
> > (or possibly the line map), set up when the struct line_maps is created.
> > For non-LTO it could be some tuned value (maybe from a param?); for LTO
> > it could be zero, so that we have as many bits as before for line/column
> > data.
> 
> That could be a possibility (likewise for column info?)
> 
> Richard.
> 
> > Hope this sounds sane
> > Dave

I did some crude benchmarking of the patchkit, using these scripts:
  https://github.com/davidmalcolm/gcc-benchmarking
(specifically, bb0222b455df8cefb53bfc1246eb0a8038256f30),
using the "big-code.c" and "kdecore.cc" files Michael posted as:
  https://gcc.gnu.org/ml/gcc-patches/2013-09/msg00062.html
and "influence.i", a preprocessed version of SPEC2006's 445.gobmk
engine/influence.c (as an example of a moderate-sized pure C source
file).

This doesn't yet cover very large autogenerated C files, and the .cc
file is only being measured to see the effect on the ad-hoc table (and
tokenization).

"control" was r227977.
"experiment" was the same revision with the v2 patchkit applied.

Recall that this patchkit captures ranges for tokens as an extra field
within tokens within libcpp and the C FE, and adds ranges to the ad-hoc
location lookaside, storing them for all tree nodes within the C FE that
have a location_t, and passing them around within c_expr for all C
expressions (including those that don't have a location_t).

Both control and experiment were built with
  --enable-checking=release \
  --disable-bootstrap \
  --disable-multilib \
  --enable-languages=c,ada,c++,fortran,go,java,lto,objc,obj-c++

The script measures:

(a) wallclock time for "xgcc -S" so it's measuring the driver, parsing,
optimimation, etc, rather than attempting to directly measure parsing.
This is without -ftime-report, since Mikhail indicated it's sufficiently
exp

Re: Fix prototype for print_insn in rtl.h

2015-10-13 Thread Jeff Law

On 10/13/2015 02:21 AM, Nikolai Bozhenov wrote:

2015-10-13  Nikolai Bozhenov

 * gcc/rtl.h (print_insn): fix prototype

Installed on the trunk after bootstrap & regression test.

jeff



[PATCH v2] PR rtl-optimization/66790: uninitialized registers handling in REE

2015-10-13 Thread Pierre-Marie de Rodat

Hello,

The first attached patch is the second attempt to fix PR 
rtl-optimization/66790 (see 
).


The second one is a fix for some inconsistency noticed while working on 
the original bug. This specific patch fixes no known bug, but anyway…


Both were bootstrapped and regtested on x86_64-linux. Ok to commit? 
Thank you in advance!


[PATCH 1/2] REE: fix uninitialized registers handling

gcc/ChangeLog:

PR rtl-optimization/66790
* df.h (DF_MIR): New macro.
(DF_LAST_PROBLEM_PLUS1): Update to be past DF_MIR
(DF_MIR_INFO_BB): New macro.
(DF_MIR_IN, DF_MIR_OUT): New macros.
(struct df_mir_bb_info): New.
(df_mir): New macro.
(df_mir_add_problem, df_mir_simulate_one_insn): New forward
declarations.
(df_mir_get_bb_info): New.
* df-problems.c (struct df_mir_problem_data): New.
(df_mir_free_bb_info, df_mir_alloc, df_mir_reset,
df_mir_bb_local_compute, df_mir_local_compute, df_mir_init,
df_mir_confluence_0, df_mir_confluence_n,
df_mir_transfer_function, df_mir_free, df_mir_top_dump,
df_mir_bottom_dump, df_mir_verify_solution_start,
df_mir_verify_solution_end): New.
(problem_MIR): New.
(df_mir_add_problem, df_mir_simulate_one_insn): New.
* timevar.def (TV_DF_MIR): New.
* ree.c: Include bitmap.h
(add_removable_extension): Add an INIT_REGS parameter.  Use it
to skip zero-extensions that may get an uninitialized register.
(find_removable_extensions): Compute must-initialized registers
using the MIR dataflow problem. Update the call to
add_removable_extension.
(find_and_remove_re): Call df_mir_add_problem.

gcc/testsuite/ChangeLog:

* gnat.dg/opt50.adb: New test.
* gnat.dg/opt50_pkg.adb: New helper.
* gnat.dg/opt50_pkg.ads: New helper.

[PATCH 2/2] DF_LIVE: make clobbers cancel effect of previous GENs in
 the same BBs

gcc/ChangeLog:

* df-problems.c (df_live_bb_local_compute): Clear GEN bits for
DF_REF_MUST_CLOBBER references.

--
Pierre-Marie de Rodat
>From d7bf6e8c194f66e6b7e1823ad3d118115e4406bc Mon Sep 17 00:00:00 2001
From: Pierre-Marie de Rodat 
Date: Sat, 18 Jul 2015 13:10:45 +0200
Subject: [PATCH 1/2] REE: fix uninitialized registers handling

gcc/ChangeLog:

	PR rtl-optimization/66790
	* df.h (DF_MIR): New macro.
	(DF_LAST_PROBLEM_PLUS1): Update to be past DF_MIR
	(DF_MIR_INFO_BB): New macro.
	(DF_MIR_IN, DF_MIR_OUT): New macros.
	(struct df_mir_bb_info): New.
	(df_mir): New macro.
	(df_mir_add_problem, df_mir_simulate_one_insn): New forward
	declarations.
	(df_mir_get_bb_info): New.
	* df-problems.c (struct df_mir_problem_data): New.
	(df_mir_free_bb_info, df_mir_alloc, df_mir_reset,
	df_mir_bb_local_compute, df_mir_local_compute, df_mir_init,
	df_mir_confluence_0, df_mir_confluence_n,
	df_mir_transfer_function, df_mir_free, df_mir_top_dump,
	df_mir_bottom_dump, df_mir_verify_solution_start,
	df_mir_verify_solution_end): New.
	(problem_MIR): New.
	(df_mir_add_problem, df_mir_simulate_one_insn): New.
	* timevar.def (TV_DF_MIR): New.
	* ree.c: Include bitmap.h
	(add_removable_extension): Add an INIT_REGS parameter.  Use it
	to skip zero-extensions that may get an uninitialized register.
	(find_removable_extensions): Compute must-initialized registers
	using the MIR dataflow problem. Update the call to
	add_removable_extension.
	(find_and_remove_re): Call df_mir_add_problem.

gcc/testsuite/ChangeLog:

	* gnat.dg/opt50.adb: New test.
	* gnat.dg/opt50_pkg.adb: New helper.
	* gnat.dg/opt50_pkg.ads: New helper.
---
 gcc/df-problems.c   | 406 
 gcc/df.h|  34 ++-
 gcc/ree.c   |  62 --
 gcc/testsuite/gnat.dg/opt50.adb |  23 ++
 gcc/testsuite/gnat.dg/opt50_pkg.adb |  48 +
 gcc/testsuite/gnat.dg/opt50_pkg.ads |  12 ++
 gcc/timevar.def |   1 +
 7 files changed, 572 insertions(+), 14 deletions(-)
 create mode 100644 gcc/testsuite/gnat.dg/opt50.adb
 create mode 100644 gcc/testsuite/gnat.dg/opt50_pkg.adb
 create mode 100644 gcc/testsuite/gnat.dg/opt50_pkg.ads

diff --git a/gcc/df-problems.c b/gcc/df-problems.c
index 153732a..c08ae36 100644
--- a/gcc/df-problems.c
+++ b/gcc/df-problems.c
@@ -1849,6 +1849,412 @@ df_live_verify_transfer_functions (void)
 }
 
 /*
+   MUST-INITIALIZED REGISTERS.
+*/
+
+/* Private data used to verify the solution for this problem.  */
+struct df_mir_problem_data
+{
+  bitmap_head *in;
+  bitmap_head *out;
+  /* An obstack for the bitmaps we need for this problem.  */
+  bitmap_obstack mir_bitmaps;
+};
+
+
+/* Free basic block info.  */
+
+static void
+df_mir_free_bb_info (basic_block bb ATTRIBUTE_UNUSED,
+		  

RE: [Patch] [x86_64]: Add bdver4 for multi versioning and fix AMD cpu model detection.

2015-10-13 Thread Kumar, Venkataramanan
Hi Uros,

I realized both GCC 4.9 and GCC 5 branches includes processor subtype 
AMDFAM15H_BDVER4.
So I need to back port not only model selection fix but also the detection of 
model for bdver4.

Is that fine?

Regards,
Venkat.   

> -Original Message-
> From: Kumar, Venkataramanan
> Sent: Friday, October 09, 2015 3:31 PM
> To: 'Uros Bizjak'
> Cc: gcc-patches@gcc.gnu.org
> Subject: RE: [Patch] [x86_64]: Add bdver4 for multi versioning and fix AMD
> cpu model detection.
> 
> Thank you Uros,
> 
> I will  test and commit model selection change in all release branches as 
> well.
> 
> Regards,
> Venkat.
> 
> > -Original Message-
> > From: Uros Bizjak [mailto:ubiz...@gmail.com]
> > Sent: Friday, October 09, 2015 3:25 PM
> > To: Kumar, Venkataramanan
> > Cc: gcc-patches@gcc.gnu.org
> > Subject: Re: [Patch] [x86_64]: Add bdver4 for multi versioning and fix
> > AMD cpu model detection.
> >
> > On Fri, Oct 9, 2015 at 11:50 AM, Kumar, Venkataramanan
> >  wrote:
> > > Hi Uros,
> > >
> > > Please find below patch that adds bdver4 target for multi versioning.
> > > Also I while computing model, the extended_model is incorrectly left
> > shifted  by 4. I have removed it now.
> > >
> > > Is below patch Ok for trunk ?
> > > GCC bootstrap and regressions passed.
> >
> > OK for trunk and release branches, where applicable. IMO, model
> > selection fix should be applied to all release branches.
> >
> > Thanks,
> > Uros.
> >
> > > diff --git a/libgcc/ChangeLog b/libgcc/ChangeLog index
> > > bb3a722..8676747 100644
> > > --- a/libgcc/ChangeLog
> > > +++ b/libgcc/ChangeLog
> > > @@ -1,3 +1,8 @@
> > > +2015-10-09  Venkataramanan Kumar
> > 
> > > +
> > > +   * config/i386/cpuinfo.c (get_amd_cpu): Detect bdver4.
> > > +   (__cpu_indicator_init): Fix model selection for AMD CPUs.
> > > +
> > >  2015-10-05  Kirill Yukhin  
> > >
> > > * config/i386/cpuinfo.c (get_intel_cpu): Detect "skylake-avx512".
> > > diff --git a/libgcc/config/i386/cpuinfo.c
> > > b/libgcc/config/i386/cpuinfo.c index 0cbbc85..1313ca3 100644
> > > --- a/libgcc/config/i386/cpuinfo.c
> > > +++ b/libgcc/config/i386/cpuinfo.c
> > > @@ -169,6 +169,9 @@ get_amd_cpu (unsigned int family, unsigned int
> > model)
> > >/* Bulldozer version 3 "Steamroller"  */
> > >if (model >= 0x30 && model <= 0x4f)
> > > __cpu_model.__cpu_subtype = AMDFAM15H_BDVER3;
> > > +  /* Bulldozer version 4 "Excavator"   */
> > > +  if (model >= 0x60 && model <= 0x7f)
> > > +   __cpu_model.__cpu_subtype = AMDFAM15H_BDVER4;
> > >break;
> > >  /* AMD Family 16h "btver2" */
> > >  case 0x16:
> > > @@ -455,7 +458,7 @@ __cpu_indicator_init (void)
> > >if (family == 0x0f)
> > > {
> > >   family += extended_family;
> > > - model += (extended_model << 4);
> > > + model += extended_model;
> > > }
> > >
> > >/* Get CPU type.  */
> > >
> > > Regards,
> > > Venkat.
> > >
> > >
> > >
> > >
> > >
> > >


Re: [PATCH] gcc/ira.c: Check !HAVE_FP_INSTEAD_INSNS when frame pointer is needed and as global register

2015-10-13 Thread Chen Gang

On 10/13/15 22:56, Bernd Schmidt wrote:
> On 10/13/2015 04:50 PM, Chen Gang wrote:
>> OK, under the bugzilla, the maintainer treated it as expected behavior
>> (not a bug). For me, we need more explanation for it (why we treat it
>> as expected behavior).
> 
> A global register is under control of the user. If the compiler uses it as a 
> frame pointer, it will get clobbered outside the user's control, which is 
> unexpected behaviour. Therefore, the code Mike quoted detects that case and 
> issues an error, indicating that you must use -fomit-frame-pointer if you 
> expect to use the frame pointer register for other purposes.
> 

OK, thanks.

> If you want an address on the stack there's __builtin_frame_address which may 
> or may not do what was intended. The code quoted in the bugzilla is just 
> invalid.
> 

OK, thank you very much, I shall send related kernel fix patch to kernel
mailing list.

Thanks.
-- 
Chen Gang (陈刚)

Open, share, and attitude like air, water, and life which God blessed


Re: [PATCH] Fix "#pragma GCC pop_options" warning.

2015-10-13 Thread Bernd Schmidt

On 10/13/2015 05:03 PM, Dominik Vogt wrote:

On Tue, Oct 13, 2015 at 04:33:42PM +0200, Bernd Schmidt wrote:

Looks like
ix86_pragma_target_parse has a "! args" test to determine if it has
a pop, maybe the default function could do the same.


All right, this solution is way better.  New patch attached.


This is ok, thanks!


Bernd



Re: [PATCH] Fix "#pragma GCC pop_options" warning.

2015-10-13 Thread Dominik Vogt
On Tue, Oct 13, 2015 at 04:33:42PM +0200, Bernd Schmidt wrote:
> Looks like
> ix86_pragma_target_parse has a "! args" test to determine if it has
> a pop, maybe the default function could do the same.

All right, this solution is way better.  New patch attached.

Ciao

Dominik ^_^  ^_^

-- 

Dominik Vogt
IBM Germany
gcc/ChangeLog

* targhooks.c (default_target_option_pragma_parse): Do not warn if
called on behalf of "#pragma GCC pop_options".

gcc/testsuite/ChangeLog

* gcc.dg/pragma-pop_options-1.c: New test.
>From 4bb0068875e005b2f0e33bec0bd5a70b798af6e3 Mon Sep 17 00:00:00 2001
From: Dominik Vogt 
Date: Tue, 13 Oct 2015 15:54:15 +0100
Subject: [PATCH] Remove "#pragma GCC pop_options" warning for "#pragma GCC
 pop_options".

---
 gcc/targhooks.c | 8 ++--
 gcc/testsuite/gcc.dg/pragma-pop_options-1.c | 7 +++
 2 files changed, 13 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/pragma-pop_options-1.c

diff --git a/gcc/targhooks.c b/gcc/targhooks.c
index 7238c8f..5077ec9 100644
--- a/gcc/targhooks.c
+++ b/gcc/targhooks.c
@@ -1305,8 +1305,12 @@ bool
 default_target_option_pragma_parse (tree ARG_UNUSED (args),
 tree ARG_UNUSED (pop_target))
 {
-  warning (OPT_Wpragmas,
-	   "#pragma GCC target is not supported for this machine");
+  /* If args is NULL the caller is handle_pragma_pop_options ().  In that case,
+ emit no warning because "#pragma GCC pop_target" is valid on targets that
+ do not have the "target" pragma.  */
+  if (args)
+warning (OPT_Wpragmas,
+	 "#pragma GCC target is not supported for this machine");
 
   return false;
 }
diff --git a/gcc/testsuite/gcc.dg/pragma-pop_options-1.c b/gcc/testsuite/gcc.dg/pragma-pop_options-1.c
new file mode 100644
index 000..4e969de
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pragma-pop_options-1.c
@@ -0,0 +1,7 @@
+/* Check warnings produced by #pragma GCC push/pop/reset_options.  */
+/* { dg-do assemble } */
+
+#pragma push_options
+#pragma pop_options
+
+int foo;
-- 
2.3.0



Re: [PATCH] gcc/ira.c: Check !HAVE_FP_INSTEAD_INSNS when frame pointer is needed and as global register

2015-10-13 Thread Chen Gang

On 10/13/15 07:02, Mike Stump wrote:
> On Oct 12, 2015, at 3:32 PM, Chen Gang  wrote:
>>
>> OK, thanks. If we really need to fix it, which target hook should I use?
>> (or do we need a new target hook?)
> 
> So, the first discussion would be if it is, or is not a bug.  If it isn’t, 
> then there is no fix.  No fix, no target hook.  So far, Bernd said not a bug.
> 

OK, under the bugzilla, the maintainer treated it as expected behavior
(not a bug). For me, we need more explanation for it (why we treat it
as expected behavior).


> So, I’ll note that one _can_ do this with the stack pointer, as a fixed 
> register.
> When the frame pointer is fixed, one cannot do this.
> 

Excuse me, I do not quite understand, could you please provide more
details?

> The code that does this is:
> 
>   /* Diagnose uses of the hard frame pointer when it is used as a global  
>   
>
>  register.  Often we can get away with letting the user appropriate   
>   
> 
>  the frame pointer, but we should let them know when code generation  
>   
> 
>  makes that impossible.  */
>   if (global_regs[HARD_FRAME_POINTER_REGNUM] && frame_pointer_needed)
> {
>   tree decl = global_regs_decl[HARD_FRAME_POINTER_REGNUM];
>   error_at (DECL_SOURCE_LOCATION (current_function_decl),
> "frame pointer required, but reserved");
>   inform (DECL_SOURCE_LOCATION (decl), "for %qD", decl);
> }
> 
> to `fix it’, one would simple remove this chunk as misguided and fix up any 
> code gen issues exposed.
> 

If there were not only one issues related with it, for me, what you said
sounds reasonable to me.


Thanks.
-- 
Chen Gang (陈刚)

Open, share, and attitude like air, water, and life which God blessed


Re: [PATCH] gcc/ira.c: Check !HAVE_FP_INSTEAD_INSNS when frame pointer is needed and as global register

2015-10-13 Thread Bernd Schmidt

On 10/13/2015 04:50 PM, Chen Gang wrote:

OK, under the bugzilla, the maintainer treated it as expected behavior
(not a bug). For me, we need more explanation for it (why we treat it
as expected behavior).


A global register is under control of the user. If the compiler uses it 
as a frame pointer, it will get clobbered outside the user's control, 
which is unexpected behaviour. Therefore, the code Mike quoted detects 
that case and issues an error, indicating that you must use 
-fomit-frame-pointer if you expect to use the frame pointer register for 
other purposes.


If you want an address on the stack there's __builtin_frame_address 
which may or may not do what was intended. The code quoted in the 
bugzilla is just invalid.



to `fix it’, one would simple remove this chunk as misguided and fix up any 
code gen issues exposed.



If there were not only one issues related with it, for me, what you said
sounds reasonable to me.


That's totally the wrong thing to do as the issue is not compiler code 
generation, it's the danger of clobbering a user variable.



Bernd


Re: [[Boolean Vector, patch 5/5] Support boolean vectors in vector lowering

2015-10-13 Thread Ilya Enkovich
2015-10-12 13:37 GMT+03:00 Alan Lawrence :
> On 09/10/15 22:01, Jeff Law wrote:
>
>> So my question for the series as a whole is whether or not we need to do
>> something for the other languages, particularly Fortran.  I was a bit
>> surprised to see this stuff bleed into the C/C++ front-ends and
>> obviously wonder if it's bled into Fortran, Ada, Java, etc.
>
>
> Isn't that just because, we have GNU extensions to C/C++, for vectors? I
> admit I don't know enough Ada/Fortran to know whether we've added GNU
> extensions to those languages as well...
>
> A.

I also got an impression only GNU vector extensions should be
affected. And those are for C/C++ only.

Thanks,
Ilya


Re: [vec-cmp, patch 4/6] Support vector mask invariants

2015-10-13 Thread Ilya Enkovich
2015-10-13 16:54 GMT+03:00 Richard Biener :
> On Thu, Oct 8, 2015 at 5:11 PM, Ilya Enkovich  wrote:
>> Hi,
>>
>> This patch adds a special handling of boolean vector invariants.  We need 
>> additional code to determine type of generated invariant.  For VEC_COND_EXPR 
>> case we even provide this type directly because statement vectype doesn't 
>> allow us to compute it.  Separate code is used to generate and expand such 
>> vectors.
>>
>> Thanks,
>> Ilya
>> --
>> gcc/
>>
>> 2015-10-08  Ilya Enkovich  
>>
>> * expr.c (const_vector_mask_from_tree): New.
>> (const_vector_from_tree): Use const_vector_mask_from_tree
>> for boolean vectors.
>> * tree-vect-stmts.c (vect_init_vector): Support boolean vector
>> invariants.
>> (vect_get_vec_def_for_operand): Add VECTYPE arg.
>> (vectorizable_condition): Directly provide vectype for invariants
>> used in comparison.
>> * tree-vectorizer.h (vect_get_vec_def_for_operand): Add VECTYPE
>> arg.
>>
>>
>> diff --git a/gcc/expr.c b/gcc/expr.c
>> index 88da8cb..a624a34 100644
>> --- a/gcc/expr.c
>> +++ b/gcc/expr.c
>> @@ -11320,6 +11320,40 @@ try_tablejump (tree index_type, tree index_expr, 
>> tree minval, tree range,
>>return 1;
>>  }
>>
>> +/* Return a CONST_VECTOR rtx representing vector mask for
>> +   a VECTOR_CST of booleans.  */
>> +static rtx
>> +const_vector_mask_from_tree (tree exp)
>> +{
>> +  rtvec v;
>> +  unsigned i;
>> +  int units;
>> +  tree elt;
>> +  machine_mode inner, mode;
>> +
>> +  mode = TYPE_MODE (TREE_TYPE (exp));
>> +  units = GET_MODE_NUNITS (mode);
>> +  inner = GET_MODE_INNER (mode);
>> +
>> +  v = rtvec_alloc (units);
>> +
>> +  for (i = 0; i < VECTOR_CST_NELTS (exp); ++i)
>> +{
>> +  elt = VECTOR_CST_ELT (exp, i);
>> +
>> +  gcc_assert (TREE_CODE (elt) == INTEGER_CST);
>> +  if (integer_zerop (elt))
>> +   RTVEC_ELT (v, i) = CONST0_RTX (inner);
>> +  else if (integer_onep (elt)
>> +  || integer_minus_onep (elt))
>> +   RTVEC_ELT (v, i) = CONSTM1_RTX (inner);
>> +  else
>> +   gcc_unreachable ();
>> +}
>> +
>> +  return gen_rtx_CONST_VECTOR (mode, v);
>> +}
>> +
>>  /* Return a CONST_VECTOR rtx for a VECTOR_CST tree.  */
>>  static rtx
>>  const_vector_from_tree (tree exp)
>> @@ -11335,6 +11369,9 @@ const_vector_from_tree (tree exp)
>>if (initializer_zerop (exp))
>>  return CONST0_RTX (mode);
>>
>> +  if (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (exp)))
>> +  return const_vector_mask_from_tree (exp);
>> +
>>units = GET_MODE_NUNITS (mode);
>>inner = GET_MODE_INNER (mode);
>>
>> diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
>> index 6949c71..337ea7b 100644
>> --- a/gcc/tree-vect-stmts.c
>> +++ b/gcc/tree-vect-stmts.c
>> @@ -1308,27 +1308,61 @@ vect_init_vector_1 (gimple *stmt, gimple *new_stmt, 
>> gimple_stmt_iterator *gsi)
>>  tree
>>  vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator 
>> *gsi)
>>  {
>> +  tree val_type = TREE_TYPE (val);
>> +  machine_mode mode = TYPE_MODE (type);
>> +  machine_mode val_mode = TYPE_MODE(val_type);
>>tree new_var;
>>gimple *init_stmt;
>>tree vec_oprnd;
>>tree new_temp;
>>
>>if (TREE_CODE (type) == VECTOR_TYPE
>> -  && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
>> -{
>> -  if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
>> +  && TREE_CODE (val_type) != VECTOR_TYPE)
>> +{
>> +  /* Handle vector of bool represented as a vector of
>> +integers here rather than on expand because it is
>> +a default mask type for targets.  Vector mask is
>> +built in a following way:
>> +
>> +tmp = (int)val
>> +vec_tmp = {tmp, ..., tmp}
>> +vec_cst = VIEW_CONVERT_EXPR(vec_tmp);  */
>> +  if (TREE_CODE (val_type) == BOOLEAN_TYPE
>> + && VECTOR_MODE_P (mode)
>> + && SCALAR_INT_MODE_P (GET_MODE_INNER (mode))
>> + && GET_MODE_INNER (mode) != val_mode)
>> {
>> - if (CONSTANT_CLASS_P (val))
>> -   val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
>> - else
>> + unsigned size = GET_MODE_BITSIZE (GET_MODE_INNER (mode));
>> + tree stype = build_nonstandard_integer_type (size, 1);
>> + tree vectype = get_vectype_for_scalar_type (stype);
>> +
>> + new_temp = make_ssa_name (stype);
>> + init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
>> + vect_init_vector_1 (stmt, init_stmt, gsi);
>> +
>> + val = make_ssa_name (vectype);
>> + new_temp = build_vector_from_val (vectype, new_temp);
>> + init_stmt = gimple_build_assign (val, new_temp);
>> + vect_init_vector_1 (stmt, init_stmt, gsi);
>> +
>> + val = build1 (VIEW_CONVERT_EXPR, type, val);
>l
> So I don't quite understand - why don't we want to build
>
>tmp = (bool-element-type)val;
>vec_cst = {tmp, tmp, tmp ... };
>
> ?

This code was writ

Re: [gomp4.1] Add new versions of GOMP_target{,_data,_update} and GOMP_target_enter_exit_data

2015-10-13 Thread Ilya Verbin
On Mon, Jun 15, 2015 at 22:48:50 +0300, Ilya Verbin wrote:
> @@ -950,50 +997,41 @@ GOMP_target (int device, void (*fn) (void *), const 
> void *unused,
> ...
> +  devicep->run_func (devicep->target_id, fn_addr, (void *) 
> tgt_vars->tgt_start);

If mapnum is 0, tgt_vars->tgt_start is uninitialized.  This is not a big bug,
because in this case the target function doesn't use this pointer, however
valgrind warns about sending uninitialized data to target.
OK for gomp-4_1-branch?


libgomp/
* target.c (gomp_map_vars): Zero tgt->tgt_start when mapnum is 0.


diff --git a/libgomp/target.c b/libgomp/target.c
index 95360d1..c4e3323 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -323,6 +323,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t 
mapnum,
   struct splay_tree_key_s cur_node;
   struct target_mem_desc *tgt
 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
+  tgt->tgt_start = 0;
   tgt->list_count = mapnum;
   tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1;
   tgt->device_descr = devicep;


  -- Ilya


[HSA] Fix emission of hsa_num_threads

2015-10-13 Thread Martin Liška
Hello.

Following pair of patches changes behavior of omp_{get,set}_num_threads and
provides more clever way how these values are passed to a another kernel.

Martin
>From 1d2732a0e33259e73a2d8059fb5f68e359144ef6 Mon Sep 17 00:00:00 2001
From: marxin 
Date: Thu, 8 Oct 2015 11:21:16 +0200
Subject: [PATCH 1/2] HSA: encapsulate type conversion constructs

gcc/ChangeLog:

2015-10-08  Martin Liska  

	* hsa-gen.c (hsa_op_with_type::get_in_type): New function.
	(gen_hsa_insns_for_switch_stmt): Use it.
	(gen_set_num_threads): Dtto.
	(gen_hsa_insns_for_known_library_call): Dtto.
	* hsa.h (hsa_op_with_type::get_in_type): Declarate the function.
---
 gcc/hsa-gen.c | 64 +--
 gcc/hsa.h |  4 
 2 files changed, 36 insertions(+), 32 deletions(-)

diff --git a/gcc/hsa-gen.c b/gcc/hsa-gen.c
index 8f707b5..ab4917b 100644
--- a/gcc/hsa-gen.c
+++ b/gcc/hsa-gen.c
@@ -795,6 +795,34 @@ hsa_op_with_type::hsa_op_with_type (BrigKind16_t k, BrigType16_t t)
   type = t;
 }
 
+hsa_op_with_type *
+hsa_op_with_type::get_in_type (BrigType16_t dtype, hsa_bb *hbb)
+{
+  if (type == dtype)
+return this;
+
+  hsa_op_reg *dest;
+
+  if (hsa_needs_cvt (dtype, type))
+{
+  dest = new hsa_op_reg (dtype);
+  hbb->append_insn (new hsa_insn_basic (2, BRIG_OPCODE_CVT,
+	dest->type, dest, this));
+}
+  else
+{
+  dest = new hsa_op_reg (type);
+  hbb->append_insn (new hsa_insn_basic (2, BRIG_OPCODE_MOV,
+	dest->type, dest, this));
+
+  /* We cannot simply for instance: 'mov_u32 $_3, 48 (s32)' because
+	 type of the operand must be same as type of the instruction.  */
+  dest->type = dtype;
+}
+
+  return dest;
+}
+
 /* Constructor of class representing HSA immediate values.  TREE_VAL is the
tree representation of the immediate value.  If min32int is true,
always expand integer types to one that has at least 32 bits.  */
@@ -3016,16 +3044,8 @@ gen_hsa_insns_for_switch_stmt (gswitch *s, hsa_bb *hbb,
 	sub_index, index,
 	new hsa_op_immed (lowest)));
 
-  if (hsa_needs_cvt (BRIG_TYPE_U64, sub_index->type))
-{
-  hsa_op_reg *sub_index_cvt = new hsa_op_reg (BRIG_TYPE_U64);
-  hbb->append_insn (new hsa_insn_basic (2, BRIG_OPCODE_CVT,
-	sub_index_cvt->type,
-	sub_index_cvt, sub_index));
-
-  sub_index = sub_index_cvt;
-}
-
+  hsa_op_base *tmp = sub_index->get_in_type (BRIG_TYPE_U64, hbb);
+  sub_index = as_a  (tmp);
   unsigned labels = gimple_switch_num_labels (s);
   unsigned HOST_WIDE_INT size = tree_to_uhwi (get_switch_size (s));
 
@@ -3251,17 +3271,7 @@ gen_set_num_threads (tree value, hsa_bb *hbb, vec  *ssa_map)
   hsa_op_with_type *src = hsa_reg_or_immed_for_gimple_op (value, hbb,
 			  ssa_map);
 
-  BrigType16_t dtype = hsa_num_threads->type;
-  if (hsa_needs_cvt (dtype, src->type))
-{
-  hsa_op_reg *tmp = new hsa_op_reg (dtype);
-  hbb->append_insn (new hsa_insn_basic (2, BRIG_OPCODE_CVT, tmp->type,
-	tmp, src));
-  src = tmp;
-}
-  else
-src->type = dtype;
-
+  src = src->get_in_type (hsa_num_threads->type, hbb);
   hsa_op_address *addr = new hsa_op_address (hsa_num_threads);
 
   hsa_op_immed *limit = new hsa_op_immed (64, BRIG_TYPE_U32);
@@ -3394,17 +3404,7 @@ gen_hsa_insns_for_known_library_call (gimple *stmt, hsa_bb *hbb,
 	  hsa_op_with_type *src = hsa_reg_or_immed_for_gimple_op (rhs1, hbb,
   ssa_map);
 
-	  BrigType16_t dtype = BRIG_TYPE_U64;
-	  if (hsa_needs_cvt (dtype, src->type))
-	{
-	  hsa_op_reg *tmp = new hsa_op_reg (dtype);
-	  hbb->append_insn (new hsa_insn_basic (2, BRIG_OPCODE_CVT,
-		tmp->type, tmp, src));
-	  src = tmp;
-	}
-	  else
-	src->type = dtype;
-
+	  src = src->get_in_type (BRIG_TYPE_U64, hbb);
 	  set_debug_value (hbb, src);
 	  return true;
 	}
diff --git a/gcc/hsa.h b/gcc/hsa.h
index 86adaa5..89d339f 100644
--- a/gcc/hsa.h
+++ b/gcc/hsa.h
@@ -120,6 +120,10 @@ public:
   /* The type.  */
   BrigType16_t type;
 
+  /* Convert an operand to a destination type DTYPE and attach insns
+ to HBB if needed.  */
+  hsa_op_with_type *get_in_type (BrigType16_t dtype, hsa_bb *hbb);
+
 protected:
   hsa_op_with_type (BrigKind16_t k, BrigType16_t t);
 private:
-- 
2.6.0

>From 7f10daa1f37ee47091a3956a13bb610464e8e279 Mon Sep 17 00:00:00 2001
From: marxin 
Date: Mon, 12 Oct 2015 15:49:50 +0200
Subject: [PATCH 2/2] HSA: handle properly number of threads in a kernel

gcc/ChangeLog:

2015-10-13  Martin Liska  

	* hsa-gen.c (hsa_insn_basic::set_output_in_type): New function.
	(query_hsa_grid): Likewise.
	(gen_set_num_threads): Save the value without any value range
	checking.
	(gen_num_threads_for_dispatch): New function.
	(gen_hsa_insns_for_known_library_call): Use the newly added
	function query_hsa_grid.
	(gen_hsa_insns_for_call): Likewise.
	(gen_hsa_insns_for_kernel_call): Use the newly added function
	gen_num_threads_for_dispatch.
	(init_omp_in_prologue): Initialize hsa_num_thr

Re: [PATCH] Fix "#pragma GCC pop_options" warning.

2015-10-13 Thread Bernd Schmidt

On 10/13/2015 03:31 PM, Dominik Vogt wrote:

On Tue, Oct 13, 2015 at 02:28:37PM +0200, Bernd Schmidt wrote:

On 10/13/2015 02:02 PM, Dominik Vogt wrote:

When "#pragma GCC pop_options" is used on a platform without
support for "#pragma GCC target", Gcc emits a warning.  As
pop_options is useful on targets without the target pragma to
restore optimizations flags, the warning should be removed.

The attached patch does that rather inelegantly by checking if the
pragma_parse hook points to the default implementation.  I could't
think of a similarly terse but less clumsy way.  Suggestions for a
better test are very welcome.


Ok, I had to go look at the code to figure out what's going on. A 
suggestion for a possibly less clumsy way - recognize which pragma we're 
looking at from the arguments. Looks like ix86_pragma_target_parse has a 
"! args" test to determine if it has a pop, maybe the default function 
could do the same. If that's insufficient, pass another argument to 
identify clearly in what situation the hook is being parsed.



Bernd


Re: [Boolean Vector, patch 3/5] Use boolean vector in C/C++ FE

2015-10-13 Thread Ilya Enkovich
On 09 Oct 14:51, Jeff Law wrote:
> On 10/02/2015 08:04 AM, Ilya Enkovich wrote:
> >Hi,
> >
> >This patch makes C/C++ FE to use boolean vector as a resulting type for 
> >vector comparison.  As a result vector comparison in source code now parsed 
> >into VEC_COND_EXPR, it required a testcase fix-up.
> >
> >Thanks,
> >Ilya
> >--
> >gcc/c
> >
> >2015-10-02  Ilya Enkovich  
> >
> > * c-typeck.c (build_conditional_expr): Use boolean vector
> > type for vector comparison.
> > (build_vec_cmp): New.
> > (build_binary_op): Use build_vec_cmp for comparison.
> >
> >gcc/cp
> >
> >2015-10-02  Ilya Enkovich  
> >
> > * call.c (build_conditional_expr_1): Use boolean vector
> > type for vector comparison.
> > * typeck.c (build_vec_cmp): New.
> > (cp_build_binary_op): Use build_vec_cmp for comparison.
> >
> >gcc/testsuite/
> >
> >2015-10-02  Ilya Enkovich  
> >
> > * g++.dg/ext/vector22.C: Allow VEC_COND_EXPR.
> >
> >
> >diff --git a/gcc/c/c-typeck.c b/gcc/c/c-typeck.c
> >index 3b26231..3f64d76 100644
> >--- a/gcc/c/c-typeck.c
> >+++ b/gcc/c/c-typeck.c
> >@@ -10220,6 +10232,19 @@ push_cleanup (tree decl, tree cleanup, bool eh_only)
> >STATEMENT_LIST_STMT_EXPR (list) = stmt_expr;
> >  }
> >  
> >+/* Build a vector comparison using VEC_COND_EXPR.  */
> Please make sure your function comments include descriptions of all the
> arguments and return values.

Fixed.

> 
> 
> >+
> >+static tree
> >+build_vec_cmp (tree_code code, tree type,
> >+   tree arg0, tree arg1)
> >+{
> >+  tree zero_vec = build_zero_cst (type);
> >+  tree minus_one_vec = build_minus_one_cst (type);
> >+  tree cmp_type = build_same_sized_truth_vector_type (type);
> >+  tree cmp = build2 (code, cmp_type, arg0, arg1);
> >+  return build3 (VEC_COND_EXPR, type, cmp, minus_one_vec, zero_vec);
> >+}
> Isn't this implementation the same for C & C++?  Does it make sense to put
> it in c-family/c-common.c?

C++ version calls fold_if_not_in_template for generated comparison.  It is 
required there to successfully recognize vector MIN, MAX and ABS templates for 
vector ?: conditional operator.  Vector form of ?: conditional operator is 
supported for C++ only.

> 
> 
> >+
> >  /* Build a binary-operation expression without default conversions.
> > CODE is the kind of expression to build.
> > LOCATION is the operator's location.
> >@@ -10786,7 +10811,8 @@ build_binary_op (location_t location, enum tree_code 
> >code,
> >result_type = build_opaque_vector_type (intt,
> >   TYPE_VECTOR_SUBPARTS (type0));
> >converted = 1;
> >-  break;
> >+  ret = build_vec_cmp (resultcode, result_type, op0, op1);
> >+  goto return_build_binary_op;
> I suspect there's some kind of whitespace/tab problem.  Those two lines
> should be indented the same, right?

Fixed.

> 
> 
> >  }
> >if (FLOAT_TYPE_P (type0) || FLOAT_TYPE_P (type1))
> > warning_at (location,
> >@@ -10938,7 +10964,8 @@ build_binary_op (location_t location, enum tree_code 
> >code,
> >result_type = build_opaque_vector_type (intt,
> >   TYPE_VECTOR_SUBPARTS (type0));
> >converted = 1;
> >-  break;
> >+  ret = build_vec_cmp (resultcode, result_type, op0, op1);
> >+  goto return_build_binary_op;
> Similarly here.
> 
> With the items above fixed, this is OK.
> 
> However, more generally, do we need to do anything for the other languages?

Looking into that I got an impression vector modes are used by C/C++ vector 
extensions only.  And I think regression testing would reveal some failures 
otherwise.

> 
> Jeff

Here is an updated version.

Thanks,
Ilya
--
gcc/c

2015-10-02  Ilya Enkovich  

* c-typeck.c (build_conditional_expr): Use boolean vector
type for vector comparison.
(build_vec_cmp): New.
(build_binary_op): Use build_vec_cmp for comparison.

gcc/cp

2015-10-02  Ilya Enkovich  

* call.c (build_conditional_expr_1): Use boolean vector
type for vector comparison.
* typeck.c (build_vec_cmp): New.
(cp_build_binary_op): Use build_vec_cmp for comparison.

gcc/testsuite/

2015-10-02  Ilya Enkovich  

* g++.dg/ext/vector22.C: Allow VEC_COND_EXPR.


diff --git a/gcc/c/c-typeck.c b/gcc/c/c-typeck.c
index df3245a..8fe6a74 100644
--- a/gcc/c/c-typeck.c
+++ b/gcc/c/c-typeck.c
@@ -4771,6 +4771,18 @@ build_conditional_expr (location_t colon_loc, tree 
ifexp, bool ifexp_bcp,
   && TREE_CODE (orig_op2) == INTEGER_CST
   && !TREE_OVERFLOW (orig_op2)));
 }
+
+  /* Need to convert condition operand into a vector mask.  */
+  if (VECTOR_TYPE_P (TREE_TYPE (ifexp)))
+{
+  tree vectype = TREE_TYPE (ifexp);
+  tree elem_type = TREE_TYPE (vectype);
+  tree zero = build_int_cst (elem_type, 0);
+  tree zero_vec = build_vector_from_val (vectype, zero);
+  tree cmp_

[hsa] Fix bitfield alignment ICEs

2015-10-13 Thread Martin Jambor
Hi,

yesterday I did not notice that I introduced an ICE on testcases with
bit-field memory accesses.  The following fixes the issue.  A better
solution would be to expand the bit-field parts of memory expressions
separately, which would often allow us to use better aligned accesses,
but that is not a priority at the moment.

Committed to the branch.
Thanks,

Martin


2015-10-13  Martin Jambor  

* hsa-gen.c (hsa_bitref_alignment): New function
(gen_hsa_insns_for_load): Use it.
(gen_hsa_insns_for_store): Likewise.

diff --git a/gcc/hsa-gen.c b/gcc/hsa-gen.c
index 8f707b5..7f713f6 100644
--- a/gcc/hsa-gen.c
+++ b/gcc/hsa-gen.c
@@ -2028,6 +2028,38 @@ gen_hsa_insns_for_bitfield_load (hsa_op_reg *dest, 
hsa_op_address *addr,
   gen_hsa_insns_for_bitfield (dest, value_reg, bitsize, bitpos, hbb);
 }
 
+/* Return the alignment of base memory accesses we issue to perform bit-field
+   memory access REF.  */
+
+static BrigAlignment8_t
+hsa_bitmemref_alignment (tree ref)
+{
+  unsigned HOST_WIDE_INT bit_offset = 0;
+
+  while (true)
+{
+  if (TREE_CODE (ref) == BIT_FIELD_REF)
+   {
+ if (!tree_fits_uhwi_p (TREE_OPERAND (ref, 2)))
+   return BRIG_ALIGNMENT_1;
+ bit_offset += tree_to_uhwi (TREE_OPERAND (ref, 2));
+   }
+  else if (TREE_CODE (ref) == COMPONENT_REF
+  && DECL_BIT_FIELD (TREE_OPERAND (ref, 1)))
+   bit_offset += int_bit_position (TREE_OPERAND (ref, 1));
+  else
+   break;
+  ref = TREE_OPERAND (ref, 0);
+}
+
+  unsigned HOST_WIDE_INT bits = bit_offset % BITS_PER_UNIT;
+  unsigned HOST_WIDE_INT byte_bits = bit_offset - bits;
+  BrigAlignment8_t base = hsa_alignment_encoding (get_object_alignment (ref));
+  if (byte_bits == 0)
+return base;
+  return MIN (base, hsa_alignment_encoding (byte_bits & -byte_bits));
+}
+
 /* Generate HSAIL instructions loading something into register DEST.  RHS is
tree representation of the loaded data, which are loaded as type TYPE.  Add
instructions to HBB, use SSA_MAP for HSA SSA lookup.  */
@@ -2145,11 +2177,9 @@ gen_hsa_insns_for_load (hsa_op_reg *dest, tree rhs, tree 
type, hsa_bb *hbb,
  return;
}
 
-  BrigAlignment8_t req_align;
-  req_align = hsa_alignment_encoding (get_object_alignment (rhs));
   if (bitsize || bitpos)
gen_hsa_insns_for_bitfield_load (dest, addr, bitsize, bitpos, hbb,
-req_align);
+hsa_bitmemref_alignment (rhs));
   else
{
  BrigType16_t mtype;
@@ -2158,7 +2188,7 @@ gen_hsa_insns_for_load (hsa_op_reg *dest, tree rhs, tree 
type, hsa_bb *hbb,
false));
  hsa_insn_mem *mem = new hsa_insn_mem (BRIG_OPCODE_LD, mtype, dest,
addr);
- mem->set_align (req_align);
+ mem->set_align (hsa_alignment_encoding (get_object_alignment (rhs)));
  hbb->append_insn (mem);
}
 }
@@ -2194,6 +2224,7 @@ gen_hsa_insns_for_store (tree lhs, hsa_op_base *src, 
hsa_bb *hbb,
 vec  *ssa_map)
 {
   HOST_WIDE_INT bitsize = 0, bitpos = 0;
+  BrigAlignment8_t req_align;
   BrigType16_t mtype;
   mtype = mem_type_for_type (hsa_type_for_scalar_tree_type (TREE_TYPE (lhs),
false));
@@ -2227,10 +2258,11 @@ gen_hsa_insns_for_store (tree lhs, hsa_op_base *src, 
hsa_bb *hbb,
 
   hsa_op_reg *value_reg = new hsa_op_reg (mem_type);
 
+  req_align = hsa_bitmemref_alignment (lhs);
   /* Load value from memory.  */
   hsa_insn_mem *mem = new hsa_insn_mem (BRIG_OPCODE_LD, mem_type,
value_reg, addr);
-  mem->set_align (hsa_alignment_encoding (get_object_alignment (lhs)));
+  mem->set_align (req_align);
   hbb->append_insn (mem);
 
   /* AND the loaded value with prepared mask.  */
@@ -2271,9 +2303,11 @@ gen_hsa_insns_for_store (tree lhs, hsa_op_base *src, 
hsa_bb *hbb,
   src = prepared_reg;
   mtype = mem_type;
 }
+  else
+req_align = hsa_alignment_encoding (get_object_alignment (lhs));
 
   hsa_insn_mem *mem = new hsa_insn_mem (BRIG_OPCODE_ST, mtype, src, addr);
-  mem->set_align (hsa_alignment_encoding (get_object_alignment (lhs)));
+  mem->set_align (req_align);
 
   /* XXX The HSAIL disasm has another constraint: if the source
  is an immediate then it must match the destination type.  If


Re: [vec-cmp, patch 4/6] Support vector mask invariants

2015-10-13 Thread Richard Biener
On Thu, Oct 8, 2015 at 5:11 PM, Ilya Enkovich  wrote:
> Hi,
>
> This patch adds a special handling of boolean vector invariants.  We need 
> additional code to determine type of generated invariant.  For VEC_COND_EXPR 
> case we even provide this type directly because statement vectype doesn't 
> allow us to compute it.  Separate code is used to generate and expand such 
> vectors.
>
> Thanks,
> Ilya
> --
> gcc/
>
> 2015-10-08  Ilya Enkovich  
>
> * expr.c (const_vector_mask_from_tree): New.
> (const_vector_from_tree): Use const_vector_mask_from_tree
> for boolean vectors.
> * tree-vect-stmts.c (vect_init_vector): Support boolean vector
> invariants.
> (vect_get_vec_def_for_operand): Add VECTYPE arg.
> (vectorizable_condition): Directly provide vectype for invariants
> used in comparison.
> * tree-vectorizer.h (vect_get_vec_def_for_operand): Add VECTYPE
> arg.
>
>
> diff --git a/gcc/expr.c b/gcc/expr.c
> index 88da8cb..a624a34 100644
> --- a/gcc/expr.c
> +++ b/gcc/expr.c
> @@ -11320,6 +11320,40 @@ try_tablejump (tree index_type, tree index_expr, 
> tree minval, tree range,
>return 1;
>  }
>
> +/* Return a CONST_VECTOR rtx representing vector mask for
> +   a VECTOR_CST of booleans.  */
> +static rtx
> +const_vector_mask_from_tree (tree exp)
> +{
> +  rtvec v;
> +  unsigned i;
> +  int units;
> +  tree elt;
> +  machine_mode inner, mode;
> +
> +  mode = TYPE_MODE (TREE_TYPE (exp));
> +  units = GET_MODE_NUNITS (mode);
> +  inner = GET_MODE_INNER (mode);
> +
> +  v = rtvec_alloc (units);
> +
> +  for (i = 0; i < VECTOR_CST_NELTS (exp); ++i)
> +{
> +  elt = VECTOR_CST_ELT (exp, i);
> +
> +  gcc_assert (TREE_CODE (elt) == INTEGER_CST);
> +  if (integer_zerop (elt))
> +   RTVEC_ELT (v, i) = CONST0_RTX (inner);
> +  else if (integer_onep (elt)
> +  || integer_minus_onep (elt))
> +   RTVEC_ELT (v, i) = CONSTM1_RTX (inner);
> +  else
> +   gcc_unreachable ();
> +}
> +
> +  return gen_rtx_CONST_VECTOR (mode, v);
> +}
> +
>  /* Return a CONST_VECTOR rtx for a VECTOR_CST tree.  */
>  static rtx
>  const_vector_from_tree (tree exp)
> @@ -11335,6 +11369,9 @@ const_vector_from_tree (tree exp)
>if (initializer_zerop (exp))
>  return CONST0_RTX (mode);
>
> +  if (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (exp)))
> +  return const_vector_mask_from_tree (exp);
> +
>units = GET_MODE_NUNITS (mode);
>inner = GET_MODE_INNER (mode);
>
> diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
> index 6949c71..337ea7b 100644
> --- a/gcc/tree-vect-stmts.c
> +++ b/gcc/tree-vect-stmts.c
> @@ -1308,27 +1308,61 @@ vect_init_vector_1 (gimple *stmt, gimple *new_stmt, 
> gimple_stmt_iterator *gsi)
>  tree
>  vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator 
> *gsi)
>  {
> +  tree val_type = TREE_TYPE (val);
> +  machine_mode mode = TYPE_MODE (type);
> +  machine_mode val_mode = TYPE_MODE(val_type);
>tree new_var;
>gimple *init_stmt;
>tree vec_oprnd;
>tree new_temp;
>
>if (TREE_CODE (type) == VECTOR_TYPE
> -  && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
> -{
> -  if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
> +  && TREE_CODE (val_type) != VECTOR_TYPE)
> +{
> +  /* Handle vector of bool represented as a vector of
> +integers here rather than on expand because it is
> +a default mask type for targets.  Vector mask is
> +built in a following way:
> +
> +tmp = (int)val
> +vec_tmp = {tmp, ..., tmp}
> +vec_cst = VIEW_CONVERT_EXPR(vec_tmp);  */
> +  if (TREE_CODE (val_type) == BOOLEAN_TYPE
> + && VECTOR_MODE_P (mode)
> + && SCALAR_INT_MODE_P (GET_MODE_INNER (mode))
> + && GET_MODE_INNER (mode) != val_mode)
> {
> - if (CONSTANT_CLASS_P (val))
> -   val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
> - else
> + unsigned size = GET_MODE_BITSIZE (GET_MODE_INNER (mode));
> + tree stype = build_nonstandard_integer_type (size, 1);
> + tree vectype = get_vectype_for_scalar_type (stype);
> +
> + new_temp = make_ssa_name (stype);
> + init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
> + vect_init_vector_1 (stmt, init_stmt, gsi);
> +
> + val = make_ssa_name (vectype);
> + new_temp = build_vector_from_val (vectype, new_temp);
> + init_stmt = gimple_build_assign (val, new_temp);
> + vect_init_vector_1 (stmt, init_stmt, gsi);
> +
> + val = build1 (VIEW_CONVERT_EXPR, type, val);

So I don't quite understand - why don't we want to build

   tmp = (bool-element-type)val;
   vec_cst = {tmp, tmp, tmp ... };

?

> +   }
> +  else
> +   {
> + if (!types_compatible_p (TREE_TYPE (type), val_type))
> {
> - new_temp = make_ssa_name (TREE_TYPE (type));
> - init_stmt = gimp

Re: [vec-cmp, patch 3/6] Vectorize comparison

2015-10-13 Thread Richard Biener
On Thu, Oct 8, 2015 at 5:03 PM, Ilya Enkovich  wrote:
> Hi,
>
> This patch supports comparison statements vectrization basing on introduced 
> optabs.
>
> Thanks,
> Ilya
> --
> gcc/
>
> 2015-10-08  Ilya Enkovich  
>
> * tree-vect-data-refs.c (vect_get_new_vect_var): Support 
> vect_mask_var.
> (vect_create_destination_var): Likewise.
> * tree-vect-stmts.c (vectorizable_comparison): New.
> (vect_analyze_stmt): Add vectorizable_comparison.
> (vect_transform_stmt): Likewise.
> * tree-vectorizer.h (enum vect_var_kind): Add vect_mask_var.
> (enum stmt_vec_info_type): Add comparison_vec_info_type.
> (vectorizable_comparison): New.
>
>
> diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
> index 3befa38..9edc663 100644
> --- a/gcc/tree-vect-data-refs.c
> +++ b/gcc/tree-vect-data-refs.c
> @@ -3849,6 +3849,9 @@ vect_get_new_vect_var (tree type, enum vect_var_kind 
> var_kind, const char *name)
>case vect_scalar_var:
>  prefix = "stmp";
>  break;
> +  case vect_mask_var:
> +prefix = "mask";
> +break;
>case vect_pointer_var:
>  prefix = "vectp";
>  break;
> @@ -4403,7 +4406,11 @@ vect_create_destination_var (tree scalar_dest, tree 
> vectype)
>tree type;
>enum vect_var_kind kind;
>
> -  kind = vectype ? vect_simple_var : vect_scalar_var;
> +  kind = vectype
> +? VECTOR_BOOLEAN_TYPE_P (vectype)
> +? vect_mask_var
> +: vect_simple_var
> +: vect_scalar_var;
>type = vectype ? vectype : TREE_TYPE (scalar_dest);
>
>gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
> diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
> index 8eda8e9..6949c71 100644
> --- a/gcc/tree-vect-stmts.c
> +++ b/gcc/tree-vect-stmts.c
> @@ -7525,6 +7525,211 @@ vectorizable_condition (gimple *stmt, 
> gimple_stmt_iterator *gsi,
>return true;
>  }
>
> +/* vectorizable_comparison.
> +
> +   Check if STMT is comparison expression that can be vectorized.
> +   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
> +   comparison, put it in VEC_STMT, and insert it at GSI.
> +
> +   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
> +
> +bool
> +vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
> +gimple **vec_stmt, tree reduc_def,
> +slp_tree slp_node)
> +{
> +  tree lhs, rhs1, rhs2;
> +  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
> +  tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
> +  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
> +  tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
> +  tree vec_compare;
> +  tree new_temp;
> +  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
> +  tree def;
> +  enum vect_def_type dt, dts[4];
> +  unsigned nunits;
> +  int ncopies;
> +  enum tree_code code;
> +  stmt_vec_info prev_stmt_info = NULL;
> +  int i, j;
> +  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
> +  vec vec_oprnds0 = vNULL;
> +  vec vec_oprnds1 = vNULL;
> +  tree mask_type;
> +  tree mask;
> +
> +  if (!VECTOR_BOOLEAN_TYPE_P (vectype))
> +return false;
> +
> +  mask_type = vectype;
> +  nunits = TYPE_VECTOR_SUBPARTS (vectype);
> +
> +  if (slp_node || PURE_SLP_STMT (stmt_info))
> +ncopies = 1;
> +  else
> +ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
> +
> +  gcc_assert (ncopies >= 1);
> +  if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
> +return false;
> +
> +  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
> +  && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
> +  && reduc_def))
> +return false;
> +
> +  if (STMT_VINFO_LIVE_P (stmt_info))
> +{
> +  if (dump_enabled_p ())
> +   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> +"value used after loop.\n");
> +  return false;
> +}
> +
> +  if (!is_gimple_assign (stmt))
> +return false;
> +
> +  code = gimple_assign_rhs_code (stmt);
> +
> +  if (TREE_CODE_CLASS (code) != tcc_comparison)
> +return false;
> +
> +  rhs1 = gimple_assign_rhs1 (stmt);
> +  rhs2 = gimple_assign_rhs2 (stmt);
> +
> +  if (TREE_CODE (rhs1) == SSA_NAME)
> +{
> +  gimple *rhs1_def_stmt = SSA_NAME_DEF_STMT (rhs1);
> +  if (!vect_is_simple_use_1 (rhs1, stmt, loop_vinfo, bb_vinfo,
> +&rhs1_def_stmt, &def, &dt, &vectype1))
> +   return false;
> +}
> +  else if (TREE_CODE (rhs1) != INTEGER_CST && TREE_CODE (rhs1) != REAL_CST
> +  && TREE_CODE (rhs1) != FIXED_CST)
> +return false;

I think vect_is_simple_use_1 handles constants just fine an def_stmt
is an output,
you don't need to initialize it.

> +
> +  if (TREE_CODE (rhs2) == SSA_NAME)
> +{
> +  gimple *rhs2_def_stmt = SSA_NAME_DEF_STMT (rhs2);
> +  if (!vect_is_simple_use_1 (rhs2, stmt, loop_vinfo, bb_vinfo,
> +&rhs2_def_stmt, &def, &dt, &vectype2))
> +   return false;
> +

Re: [vec-cmp, patch 2/6] Vectorization factor computation

2015-10-13 Thread Richard Biener
On Thu, Oct 8, 2015 at 4:59 PM, Ilya Enkovich  wrote:
> Hi,
>
> This patch handles statements with boolean result in vectorization factor 
> computation.  For comparison its operands type is used instead of restult 
> type to compute VF.  Other boolean statements are ignored for VF.
>
> Vectype for comparison is computed using type of compared values.  Computed 
> type is propagated into other boolean operations.

This feels rather ad-hoc, mixing up the existing way of computing
vector type and VF.  I'd rather have turned the whole
vector type computation around to the scheme working on the operands
rather than on the lhs and then searching
for smaller/larger types on the rhs'.

I know this is a tricky function (heh, but you make it even worse...).
And it needs a helper with knowledge about operations
so one can compute the result vector type for an operation on its
operands.  The seeds should be PHIs (handled like now)
and loads, and yes, externals need special handling.

Ideally we'd do things in two stages, first compute vector types in a
less constrained manner (not forcing a single vector size)
and then in a 2nd run promote to a common size also computing the VF to do that.

Btw, I think you "mishandle" bool b = boolvar != 0;

Richard.

> Thanks,
> Ilya
> --
> gcc/
>
> 2015-10-08  Ilya Enkovich  
>
> * tree-vect-loop.c (vect_determine_vectorization_factor):  Ignore mask
> operations for VF.  Add mask type computation.
> * tree-vect-stmts.c (get_mask_type_for_scalar_type): New.
> * tree-vectorizer.h (get_mask_type_for_scalar_type): New.
>
>
> diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
> index 63e29aa..c7e8067 100644
> --- a/gcc/tree-vect-loop.c
> +++ b/gcc/tree-vect-loop.c
> @@ -183,19 +183,21 @@ vect_determine_vectorization_factor (loop_vec_info 
> loop_vinfo)
>  {
>struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
>basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
> -  int nbbs = loop->num_nodes;
> +  unsigned nbbs = loop->num_nodes;
>unsigned int vectorization_factor = 0;
>tree scalar_type;
>gphi *phi;
>tree vectype;
>unsigned int nunits;
>stmt_vec_info stmt_info;
> -  int i;
> +  unsigned i;
>HOST_WIDE_INT dummy;
>gimple *stmt, *pattern_stmt = NULL;
>gimple_seq pattern_def_seq = NULL;
>gimple_stmt_iterator pattern_def_si = gsi_none ();
>bool analyze_pattern_stmt = false;
> +  bool bool_result;
> +  auto_vec mask_producers;
>
>if (dump_enabled_p ())
>  dump_printf_loc (MSG_NOTE, vect_location,
> @@ -414,6 +416,8 @@ vect_determine_vectorization_factor (loop_vec_info 
> loop_vinfo)
>   return false;
> }
>
> + bool_result = false;
> +
>   if (STMT_VINFO_VECTYPE (stmt_info))
> {
>   /* The only case when a vectype had been already set is for 
> stmts
> @@ -434,6 +438,32 @@ vect_determine_vectorization_factor (loop_vec_info 
> loop_vinfo)
> scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
>   else
> scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
> +
> + /* Bool ops don't participate in vectorization factor
> +computation.  For comparison use compared types to
> +compute a factor.  */
> + if (TREE_CODE (scalar_type) == BOOLEAN_TYPE)
> +   {
> + mask_producers.safe_push (stmt_info);
> + bool_result = true;
> +
> + if (gimple_code (stmt) == GIMPLE_ASSIGN
> + && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
> +== tcc_comparison
> + && TREE_CODE (TREE_TYPE (gimple_assign_rhs1 (stmt)))
> +!= BOOLEAN_TYPE)
> +   scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
> + else
> +   {
> + if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
> +   {
> + pattern_def_seq = NULL;
> + gsi_next (&si);
> +   }
> + continue;
> +   }
> +   }
> +
>   if (dump_enabled_p ())
> {
>   dump_printf_loc (MSG_NOTE, vect_location,
> @@ -456,7 +486,8 @@ vect_determine_vectorization_factor (loop_vec_info 
> loop_vinfo)
>   return false;
> }
>
> - STMT_VINFO_VECTYPE (stmt_info) = vectype;
> + if (!bool_result)
> +   STMT_VINFO_VECTYPE (stmt_info) = vectype;
>
>   if (dump_enabled_p ())
> {
> @@ -469,8 +500,9 @@ vect_determine_vectorization_factor (loop_vec_info 
> loop_vinfo)
>   /* The vectorization factor is according to the smallest
>  scalar type (or the largest vector size, but we only
>  support one vector size per loop).  */
> - scal

Re: [Boolean Vector, patch 1/5] Introduce boolean vector to be used as a vector comparison type

2015-10-13 Thread Ilya Enkovich
2015-10-13 16:17 GMT+03:00 Richard Biener :
> On Fri, Oct 9, 2015 at 10:43 PM, Jeff Law  wrote:
>> On 10/02/2015 07:59 AM, Ilya Enkovich wrote:
>>>
>>> 2015-10-02  Ilya Enkovich  
>>>
>>> * doc/tm.texi: Regenerated.
>>> * doc/tm.texi.in (TARGET_VECTORIZE_GET_MASK_MODE): New.
>>> * stor-layout.c (layout_type): Use mode to get vector mask size.
>>> * target.def (get_mask_mode): New.
>>> * targhooks.c (default_get_mask_mode): New.
>>> * targhooks.h (default_get_mask_mode): New.
>>> * gcc/tree-vect-stmts.c (get_same_sized_vectype): Add special case
>>> for boolean vector.
>>> * tree.c (MAX_BOOL_CACHED_PREC): New.
>>> (nonstandard_boolean_type_cache): New.
>>> (build_nonstandard_boolean_type): New.
>>> (make_vector_type): Vector mask has no canonical type.
>>> (build_truth_vector_type): New.
>>> (build_same_sized_truth_vector_type): New.
>>> (truth_type_for): Support vector masks.
>>> * tree.h (VECTOR_BOOLEAN_TYPE_P): New.
>>> (build_truth_vector_type): New.
>>> (build_same_sized_truth_vector_type): New.
>>> (build_nonstandard_boolean_type): New.
>>>
>>>
>>> diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
>>> index eb495a8..098213e 100644
>>> --- a/gcc/doc/tm.texi
>>> +++ b/gcc/doc/tm.texi
>>> @@ -5688,6 +5688,11 @@ mode returned by
>>> @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE}.
>>>   The default is zero which means to not iterate over other vector sizes.
>>>   @end deftypefn
>>>
>>> +@deftypefn {Target Hook} machine_mode TARGET_VECTORIZE_GET_MASK_MODE
>>> (unsigned @var{nunits}, unsigned @var{length})
>>> +This hook returns mode to be used for a mask to be used for a vector
>>> +of specified @var{length} with @var{nunits} elements.
>>> +@end deftypefn
>>
>> Does it make sense to indicate the default used if the target does not
>> provide a definition for this hook?
>>
>>
>>
>>
>>> diff --git a/gcc/stor-layout.c b/gcc/stor-layout.c
>>> index 938e54b..58ecd7b 100644
>>> --- a/gcc/stor-layout.c
>>> +++ b/gcc/stor-layout.c
>>> @@ -2184,10 +2184,16 @@ layout_type (tree type)
>>>
>>> TYPE_SATURATING (type) = TYPE_SATURATING (TREE_TYPE (type));
>>>   TYPE_UNSIGNED (type) = TYPE_UNSIGNED (TREE_TYPE (type));
>>> -   TYPE_SIZE_UNIT (type) = int_const_binop (MULT_EXPR,
>>> -TYPE_SIZE_UNIT
>>> (innertype),
>>> -size_int (nunits));
>>> -   TYPE_SIZE (type) = int_const_binop (MULT_EXPR, TYPE_SIZE
>>> (innertype),
>>> +   /* Several boolean vector elements may fit in a single unit.  */
>>> +   if (VECTOR_BOOLEAN_TYPE_P (type))
>>> + TYPE_SIZE_UNIT (type)
>>> +   = size_int (GET_MODE_SIZE (type->type_common.mode));
>>
>> Shouldn't this be TYPE_MODE rather than accessing the internals of the tree
>> node directly?
>
> Probably not because of TYPE_MODE interfering for vector types.

Seems I need to roll it back then. I don't think I want scalar mode to
be used for cases when proper integer vector mode is unsupported by
target but returned by default get_mask_mode hook. Such cases just
should be lowered into scalars.

>
> But...
>
> +/* Builds a boolean type of precision PRECISION.
> +   Used for boolean vectors to choose proper vector element size.  */
> +tree
> +build_nonstandard_boolean_type (unsigned HOST_WIDE_INT precision)
> +{
> +  tree type;
> +
> +  if (precision <= MAX_BOOL_CACHED_PREC)
> +{
> +  type = nonstandard_boolean_type_cache[precision];
> +  if (type)
> +   return type;
> +}
> +
> +  type = make_node (BOOLEAN_TYPE);
> +  TYPE_PRECISION (type) = precision;
> +  fixup_unsigned_type (type);
>
> do we really need differing _precision_ boolean types?  I think we only
> need differing size (aka mode) boolean types, no?  Thus, keep precision == 1
> but "only" adjust the mode (possibly by simply setting precision to 1 after
> fixup_unsigned_type ...)?

The reason for that was -1 value of a proper size which may be used as
vector element value. I'm not sure if something breaks in the compiler
if I set 1 precision for all created boolean types, but I assumed it's
reasonable to match precision and actually stored values.

Ilya

>
> Richard.
>
>>
>>> diff --git a/gcc/tree.c b/gcc/tree.c
>>> index 84fd34d..0cb8361 100644
>>> --- a/gcc/tree.c
>>> +++ b/gcc/tree.c
>>> @@ -11067,9 +11130,10 @@ truth_type_for (tree type)
>>>   {
>>> if (TREE_CODE (type) == VECTOR_TYPE)
>>>   {
>>> -  tree elem = lang_hooks.types.type_for_size
>>> -(GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (type))), 0);
>>> -  return build_opaque_vector_type (elem, TYPE_VECTOR_SUBPARTS
>>> (type));
>>> +  if (VECTOR_BOOLEAN_TYPE_P (type))
>>> +   return type;
>>> +  return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (type),
>>> + GET_MODE_SIZE (TYPE_MODE (type)));
>>
>> Presumabl

Re: [PATCH] Fix "#pragma GCC pop_options" warning.

2015-10-13 Thread Dominik Vogt
On Tue, Oct 13, 2015 at 02:28:37PM +0200, Bernd Schmidt wrote:
> On 10/13/2015 02:02 PM, Dominik Vogt wrote:
> >When "#pragma GCC pop_options" is used on a platform without
> >support for "#pragma GCC target", Gcc emits a warning.  As
> >pop_options is useful on targets without the target pragma to
> >restore optimizations flags, the warning should be removed.
> >
> >The attached patch does that rather inelegantly by checking if the
> >pragma_parse hook points to the default implementation.  I could't
> >think of a similarly terse but less clumsy way.  Suggestions for a
> >better test are very welcome.
> 
> Why not just remove the code that emits the warning message? Are
> there situations where the warning is justified?

Removing the warning would also affect "#pragma GCC target("foo")
But then, "#pragma GCC asdfg" doesn't produce a warning either, so
what's the point warning about an undefined "target" pragma, but
not about other undefined pragmas.  For me, either way to do this
is good.

By the way, the background is that Glibc used pop_options and the
warning broke building with -Werror (they have solved that in a
different way now).

Ciao

Dominik ^_^  ^_^

-- 

Dominik Vogt
IBM Germany



Re: [Boolean Vector, patch 1/5] Introduce boolean vector to be used as a vector comparison type

2015-10-13 Thread Ilya Enkovich
On 09 Oct 14:43, Jeff Law wrote:
> On 10/02/2015 07:59 AM, Ilya Enkovich wrote:
> >+This hook returns mode to be used for a mask to be used for a vector
> >+of specified @var{length} with @var{nunits} elements.
> >+@end deftypefn
> Does it make sense to indicate the default used if the target does not
> provide a definition for this hook?
> 
> 

Sure

> 
> 
> >diff --git a/gcc/stor-layout.c b/gcc/stor-layout.c
> >index 938e54b..58ecd7b 100644
> >--- a/gcc/stor-layout.c
> >+++ b/gcc/stor-layout.c
> >@@ -2184,10 +2184,16 @@ layout_type (tree type)
> >
> > TYPE_SATURATING (type) = TYPE_SATURATING (TREE_TYPE (type));
> >  TYPE_UNSIGNED (type) = TYPE_UNSIGNED (TREE_TYPE (type));
> >-TYPE_SIZE_UNIT (type) = int_const_binop (MULT_EXPR,
> >- TYPE_SIZE_UNIT (innertype),
> >- size_int (nunits));
> >-TYPE_SIZE (type) = int_const_binop (MULT_EXPR, TYPE_SIZE (innertype),
> >+/* Several boolean vector elements may fit in a single unit.  */
> >+if (VECTOR_BOOLEAN_TYPE_P (type))
> >+  TYPE_SIZE_UNIT (type)
> >+= size_int (GET_MODE_SIZE (type->type_common.mode));
> Shouldn't this be TYPE_MODE rather than accessing the internals of the tree
> node directly?

Previous version of this patch had changes in vector_type_mode and seems I 
copy-pasted this field access from there.
Will fix it here.

> 
> 
> >diff --git a/gcc/tree.c b/gcc/tree.c
> >index 84fd34d..0cb8361 100644
> >--- a/gcc/tree.c
> >+++ b/gcc/tree.c
> >@@ -11067,9 +11130,10 @@ truth_type_for (tree type)
> >  {
> >if (TREE_CODE (type) == VECTOR_TYPE)
> >  {
> >-  tree elem = lang_hooks.types.type_for_size
> >-(GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (type))), 0);
> >-  return build_opaque_vector_type (elem, TYPE_VECTOR_SUBPARTS (type));
> >+  if (VECTOR_BOOLEAN_TYPE_P (type))
> >+return type;
> >+  return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (type),
> >+  GET_MODE_SIZE (TYPE_MODE (type)));
> Presumably you're not building an opaque type anymore because you want
> warnings if somethings tries to do a conversion?  I'm going to assume this
> was intentional.

Right.  I don't expect front-end to cast boolean vector to anything.  Its usage 
should be limited by VEC_COND_EXPR.

> 
> 
> With the doc update and the fix to use TYPE_MODE (assuming there's not a
> good reason to be looking at the underlying type directly) this is OK.
> 
> jeff

Here is an updated version.

Thanks,
Ilya
--
2015-10-13  Ilya Enkovich  

* doc/tm.texi: Regenerated.
* doc/tm.texi.in (TARGET_VECTORIZE_GET_MASK_MODE): New.
* stor-layout.c (layout_type): Use mode to get vector mask size.
* target.def (get_mask_mode): New.
* targhooks.c (default_get_mask_mode): New.
* targhooks.h (default_get_mask_mode): New.
* gcc/tree-vect-stmts.c (get_same_sized_vectype): Add special case
for boolean vector.
* tree.c (MAX_BOOL_CACHED_PREC): New.
(nonstandard_boolean_type_cache): New.
(build_nonstandard_boolean_type): New.
(make_vector_type): Vector mask has no canonical type.
(build_truth_vector_type): New.
(build_same_sized_truth_vector_type): New.
(truth_type_for): Support vector masks.
* tree.h (VECTOR_BOOLEAN_TYPE_P): New.
(build_truth_vector_type): New.
(build_same_sized_truth_vector_type): New.
(build_nonstandard_boolean_type): New.


diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index 33939ec..914cfea 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -4225,6 +4225,8 @@ address;  but often a machine-dependent strategy can 
generate better code.
 
 @hook TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
 
+@hook TARGET_VECTORIZE_GET_MASK_MODE
+
 @hook TARGET_VECTORIZE_INIT_COST
 
 @hook TARGET_VECTORIZE_ADD_STMT_COST
diff --git a/gcc/stor-layout.c b/gcc/stor-layout.c
index 938e54b..d2289d9 100644
--- a/gcc/stor-layout.c
+++ b/gcc/stor-layout.c
@@ -2184,10 +2184,16 @@ layout_type (tree type)
 
TYPE_SATURATING (type) = TYPE_SATURATING (TREE_TYPE (type));
 TYPE_UNSIGNED (type) = TYPE_UNSIGNED (TREE_TYPE (type));
-   TYPE_SIZE_UNIT (type) = int_const_binop (MULT_EXPR,
-TYPE_SIZE_UNIT (innertype),
-size_int (nunits));
-   TYPE_SIZE (type) = int_const_binop (MULT_EXPR, TYPE_SIZE (innertype),
+   /* Several boolean vector elements may fit in a single unit.  */
+   if (VECTOR_BOOLEAN_TYPE_P (type))
+ TYPE_SIZE_UNIT (type)
+   = size_int (GET_MODE_SIZE (TYPE_MODE (type)));
+   else
+ TYPE_SIZE_UNIT (type) = int_const_binop (MULT_EXPR,
+  TYPE_SIZE_UNIT (innertype),
+  size_int (nunits));
+   TYPE_SIZE (type)

Re: [Boolean Vector, patch 1/5] Introduce boolean vector to be used as a vector comparison type

2015-10-13 Thread Richard Biener
On Fri, Oct 9, 2015 at 10:43 PM, Jeff Law  wrote:
> On 10/02/2015 07:59 AM, Ilya Enkovich wrote:
>>
>> 2015-10-02  Ilya Enkovich  
>>
>> * doc/tm.texi: Regenerated.
>> * doc/tm.texi.in (TARGET_VECTORIZE_GET_MASK_MODE): New.
>> * stor-layout.c (layout_type): Use mode to get vector mask size.
>> * target.def (get_mask_mode): New.
>> * targhooks.c (default_get_mask_mode): New.
>> * targhooks.h (default_get_mask_mode): New.
>> * gcc/tree-vect-stmts.c (get_same_sized_vectype): Add special case
>> for boolean vector.
>> * tree.c (MAX_BOOL_CACHED_PREC): New.
>> (nonstandard_boolean_type_cache): New.
>> (build_nonstandard_boolean_type): New.
>> (make_vector_type): Vector mask has no canonical type.
>> (build_truth_vector_type): New.
>> (build_same_sized_truth_vector_type): New.
>> (truth_type_for): Support vector masks.
>> * tree.h (VECTOR_BOOLEAN_TYPE_P): New.
>> (build_truth_vector_type): New.
>> (build_same_sized_truth_vector_type): New.
>> (build_nonstandard_boolean_type): New.
>>
>>
>> diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
>> index eb495a8..098213e 100644
>> --- a/gcc/doc/tm.texi
>> +++ b/gcc/doc/tm.texi
>> @@ -5688,6 +5688,11 @@ mode returned by
>> @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE}.
>>   The default is zero which means to not iterate over other vector sizes.
>>   @end deftypefn
>>
>> +@deftypefn {Target Hook} machine_mode TARGET_VECTORIZE_GET_MASK_MODE
>> (unsigned @var{nunits}, unsigned @var{length})
>> +This hook returns mode to be used for a mask to be used for a vector
>> +of specified @var{length} with @var{nunits} elements.
>> +@end deftypefn
>
> Does it make sense to indicate the default used if the target does not
> provide a definition for this hook?
>
>
>
>
>> diff --git a/gcc/stor-layout.c b/gcc/stor-layout.c
>> index 938e54b..58ecd7b 100644
>> --- a/gcc/stor-layout.c
>> +++ b/gcc/stor-layout.c
>> @@ -2184,10 +2184,16 @@ layout_type (tree type)
>>
>> TYPE_SATURATING (type) = TYPE_SATURATING (TREE_TYPE (type));
>>   TYPE_UNSIGNED (type) = TYPE_UNSIGNED (TREE_TYPE (type));
>> -   TYPE_SIZE_UNIT (type) = int_const_binop (MULT_EXPR,
>> -TYPE_SIZE_UNIT
>> (innertype),
>> -size_int (nunits));
>> -   TYPE_SIZE (type) = int_const_binop (MULT_EXPR, TYPE_SIZE
>> (innertype),
>> +   /* Several boolean vector elements may fit in a single unit.  */
>> +   if (VECTOR_BOOLEAN_TYPE_P (type))
>> + TYPE_SIZE_UNIT (type)
>> +   = size_int (GET_MODE_SIZE (type->type_common.mode));
>
> Shouldn't this be TYPE_MODE rather than accessing the internals of the tree
> node directly?

Probably not because of TYPE_MODE interfering for vector types.

But...

+/* Builds a boolean type of precision PRECISION.
+   Used for boolean vectors to choose proper vector element size.  */
+tree
+build_nonstandard_boolean_type (unsigned HOST_WIDE_INT precision)
+{
+  tree type;
+
+  if (precision <= MAX_BOOL_CACHED_PREC)
+{
+  type = nonstandard_boolean_type_cache[precision];
+  if (type)
+   return type;
+}
+
+  type = make_node (BOOLEAN_TYPE);
+  TYPE_PRECISION (type) = precision;
+  fixup_unsigned_type (type);

do we really need differing _precision_ boolean types?  I think we only
need differing size (aka mode) boolean types, no?  Thus, keep precision == 1
but "only" adjust the mode (possibly by simply setting precision to 1 after
fixup_unsigned_type ...)?

Richard.

>
>> diff --git a/gcc/tree.c b/gcc/tree.c
>> index 84fd34d..0cb8361 100644
>> --- a/gcc/tree.c
>> +++ b/gcc/tree.c
>> @@ -11067,9 +11130,10 @@ truth_type_for (tree type)
>>   {
>> if (TREE_CODE (type) == VECTOR_TYPE)
>>   {
>> -  tree elem = lang_hooks.types.type_for_size
>> -(GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (type))), 0);
>> -  return build_opaque_vector_type (elem, TYPE_VECTOR_SUBPARTS
>> (type));
>> +  if (VECTOR_BOOLEAN_TYPE_P (type))
>> +   return type;
>> +  return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (type),
>> + GET_MODE_SIZE (TYPE_MODE (type)));
>
> Presumably you're not building an opaque type anymore because you want
> warnings if somethings tries to do a conversion?  I'm going to assume this
> was intentional.
>
>
> With the doc update and the fix to use TYPE_MODE (assuming there's not a
> good reason to be looking at the underlying type directly) this is OK.
>
> jeff


[gomp4.1] Testsuite tweaks

2015-10-13 Thread Jakub Jelinek
Hi!

This patch fixes issues in tests I'm not running all the time during
gomp branch development (basically I do just RUNTESTFLAGS=gomp.exp
in gcc subdir and check-target-libgomp).

2015-10-13  Jakub Jelinek  

* c-c++-common/cpp/openmp-define-3.c: Adjust for the new
value of _OPENMP macro.
* c-c++-common/cilk-plus/PS/body.c (foo): Adjust expected diagnostics.
* c-c++-common/goacc-gomp/nesting-fail-1.c (f_acc_parallel,
f_acc_kernels, f_acc_data, f_acc_loop): Add map clause to target data.

--- gcc/testsuite/c-c++-common/cpp/openmp-define-3.c.jj 2015-04-24 
12:32:01.0 +0200
+++ gcc/testsuite/c-c++-common/cpp/openmp-define-3.c2015-10-13 
14:58:40.968654734 +0200
@@ -6,6 +6,6 @@
 # error _OPENMP not defined
 #endif
 
-#if _OPENMP != 201307
+#if _OPENMP != 201511
 # error _OPENMP defined to wrong value
 #endif
--- gcc/testsuite/c-c++-common/cilk-plus/PS/body.c.jj   2015-04-24 
12:32:01.0 +0200
+++ gcc/testsuite/c-c++-common/cilk-plus/PS/body.c  2015-10-13 
15:00:00.946495358 +0200
@@ -27,7 +27,7 @@ void foo()
 #pragma simd
   for (int i=0; i < 1000; ++i)
 {
-#pragma omp for /* { dg-error "OpenMP constructs may not" } */
+#pragma omp for /* { dg-error "OpenMP constructs other than" } */
   for (j=0; j < 1000; ++j)
a[i] = b[i];
 }
--- gcc/testsuite/c-c++-common/goacc-gomp/nesting-fail-1.c.jj   2015-04-24 
12:32:01.0 +0200
+++ gcc/testsuite/c-c++-common/goacc-gomp/nesting-fail-1.c  2015-10-13 
15:02:18.549500635 +0200
@@ -230,7 +230,7 @@ f_acc_parallel (void)
   {
 #pragma omp target /* { dg-error "non-OpenACC construct inside of OpenACC 
region" } */
 ;
-#pragma omp target data /* { dg-error "non-OpenACC construct inside of OpenACC 
region" } */
+#pragma omp target data map(i) /* { dg-error "non-OpenACC construct inside of 
OpenACC region" } */
 ;
 #pragma omp target update to(i) /* { dg-error "non-OpenACC construct inside of 
OpenACC region" } */
   }
@@ -300,7 +300,7 @@ f_acc_kernels (void)
   {
 #pragma omp target /* { dg-error "non-OpenACC construct inside of OpenACC 
region" } */
 ;
-#pragma omp target data /* { dg-error "non-OpenACC construct inside of OpenACC 
region" } */
+#pragma omp target data map(i) /* { dg-error "non-OpenACC construct inside of 
OpenACC region" } */
 ;
 #pragma omp target update to(i) /* { dg-error "non-OpenACC construct inside of 
OpenACC region" } */
   }
@@ -370,7 +370,7 @@ f_acc_data (void)
   {
 #pragma omp target /* { dg-error "non-OpenACC construct inside of OpenACC 
region" } */
 ;
-#pragma omp target data /* { dg-error "non-OpenACC construct inside of OpenACC 
region" } */
+#pragma omp target data map(i) /* { dg-error "non-OpenACC construct inside of 
OpenACC region" } */
 ;
 #pragma omp target update to(i) /* { dg-error "non-OpenACC construct inside of 
OpenACC region" } */
   }
@@ -450,7 +450,7 @@ f_acc_loop (void)
 {
 #pragma omp target /* { dg-error "non-OpenACC construct inside of OpenACC 
region" } */
   ;
-#pragma omp target data /* { dg-error "non-OpenACC construct inside of OpenACC 
region" } */
+#pragma omp target data map(i) /* { dg-error "non-OpenACC construct inside of 
OpenACC region" } */
   ;
 #pragma omp target update to(i) /* { dg-error "non-OpenACC construct inside of 
OpenACC region" } */
 }

Jakub


Re: [AArch64_be] Fix vtbl[34] and vtbx4

2015-10-13 Thread James Greenhalgh
On Tue, Oct 13, 2015 at 02:05:01PM +0100, Christophe Lyon wrote:
> I commited this as r228716, and noticed later that
> gcc.target/aarch64/table-intrinsics.c failed because of this patch.
> 
> This is because that testcase scans the assembly for 'tbl v' or 'tbx
> v', but since I replaced some asm statements,
> the space is now a tab.
> 
> I plan to commit this (probably obvious?):

> 2015-10-13  Christophe Lyon  
> 
>   * gcc/testsuite/gcc.target/aarch64/table-intrinsics.c: Fix regexp
>   after r228716 (Fix vtbl[34] and vtbx4).

Bad luck. This is fine (and yes, obvious).

Thanks,
James

> Index: gcc/testsuite/gcc.target/aarch64/table-intrinsics.c
> ===
> --- gcc/testsuite/gcc.target/aarch64/table-intrinsics.c   (revision 
> 228759)
> +++ gcc/testsuite/gcc.target/aarch64/table-intrinsics.c   (working copy)
> @@ -435,5 +435,5 @@
>return vqtbx4q_p8 (r, tab, idx);
>  }
>  
> -/* { dg-final { scan-assembler-times "tbl v" 42} }  */
> -/* { dg-final { scan-assembler-times "tbx v" 30} }  */
> +/* { dg-final { scan-assembler-times "tbl\[ |\t\]*v" 42} }  */
> +/* { dg-final { scan-assembler-times "tbx\[ |\t\]*v" 30} }  */



Re: [AArch64_be] Fix vtbl[34] and vtbx4

2015-10-13 Thread Christophe Lyon
On 12 October 2015 at 15:30, James Greenhalgh  wrote:
> On Fri, Oct 09, 2015 at 05:16:05PM +0100, Christophe Lyon wrote:
>> On 8 October 2015 at 11:12, James Greenhalgh  
>> wrote:
>> > On Wed, Oct 07, 2015 at 09:07:30PM +0100, Christophe Lyon wrote:
>> >> On 7 October 2015 at 17:09, James Greenhalgh  
>> >> wrote:
>> >> > On Tue, Sep 15, 2015 at 05:25:25PM +0100, Christophe Lyon wrote:
>> >> >
>> >> > Why do we want this for vtbx4 rather than putting out a VTBX instruction
>> >> > directly (as in the inline asm versions you replace)?
>> >> >
>> >> I just followed the pattern used for vtbx3.
>> >>
>> >> > This sequence does make sense for vtbx3.
>> >> In fact, I don't see why vtbx3 and vtbx4 should be different?
>> >
>> > The difference between TBL and TBX is in their handling of a request to
>> > select an out-of-range value. For TBL this returns zero, for TBX this
>> > returns the value which was already in the destination register.
>> >
>> > Because the byte-vectors used by the TBX instruction in aarch64 are 128-bit
>> > (so two of them togather allow selecting elements in the range 0-31), and
>> > vtbx3 needs to emulate the AArch32 behaviour of picking elements from 
>> > 3x64-bit
>> > vectors (allowing elements in the range 0-23), we need to manually check 
>> > for
>> > values which would have been out-of-range on AArch32, but are not out
>> > of range for AArch64 and handle them appropriately. For vtbx4 on the other
>> > hand, 2x128-bit registers give the range 0..31 and 4x64-bit registers give
>> > the range 0..31, so we don't need the special masked handling.
>> >
>> > You can find the suggested instruction sequences for the Neon intrinsics
>> > in this document:
>> >
>> >   
>> > http://infocenter.arm.com/help/topic/com.arm.doc.ihi0073a/IHI0073A_arm_neon_intrinsics_ref.pdf
>> >
>>
>> Hi James,
>>
>> Please find attached an updated version which hopefully addresses your 
>> comments.
>> Tested on aarch64-none-elf and aarch64_be-none-elf using the Foundation 
>> Model.
>>
>> OK?
>
> Looks good to me,
>
> Thanks,
> James
>

I commited this as r228716, and noticed later that
gcc.target/aarch64/table-intrinsics.c failed because of this patch.

This is because that testcase scans the assembly for 'tbl v' or 'tbx
v', but since I replaced some asm statements,
the space is now a tab.

I plan to commit this (probably obvious?):
2015-10-13  Christophe Lyon  

* gcc/testsuite/gcc.target/aarch64/table-intrinsics.c: Fix regexp
after r228716 (Fix vtbl[34] and vtbx4).

Index: gcc/testsuite/gcc.target/aarch64/table-intrinsics.c
===
--- gcc/testsuite/gcc.target/aarch64/table-intrinsics.c	(revision 228759)
+++ gcc/testsuite/gcc.target/aarch64/table-intrinsics.c	(working copy)
@@ -435,5 +435,5 @@
   return vqtbx4q_p8 (r, tab, idx);
 }
 
-/* { dg-final { scan-assembler-times "tbl v" 42} }  */
-/* { dg-final { scan-assembler-times "tbx v" 30} }  */
+/* { dg-final { scan-assembler-times "tbl\[ |\t\]*v" 42} }  */
+/* { dg-final { scan-assembler-times "tbx\[ |\t\]*v" 30} }  */


Re: Fix prototype for print_insn in rtl.h

2015-10-13 Thread Jeff Law

On 10/13/2015 06:41 AM, Nikolai Bozhenov wrote:

On 10/13/2015 03:22 PM, Jeff Law wrote:

On 10/13/2015 02:21 AM, Nikolai Bozhenov wrote:

Currently prototype for print_insn in rtl.h doesn't match it's
definition in sched-vis.c The patch fixes this mismatch.

I'll run this through the usual bootstrap & regression testing before
installing later today.
jeff


I've bootstrapped it on x86_64, but I don't see much sense in regression
testing this patch cause it's so small. Though, if you think it's
necessary,
I can test it myself and write to you when I get the results.
It's standard procedure.  While I agree that a bootstrap is almost 
certainly sufficient here, it's not a big deal to add this to the 
regression run I set up to run while getting the kids ready for school :-)


jeff



Re: [PATCH] Random shuffle moveable: container size

2015-10-13 Thread Aurelio Remonda
On Tue, Oct 13, 2015 at 6:26 AM, Jonathan Wakely  wrote:
> On 08/10/15 10:35 -0300, Aurelio Remonda wrote:
>>
>> This patch reduces the size of the array A (the array that contains
>> the values being shuffled) so the test can pass while running the
>> stdlibc++ testsuite.
>
>
> Ahem! The project's name is libstdc++ !!! :-)

:) My bad! Sorry about that!

>> It also make some minor changes such as:
>> *Deleting a useless call to fill_ascending function on test02.
>> *Changing N from const int to const unsigned int.
>> I have a company-wide copyright assignment, but I don't have commit
>> access.
>
>
> OK, I will commit this (without the unnecessary whitespace changes).
>
> Thanks.

Thank you!
Regards



-- 
Aurelio Remonda

Software Engineer

San Lorenzo 47, 3rd Floor, Office 5
Córdoba, Argentina
Phone: +54-351-4217888 / 4218211


[gomp4.1] jit and ada fixes

2015-10-13 Thread Jakub Jelinek
Hi!

When bootstrapping/regtesting, I found small omissions, fixed thusly:

2015-10-13  Jakub Jelinek  

gcc/ada/
* gcc-interface/utils.c (DEF_FUNCTION_TYPE_9, DEF_FUNCTION_TYPE_10,
DEF_FUNCTION_TYPE_11): Define.
gcc/jit/
* jit-builtins.h (DEF_FUNCTION_TYPE_9, DEF_FUNCTION_TYPE_11): Define.

--- gcc/ada/gcc-interface/utils.c.jj2015-10-13 10:34:05.0 +0200
+++ gcc/ada/gcc-interface/utils.c   2015-10-13 13:27:49.780598533 +0200
@@ -5369,6 +5369,12 @@ enum c_builtin_type
ARG6, ARG7) NAME,
 #define DEF_FUNCTION_TYPE_8(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
ARG6, ARG7, ARG8) NAME,
+#define DEF_FUNCTION_TYPE_9(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
+   ARG6, ARG7, ARG8, ARG9) NAME,
+#define DEF_FUNCTION_TYPE_10(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
+ARG6, ARG7, ARG8, ARG9, ARG10) NAME,
+#define DEF_FUNCTION_TYPE_11(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
+ARG6, ARG7, ARG8, ARG9, ARG10, ARG11) NAME,
 #define DEF_FUNCTION_TYPE_VAR_0(NAME, RETURN) NAME,
 #define DEF_FUNCTION_TYPE_VAR_1(NAME, RETURN, ARG1) NAME,
 #define DEF_FUNCTION_TYPE_VAR_2(NAME, RETURN, ARG1, ARG2) NAME,
@@ -5392,6 +5398,9 @@ enum c_builtin_type
 #undef DEF_FUNCTION_TYPE_6
 #undef DEF_FUNCTION_TYPE_7
 #undef DEF_FUNCTION_TYPE_8
+#undef DEF_FUNCTION_TYPE_9
+#undef DEF_FUNCTION_TYPE_10
+#undef DEF_FUNCTION_TYPE_11
 #undef DEF_FUNCTION_TYPE_VAR_0
 #undef DEF_FUNCTION_TYPE_VAR_1
 #undef DEF_FUNCTION_TYPE_VAR_2
@@ -5493,6 +5502,18 @@ install_builtin_function_types (void)
ARG6, ARG7, ARG8)   \
   def_fn_type (ENUM, RETURN, 0, 8, ARG1, ARG2, ARG3, ARG4, ARG5, ARG6, \
   ARG7, ARG8);
+#define DEF_FUNCTION_TYPE_9(ENUM, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
+   ARG6, ARG7, ARG8, ARG9) \
+  def_fn_type (ENUM, RETURN, 0, 9, ARG1, ARG2, ARG3, ARG4, ARG5, ARG6, \
+  ARG7, ARG8, ARG9);
+#define DEF_FUNCTION_TYPE_10(ENUM, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5,\
+ARG6, ARG7, ARG8, ARG9, ARG10) \
+  def_fn_type (ENUM, RETURN, 0, 10, ARG1, ARG2, ARG3, ARG4, ARG5, ARG6,
\
+  ARG7, ARG8, ARG9, ARG10);
+#define DEF_FUNCTION_TYPE_11(ENUM, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5,\
+ARG6, ARG7, ARG8, ARG9, ARG10, ARG11)  \
+  def_fn_type (ENUM, RETURN, 0, 11, ARG1, ARG2, ARG3, ARG4, ARG5, ARG6,
\
+  ARG7, ARG8, ARG9, ARG10, ARG11);
 #define DEF_FUNCTION_TYPE_VAR_0(ENUM, RETURN) \
   def_fn_type (ENUM, RETURN, 1, 0);
 #define DEF_FUNCTION_TYPE_VAR_1(ENUM, RETURN, ARG1) \
@@ -5526,6 +5547,9 @@ install_builtin_function_types (void)
 #undef DEF_FUNCTION_TYPE_6
 #undef DEF_FUNCTION_TYPE_7
 #undef DEF_FUNCTION_TYPE_8
+#undef DEF_FUNCTION_TYPE_9
+#undef DEF_FUNCTION_TYPE_10
+#undef DEF_FUNCTION_TYPE_11
 #undef DEF_FUNCTION_TYPE_VAR_0
 #undef DEF_FUNCTION_TYPE_VAR_1
 #undef DEF_FUNCTION_TYPE_VAR_2
--- gcc/jit/jit-builtins.h.jj   2015-10-13 13:13:02.0 +0200
+++ gcc/jit/jit-builtins.h  2015-10-13 13:31:09.150670112 +0200
@@ -43,8 +43,12 @@ enum jit_builtin_type
ARG6, ARG7) NAME,
 #define DEF_FUNCTION_TYPE_8(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
ARG6, ARG7, ARG8) NAME,
+#define DEF_FUNCTION_TYPE_9(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
+   ARG6, ARG7, ARG8, ARG9) NAME,
 #define DEF_FUNCTION_TYPE_10(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
 ARG6, ARG7, ARG8, ARG9, ARG10) NAME,
+#define DEF_FUNCTION_TYPE_11(NAME, RETURN, ARG1, ARG2, ARG3, ARG4, ARG5, \
+ARG6, ARG7, ARG8, ARG9, ARG10, ARG11) NAME,
 #define DEF_FUNCTION_TYPE_VAR_0(NAME, RETURN) NAME,
 #define DEF_FUNCTION_TYPE_VAR_1(NAME, RETURN, ARG1) NAME,
 #define DEF_FUNCTION_TYPE_VAR_2(NAME, RETURN, ARG1, ARG2) NAME,
@@ -68,7 +72,9 @@ enum jit_builtin_type
 #undef DEF_FUNCTION_TYPE_6
 #undef DEF_FUNCTION_TYPE_7
 #undef DEF_FUNCTION_TYPE_8
+#undef DEF_FUNCTION_TYPE_9
 #undef DEF_FUNCTION_TYPE_10
+#undef DEF_FUNCTION_TYPE_11
 #undef DEF_FUNCTION_TYPE_VAR_0
 #undef DEF_FUNCTION_TYPE_VAR_1
 #undef DEF_FUNCTION_TYPE_VAR_2

Jakub


Re: [PATCH PR67909 PR67947]

2015-10-13 Thread Richard Biener
On Tue, Oct 13, 2015 at 2:49 PM, Yuri Rumyantsev  wrote:
> Here is updated patch with splitting long line.
> The patch is attached.

Ok with aligning the guard_edge == ... line properly

Thanks,
Richard.

> Yuri.
>
> 2015-10-13 15:38 GMT+03:00 H.J. Lu :
>> On Tue, Oct 13, 2015 at 4:57 AM, Yuri Rumyantsev  wrote:
>>> Hi All,
>>>
>>> Here is a simple patch for unswitching outer loop through guard-edge
>>> hoisting. The check that guard-edge is around the inner loop was
>>> missed.
>>>
>>> Bootstrapping and regression testing did not show new failures.
>>>
>>> Is it OK for trunk?
>>>
>>> ChangeLog:
>>> 2014-10-13  Yuri Rumyantsev  
>>>
>>> PR tree-optimization/67909, 67947
>>> * tree-ssa-loop-unswitch.c (find_loop_guard): Add check that GUARD_EDGE
>>> really skip the inner loop.
>>>
>>> gcc/testsuite/ChangeLog
>>> * gcc.dg/torture/pr67947.c: New test.
>>
>> +  /* Guard edge must skip inner loop.  */
>> +  if (!dominated_by_p (CDI_DOMINATORS, loop->inner->header,
>> +  guard_edge == fe ? te->dest : fe->dest))
>>   It should line up with "CDI_DOMINATORS".
>>
>> + fprintf (dump_file, "Guard edge %d --> %d is not around the
>> loop!\n",guard_edge->src->index,guard_edge->dest->index);
>>
>> Please break lone line.
>>
>> --
>> H.J.


Re: [RFC VTV] Fix VTV for targets that have section anchors.

2015-10-13 Thread Ramana Radhakrishnan



On 12/10/15 21:44, Jeff Law wrote:
> On 10/09/2015 03:17 AM, Ramana Radhakrishnan wrote:
>> This started as a Friday afternoon project ...
>>
>> It turned out enabling VTV for AArch64 and ARM was a matter of fixing
>> PR67868 which essentially comes from building libvtv with section
>> anchors turned on. The problem was that the flow of control from
>> output_object_block through to switch_section did not have the same
>> special casing for the vtable section that exists in
>> assemble_variable.
> That's some ugly code.  You might consider factoring that code into a 
> function and just calling it from both places.  Your version doesn't seem to 
> handle PECOFF, so I'd probably refactor from assemble_variable.
> 

I was a bit lazy as I couldn't immediately think of a target that would want 
PECOFF, section anchors and VTV. That combination seems to be quite rare, 
anyway point taken on the refactor.

Ok if no regressions ?

>>
>> However both these failures also occur on x86_64 - so I'm content to
>> declare victory on AArch64 as far as basic enablement goes.
> Cool.
> 
>>
>> 1. Are the generic changes to varasm.c ok ? 2. Can we take the
>> AArch64 support in now, given this amount of testing ? Marcus /
>> Caroline ? 3. Any suggestions / helpful debug hints for VTV debugging
>> (other than turning VTV_DEBUG on and inspecting trace) ?
> I think that with refactoring they'd be good to go.  No opinions on the 
> AArch64 specific question -- call for the AArch64 maintainers.
> 
> Good to see someone hacking on vtv.  It's in my queue to look at as well.

Yeah figuring out more about vtv is also in my background queue.

regards
Ramana

PR other/67868

* varasm.c (assemble_variable): Move special vtv handling to..
(handle_vtv_comdat_sections): .. here. New function.
(output_object_block): Handle vtv sections.

libvtv/Changelog

* configure.tgt: Support aarch64 and arm.
diff --git a/gcc/varasm.c b/gcc/varasm.c
index f1564bc..62ad863 100644
--- a/gcc/varasm.c
+++ b/gcc/varasm.c
@@ -127,6 +127,7 @@ static void asm_output_aligned_bss (FILE *, tree, const 
char *,
 #endif /* BSS_SECTION_ASM_OP */
 static void mark_weak (tree);
 static void output_constant_pool (const char *, tree);
+static void handle_vtv_comdat_section (section *, const_tree);
 
 /* Well-known sections, each one associated with some sort of *_ASM_OP.  */
 section *text_section;
@@ -2230,56 +2231,10 @@ assemble_variable (tree decl, int top_level 
ATTRIBUTE_UNUSED,
 assemble_noswitch_variable (decl, name, sect, align);
   else
 {
-  /* The following bit of code ensures that vtable_map 
- variables are not only in the comdat section, but that
- each variable has its own unique comdat name.  If this
- code is removed, the variables end up in the same section
- with a single comdat name.
-
- FIXME:  resolve_unique_section needs to deal better with
- decls with both DECL_SECTION_NAME and DECL_ONE_ONLY.  Once
- that is fixed, this if-else statement can be replaced with
- a single call to "switch_to_section (sect)".  */
+  /* Special-case handling of vtv comdat sections.  */
   if (sect->named.name
  && (strcmp (sect->named.name, ".vtable_map_vars") == 0))
-   {
-#if defined (OBJECT_FORMAT_ELF)
-  targetm.asm_out.named_section (sect->named.name,
-sect->named.common.flags
-| SECTION_LINKONCE,
-DECL_NAME (decl));
-  in_section = sect;
-#elif defined (TARGET_PECOFF)
-  /* Neither OBJECT_FORMAT_PE, nor OBJECT_FORMAT_COFF is set here.
- Therefore the following check is used.
- In case a the target is PE or COFF a comdat group section
- is created, e.g. .vtable_map_vars$foo. The linker places
- everything in .vtable_map_vars at the end.
-
- A fix could be made in
- gcc/config/i386/winnt.c: i386_pe_unique_section. */
-  if (TARGET_PECOFF)
-  {
-char *name;
-
-if (TREE_CODE (DECL_NAME (decl)) == IDENTIFIER_NODE)
-  name = ACONCAT ((sect->named.name, "$",
-   IDENTIFIER_POINTER (DECL_NAME (decl)), NULL));
-else
-  name = ACONCAT ((sect->named.name, "$",
-IDENTIFIER_POINTER (DECL_COMDAT_GROUP (DECL_NAME (decl))),
-NULL));
-
-targetm.asm_out.named_section (name,
-   sect->named.common.flags
-   | SECTION_LINKONCE,
-   DECL_NAME (decl));
-in_section = sect;
-}
-#else
-  switch_to_section (sect);
-#endif
-}
+   handle_vtv_comdat_section (sect, decl);
   else
switch_to_section (sect);
   if (align > BITS_PER_UNIT)
@@ -7329,7

Re: [PATCH] Allow FSM to thread single block cases too

2015-10-13 Thread Richard Biener
On Tue, Oct 13, 2015 at 2:21 PM, Jeff Law  wrote:
>
> One of the cases that was missing in the FSM support is threading when the
> path is a single block.  ie, a control statement's output can be statically
> determined just by looking at PHIs in the control statement's block for one
> or incoming edges.
>
> This is necessary to fix a regression if I turn off the old jump threader's
> backedge support.  Just as important, Jan has in the past asked about a
> trivial jump threader to be run during early optimizations.  Limiting the
> FSM bits to this case would likely satisfy that need in the future.

I think he asked for trivial forward threads though due to repeated tests.
I hacked FRE to do this (I think), but maybe some trivial cleanup opportunities
are still left here.  Honza?

Richard.

> Bootstrapped and regression tested on x86_64-linux-gnu.  Installed on the
> trunk.
>
> Jeff
>
> commit a53bb29a1dffd329aa6235b88b0c2a830aa5a59e
> Author: Jeff Law 
> Date:   Tue Oct 13 06:19:20 2015 -0600
>
> [PATCH] Allow FSM to thread single block cases too
>
> * tree-ssa-threadbackward.c
> (fsm_find_control_statement_thread_paths):
> Allow single block jump threading paths.
>
> * gcc.dg/tree-ssa/ssa-thread-13.c: New test.
>
> diff --git a/gcc/ChangeLog b/gcc/ChangeLog
> index d71bcd2..caab533 100644
> --- a/gcc/ChangeLog
> +++ b/gcc/ChangeLog
> @@ -1,3 +1,8 @@
> +2015-10-13  Jeff Law  
> +
> +   * tree-ssa-threadbackward.c
> (fsm_find_control_statement_thread_paths):
> +   Allow single block jump threading paths.
> +
>  2015-10-13  Tom de Vries  
>
> PR tree-optimization/67476
> diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
> index 4a08f0f..acf6df5 100644
> --- a/gcc/testsuite/ChangeLog
> +++ b/gcc/testsuite/ChangeLog
> @@ -1,3 +1,7 @@
> +2015-10-13  Jeff Law  
> +
> +   * gcc.dg/tree-ssa/ssa-thread-13.c: New test.
> +
>  2015-10-12  Jeff Law  
>
> * gcc.dg/tree-ssa/ssa-thread-12.c: New test.
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-13.c
> b/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-13.c
> new file mode 100644
> index 000..5051d11
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-13.c
> @@ -0,0 +1,70 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-vrp1-details" } */
> +/* { dg-final { scan-tree-dump "FSM" "vrp1" } } */
> +
> +typedef struct rtx_def *rtx;
> +typedef const struct rtx_def *const_rtx;
> +enum rtx_code
> +{
> +  UNKNOWN, VALUE, DEBUG_EXPR, EXPR_LIST, INSN_LIST, SEQUENCE, ADDRESS,
> +DEBUG_INSN, INSN, JUMP_INSN, CALL_INSN, BARRIER, CODE_LABEL, NOTE,
> +COND_EXEC, PARALLEL, ASM_INPUT, ASM_OPERANDS, UNSPEC, UNSPEC_VOLATILE,
> +ADDR_VEC, ADDR_DIFF_VEC, PREFETCH, SET, USE, CLOBBER, CALL, RETURN,
> +EH_RETURN, TRAP_IF, CONST_INT, CONST_FIXED, CONST_DOUBLE, CONST_VECTOR,
> +CONST_STRING, CONST, PC, REG, SCRATCH, SUBREG, STRICT_LOW_PART, CONCAT,
> +CONCATN, MEM, LABEL_REF, SYMBOL_REF, CC0, IF_THEN_ELSE, COMPARE, PLUS,
> +MINUS, NEG, MULT, SS_MULT, US_MULT, DIV, SS_DIV, US_DIV, MOD, UDIV,
> UMOD,
> +AND, IOR, XOR, NOT, ASHIFT, ROTATE, ASHIFTRT, LSHIFTRT, ROTATERT, SMIN,
> +SMAX, UMIN, UMAX, PRE_DEC, PRE_INC, POST_DEC, POST_INC, PRE_MODIFY,
> +POST_MODIFY, NE, EQ, GE, GT, LE, LT, GEU, GTU, LEU, LTU, UNORDERED,
> +ORDERED, UNEQ, UNGE, UNGT, UNLE, UNLT, LTGT, SIGN_EXTEND, ZERO_EXTEND,
> +TRUNCATE, FLOAT_EXTEND, FLOAT_TRUNCATE, FLOAT, FIX, UNSIGNED_FLOAT,
> +UNSIGNED_FIX, FRACT_CONVERT, UNSIGNED_FRACT_CONVERT, SAT_FRACT,
> +UNSIGNED_SAT_FRACT, ABS, SQRT, BSWAP, FFS, CLZ, CTZ, POPCOUNT, PARITY,
> +SIGN_EXTRACT, ZERO_EXTRACT, HIGH, LO_SUM, VEC_MERGE, VEC_SELECT,
> +VEC_CONCAT, VEC_DUPLICATE, SS_PLUS, US_PLUS, SS_MINUS, SS_NEG, US_NEG,
> +SS_ABS, SS_ASHIFT, US_ASHIFT, US_MINUS, SS_TRUNCATE, US_TRUNCATE, FMA,
> +VAR_LOCATION, DEBUG_IMPLICIT_PTR, ENTRY_VALUE, LAST_AND_UNUSED_RTX_CODE
> +};
> +union rtunion_def
> +{
> +  rtx rt_rtx;
> +};
> +typedef union rtunion_def rtunion;
> +struct rtx_def
> +{
> +  __extension__ enum rtx_code code:16;
> +  union u
> +  {
> +rtunion fld[1];
> +  }
> +  u;
> +};
> +
> +unsigned int rtx_cost (rtx, enum rtx_code, unsigned char);
> +rtx single_set_2 (const_rtx, rtx);
> +
> +unsigned
> +seq_cost (const_rtx seq, unsigned char speed)
> +{
> +  unsigned cost = 0;
> +  rtx set;
> +  for (; seq; seq = (((seq)->u.fld[2]).rt_rtx))
> +{
> +  set =
> +   (enum rtx_code) (seq)->code) == INSN)
> + || (((enum rtx_code) (seq)->code) == DEBUG_INSN)
> + || (((enum rtx_code) (seq)->code) == JUMP_INSN)
> + || (((enum rtx_code) (seq)->code) ==
> + CALL_INSN)) ? (((enum rtx_code) seq)->u.fld[4]).rt_rtx))->
> + code) ==
> +SET ? (((seq)->u.fld[4]).
> +   rt_rtx) : single_set_2 (seq,
> +   (((seq)->u.
> + 

Re: Move some bit and binary optimizations in simplify and match

2015-10-13 Thread Richard Biener
On Tue, Oct 13, 2015 at 2:18 PM, Marc Glisse  wrote:
> On Tue, 13 Oct 2015, Richard Biener wrote:
>
>> +/* Simplify ~X & X as zero.  */
>> +(simplify
>> + (bit_and:c (convert? @0) (convert? (bit_not @0)))
>> +  (if (tree_nop_conversion_p (type, TREE_TYPE (@0)))
>
>
> The test seems unnecessary for this specific transformation.
>
>> +  { build_zero_cst (TREE_TYPE (@0)); }))
>
>
> I'd rather build_zero_cst (type) directly.
>
>> +/* (-A) * (-B) -> A * B  */
>> +(simplify
>> + (mult:c (convert? (negate @0)) (convert? negate_expr_p@1))
>> +  (if (tree_nop_conversion_p (type, TREE_TYPE (@0)))
>> +   (mult (convert @0) (convert (negate @1)
>>
>> this one is ok with using convert1? and convert2?
>
>
> Is it? Maybe if it also checked tree_nop_conversion_p for @1...

Sorry, your comments are of course correct.  Neveen, please adjust
also according
to these comments.

Richard.

> --
> Marc Glisse


Re: [PATCH PR67909 PR67947]

2015-10-13 Thread Yuri Rumyantsev
Here is updated patch with splitting long line.
The patch is attached.

Yuri.

2015-10-13 15:38 GMT+03:00 H.J. Lu :
> On Tue, Oct 13, 2015 at 4:57 AM, Yuri Rumyantsev  wrote:
>> Hi All,
>>
>> Here is a simple patch for unswitching outer loop through guard-edge
>> hoisting. The check that guard-edge is around the inner loop was
>> missed.
>>
>> Bootstrapping and regression testing did not show new failures.
>>
>> Is it OK for trunk?
>>
>> ChangeLog:
>> 2014-10-13  Yuri Rumyantsev  
>>
>> PR tree-optimization/67909, 67947
>> * tree-ssa-loop-unswitch.c (find_loop_guard): Add check that GUARD_EDGE
>> really skip the inner loop.
>>
>> gcc/testsuite/ChangeLog
>> * gcc.dg/torture/pr67947.c: New test.
>
> +  /* Guard edge must skip inner loop.  */
> +  if (!dominated_by_p (CDI_DOMINATORS, loop->inner->header,
> +  guard_edge == fe ? te->dest : fe->dest))
>   It should line up with "CDI_DOMINATORS".
>
> + fprintf (dump_file, "Guard edge %d --> %d is not around the
> loop!\n",guard_edge->src->index,guard_edge->dest->index);
>
> Please break lone line.
>
> --
> H.J.


patch1
Description: Binary data


[gomp4] More openacc loop indirection

2015-10-13 Thread Nathan Sidwell
I've committed this next patch  in my series to move loop partitioning decisions 
to the target compiler.


It introduces 2 more IFN_UNIQUE cases, marking the head and tail sequences of an 
openACC loop.  These are added around the reduction and fork/join regions.  In 
the oacc_device_lower pass we use these markers to reconstruct the openacc 
partitioning regions (their  unique property permits this,  in the same way the 
ptx backend uses the fork/join markers themselves).  Then we scan over the head 
and tail sequences setting the partitioning level.


This patch still doesn't actually defer the partitioning decision -- its putting 
in place machinery to allow such deferral.  I expect the next patch to complete 
the transition.


nathan
Index: gcc/internal-fn.def
===
--- gcc/internal-fn.def	(revision 228713)
+++ gcc/internal-fn.def	(working copy)
@@ -78,6 +78,10 @@ DEF_INTERNAL_FN (UNIQUE, ECF_NOTHROW | E
indicating the axis of forking or joining and return nothing.  */
 #define IFN_UNIQUE_OACC_FORK 1
 #define IFN_UNIQUE_OACC_JOIN 2
+/* HEAD_MARK and TAIL_MARK are used to demark the sequence entering or
+   leaving partitioned execution.  */
+#define IFN_UNIQUE_OACC_HEAD_MARK 3
+#define IFN_UNIQUE_OACC_TAIL_MARK 4
 
 /* DIM_SIZE and DIM_POS return the size of a particular compute
dimension and the executing thread's position within that
Index: gcc/omp-low.c
===
--- gcc/omp-low.c	(revision 228713)
+++ gcc/omp-low.c	(working copy)
@@ -236,6 +236,26 @@ struct omp_for_data
   struct omp_for_data_loop *loops;
 };
 
+/* Describe the OpenACC looping structure of a function.  The entire
+   function is held in a 'NULL' loop.  */
+
+struct oacc_loop
+{
+  oacc_loop *parent; /* Containing loop.  */
+
+  oacc_loop *child; /* First inner loop.  */
+
+  oacc_loop *sibling; /* Next loop within same parent.  */
+
+  location_t loc; /* Location of the loop start.  */
+
+  /* Start of head and tail.  */
+  gcall *head;  /* Head marker function. */
+  gcall *tail;  /* Tail marker function.  */
+
+  /* Partitioning level.  */
+  unsigned level;
+};
 
 static splay_tree all_contexts;
 static int taskreg_nesting_level;
@@ -4737,11 +4757,12 @@ expand_oacc_get_thread_num (gimple_seq *
   return res;
 }
 
-/* Lower the OpenACC reductions of CLAUSES for compute axis DIM.  INNER
-   is true if this is an inner axis of a multi-axis loop.  FORK and
-   JOIN are (optional) fork and join markers.  Generate the
-   before-loop forking sequence in FORK_SEQ and the after-loop joining
-   sequence to JOIN_SEQ.  The general form of these sequences is
+/* Lower the OpenACC reductions of CLAUSES for compute axis LEVEL
+   (which might be a placeholder).  INNER is true if this is an inner
+   axis of a multi-axis loop.  FORK and JOIN are (optional) fork and
+   join markers.  Generate the before-loop forking sequence in
+   FORK_SEQ and the after-loop joining sequence to JOIN_SEQ.  The
+   general form of these sequences is
 
  GOACC_REDUCTION_SETUP
  GOACC_FORK
@@ -4752,7 +4773,7 @@ expand_oacc_get_thread_num (gimple_seq *
  GOACC_REDUCTION_TEARDOWN.  */
 
 static void
-lower_oacc_reductions (location_t loc, tree clauses, unsigned dim, bool inner,
+lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner,
 		   gcall *fork, gcall *join, gimple_seq *fork_seq,
 		   gimple_seq *join_seq, omp_context *ctx)
 {
@@ -4764,7 +4785,6 @@ lower_oacc_reductions (location_t loc, t
   gimple_seq after_join = NULL;
   unsigned count = 0;
   tree lid = build_int_cst (unsigned_type_node, oacc_lid++);
-  tree level = build_int_cst (unsigned_type_node, dim);
 
   for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION)
@@ -4866,6 +4886,22 @@ lower_oacc_reductions (location_t loc, t
   gimple_seq_add_seq (join_seq, after_join);
 }
 
+/* Emit an OpenACC lopp head or tail marker to SEQ.  LEVEL is the
+   partitioning level of the enclosed region.  */ 
+
+static void
+lower_oacc_loop_marker (location_t loc, bool head, tree level,
+			gimple_seq *seq)
+{
+  tree marker = build_int_cst
+(integer_type_node, (head ? IFN_UNIQUE_OACC_HEAD_MARK
+			 : IFN_UNIQUE_OACC_TAIL_MARK));
+  gcall *call = gimple_build_call_internal
+(IFN_UNIQUE, 1 + (level != NULL_TREE), marker, level);
+  gimple_set_location (call, loc);
+  gimple_seq_add_stmt (seq, call);
+}
+
 /* Generate the before and after OpenACC loop sequences.  CLAUSES are
the loop clauses, from which we extract reductions.  Initialize
HEAD and TAIL.  */
@@ -4884,19 +4920,25 @@ lower_oacc_head_tail (location_t loc, tr
   for (ix = GOMP_DIM_GANG; ix != GOMP_DIM_MAX; ix++)
 if (mask & GOMP_DIM_MASK (ix))
   {
-	tree level = build_int_cst (unsigned_type_node, ix);
+	tree place = build_int_cst (integer_type_node, -1);
+	tree level = build_int_cst (integer_type_node, ix);
 	gcall *

Re: Fix prototype for print_insn in rtl.h

2015-10-13 Thread Nikolai Bozhenov

On 10/13/2015 03:22 PM, Jeff Law wrote:

On 10/13/2015 02:21 AM, Nikolai Bozhenov wrote:

Currently prototype for print_insn in rtl.h doesn't match it's
definition in sched-vis.c The patch fixes this mismatch.
I'll run this through the usual bootstrap & regression testing before 
installing later today.

jeff


I've bootstrapped it on x86_64, but I don't see much sense in regression
testing this patch cause it's so small. Though, if you think it's necessary,
I can test it myself and write to you when I get the results.

Thanks,
Nikolai


Re: [PATCH PR67909 PR67947]

2015-10-13 Thread H.J. Lu
On Tue, Oct 13, 2015 at 4:57 AM, Yuri Rumyantsev  wrote:
> Hi All,
>
> Here is a simple patch for unswitching outer loop through guard-edge
> hoisting. The check that guard-edge is around the inner loop was
> missed.
>
> Bootstrapping and regression testing did not show new failures.
>
> Is it OK for trunk?
>
> ChangeLog:
> 2014-10-13  Yuri Rumyantsev  
>
> PR tree-optimization/67909, 67947
> * tree-ssa-loop-unswitch.c (find_loop_guard): Add check that GUARD_EDGE
> really skip the inner loop.
>
> gcc/testsuite/ChangeLog
> * gcc.dg/torture/pr67947.c: New test.

+  /* Guard edge must skip inner loop.  */
+  if (!dominated_by_p (CDI_DOMINATORS, loop->inner->header,
+  guard_edge == fe ? te->dest : fe->dest))
  It should line up with "CDI_DOMINATORS".

+ fprintf (dump_file, "Guard edge %d --> %d is not around the
loop!\n",guard_edge->src->index,guard_edge->dest->index);

Please break lone line.

-- 
H.J.


Re: [PR debug/67192] Fix C loops' back-jump location

2015-10-13 Thread Bernd Schmidt

On 10/12/2015 04:04 PM, Andreas Arnez wrote:

Since r223098 ("Implement -Wmisleading-indentation") the backward-jump
generated for a C while- or for-loop can get the wrong line number.
This is because the check for misleading indentation peeks ahead one
token, advancing input_location to after the loop, and then
c_finish_loop() creates the back-jump and calls add_stmt(), which
assigns input_location to the statement by default.

This patch swaps the check for misleading indentation with the finishing
of the loop, such that input_location still has the right value at the
time of any invocations of add_stmt().


One could argue that peek_token should not have an effect on 
input_location, and in fact cpp_peek_token seems to take steps that this 
does not happen, but it looks like c_parser_peek_token does not use that 
mechanism. Still,


gcc/testsuite/ChangeLog:

PR debug/67192
* gcc.dg/guality/pr67192.c: New test.

gcc/c/ChangeLog:

PR debug/67192
* c-parser.c (c_parser_while_statement): Finish the loop before
parsing ahead for misleading indentation.
(c_parser_for_statement): Likewise.


This fix looks simple enough. Ok. (Might want to add noclone to the 
testcase attributes).



Bernd


Re: [PATCH] Fix "#pragma GCC pop_options" warning.

2015-10-13 Thread Bernd Schmidt

On 10/13/2015 02:02 PM, Dominik Vogt wrote:

When "#pragma GCC pop_options" is used on a platform without
support for "#pragma GCC target", Gcc emits a warning.  As
pop_options is useful on targets without the target pragma to
restore optimizations flags, the warning should be removed.

The attached patch does that rather inelegantly by checking if the
pragma_parse hook points to the default implementation.  I could't
think of a similarly terse but less clumsy way.  Suggestions for a
better test are very welcome.

gcc/ChangeLog:

* c-pragma.c: Include targhooks.h.
(handle_pragma_pop_options): Do not call
default_target_option_pragma_parse to prevent its warning when using
"#pragma GCC pop_options" on platforms that do not support
"#pragma GCC target".


Why not just remove the code that emits the warning message? Are there 
situations where the warning is justified?


A testcase would be good.


Bernd



Re: Fix prototype for print_insn in rtl.h

2015-10-13 Thread Jeff Law

On 10/13/2015 02:21 AM, Nikolai Bozhenov wrote:

Currently prototype for print_insn in rtl.h doesn't match it's
definition in sched-vis.c The patch fixes this mismatch.
I'll run this through the usual bootstrap & regression testing before 
installing later today.

jeff


[PATCH] Allow FSM to thread single block cases too

2015-10-13 Thread Jeff Law


One of the cases that was missing in the FSM support is threading when 
the path is a single block.  ie, a control statement's output can be 
statically determined just by looking at PHIs in the control statement's 
block for one or incoming edges.


This is necessary to fix a regression if I turn off the old jump 
threader's backedge support.  Just as important, Jan has in the past 
asked about a trivial jump threader to be run during early 
optimizations.  Limiting the FSM bits to this case would likely satisfy 
that need in the future.


Bootstrapped and regression tested on x86_64-linux-gnu.  Installed on 
the trunk.


Jeff
commit a53bb29a1dffd329aa6235b88b0c2a830aa5a59e
Author: Jeff Law 
Date:   Tue Oct 13 06:19:20 2015 -0600

[PATCH] Allow FSM to thread single block cases too

* tree-ssa-threadbackward.c (fsm_find_control_statement_thread_paths):
Allow single block jump threading paths.

* gcc.dg/tree-ssa/ssa-thread-13.c: New test.

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index d71bcd2..caab533 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,8 @@
+2015-10-13  Jeff Law  
+
+   * tree-ssa-threadbackward.c (fsm_find_control_statement_thread_paths):
+   Allow single block jump threading paths.
+
 2015-10-13  Tom de Vries  
 
PR tree-optimization/67476
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 4a08f0f..acf6df5 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,7 @@
+2015-10-13  Jeff Law  
+
+   * gcc.dg/tree-ssa/ssa-thread-13.c: New test.
+
 2015-10-12  Jeff Law  
 
* gcc.dg/tree-ssa/ssa-thread-12.c: New test.
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-13.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-13.c
new file mode 100644
index 000..5051d11
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-13.c
@@ -0,0 +1,70 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-vrp1-details" } */
+/* { dg-final { scan-tree-dump "FSM" "vrp1" } } */
+
+typedef struct rtx_def *rtx;
+typedef const struct rtx_def *const_rtx;
+enum rtx_code
+{
+  UNKNOWN, VALUE, DEBUG_EXPR, EXPR_LIST, INSN_LIST, SEQUENCE, ADDRESS,
+DEBUG_INSN, INSN, JUMP_INSN, CALL_INSN, BARRIER, CODE_LABEL, NOTE,
+COND_EXEC, PARALLEL, ASM_INPUT, ASM_OPERANDS, UNSPEC, UNSPEC_VOLATILE,
+ADDR_VEC, ADDR_DIFF_VEC, PREFETCH, SET, USE, CLOBBER, CALL, RETURN,
+EH_RETURN, TRAP_IF, CONST_INT, CONST_FIXED, CONST_DOUBLE, CONST_VECTOR,
+CONST_STRING, CONST, PC, REG, SCRATCH, SUBREG, STRICT_LOW_PART, CONCAT,
+CONCATN, MEM, LABEL_REF, SYMBOL_REF, CC0, IF_THEN_ELSE, COMPARE, PLUS,
+MINUS, NEG, MULT, SS_MULT, US_MULT, DIV, SS_DIV, US_DIV, MOD, UDIV, UMOD,
+AND, IOR, XOR, NOT, ASHIFT, ROTATE, ASHIFTRT, LSHIFTRT, ROTATERT, SMIN,
+SMAX, UMIN, UMAX, PRE_DEC, PRE_INC, POST_DEC, POST_INC, PRE_MODIFY,
+POST_MODIFY, NE, EQ, GE, GT, LE, LT, GEU, GTU, LEU, LTU, UNORDERED,
+ORDERED, UNEQ, UNGE, UNGT, UNLE, UNLT, LTGT, SIGN_EXTEND, ZERO_EXTEND,
+TRUNCATE, FLOAT_EXTEND, FLOAT_TRUNCATE, FLOAT, FIX, UNSIGNED_FLOAT,
+UNSIGNED_FIX, FRACT_CONVERT, UNSIGNED_FRACT_CONVERT, SAT_FRACT,
+UNSIGNED_SAT_FRACT, ABS, SQRT, BSWAP, FFS, CLZ, CTZ, POPCOUNT, PARITY,
+SIGN_EXTRACT, ZERO_EXTRACT, HIGH, LO_SUM, VEC_MERGE, VEC_SELECT,
+VEC_CONCAT, VEC_DUPLICATE, SS_PLUS, US_PLUS, SS_MINUS, SS_NEG, US_NEG,
+SS_ABS, SS_ASHIFT, US_ASHIFT, US_MINUS, SS_TRUNCATE, US_TRUNCATE, FMA,
+VAR_LOCATION, DEBUG_IMPLICIT_PTR, ENTRY_VALUE, LAST_AND_UNUSED_RTX_CODE
+};
+union rtunion_def
+{
+  rtx rt_rtx;
+};
+typedef union rtunion_def rtunion;
+struct rtx_def
+{
+  __extension__ enum rtx_code code:16;
+  union u
+  {
+rtunion fld[1];
+  }
+  u;
+};
+
+unsigned int rtx_cost (rtx, enum rtx_code, unsigned char);
+rtx single_set_2 (const_rtx, rtx);
+
+unsigned
+seq_cost (const_rtx seq, unsigned char speed)
+{
+  unsigned cost = 0;
+  rtx set;
+  for (; seq; seq = (((seq)->u.fld[2]).rt_rtx))
+{
+  set =
+   (enum rtx_code) (seq)->code) == INSN)
+ || (((enum rtx_code) (seq)->code) == DEBUG_INSN)
+ || (((enum rtx_code) (seq)->code) == JUMP_INSN)
+ || (((enum rtx_code) (seq)->code) ==
+ CALL_INSN)) ? (((enum rtx_code) seq)->u.fld[4]).rt_rtx))->
+ code) ==
+SET ? (((seq)->u.fld[4]).
+   rt_rtx) : single_set_2 (seq,
+   (((seq)->u.
+ fld[4]).
+rt_rtx))) : (rtx)
+0);
+  if (set)
+   cost += rtx_cost (set, SET, speed);
+}
+}
diff --git a/gcc/tree-ssa-threadbackward.c b/gcc/tree-ssa-threadbackward.c
index 5be6ee4..9128094 100644
--- a/gcc/tree-ssa-threadbackward.c
+++ b/gcc/tree-ssa-threadbackward.c
@@ -211,10 +211,6 @@ fsm_find_control_statement_thread_paths (tree 

Re: Move some bit and binary optimizations in simplify and match

2015-10-13 Thread Marc Glisse

On Tue, 13 Oct 2015, Richard Biener wrote:


+/* Simplify ~X & X as zero.  */
+(simplify
+ (bit_and:c (convert? @0) (convert? (bit_not @0)))
+  (if (tree_nop_conversion_p (type, TREE_TYPE (@0)))


The test seems unnecessary for this specific transformation.


+  { build_zero_cst (TREE_TYPE (@0)); }))


I'd rather build_zero_cst (type) directly.


+/* (-A) * (-B) -> A * B  */
+(simplify
+ (mult:c (convert? (negate @0)) (convert? negate_expr_p@1))
+  (if (tree_nop_conversion_p (type, TREE_TYPE (@0)))
+   (mult (convert @0) (convert (negate @1)

this one is ok with using convert1? and convert2?


Is it? Maybe if it also checked tree_nop_conversion_p for @1...

--
Marc Glisse


Re: [PATCH] x86 interrupt attribute

2015-10-13 Thread Yulia Koval
Here is the current version of the patch with all the fixes.
Regtested\bootstraped it on 64 bit.

We need a pointer since interrupt handler will update data pointing
to by frame.  Since error_code isn't at the normal location where the
parameter is passed on stack and frame isn't in a hard register, we
changed ix86_function_arg:

+  if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
+{
+  /* The first argument of interrupt handler is a pointer and
+points to the return address slot on stack.  The optional
+second argument is an integer for error code on stack.  */
+  gcc_assert (type != NULL_TREE);
+  if (POINTER_TYPE_P (type))
+   {
+ if (cfun->machine->func_type == TYPE_EXCEPTION)
+   /* (AP) in the current frame in exception handler.  */
+   arg = arg_pointer_rtx;
+ else
+   /* -WORD(AP) in the current frame in interrupt handler.  */
+   arg = force_reg (Pmode,
+plus_constant (Pmode, arg_pointer_rtx,
+   -UNITS_PER_WORD));
+ if (mode != Pmode)
+   arg = convert_to_mode (mode, arg, 1);
+   }
+  else
+   {
+ gcc_assert (TREE_CODE (type) == INTEGER_TYPE
+ && cfun->machine->func_type == TYPE_EXCEPTION
+ && mode == word_mode);
+ /* The error code is at -WORD(AP) in the current frame in
+exception handler.  */
+ arg = gen_rtx_MEM (word_mode,
+plus_constant (Pmode, arg_pointer_rtx,
+   -UNITS_PER_WORD));
+   }
+
+  return arg;
+}
+

to return a pseudo register.  It violates

   Return where to put the arguments to a function.
   Return zero to push the argument on the stack, or a hard register in
   which to store the argument.

Register allocator has no problem with parameters in pseudo registers.
But GCC crashes when it tries to access DECL_INCOMING_RTL as a hard
register when generating debug information.  We worked around it by
doing

+
+  if (cfun->machine->func_type != TYPE_NORMAL)
+{
+  /* Since the pointer argument of interrupt handler isn't a real
+ argument, adjust DECL_INCOMING_RTL for debug output.  */
+  tree arg = DECL_ARGUMENTS (current_function_decl);
+  gcc_assert (arg != NULL_TREE
+ && POINTER_TYPE_P (TREE_TYPE (arg)));
+  if (cfun->machine->func_type == TYPE_EXCEPTION)
+   /* (AP) in the current frame in exception handler.  */
+   DECL_INCOMING_RTL (arg) = arg_pointer_rtx;
+  else
+   /* -WORD(AP) in the current frame in interrupt handler.  */
+   DECL_INCOMING_RTL (arg) = plus_constant (Pmode,
+arg_pointer_rtx,
+-UNITS_PER_WORD);
+}


On Mon, Oct 5, 2015 at 12:29 PM, Uros Bizjak  wrote:
> On Mon, Oct 5, 2015 at 1:17 AM, H.J. Lu  wrote:
>
>>> Looking a bit deeper into the code, it looks that we want to realign
>>> the stack in the interrupt handler. Let's assume that interrupt
>>> handler is calling some other function that saves SSE vector regs to
>>> the stack. According to the x86 ABI, incoming stack of the called
>>> function is assumed to be aligned to 16 bytes. But, interrupt handler
>>> violates this assumption, since the stack could be aligned to only 4
>>> bytes for 32bit and 8 bytes for 64bit targets. Entering the called
>>> function with stack, aligned to less than 16 bytes will certainly
>>> violate ABI.
>>>
>>> So, it looks to me that we need to realign the stack in the interrupt
>>> handler unconditionally to 16bytes. In this case, we also won't need
>>> the following changes:
>>>
>>
>> Current stack alignment implementation requires at least
>> one, maybe two, scratch registers:
>>
>> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67841
>>
>> Extend it to the interrupt handler, which doesn't have any scratch
>> registers may require significant changes in backend as well as
>> register allocator.
>
> But without realignment, the handler is unusable for anything but
> simple functions. The handler will crash when called function will try
> to save vector reg to stack.
>

 We can use unaligned load and store to avoid crash.
>>>
>>> Oh, sorry, I meant "called function will crash", like:
>>>
>>> -> interrupt when %rsp = 0x...8 ->
>>> -> interrupt handler ->
>>> -> calls some function that tries to save xmm reg to stack
>>> -> crash in the called function
>>>
>>
>> It should be fixed by this patch.   But we need to fix stack
>> alignment in interrupt handler to avoid scratch register.
>>
>>
>> --
>> H.J.
>> ---
>> commit 15f48be1dc7ff48207927d0b835e593d058f695b
>> Author: H.J. Lu 
>> Date:   Sun Oct 4 16:14:03 2015 -0700
>>
>> Correctly set incoming stack bounda

[PATCH] Fix "#pragma GCC pop_options" warning.

2015-10-13 Thread Dominik Vogt
When "#pragma GCC pop_options" is used on a platform without
support for "#pragma GCC target", Gcc emits a warning.  As
pop_options is useful on targets without the target pragma to
restore optimizations flags, the warning should be removed.

The attached patch does that rather inelegantly by checking if the
pragma_parse hook points to the default implementation.  I could't
think of a similarly terse but less clumsy way.  Suggestions for a
better test are very welcome.

gcc/ChangeLog:

* c-pragma.c: Include targhooks.h.
(handle_pragma_pop_options): Do not call
default_target_option_pragma_parse to prevent its warning when using
"#pragma GCC pop_options" on platforms that do not support
"#pragma GCC target".

Ciao

Dominik ^_^  ^_^

-- 

Dominik Vogt
IBM Germany
>From d149dd8b9d6c9f720809de3839f2ad5a6825f7e5 Mon Sep 17 00:00:00 2001
From: Dominik Vogt 
Date: Tue, 13 Oct 2015 12:55:21 +0100
Subject: [PATCH] Fix "#pragma GCC pop_options" warning.

---
 gcc/c-family/c-pragma.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/gcc/c-family/c-pragma.c b/gcc/c-family/c-pragma.c
index 3c34800..b209b7b 100644
--- a/gcc/c-family/c-pragma.c
+++ b/gcc/c-family/c-pragma.c
@@ -38,6 +38,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tm_p.h"		/* For REGISTER_TARGET_PRAGMAS (why is
    this not a target hook?).  */
 #include "target.h"
+#include "targhooks.h"
 #include "diagnostic.h"
 #include "opts.h"
 #include "plugin.h"
@@ -997,7 +998,9 @@ handle_pragma_pop_options (cpp_reader *ARG_UNUSED(dummy))
 
   if (p->target_binary != target_option_current_node)
 {
-  (void) targetm.target_option.pragma_parse (NULL_TREE, p->target_binary);
+  if (targetm.target_option.pragma_parse
+	  != default_target_option_pragma_parse)
+	(void) targetm.target_option.pragma_parse (NULL_TREE, p->target_binary);
   target_option_current_node = p->target_binary;
 }
 
-- 
2.3.0



[PATCH PR67909 PR67947]

2015-10-13 Thread Yuri Rumyantsev
Hi All,

Here is a simple patch for unswitching outer loop through guard-edge
hoisting. The check that guard-edge is around the inner loop was
missed.

Bootstrapping and regression testing did not show new failures.

Is it OK for trunk?

ChangeLog:
2014-10-13  Yuri Rumyantsev  

PR tree-optimization/67909, 67947
* tree-ssa-loop-unswitch.c (find_loop_guard): Add check that GUARD_EDGE
really skip the inner loop.

gcc/testsuite/ChangeLog
* gcc.dg/torture/pr67947.c: New test.


patch
Description: Binary data


Re: Move some bit and binary optimizations in simplify and match

2015-10-13 Thread Richard Biener
On Tue, Oct 13, 2015 at 12:52 PM, Hurugalawadi, Naveen
 wrote:
> Hi Richard,
>
> Thanks for the comments. Sorry, I was confused with handling the const and 
> variable
> together part. Have modified them.
> Also, considered that both (X & Y) can be const or variable in those cases
> for which match patterns have been added.

Both can't be constant as (bit_and INTEGER_CST INTEGER_CST) will have been
simplified to a INTEGER_CST already.  Likewise (bit_not INTEGER_CST) will
never appear (that is the problem we are trying to solve!).

> Please let me know whether its correct or only "Y" should be both const and 
> variable
> whereas the "X" should be variable always.
>
> Please find attached the patch as per your comments.
> Please review the patch and let me know if any further modifications
> are required.
>
> Am learning lots of useful stuff while porting these patches.
> Thanks for all the help again.
>
>>> Looks like I really need to make 'match' handle these kind of things.
> I assume that its for bit ops, and binary operations like (A & B) and so on.
> Should I try doing that part? Also, how do we know which patterns should
> be const or variable or supports both?

I was thinking about this for quite a while and didn't find a good solution on
how to implement this reliably other than basically doing the pattern
duplication
in genmatch.  Say, for

/* Fold (A & ~B) - (A & B) into (A ^ B) - B.  */
(simplify
 (minus (bit_and:s @0 (bit_not @1)) (bit_and:s @0 @1))
  (if (! FLOAT_TYPE_P (type))
   (minus (bit_xor @0 @1) @1)))

generate also

(simplify
 (minus (bit_and:s @0 INTEGER_CST@2) (bit_and:s @0 INTEGER_CST@1))
 (if (! FLOAT_TYPE_P (type)
  && wi::eq (const_unop (BIT_NOT_EXPR, @2), @1))
  (minus (bit_xor @0 @1) @1)))

where we'd only target matches and unary ops of depth 1.

The question is whether this is really worth the effort, writing the
above explicitely
isn't too difficult.  So for your case simply do that duplication manually
(not using const_unop of course but wi:: functionality).  Sorry that I misled
you into doing this with (match (xdivamulminusa ..., etc.).  We want to minimize
the number of lines in match.pd and this doesn't really achieve this compared
to duplicating the whole pattern.

Also please take Marcs review comments into account.

+/* Fold (C1/X)*C2 into (C1*C2)/X.  */
+(simplify
+ (mult (rdiv REAL_CST@0 @1) REAL_CST@2)
+  (if (flag_associative_math)
+  (with
+   { tree tem = const_binop (MULT_EXPR, type, @0, @2); }
+  (if (tem)
+   (rdiv { tem; } @1)

this one is ok with :s added on the rdiv

+/* Simplify ~X & X as zero.  */
+(simplify
+ (bit_and:c (convert? @0) (convert? (bit_not @0)))
+  (if (tree_nop_conversion_p (type, TREE_TYPE (@0)))
+  { build_zero_cst (TREE_TYPE (@0)); }))

this one is ok as well.

+/* (-A) * (-B) -> A * B  */
+(simplify
+ (mult:c (convert? (negate @0)) (convert? negate_expr_p@1))
+  (if (tree_nop_conversion_p (type, TREE_TYPE (@0)))
+   (mult (convert @0) (convert (negate @1)

this one is ok with using convert1? and convert2?

Please consider splitting those three patterns out (with the suggested
adjustments and the corresponding fold-const.c changes) and committing
them separately to make the rest of the patch smaller.

Thanks,
Richard.



> Thanks,
> Naveen


Re: Move some bit and binary optimizations in simplify and match

2015-10-13 Thread Marc Glisse

On Tue, 13 Oct 2015, Hurugalawadi, Naveen wrote:


Please find attached the patch as per your comments.


(hmm, maybe you missed the email I sent with other comments?)

+(simplify
+ (plus (convert? @0) (convert? (xdivamulminusa @0 @1)))
+  (if ((INTEGRAL_TYPE_P (type) || VECTOR_INTEGER_TYPE_P (type))
+   && tree_nop_conversion_p (type, TREE_TYPE (@0)))
+   (trunc_mod (convert @0) (convert @1

Is that true when the conversion changes from signed to unsigned? The 
existing transformation X - (X / Y) * Y appears to be broken as well.


(the version in fold-const is hard to trigger because of canonicalization, 
but it was slightly more general in that it allowed for VECTOR_CST)


+/* Fold (a * (1 << b)) into (a << b)  */
+(simplify
+ (mult:c @0 (convert? (lshift integer_onep@1 @2)))
+  (if (! FLOAT_TYPE_P (type)
+&& tree_nop_conversion_p (type, TREE_TYPE (@0)))
+   (lshift @0 (convert @2

Wrong test, did you mean TREE_TYPE (@1) maybe?

--
Marc Glisse


Re: [patch 2/6] scalar-storage-order merge: C front-end

2015-10-13 Thread Jeff Law

On 10/06/2015 05:02 AM, Eric Botcazou wrote:

This is the C front-end + C family part.

* doc/extend.texi (type attributes): Document scalar_storage_order.
(Structure-Packing Pragmas): Rename into...
(Structure-Layout Pragmas): ...this.  Document scalar_storage_order.
* doc/invoke.texi (C Dialect Options): Document -fsso-struct
(Warnings): Document -Wno-scalar-storage-order.
* flag-types.h (enum scalar_storage_order_kind): New enumeration.
c-family/
* c-common.c (c_common_attributes): Add scalar_storage_order.
(handle_scalar_storage_order_attribute): New function.
* c-pragma.c (global_sso): New variable.
(maybe_apply_pragma_scalar_storage_order): New function.
(handle_pragma_scalar_storage_order): Likewise.
(init_pragma): Register scalar_storage_order.
* c-pragma.h (maybe_apply_pragma_scalar_storage_order): Declare.
* c.opt (Wscalar-storage-order): New warning.
(fsso-struct=): New option.
c/
* c-decl.c (finish_struct): If the structure has reverse storage
order, rewrite the type of array fields with scalar component.  Call
maybe_apply_pragma_scalar_storage_order on entry.
* c-typeck.c (build_unary_op) : Remove left-overs.  Issue
errors on bit-fields and reverse SSO here and not...
(c_mark_addressable): ...here.
(output_init_element): Adjust call to initializer_constant_valid_p.
(c_build_qualified_type): Propagate TYPE_REVERSE_STORAGE_ORDER.

  doc/extend.texi |   69 ++
  doc/invoke.texi |   22 +++-
  flag-types.h|9 +-
  c-family/c.opt  |   17 
  c-family/c-common.c |   47 ++-
  c-family/c-pragma.c |   50 +
  c-family/c-pragma.h |1
  c/c-typeck.c|   66 ++---
  c/c-decl.c  |   48 +---
  8 files changed, 273 insertions(+), 47 deletions(-)

-- Eric Botcazou


sso-c.diff


Index: doc/extend.texi
===
--- doc/extend.texi (.../trunk/gcc) (revision 228112)
+++ doc/extend.texi (.../branches/scalar-storage-order/gcc) (revision 
228133)
@@ -6310,6 +6310,42 @@ of the structure or union is placed to m
+@itemize
+@item Taking the address of a scalar field of a @code{union} or a
+@code{struct} with reverse scalar storage order is not permitted and will
+yield an error
Seems reasonable.  Certainly avoids a host of problems tracking this 
stuff later I bet.




+static tree
+handle_scalar_storage_order_attribute (tree *node, tree name, tree args,
+  int flags, bool *no_add_attrs)
+{
+  tree id = TREE_VALUE (args);
+  tree type;
+
+  if (TREE_CODE (*node) == TYPE_DECL
+  && ! (flags & ATTR_FLAG_CXX11))
+node = &TREE_TYPE (*node);
+  type = *node;
+
+  if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
+error ("scalar_storage_order is not supported");
You might want to consider indicating why it's not supported.  Not that 
I expect folks to be using this on a pdp11 :-)





Index: c/c-typeck.c
===
--- c/c-typeck.c(.../trunk/gcc) (revision 228112)
+++ c/c-typeck.c(.../branches/scalar-storage-order/gcc) (revision 
228133)
@@ -4173,18 +4173,10 @@ build_unary_op (location_t location,
  goto return_build_unary_op;
}

-  /* For &x[y], return x+y */
-  if (TREE_CODE (arg) == ARRAY_REF)
-   {
- tree op0 = TREE_OPERAND (arg, 0);
- if (!c_mark_addressable (op0))
-   return error_mark_node;
-   }
Do we still get a proper diagnostic for &x[y] where x isn't something we 
can mark addressable?


Our testsuites aren't particularly good (IMHO) at ensuring we're getting 
diags in all the cases where they're required.


No real objections, assuming that &x[y] diagnostics is still handled 
correctly somewhere.


Re: [PATCH] PR66870 PowerPC64 Enable gold linker with split stack

2015-10-13 Thread Matthias Klose

On 13.10.2015 00:53, Alan Modra wrote:

On Mon, Oct 12, 2015 at 10:15:04AM -0500, Lynn A. Boger wrote:

Thanks for doing this Alan.  I agree this looks better to me.

I assume by "etc" you mean you did biarch builds for your bootstraps on BE?


By "etc" I meant "and regression tested".

I built four configurations, powerpc-linux 32-bit only,
powerpc64le-linux 64-bit only, biarch powerpc-linux with 32-bit
default, and biarch powerpc64-linux with 64-bit default.


thanks, that works for me as well (biarch powerpc-linux-gnu).



[PATCH 7/7] Libsanitizer merge from upstream r249633.

2015-10-13 Thread Maxim Ostapenko
This is the final patch. Force libsanitizer to use an old ABI for ubsan 
float cast data descriptors, because for some exprs (e.g. that type of 
tcc_declaration) we can't get the right location for now. I'm not sure 
about this, perhaps it should be fixed in GCC somehow.
2015-10-13  Maxim Ostapenko  

	* ubsan/ubsan_handlers.cc (looksLikeFloatCastOverflowDataV1): Always
	return true for now.

Index: libsanitizer/ubsan/ubsan_handlers.cc
===
--- libsanitizer/ubsan/ubsan_handlers.cc	(revision 250059)
+++ libsanitizer/ubsan/ubsan_handlers.cc	(working copy)
@@ -307,6 +307,9 @@
 }
 
 static bool looksLikeFloatCastOverflowDataV1(void *Data) {
+  // (TODO): propagate SourceLocation into DataDescriptor and use this
+  // heuristic than.
+  return true;
   // First field is either a pointer to filename or a pointer to a
   // TypeDescriptor.
   u8 *FilenameOrTypeDescriptor;


[PATCH 6/7] Libsanitizer merge from upstream r249633.

2015-10-13 Thread Maxim Ostapenko
This patch adjusts the fix for 
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61771 to extract the last 
PC from the stack frame if no valid FP is available for ARM.
2015-10-13  Maxim Ostapenko  

	* sanitizer_common/sanitizer_stacktrace.cc (GetCanonicFrame): Assume we
	compiled code with GCC when extracting the caller PC for ARM if no
	valid frame pointer is available.

Index: libsanitizer/sanitizer_common/sanitizer_stacktrace.cc
===
--- libsanitizer/sanitizer_common/sanitizer_stacktrace.cc	(revision 250059)
+++ libsanitizer/sanitizer_common/sanitizer_stacktrace.cc	(working copy)
@@ -62,8 +62,8 @@
   // Nope, this does not look right either. This means the frame after next does
   // not have a valid frame pointer, but we can still extract the caller PC.
   // Unfortunately, there is no way to decide between GCC and LLVM frame
-  // layouts. Assume LLVM.
-  return bp_prev;
+  // layouts. Assume GCC.
+  return bp_prev - 1;
 #else
   return (uhwptr*)bp;
 #endif


[PATCH 5/7] Libsanitizer merge from upstream r249633.

2015-10-13 Thread Maxim Ostapenko
This patch removes UBSan stubs from ASan and TSan code. We don't embed 
UBSan to ASan and UBSan because that would lead to undefined references 
to C++ stuff when linking with -static-libasan. AFAIK, sanitizer 
developers use different libraries for C and CXX runtimes, but I think 
this is out of scope of this merge.
2015-10-13  Maxim Ostapenko  

	* tsan/tsan_defs.h: Define TSAN_CONTAINS_UBSAN to 0.
	* asan/asan_flags.cc (InitializeFlags): Do not initialize UBSan flags.
	* asan/asan_rtl.cc (AsanInitInternal): Do not init UBSan.

Index: libsanitizer/asan/asan_flags.cc
===
--- libsanitizer/asan/asan_flags.cc	(revision 250059)
+++ libsanitizer/asan/asan_flags.cc	(working copy)
@@ -86,15 +86,6 @@
   RegisterCommonFlags(&lsan_parser);
 #endif
 
-#if CAN_SANITIZE_UB
-  __ubsan::Flags *uf = __ubsan::flags();
-  uf->SetDefaults();
-
-  FlagParser ubsan_parser;
-  __ubsan::RegisterUbsanFlags(&ubsan_parser, uf);
-  RegisterCommonFlags(&ubsan_parser);
-#endif
-
   // Override from ASan compile definition.
   const char *asan_compile_def = MaybeUseAsanDefaultOptionsCompileDefinition();
   asan_parser.ParseString(asan_compile_def);
@@ -102,20 +93,11 @@
   // Override from user-specified string.
   const char *asan_default_options = MaybeCallAsanDefaultOptions();
   asan_parser.ParseString(asan_default_options);
-#if CAN_SANITIZE_UB
-  const char *ubsan_default_options = __ubsan::MaybeCallUbsanDefaultOptions();
-  ubsan_parser.ParseString(ubsan_default_options);
-#endif
-
   // Override from command line.
   asan_parser.ParseString(GetEnv("ASAN_OPTIONS"));
 #if CAN_SANITIZE_LEAKS
   lsan_parser.ParseString(GetEnv("LSAN_OPTIONS"));
 #endif
-#if CAN_SANITIZE_UB
-  ubsan_parser.ParseString(GetEnv("UBSAN_OPTIONS"));
-#endif
-
   // Let activation flags override current settings. On Android they come
   // from a system property. On other platforms this is no-op.
   if (!flags()->start_deactivated) {
Index: libsanitizer/asan/asan_rtl.cc
===
--- libsanitizer/asan/asan_rtl.cc	(revision 250059)
+++ libsanitizer/asan/asan_rtl.cc	(working copy)
@@ -513,10 +513,6 @@
   }
 #endif  // CAN_SANITIZE_LEAKS
 
-#if CAN_SANITIZE_UB
-  __ubsan::InitAsPlugin();
-#endif
-
   InitializeSuppressions();
 
   VReport(1, "AddressSanitizer Init done\n");
Index: libsanitizer/tsan/rtl/tsan_defs.h
===
--- libsanitizer/tsan/tsan_defs.h	(revision 250059)
+++ libsanitizer/tsan/tsan_defs.h	(working copy)
@@ -29,7 +29,7 @@
 #endif
 
 #ifndef TSAN_CONTAINS_UBSAN
-# define TSAN_CONTAINS_UBSAN (CAN_SANITIZE_UB && !defined(SANITIZER_GO))
+# define TSAN_CONTAINS_UBSAN 0
 #endif
 
 namespace __tsan {


[PATCH 4/7] Libsanitizer merge from upstream r249633.

2015-10-13 Thread Maxim Ostapenko
This is a reapplied Jakub's patch for disabling ODR violation detection. 
More details can be found here 
(https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63888).
2015-10-12  Maxim Ostapenko  

	PR bootstrap/63888
	Reapply:
	2015-02-20  Jakub Jelinek  

	* asan/asan_globals.cc (RegisterGlobal): Disable detect_odr_violation
	support until it is rewritten upstream.

	* c-c++-common/asan/pr63888.c: New test.

Index: libsanitizer/asan/asan_globals.cc
===
--- libsanitizer/asan/asan_globals.cc	(revision 250059)
+++ libsanitizer/asan/asan_globals.cc	(working copy)
@@ -146,7 +146,9 @@
   CHECK(AddrIsInMem(g->beg));
   CHECK(AddrIsAlignedByGranularity(g->beg));
   CHECK(AddrIsAlignedByGranularity(g->size_with_redzone));
-  if (flags()->detect_odr_violation) {
+  // This "ODR violation" detection is fundamentally incompatible with
+  // how GCC registers globals.  Disable as useless until rewritten upstream.
+  if (0 && flags()->detect_odr_violation) {
 // Try detecting ODR (One Definition Rule) violation, i.e. the situation
 // where two globals with the same name are defined in different modules.
 if (__asan_region_is_poisoned(g->beg, g->size_with_redzone)) {


[PATCH 3/7] Libsanitizer merge from upstream r249633.

2015-10-13 Thread Maxim Ostapenko
This is just reapplied patch for SPARC by David S. Miller. I was unable 
to test this, so could anyone help me here?
2015-10-12  Maxim Ostapenko  

	PR sanitizer/63958
	Reapply:
	2015-03-09  Jakub Jelinek  

	PR sanitizer/63958
	Reapply:
	2014-10-14  David S. Miller  

	* sanitizer_common/sanitizer_platform_limits_linux.cc (time_t):
	Define at __kernel_time_t, as needed for sparc.
	(struct __old_kernel_stat): Don't check if __sparc__ is defined.
	* libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.h
	(__sanitizer): Define struct___old_kernel_stat_sz,
	struct_kernel_stat_sz, and struct_kernel_stat64_sz for sparc.
	(__sanitizer_ipc_perm): Adjust for sparc targets.
	(__sanitizer_shmid_ds): Likewsie.
	(__sanitizer_sigaction): Likewise.
	(IOC_SIZE): Likewsie.

Index: libsanitizer/sanitizer_common/sanitizer_platform_limits_linux.cc
===
--- libsanitizer/sanitizer_common/sanitizer_platform_limits_linux.cc	(revision 250059)
+++ libsanitizer/sanitizer_common/sanitizer_platform_limits_linux.cc	(working copy)
@@ -38,6 +38,7 @@
 #define uid_t __kernel_uid_t
 #define gid_t __kernel_gid_t
 #define off_t __kernel_off_t
+#define time_t __kernel_time_t
 // This header seems to contain the definitions of _kernel_ stat* structs.
 #include 
 #undef ino_t
@@ -62,7 +63,7 @@
 }  // namespace __sanitizer
 
 #if !defined(__powerpc64__) && !defined(__x86_64__) && !defined(__aarch64__)\
-&& !defined(__mips__)
+&& !defined(__mips__) && !defined(__sparc__)
 COMPILER_CHECK(struct___old_kernel_stat_sz == sizeof(struct __old_kernel_stat));
 #endif
 
Index: libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.h
===
--- libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.h	(revision 250059)
+++ libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.h	(working copy)
@@ -83,6 +83,14 @@
   const unsigned struct_kernel_stat_sz = 144;
   #endif
   const unsigned struct_kernel_stat64_sz = 104;
+#elif defined(__sparc__) && defined(__arch64__)
+  const unsigned struct___old_kernel_stat_sz = 0;
+  const unsigned struct_kernel_stat_sz = 104;
+  const unsigned struct_kernel_stat64_sz = 144;
+#elif defined(__sparc__) && !defined(__arch64__)
+  const unsigned struct___old_kernel_stat_sz = 0;
+  const unsigned struct_kernel_stat_sz = 64;
+  const unsigned struct_kernel_stat64_sz = 104;
 #endif
   struct __sanitizer_perf_event_attr {
 unsigned type;
@@ -105,7 +113,7 @@
 
 #if defined(__powerpc64__)
   const unsigned struct___old_kernel_stat_sz = 0;
-#else
+#elif !defined(__sparc__)
   const unsigned struct___old_kernel_stat_sz = 32;
 #endif
 
@@ -184,6 +192,18 @@
 unsigned short __pad1;
 unsigned long __unused1;
 unsigned long __unused2;
+#elif defined(__sparc__)
+# if defined(__arch64__)
+unsigned mode;
+unsigned short __pad1;
+# else
+unsigned short __pad1;
+unsigned short mode;
+unsigned short __pad2;
+# endif
+unsigned short __seq;
+unsigned long long __unused1;
+unsigned long long __unused2;
 #else
 unsigned short mode;
 unsigned short __pad1;
@@ -201,6 +221,26 @@
 
   struct __sanitizer_shmid_ds {
 __sanitizer_ipc_perm shm_perm;
+  #if defined(__sparc__)
+  # if !defined(__arch64__)
+u32 __pad1;
+  # endif
+long shm_atime;
+  # if !defined(__arch64__)
+u32 __pad2;
+  # endif
+long shm_dtime;
+  # if !defined(__arch64__)
+u32 __pad3;
+  # endif
+long shm_ctime;
+uptr shm_segsz;
+int shm_cpid;
+int shm_lpid;
+unsigned long shm_nattch;
+unsigned long __glibc_reserved1;
+unsigned long __glibc_reserved2;
+  #else
   #ifndef __powerpc__
 uptr shm_segsz;
   #elif !defined(__powerpc64__)
@@ -238,6 +278,7 @@
 uptr __unused4;
 uptr __unused5;
   #endif
+#endif
   };
 #elif SANITIZER_FREEBSD
   struct __sanitizer_ipc_perm {
@@ -555,9 +596,13 @@
 #else
 __sanitizer_sigset_t sa_mask;
 #ifndef __mips__
+#if defined(__sparc__)
+unsigned long sa_flags;
+#else
 int sa_flags;
 #endif
 #endif
+#endif
 #if SANITIZER_LINUX
 void (*sa_restorer)();
 #endif
@@ -799,7 +844,7 @@
 
 #define IOC_NRBITS 8
 #define IOC_TYPEBITS 8
-#if defined(__powerpc__) || defined(__powerpc64__) || defined(__mips__)
+#if defined(__powerpc__) || defined(__powerpc64__) || defined(__mips__) || defined(__sparc__)
 #define IOC_SIZEBITS 13
 #define IOC_DIRBITS 3
 #define IOC_NONE 1U
@@ -829,7 +874,17 @@
 #define IOC_DIR(nr) (((nr) >> IOC_DIRSHIFT) & IOC_DIRMASK)
 #define IOC_TYPE(nr) (((nr) >> IOC_TYPESHIFT) & IOC_TYPEMASK)
 #define IOC_NR(nr) (((nr) >> IOC_NRSHIFT) & IOC_NRMASK)
+
+#if defined(__sparc__)
+// In sparc the 14 bits SIZE field overlaps with the
+// least significant bit of DIR, so either IOC_READ or
+// IOC_WRITE shall be 1 in order to get a non-zero SIZE.
+# define IOC_SIZE(nr)   \
+  ((nr) >> 29) & 0x7) &

[PATCH 2/7] Libsanitizer merge from upstream r249633.

2015-10-13 Thread Maxim Ostapenko
This patch introduces required compiler changes. Now, we don't version 
asan_init, we have a special __asan_version_mismatch_check_v[n] symbol 
for this.


Also, asan_stack_malloc_[n] doesn't take a local stack as a second 
parameter anymore, so don't pass it.
2015-10-12  Maxim Ostapenko  

config/

	* bootstrap-asan.mk: Replace ASAN_OPTIONS=detect_leaks with
	LSAN_OPTIONS=detect_leaks

gcc/

	* asan.c (asan_emit_stack_protection): Don't pass local stack to
	asan_stack_malloc_[n] anymore.
	(asan_finish_file): Instert __asan_version_mismatch_check_v[n] call.
	* sanitizer.def (BUILT_IN_ASAN_INIT): Rename to __asan_init.
	(BUILT_IN_ASAN_VERSION_MISMATCH_CHECK): Add new builtin call.

gcc/testsuite/

	g++.dg/asan/default-options-1.C: Adjust testcase.

Index: gcc/asan.c
===
--- gcc/asan.c	(revision 228704)
+++ gcc/asan.c	(working copy)
@@ -1132,12 +1132,10 @@
   snprintf (buf, sizeof buf, "__asan_stack_malloc_%d",
 		use_after_return_class);
   ret = init_one_libfunc (buf);
-  rtx addr = convert_memory_address (ptr_mode, base);
-  ret = emit_library_call_value (ret, NULL_RTX, LCT_NORMAL, ptr_mode, 2,
+  ret = emit_library_call_value (ret, NULL_RTX, LCT_NORMAL, ptr_mode, 1,
  GEN_INT (asan_frame_size
 	  + base_align_bias),
- TYPE_MODE (pointer_sized_int_node),
- addr, ptr_mode);
+ TYPE_MODE (pointer_sized_int_node));
   ret = convert_memory_address (Pmode, ret);
   emit_move_insn (base, ret);
   emit_label (lab);
@@ -2470,6 +2468,8 @@
 {
   tree fn = builtin_decl_implicit (BUILT_IN_ASAN_INIT);
   append_to_statement_list (build_call_expr (fn, 0), &asan_ctor_statements);
+  fn = builtin_decl_implicit (BUILT_IN_ASAN_VERSION_MISMATCH_CHECK);
+  append_to_statement_list (build_call_expr (fn, 0), &asan_ctor_statements);
 }
   FOR_EACH_DEFINED_VARIABLE (vnode)
 if (TREE_ASM_WRITTEN (vnode->decl)
Index: gcc/sanitizer.def
===
--- gcc/sanitizer.def	(revision 228704)
+++ gcc/sanitizer.def	(working copy)
@@ -27,8 +27,11 @@
for other FEs by asan.c.  */
 
 /* Address Sanitizer */
-DEF_SANITIZER_BUILTIN(BUILT_IN_ASAN_INIT, "__asan_init_v4",
+DEF_SANITIZER_BUILTIN(BUILT_IN_ASAN_INIT, "__asan_init",
 		  BT_FN_VOID, ATTR_NOTHROW_LEAF_LIST)
+DEF_SANITIZER_BUILTIN(BUILT_IN_ASAN_VERSION_MISMATCH_CHECK,
+		  "__asan_version_mismatch_check_v6",
+		  BT_FN_VOID, ATTR_NOTHROW_LEAF_LIST)
 /* Do not reorder the BUILT_IN_ASAN_{REPORT,CHECK}* builtins, e.g. cfgcleanup.c
relies on this order.  */
 DEF_SANITIZER_BUILTIN(BUILT_IN_ASAN_REPORT_LOAD1, "__asan_report_load1",
Index: gcc/testsuite/g++.dg/asan/default-options-1.C
===
--- gcc/testsuite/g++.dg/asan/default-options-1.C	(revision 228704)
+++ gcc/testsuite/g++.dg/asan/default-options-1.C	(working copy)
@@ -12,4 +12,4 @@
   return 0;
 }
 
-// { dg-output "Using the defaults from __asan_default_options:.* foo=bar.*(\n|\r\n|\r)" }
+// { dg-output "WARNING: found 1 unrecognized flag\\(s\\):(\n|\r\n|\r).*foo(\n|\r\n|\r)" }


[PATCH 0/7] Libsanitizer merge from upstream r249633.

2015-10-13 Thread Maxim Ostapenko

Hi,

it's been a while since the last libsanitizer merge from upstream into 
GCC happened and the library has significantly changed since that time. 
The main features to be ported are:


-New common strings interceptors were added.
-Various allocator improvements were performed.
-Improvements for ASan deactivated start were performed.
-TSan and LSan were enabled for Aarch64.
-Fast unwinding was enabled for Aarch64.
-New tsan_unaligned_{load, store}_[n] functions were intoduced.
-asan_stack_malloc_[n] doesn't take a local stack as a second parameter 
anymore.

-sanitization for std containers is supported now.
-New interface functions for dynamic allocas and VLA's 
poisoning/unpoisoning were introduced.


Some features are not ported for now, by might be enabled in future:

-Embedded UBSan runtime into ASan and TSan ones. I don't enable this 
now, because of errors during ASan static linkage: GCC uses 
-whole-archive option that would lead to undefined references to C++ stuff.
-UBSan data descriptors for float-cast conversion support location 
propagation now. But sometimes we have loc == UNKNOWN_LOCATION in 
ubsan_instrument_float_cast, so use old ABI for now. See below for details.


The first patch of the series is the merge itself.

The second one introduces corresponding compiler changes.

Other patches are applied to library and they are GCC-specific:

Patches 3 and 4 are just reapplied David's and Jakub's patches for SPARC 
and disabling ODR violation detection respectively.


Patch 5 removes UBSan stubs from ASan and TSan code since we don't 
support embedded UBSan runtime into ASan and TSan.


Patch 6 changes heuristic for extracting last PC from stack frame for 
ARM in fast unwind routine. More details can be found here 
(https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61771).


Patch 7 forces libsanitizer to use an old ABI for ubsan float cast data 
descriptors, because sometimes we can have loc == UNKNOWN_LOCATION in 
ubsan_instrument_float_cast e.g. in a such case:


..
volatile double foo; // ubsan_instrument_float_cast is called by convert 
function.

..

Since foo is a tcc_declaration, loc is UNKNOWN_LOCATION. I'm actually 
not sure about this, perhaps we can fix this in GCC somehow.


I've regtested and {A, UB}San bootstrapped these patches on 
x86-64-unknown-linux-gnu and aarch64-linux-gnueabi (Juno board, 39 bit 
VA space) and tested for ARM under QEMU-ARM.
Testing ASan under QEMU-AARCH64 revealed many test failures due to LSan 
was enabled. In particular, it tries to call internal_clone function in 
LSan internals, that in turn calls _NR_clone syscall and than QEMU exits 
with EINTR error code (that might be expected, AFAIK QEMU is not very 
good with threads). So, I wonder, if I should disable LSan for AArch64 now?


I'm also asking community to help me with testing these patches on 
various targets (ARM, PPC, etc) I'm lack of, so could you help me on 
this please?


-Maxim


Re: [ARM] Add ARMv8.1 command line options.

2015-10-13 Thread Matthew Wahab

Some of the command line options may be unnecessary so I'll drop this patch.
Matthew

On 08/10/15 12:00, Matthew Wahab wrote:

Ping.

Updated patch attached, I've broken the over-long lines added to arm-arches.def 
and
arm-fpus.def.

Matthew

On 17/09/15 18:54, Matthew Wahab wrote:

Hello,

ARMv8.1 is a set of architectural extensions to ARMv8. Support has been
enabled in binutils for ARMv8.1 for the architechure, using the name
"armv8.1-a".

This patch adds support to gcc for specifying an ARMv8.1 architecture
using options "-march=armv8.1-a" and "-march=armv8.1-a+crc". It also
adds the FPU options "-mfpu=neon-fp-armv8.1" and
"-mpu=crypto-neon-fp-armv8.1", to specify the ARMv8.1 Adv.SIMD
instruction set.  The changes set the apropriate architecture and fpu
options for binutils but don't otherwise change the code generated by
gcc.

Tested for arm-none-linux-gnueabihf with native bootstrap and make
check.

Ok for trunk?
Matthew

2015-09-17  Matthew Wahab  

 * config/arm/arm-arches.def: Add "armv8.1-a" and "armv8.1-a+crc".
 * config/arm/arm-fpus.def: Add "neon-fp-armv8.1" and
 "crypto-neon-fp-armv8.1".
 * config/arm/arm-protos.h (FL2_ARCH8_1): New.
 (FL2_FOR_ARCH8_1A): New.
 * config/arm/arm-tables.opt: Regenerate.
 * config/arm/arm.h (FPU_FL_RDMA): New.
 * doc/invoke.texi (ARM -march): Add "armv8.1-a" and
 "armv8.1-a+crc".
 (ARM -mfpu): Add "neon-fp-armv8.1" and "crypto-neon-fp-armv8.1".






Re: Move some bit and binary optimizations in simplify and match

2015-10-13 Thread Hurugalawadi, Naveen
Hi Richard,

Thanks for the comments. Sorry, I was confused with handling the const and 
variable 
together part. Have modified them.
Also, considered that both (X & Y) can be const or variable in those cases
for which match patterns have been added.
Please let me know whether its correct or only "Y" should be both const and 
variable
whereas the "X" should be variable always.

Please find attached the patch as per your comments.
Please review the patch and let me know if any further modifications 
are required.

Am learning lots of useful stuff while porting these patches. 
Thanks for all the help again.

>> Looks like I really need to make 'match' handle these kind of things.
I assume that its for bit ops, and binary operations like (A & B) and so on.
Should I try doing that part? Also, how do we know which patterns should
be const or variable or supports both?

Thanks,
Naveendiff --git a/gcc/fold-const.c b/gcc/fold-const.c
index de45a2c..2d81b2c 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -9232,26 +9232,6 @@ fold_binary_loc (location_t loc,
   return NULL_TREE;
 
 case PLUS_EXPR:
-  if (INTEGRAL_TYPE_P (type) || VECTOR_INTEGER_TYPE_P (type))
-	{
-	  /* X + (X / CST) * -CST is X % CST.  */
-	  if (TREE_CODE (arg1) == MULT_EXPR
-	  && TREE_CODE (TREE_OPERAND (arg1, 0)) == TRUNC_DIV_EXPR
-	  && operand_equal_p (arg0,
-  TREE_OPERAND (TREE_OPERAND (arg1, 0), 0), 0))
-	{
-	  tree cst0 = TREE_OPERAND (TREE_OPERAND (arg1, 0), 1);
-	  tree cst1 = TREE_OPERAND (arg1, 1);
-	  tree sum = fold_binary_loc (loc, PLUS_EXPR, TREE_TYPE (cst1),
-  cst1, cst0);
-	  if (sum && integer_zerop (sum))
-		return fold_convert_loc (loc, type,
-	 fold_build2_loc (loc, TRUNC_MOD_EXPR,
-		  TREE_TYPE (arg0), arg0,
-		  cst0));
-	}
-	}
-
   /* Handle (A1 * C1) + (A2 * C2) with A1, A2 or C1, C2 being the same or
 	 one.  Make sure the type is not saturating and has the signedness of
 	 the stripped operands, as fold_plusminus_mult_expr will re-associate.
@@ -9692,28 +9672,6 @@ fold_binary_loc (location_t loc,
 			fold_convert_loc (loc, type,
 	  TREE_OPERAND (arg0, 0)));
 
-  if (! FLOAT_TYPE_P (type))
-	{
-	  /* Fold (A & ~B) - (A & B) into (A ^ B) - B, where B is
-	 any power of 2 minus 1.  */
-	  if (TREE_CODE (arg0) == BIT_AND_EXPR
-	  && TREE_CODE (arg1) == BIT_AND_EXPR
-	  && operand_equal_p (TREE_OPERAND (arg0, 0),
-  TREE_OPERAND (arg1, 0), 0))
-	{
-	  tree mask0 = TREE_OPERAND (arg0, 1);
-	  tree mask1 = TREE_OPERAND (arg1, 1);
-	  tree tem = fold_build1_loc (loc, BIT_NOT_EXPR, type, mask0);
-
-	  if (operand_equal_p (tem, mask1, 0))
-		{
-		  tem = fold_build2_loc (loc, BIT_XOR_EXPR, type,
- TREE_OPERAND (arg0, 0), mask1);
-		  return fold_build2_loc (loc, MINUS_EXPR, type, tem, mask1);
-		}
-	}
-	}
-
   /* Fold __complex__ ( x, 0 ) - __complex__ ( 0, y ) to
 	 __complex__ ( x, -y ).  This is not the same for SNaNs or if
 	 signed zeros are involved.  */
@@ -9803,20 +9761,6 @@ fold_binary_loc (location_t loc,
   goto associate;
 
 case MULT_EXPR:
-  /* (-A) * (-B) -> A * B  */
-  if (TREE_CODE (arg0) == NEGATE_EXPR && negate_expr_p (arg1))
-	return fold_build2_loc (loc, MULT_EXPR, type,
-			fold_convert_loc (loc, type,
-	  TREE_OPERAND (arg0, 0)),
-			fold_convert_loc (loc, type,
-	  negate_expr (arg1)));
-  if (TREE_CODE (arg1) == NEGATE_EXPR && negate_expr_p (arg0))
-	return fold_build2_loc (loc, MULT_EXPR, type,
-			fold_convert_loc (loc, type,
-	  negate_expr (arg0)),
-			fold_convert_loc (loc, type,
-	  TREE_OPERAND (arg1, 0)));
-
   if (! FLOAT_TYPE_P (type))
 	{
 	  /* Transform x * -C into -x * C if x is easily negatable.  */
@@ -9830,16 +9774,6 @@ fold_binary_loc (location_t loc,
 		  negate_expr (arg0)),
 tem);
 
-	  /* (a * (1 << b)) is (a << b)  */
-	  if (TREE_CODE (arg1) == LSHIFT_EXPR
-	  && integer_onep (TREE_OPERAND (arg1, 0)))
-	return fold_build2_loc (loc, LSHIFT_EXPR, type, op0,
-TREE_OPERAND (arg1, 1));
-	  if (TREE_CODE (arg0) == LSHIFT_EXPR
-	  && integer_onep (TREE_OPERAND (arg0, 0)))
-	return fold_build2_loc (loc, LSHIFT_EXPR, type, op1,
-TREE_OPERAND (arg0, 1));
-
 	  /* (A + A) * C -> A * 2 * C  */
 	  if (TREE_CODE (arg0) == PLUS_EXPR
 	  && TREE_CODE (arg1) == INTEGER_CST
@@ -9882,21 +9816,6 @@ fold_binary_loc (location_t loc,
 	}
   else
 	{
-	  /* Convert (C1/X)*C2 into (C1*C2)/X.  This transformation may change
- the result for floating point types due to rounding so it is applied
- only if -fassociative-math was specify.  */
-	  if (flag_associative_math
-	  && TREE_CODE (arg0) == RDIV_EXPR
-	  && TREE_CODE (arg1) == REAL_CST
-	  && TREE_CODE (TREE_OPERAND (arg0, 0)) == REAL_CST)
-	{
-	  tree tem = const_binop (MULT_EXPR, TREE_OPERAND (arg0, 0),
-  arg1);
-	  if (tem)
-	

Re: Fix 61441

2015-10-13 Thread Sujoy Saraswati
Hi,
 This is another modified version of the patch, incorporating the
previous comments.

Bootstrap and regression tests on x86_64-linux-gnu and
aarch64-unknown-linux-gnu passed with changes done on trunk.

Is this fine ?

Regards,
Sujoy

2015-10-13  Sujoy Saraswati 

PR tree-optimization/61441
* builtins.c (integer_valued_real_p): Return true for
NaN values.
(fold_builtin_trunc, fold_builtin_pow): Avoid the operation
if flag_signaling_nans is on and the operand is a NaN.
(fold_builtin_powi): Same.
* fold-const.c (const_binop): Convert sNaN to qNaN when
flag_signaling_nans is off.
(const_unop): Avoid the operation, other than NEGATE and
ABS, if flag_signaling_nans is on and the operand is a NaN.
(fold_convert_const_real_from_real): Avoid the operation if
flag_signaling_nans is on and the operand is a NaN.
* real.c (do_add): Make resulting NaN value to be qNaN.
(do_multiply, do_divide, do_fix_trunc): Same.
(real_arithmetic, real_ldexp): Same
* simplify-rtx.c (simplify_const_unary_operation): Avoid the
operation if flag_signaling_nans is on and the operand is a NaN.
* tree-ssa-math-opts.c (gimple_expand_builtin_pow): Same.

PR tree-optimization/61441
* gcc.dg/pr61441.c: New testcase.

Index: gcc/builtins.c
===
--- gcc/builtins.c  (revision 228700)
+++ gcc/builtins.c  (working copy)
@@ -7357,7 +7357,11 @@ integer_valued_real_p (tree t)
 && integer_valued_real_p (TREE_OPERAND (t, 2));

 case REAL_CST:
-  return real_isinteger (TREE_REAL_CST_PTR (t), TYPE_MODE (TREE_TYPE (t)));
+  /* Return true for NaN values, since real_isinteger would
+ return false if the value is sNaN.  */
+  return (REAL_VALUE_ISNAN (TREE_REAL_CST (t))
+  || real_isinteger (TREE_REAL_CST_PTR (t),
+ TYPE_MODE (TREE_TYPE (t;

 CASE_CONVERT:
   {
@@ -7910,8 +7914,13 @@ fold_builtin_trunc (location_t loc, tree fndecl, t
   tree type = TREE_TYPE (TREE_TYPE (fndecl));

   x = TREE_REAL_CST (arg);
-  real_trunc (&r, TYPE_MODE (type), &x);
-  return build_real (type, r);
+  /* Avoid the folding if flag_signaling_nans is on.  */
+  if (!(HONOR_SNANS (TYPE_MODE (type))
+&& REAL_VALUE_ISNAN (x)))
+  {
+real_trunc (&r, TYPE_MODE (type), &x);
+return build_real (type, r);
+  }
 }

   return fold_trunc_transparent_mathfn (loc, fndecl, arg);
@@ -8297,9 +8306,15 @@ fold_builtin_pow (location_t loc, tree fndecl, tre
  bool inexact;

  x = TREE_REAL_CST (arg0);
+
  inexact = real_powi (&x, TYPE_MODE (type), &x, n);
- if (flag_unsafe_math_optimizations || !inexact)
-   return build_real (type, x);
+
+  /* Avoid the folding if flag_signaling_nans is on.  */
+ if (flag_unsafe_math_optimizations
+  || (!inexact
+  && !(HONOR_SNANS (TYPE_MODE (TREE_TYPE (arg0)))
+   && REAL_VALUE_ISNAN (x
+ return build_real (type, x);
}

  /* Strip sign ops from even integer powers.  */
@@ -8388,8 +8403,14 @@ fold_builtin_powi (location_t loc, tree fndecl ATT
{
  REAL_VALUE_TYPE x;
  x = TREE_REAL_CST (arg0);
- real_powi (&x, TYPE_MODE (type), &x, c);
- return build_real (type, x);
+
+  /* Avoid the folding if flag_signaling_nans is on.  */
+  if (!(HONOR_SNANS (TYPE_MODE (TREE_TYPE (arg0)))
+&& REAL_VALUE_ISNAN (x)))
+  {
+   real_powi (&x, TYPE_MODE (type), &x, c);
+   return build_real (type, x);
+  }
}

   /* Optimize pow(x,0) = 1.0.  */
Index: gcc/fold-const.c
===
--- gcc/fold-const.c(revision 228700)
+++ gcc/fold-const.c(working copy)
@@ -1185,9 +1185,21 @@ const_binop (enum tree_code code, tree arg1, tree
   /* If either operand is a NaN, just return it.  Otherwise, set up
 for floating-point trap; we return an overflow.  */
   if (REAL_VALUE_ISNAN (d1))
-   return arg1;
+  {
+/* Make resulting NaN value to be qNaN when flag_signaling_nans
+   is off.  */
+d1.signalling = 0;
+t = build_real (type, d1);
+   return t;
+  }
   else if (REAL_VALUE_ISNAN (d2))
-   return arg2;
+  {
+/* Make resulting NaN value to be qNaN when flag_signaling_nans
+   is off.  */
+d2.signalling = 0;
+t = build_real (type, d2);
+   return t;
+  }

   inexact = real_arithmetic (&value, code, &d1, &d2);
   real_convert (&result, mode, &value);
@@ -1557,6 +1569,15 @@ const_binop (enum tree_code code, tree type, tree
 tree
 const_unop 

Re: [PATCH, sparc]: Use ROUND_UP and ROUND_DOWN macros

2015-10-13 Thread Eric Botcazou
> In this case, I think it is better to write this part as:
> 
> --cut here--
> offset += 8;
> 
> /* Always preserve double-word alignment.  */
> offset = ROUND_DOWN (offset, 8);
> --cut here--

Not convinced, having offset == 12 after the first line doesn't make sense.

I'd just beef up the comment:

/* Bump and round down to double word in case we already bumped by 4.  */
offset = ROUND_DOWN (offset + 8, 8);

-- 
Eric Botcazou


Re: [PATCH, sparc]: Use ROUND_UP and ROUND_DOWN macros

2015-10-13 Thread Uros Bizjak
On Tue, Oct 13, 2015 at 12:10 PM, Eric Botcazou  wrote:
>> Two functional changes I'd like to point out:
>>
>>  /* ALIGN FRAMES on double word boundaries */
>> -#define SPARC_STACK_ALIGN(LOC) \
>> -  (TARGET_ARCH64 ? (((LOC)+15) & ~15) : (((LOC)+7) & ~7))
>> +#define SPARC_STACK_ALIGN(LOC) ROUND_UP ((LOC), UNITS_PER_WORD * 2)
>>
>> The one above uses UNITS_PER_WORD in stack alignment calculation
>
> OK.
>
>>/* Always preserve double-word alignment.  */
>> -  offset = (offset + 8) & -8;
>> +  offset = ROUND_UP (offset, 8);
>>
>> The one above looks like off-by-one bug, but this needs a confirmation.
>
> No, it's correct, it's a bump of 8 followed by a ROUND_DOWN (the offset may or
> may not have been bumped by 4 already in the code just above).

In this case, I think it is better to write this part as:

--cut here--
offset += 8;

/* Always preserve double-word alignment.  */
offset = ROUND_DOWN (offset, 8);
--cut here--

WDYT?

Uros.


  1   2   >