Re: [PATCH] Improve constant vec_perm expansion on i?86 (PR target/68655)

2015-12-03 Thread Uros Bizjak
On Thu, Dec 3, 2015 at 9:52 PM, Jakub Jelinek  wrote:
> Hi!
>
> As discussed in the PR, for some permutation we can get better code
> if we try to expand it as if it was a permutation in a mode with the
> same vector size, but wider vector element.  The first attempt to do this
> always had mixed results, lots of improvements, lots of pessimizations,
> this one at least on gcc.dg/vshuf*
> {-msse2,-msse4,-mavx,-mavx2,-mavx512f,-mavx512bw} shows only
> improvements - it tries the original permutation for single insn,
> if that doesn't work tries the wider one single insn, and then
> as complete fallback, if we don't have any expansion whatsoever, tries
> the wider one too.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2015-12-03  Jakub Jelinek  
>
> PR target/68655
> * config/i386/i386.c (canonicalize_vector_int_perm): New function.
> (expand_vec_perm_1): Use it and recurse if everything else
> failed.  Use nd.perm instead of perm2.
> (expand_vec_perm_even_odd_1): If testing_p, use gen_raw_REG
> instead of gen_lowpart for the target.
> (ix86_expand_vec_perm_const_1): Use canonicalize_vector_int_perm
> and recurse if everything else failed.
>
> * gcc.dg/torture/vshuf-4.inc (TESTS): Add one extra test.
> * gcc.dg/torture/vshuf-4.inc (TESTS): Add two extra tests.

OK for mainline.

Thanks,
Uros.

> --- gcc/config/i386/i386.c.jj   2015-12-02 20:27:00.0 +0100
> +++ gcc/config/i386/i386.c  2015-12-03 15:03:13.415764986 +0100
> @@ -49365,6 +49365,57 @@ expand_vec_perm_pshufb (struct expand_ve
>return true;
>  }
>
> +/* For V*[QHS]Imode permutations, check if the same permutation
> +   can't be performed in a 2x, 4x or 8x wider inner mode.  */
> +
> +static bool
> +canonicalize_vector_int_perm (const struct expand_vec_perm_d *d,
> + struct expand_vec_perm_d *nd)
> +{
> +  int i;
> +  enum machine_mode mode = VOIDmode;
> +
> +  switch (d->vmode)
> +{
> +case V16QImode: mode = V8HImode; break;
> +case V32QImode: mode = V16HImode; break;
> +case V64QImode: mode = V32HImode; break;
> +case V8HImode: mode = V4SImode; break;
> +case V16HImode: mode = V8SImode; break;
> +case V32HImode: mode = V16SImode; break;
> +case V4SImode: mode = V2DImode; break;
> +case V8SImode: mode = V4DImode; break;
> +case V16SImode: mode = V8DImode; break;
> +default: return false;
> +}
> +  for (i = 0; i < d->nelt; i += 2)
> +if ((d->perm[i] & 1) || d->perm[i + 1] != d->perm[i] + 1)
> +  return false;
> +  nd->vmode = mode;
> +  nd->nelt = d->nelt / 2;
> +  for (i = 0; i < nd->nelt; i++)
> +nd->perm[i] = d->perm[2 * i] / 2;
> +  if (GET_MODE_INNER (mode) != DImode)
> +canonicalize_vector_int_perm (nd, nd);
> +  if (nd != d)
> +{
> +  nd->one_operand_p = d->one_operand_p;
> +  nd->testing_p = d->testing_p;
> +  if (d->op0 == d->op1)
> +   nd->op0 = nd->op1 = gen_lowpart (nd->vmode, d->op0);
> +  else
> +   {
> + nd->op0 = gen_lowpart (nd->vmode, d->op0);
> + nd->op1 = gen_lowpart (nd->vmode, d->op1);
> +   }
> +  if (d->testing_p)
> +   nd->target = gen_raw_REG (nd->vmode, LAST_VIRTUAL_REGISTER + 1);
> +  else
> +   nd->target = gen_reg_rtx (nd->vmode);
> +}
> +  return true;
> +}
> +
>  /* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to instantiate D
> in a single instruction.  */
>
> @@ -49372,7 +49423,7 @@ static bool
>  expand_vec_perm_1 (struct expand_vec_perm_d *d)
>  {
>unsigned i, nelt = d->nelt;
> -  unsigned char perm2[MAX_VECT_LEN];
> +  struct expand_vec_perm_d nd;
>
>/* Check plain VEC_SELECT first, because AVX has instructions that could
>   match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
> @@ -49385,10 +49436,10 @@ expand_vec_perm_1 (struct expand_vec_per
>
>for (i = 0; i < nelt; i++)
> {
> - perm2[i] = d->perm[i] & mask;
> - if (perm2[i] != i)
> + nd.perm[i] = d->perm[i] & mask;
> + if (nd.perm[i] != i)
> identity_perm = false;
> - if (perm2[i])
> + if (nd.perm[i])
> broadcast_perm = false;
> }
>
> @@ -49457,7 +49508,7 @@ expand_vec_perm_1 (struct expand_vec_per
> }
> }
>
> -  if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
> +  if (expand_vselect (d->target, d->op0, nd.perm, nelt, d->testing_p))
> return true;
>
>/* There are plenty of patterns in sse.md that are written for
> @@ -49468,10 +49519,10 @@ expand_vec_perm_1 (struct expand_vec_per
>  every other permutation operand.  */
>for (i = 0; i < nelt; i += 2)
> {
> - perm2[i] = d->perm[i] & mask;
> - perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
> + nd.perm[i] = d->perm[i] & mask;
> + nd.perm[i + 1] = (d->perm[i + 1] & mask) + nelt;
>  

-fstrict-aliasing fixes 6/6: permit inlining of comdats

2015-12-03 Thread Jan Hubicka
Hi,
this is the last patch of the series.  It makes operand_equal_p to compare
alias sets even in !flag_strict_aliasing before inlining so inlining 
!flag_strict_aliasing to flag_strict_aliasing is possible when callee is
merged comdat.  I tried to explain it in greater detail in the comment
in ipa-inline-tranform.

While working on the code I noticed that I managed to overload merged with
two meanings. One is that the function had bodies defined in multiple units
(and thus its inlining should not be considered cross-modulo) and other is
that it used to be comdat.  This is usually the same, but not always - one
can manually define weak functions where the bypass for OPTIMIZAITON_NODE
checks can not apply.

Since the first only affects heuristics and I do not think I need to care
about weaks much, I dropped it and renamed the flag to merged_comdat to make
it more obvious what it means.

Bootstrapped/regtested x86_64-linux, OK?

I will work on some testcases for the ICF and fold-const that would lead
to wrong code if alias sets was ignored early.

Honza
* fold-const.c (operand_equal_p): Before inlining do not permit
transformations that would break with strict aliasing.
* ipa-inline.c (can_inline_edge_p) Use merged_comdat.
* ipa-inline-transform.c (inline_call): When inlining merged comdat do
not drop strict_aliasing flag of caller.
* cgraphclones.c (cgraph_node::create_clone): Use merged_comdat.
* cgraph.c (cgraph_node::dump): Dump merged_comdat.
* ipa-icf.c (sem_function::merge): Drop merged_comdat when merging
comdat and non-comdat.
* cgraph.h (cgraph_node): Rename merged to merged_comdat.
* ipa-inline-analysis.c (simple_edge_hints): Check both merged_comdat
and icf_merged.

* lto-symtab.c (lto_cgraph_replace_node): Update code computing
merged_comdat.
Index: fold-const.c
===
--- fold-const.c(revision 231239)
+++ fold-const.c(working copy)
@@ -2987,7 +2987,7 @@ operand_equal_p (const_tree arg0, const_
   flags)))
return 0;
  /* Verify that accesses are TBAA compatible.  */
- if (flag_strict_aliasing
+ if ((flag_strict_aliasing || !cfun->after_inlining)
  && (!alias_ptr_types_compatible_p
(TREE_TYPE (TREE_OPERAND (arg0, 1)),
 TREE_TYPE (TREE_OPERAND (arg1, 1)))
Index: ipa-inline.c
===
--- ipa-inline.c(revision 231239)
+++ ipa-inline.c(working copy)
@@ -466,7 +466,7 @@ can_inline_edge_p (struct cgraph_edge *e
  optimized with the optimization flags of module they are used in.
 Also do not care about mixing up size/speed optimization when
 DECL_DISREGARD_INLINE_LIMITS is set.  */
-  else if ((callee->merged
+  else if ((callee->merged_comdat
&& !lookup_attribute ("optimize",
  DECL_ATTRIBUTES (caller->decl)))
   || DECL_DISREGARD_INLINE_LIMITS (callee->decl))
Index: ipa-inline-transform.c
===
--- ipa-inline-transform.c  (revision 231239)
+++ ipa-inline-transform.c  (working copy)
@@ -322,11 +322,26 @@ inline_call (struct cgraph_edge *e, bool
   if (DECL_FUNCTION_PERSONALITY (callee->decl))
 DECL_FUNCTION_PERSONALITY (to->decl)
   = DECL_FUNCTION_PERSONALITY (callee->decl);
+
+  /* merged_comdat indicate that function was originally COMDAT and merged
+ from multiple units.  Because every unit using COMDAT must also define it,
+ we know that the function is safe to build with each of the optimization
+ flags used used to compile them.
+
+ If one unit is compiled with -fstrict-aliasing and
+ other with -fno-strict-aliasing we may bypass dropping the
+ flag_strict_aliasing because we know it would be valid to inline
+ -fstrict-aliaisng variant of the calee, too.  Unless optimization
+ attribute was used, the caller and COMDAT callee must have been
+ compiled with the same flags.  */
   if (!opt_for_fn (callee->decl, flag_strict_aliasing)
-  && opt_for_fn (to->decl, flag_strict_aliasing))
+  && opt_for_fn (to->decl, flag_strict_aliasing)
+  && (!callee->merged_comdat
+ || lookup_attribute ("optimization",
+  DECL_ATTRIBUTES (e->caller->decl))
+ || lookup_attribute ("optimization", DECL_ATTRIBUTES (callee->decl
 {
   struct gcc_options opts = global_options;
-
   cl_optimization_restore (&opts,
 TREE_OPTIMIZATION (DECL_FUNCTION_SPECIFIC_OPTIMIZATION (to->decl)));
   opts.x_flag_strict_aliasing = false;
Index: cgraphclones.c
===
--- cgraphclo

Ping [PATCH] c++/42121 - diagnose invalid flexible array members

2015-12-03 Thread Martin Sebor

[CC Jason for the C++ changes and Joseph for the one C change.]

Attached is a reworked and expanded patch for the bug plus three
others in the same area that I uncovered while developing and
testing the former patch:

c++/68689 - flexible array members in unions accepted in C++
c++/68478 - flexible array members have complete type
c++/68613 - initializer-string for array of chars is too long error
on flexible array member

The patch should bring C++ support for flexible array members closer
to C (most of the same constructs should be accepted and rejected).
The only C change in this patch is to include the size of excessively
large types in diagnostics (I found knowing the size helpful when
adding tests and I think it might be helpful to others as well).

Unlike in my first attempt, this patch distinguishes flexible array
members from zero-length arrays by setting the upper bound of the
former to null.  This seems to be in line with what the C front end
does but has required bigger changes than I had hoped.  Hopefully,
the result is a more consistent treatment of the extension between
the two front ends (for example, both C and C++ now emit the same
ADA specification for flexible array members).

Tested by bootstrapping and running C and C++ tests (including
libstdc++) on x86_64.

I'm not sure if this is appropriate for this stage or if it needs
to wait until after the release.  Either is fine with me.

Martin

On 11/21/2015 03:17 PM, Martin Sebor wrote:

Bug 42121 - g++ should warn or error on internal 0 size array in
struct, is a request to diagnose declarations of flexible array
members that aren't last in the enclosing struct, such as in the
following:

 struct S
 {
 int a;
 char b[];   // invalid
 int c;
 };

The C front end diagnoses such cases because they are invalid in
standard C.  Comment 8 on the bug points out that flexible array
members should not be treated identically to zero-size arrays
(they're not in C).

The attached patch implements the requested diagnostic, keeping
comment 8 in mind.  It also issues a diagnostic for flexible array
members in unions (which are also diagnosed as invalid in C mode).
The patch found a number of instances of invalid flexible array
members in the C++ test suites.  I corrected them.

Since the C++ front end doesn't distinguish between flexible array
members and zero-size arrays (both are considered to have an upper
bound of SIZE_MAX), and since determining whether or not
a declaration of such a member is valid cannot be done until
the whole containing struct has been processed, the patch makes
use one of the DECL_LANG_FLAGs to temporarily remember which is
which (I somewhat arbitrarily picked DECL_LANG_FLAG_1), before
clearing it. There might be a better flag to use, and it might
be appropriate to define a descriptive macro for this purpose
in cp-tree.h, along the same lines as the macros already defined
for other such purposes.

Martin


gcc/testsuite/ChangeLog:
2015-12-02  Martin Sebor  

	c++/42121
	c++/68478
	c++/68613
	c++/68689
	* g++.dg/ext/flexary2.C: Expect a sole flexible array member
	to be rejected.  Add a test case exercising zero-length array.
	* g++.dg/ext/flexary3.C: Expect a sole flexible array member
	to be rejected.
	* g++.dg/ext/flexary4.C: New file.
	* g++.dg/ext/flexary5.C: New file.
	* g++.dg/ext/flexary6.C: New file.
	* g++.dg/ext/flexary7.C: New file.
	* g++.dg/other/dump-ada-spec-2.C: Adjust to reflect flexible
	array members.
	* g++.dg/parse/pr43765.C: Add a member to make a struct with
	a flexible array member valid.  Adjust expected error message.
	* g++.dg/torture/pr64280.C: Expect a sole flexible array member
	to be rejected.
	* g++.dg/torture/pr64312.C: Add a member to make a struct with
	a flexible array member valid.
	* g++.dg/ubsan/object-size-1.C: Adjust expected diagnostic.
	* g++.dg/other/dump-ada-spec-2.C: Adjust expected type.

gcc/cp/ChangeLog:
2015-12-02  Martin Sebor  

	c++/42121
	c++/68478
	c++/68613
	c++/68689
	* class.c (walk_subobject_offsets): Avoid assuming type domain
	is non-null or has an upper bound.
	(layout_class_type): Include type size in error message.
	(all_bases_empty_p, field_nonempty_p): New helper functions.
	(check_flexarrays): New function.
	(finish_struct_1): Call check_flexarrays.
	* decl.c (compute_array_index_type): Distinguish flexible array
	members from zero-length arrays.
	(grokdeclarator): Reject flexible array members in unions.  Avoid
	rejecting members of incomplete types that are flexible array members.
	* error.c (dump_type_suffix): Handle flexible array members with null
	upper bound.
	* init.c (perform_member_init): Same.
	* pt.c (instantiate_class_template_1): Allow flexible array members.
	(tsubst): Handle flexible array members with null upper bound.
	* typeck2.c (digest_init_r): Warn for initialization of flexible
	array members.
	(process_init_constructor_record): Handle flexible array members.

gcc/c/ChangeLog:
2015-12-02  

[PATCH AArch64]Use aarch64_sync_memory_operand in atomic_store pattern

2015-12-03 Thread Bin Cheng
Hi,
I noticed atmoic_store pattern is the only one in atomic.md that uses
memory_operand as predicate.  This seems like a typo to me.  It also causes
problem.  The general address expression supported by memory_operand is kept
till LRA finds out it doesn't match the "Q" constraint.  As a result LRA
needs to reload the address expression out of memory reference.  Since there
is no combine optimizer after LRA, below inefficient code is generated for
atomic stores:
  67 add x1, x29, 64
  68 add x0, x1, x0, sxtw 3
  69 sub x0, x0, #16
  70 stlrx19, [x0]
Or:
  67 sxtwx0, w0
  68 add x1, x29, 48
  69 add x1, x1, x0, sxtw 3
  70 stlrx19, [x1]

With this patch, we force atomic_store to use direct register addressing
mode at earlier compilation phase and better code will be generated:
  67 add x1, x29, 48
  68 add x1, x1, x0, sxtw 3
  69 stlrx19, [x1]

Bootstrap and test on aarch64.  Is it OK?

Thanks,
bin

2015-12-01  Bin Cheng  

* config/aarch64/atomics.md (atomic_store): Use predicate
aarch64_sync_memory_operand.

diff --git a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md
index 3c034fb..68dc27a 100644
--- a/gcc/config/aarch64/atomics.md
+++ b/gcc/config/aarch64/atomics.md
@@ -481,7 +481,7 @@
 )
 
 (define_insn "atomic_store"
-  [(set (match_operand:ALLI 0 "memory_operand" "=Q")
+  [(set (match_operand:ALLI 0 "aarch64_sync_memory_operand" "=Q")
 (unspec_volatile:ALLI
   [(match_operand:ALLI 1 "general_operand" "rZ")
(match_operand:SI 2 "const_int_operand")]   ;; model


Re: [PATCH AArch64]Handle REG+REG+CONST and REG+NON_REG+CONST in legitimize address

2015-12-03 Thread Bin.Cheng
On Thu, Dec 3, 2015 at 6:26 PM, Richard Earnshaw
 wrote:
> On 03/12/15 05:26, Bin.Cheng wrote:
>> On Tue, Dec 1, 2015 at 6:25 PM, Richard Earnshaw
>>  wrote:
>>> On 01/12/15 03:19, Bin.Cheng wrote:
 On Tue, Nov 24, 2015 at 6:18 PM, Richard Earnshaw
  wrote:
> On 24/11/15 09:56, Richard Earnshaw wrote:
>> On 24/11/15 02:51, Bin.Cheng wrote:
> The aarch64's problem is we don't define addptr3 pattern, and we don't
>>> have direct insn pattern describing the "x + y << z".  According to
>>> gcc internal:
>>>
>>> ‘addptrm3’
>>> Like addm3 but is guaranteed to only be used for address 
>>> calculations.
>>> The expanded code is not allowed to clobber the condition code. It
>>> only needs to be defined if addm3 sets the condition code.
>
> addm3 on aarch64 does not set the condition codes, so by this rule we
> shouldn't need to define this pattern.
>>> Hi Richard,
>>> I think that rule has a prerequisite that backend needs to support
>>> register shifted addition in addm3 pattern.
>>
>> addm3 is a named pattern and its format is well defined.  It does not
>> take a shifted operand and never has.
>>
>>> Apparently for AArch64,
>>> addm3 only supports "reg+reg" or "reg+imm".  Also we don't really
>>> "does not set the condition codes" actually, because both
>>> "adds_shift_imm_*" and "adds_mul_imm_*" do set the condition flags.
>>
>> You appear to be confusing named patterns (used by expand) with
>> recognizers.  Anyway, we have
>>
>> (define_insn "*add__"
>>   [(set (match_operand:GPI 0 "register_operand" "=r")
>> (plus:GPI (ASHIFT:GPI (match_operand:GPI 1 "register_operand" 
>> "r")
>>   (match_operand:QI 2
>> "aarch64_shift_imm_" "n"))
>>   (match_operand:GPI 3 "register_operand" "r")))]
>>
>> Which is a non-flag setting add with shifted operand.
>>
>>> Either way I think it is another backend issue, so do you approve that
>>> I commit this patch now?
>>
>> Not yet.  I think there's something fundamental amiss here.
>>
>> BTW, it looks to me as though addptr3 should have exactly the same
>> operand rules as add3 (documentation reads "like add3"), so a
>> shifted operand shouldn't be supported there either.  If that isn't the
>> case then that should be clearly called out in the documentation.
>>
>> R.
>>
>
> PS.
>
> I presume you are aware of the canonicalization rules for add?  That is,
> for a shift-and-add operation, the shift operand must appear first.  Ie.
>
> (plus (shift (op, op)), op)
>
> not
>
> (plus (op, (shift (op, op))

 Hi Richard,
 Thanks for the comments.  I realized that the not-recognized insn
 issue is because the original patch build non-canonical expressions.
 When reloading address expression, LRA generates non-canonical
 register scaled insn, which can't be recognized by aarch64 backend.

 Here is the updated patch using canonical form pattern,  it passes
 bootstrap and regression test.  Well, the ivo failure still exists,
 but it analyzed in the original message.

 Is this patch OK?

 As for Jiong's concern about the additional extension instruction, I
 think this only stands for atmoic load store instructions.  For
 general load store, AArch64 supports zext/sext in register scaling
 addressing mode, the additional instruction can be forward propagated
 into memory reference.  The problem for atomic load store is AArch64
 only supports direct register addressing mode.  After LRA reloads
 address expression out of memory reference, there is no combine/fwprop
 optimizer to merge instructions.  The problem is atomic_store's
 predicate doesn't match its constraint.   The predicate used for
 atomic_store is memory_operand, while all other atomic patterns
 use aarch64_sync_memory_operand.  I think this might be a typo.  With
 this change, expand will not generate addressing mode requiring reload
 anymore.  I will test another patch fixing this.

 Thanks,
 bin
>>>
>>> Some comments inline.
>>>
>
> R.
>
> aarch64_legitimize_addr-20151128.txt
>
>
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 3fe2f0f..5b3e3c4 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -4757,13 +4757,65 @@ aarch64_legitimize_address (rtx x, rtx /* orig_x  
> */, machine_mode mode)
>   We try to pick as large a range for the offset as possible to
>   maximize the chance of a CSE.  However, for aligned addresses
>   we limit the range to 4k so that structures with different sized
> - elements are likely to use the s

[PATCH] S/390: Add -mbackchain options to fix test failure.

2015-12-03 Thread Dominik Vogt
On S/390, __builtin_return_address and __builtin_frame_address
require the -mbackchain option to work for arbitrary stack frames.
The attached patch adds the option to two test cases to make them
work.

Ciao

Dominik ^_^  ^_^

-- 

Dominik Vogt
IBM Germany
gcc/testsuite/ChangeLog

* gcc.dg/Wframe-address.c: S/390 requires the -mbackchain option to
access arbitrary stack frames.
* gcc.dg/Wno-frame-address.c: Likewise.
>From 3461500e48e8f83bd18d5d6ea771ddf2d5ee7c35 Mon Sep 17 00:00:00 2001
From: Dominik Vogt 
Date: Fri, 4 Dec 2015 01:07:38 +0100
Subject: [PATCH] S/390: Add -mbackchain options to fix test failure.

---
 gcc/testsuite/gcc.dg/Wframe-address.c| 1 +
 gcc/testsuite/gcc.dg/Wno-frame-address.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/gcc/testsuite/gcc.dg/Wframe-address.c b/gcc/testsuite/gcc.dg/Wframe-address.c
index 7481baf..29bdfa3 100644
--- a/gcc/testsuite/gcc.dg/Wframe-address.c
+++ b/gcc/testsuite/gcc.dg/Wframe-address.c
@@ -1,5 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-Wframe-address" } */
+/* { dg-additional-options "-mbackchain" { target { s390*-*-* } } } */
 
 void* __attribute__ ((noclone, noinline))
 test_builtin_frame_address (unsigned i)
diff --git a/gcc/testsuite/gcc.dg/Wno-frame-address.c b/gcc/testsuite/gcc.dg/Wno-frame-address.c
index 6409b29..e5d078f 100644
--- a/gcc/testsuite/gcc.dg/Wno-frame-address.c
+++ b/gcc/testsuite/gcc.dg/Wno-frame-address.c
@@ -1,6 +1,7 @@
 /* { dg-do compile } */
 /* { dg-skip-if "Cannot access arbitrary stack frames" { arm*-*-* hppa*-*-* visium-*-* } } */
 /* { dg-options "-Werror" } */
+/* { dg-additional-options "-mbackchain" { target { s390*-*-* } } } */
 
 /* Verify that -Wframe-address is not enabled by default by enabling
-Werror and verifying the test still compiles.  */
-- 
2.3.0



Re: [PATCH] [ARC] Add support for atomic memory built-in.

2015-12-03 Thread Joern Wolfgang Rennecke


On 16/11/15 10:18, Claudiu Zissulescu wrote:
  
+/* Expand code to perform a 8 or 16-bit compare and swap by doing

+   32-bit compare and swap on the word containing the byte or
+   half-word.  The difference between a weak and a strong CAS is that
+   the weak version may simply fail.  The strong version relays on two


Typo: relays -> relies

More importantly, your use of barriers makes no sense to me.
Memory models other that MEMMODEL_RELAXED impose two requirement
on the compiler:
- For systems without hardware memory coherency  (e.g. multiple caches 
with software
  synchronisation), emit any instructions necessary to archive 
coherency for those objects

  that the access / memory model requires.
- Prevent code movement by compiler optimizations.  This is where, 
hardware-independently, the
  memory model makes / could make a difference in how much restrictions 
are placed on the

  optimizers.

Because of PR middle-end/59448, we currently promote MEMMODEL_CONSUME to 
MEMMODEL_AQUIRE;
which is a shame, really, because otherwise we could just rely on 
ordinary dependencies to prevent

reordering after a cache flush/invalidation at the atomic operation.

Now, assuming we have multiple cores with software-synchronized caches:

A MEMMODEL_SEQ / MEMMODEL_RELEASE operation requires a cache flush 
(unless you have a
write-through cache in the first place), so that all values that have 
been written into the local cache
become visible in main memory.  Also, any writes that are delayed due to 
out-of-order operation or

a write buffer must be flushed to main memory.

A MEMMODEL_SEQ / MEMMODEL_AQUIRE operation requires a cache invalidation 
- preceded by a
cache flush to avoid loosing data, so that values written by the 
releasing thread to main memory

will be seen by the current thread.

The patterns that represent the hardware cache / synchronisation 
operations may also double as

memory barriers for the compiler.

If you don't need hardware cache / synchronization operations (either 
because you have hardware coherency, or you have only a single cache 
system for all cores / the only core in the system),

you still need memory barriers for the compiler.

AFAICT, you use hardware synchronisation instruction for EMMODEL_SEQ, 
and compiler memory barriers
for all other memory models (except MEMMODEL_RELAXED).  That makes no 
sense; either the platform

needs explicit instructions for memory coherency, or it doesn't.

On the other hand, your memory barriers are more restrictive than they 
need to be.
To tell the compiler that it must not sink a write below MEMMODEL_SEQ / 
MEMMODE_RELEASE operations,
it is sufficient to display a USE of an unspecified memory location.  
This is also true when you have
a cache flush: it is sufficient to show the compiler that this cache 
flush may read anything.
(Well, actually, for our purposes it'd be OK to make it so that 
thread-local variables, spill slots and variables that satisfy an escape 
analysis are considered  independent.)
The USE of the unspecified memory has to be tied to the atomic 
operation, of course.  This could be
by making it part of the instruction pattern itself, or by having the 
atomic operation USE something
(e.g. a fake hard register) that is 'set' by the memory barrier / sync/ 
cache flush pattern.


Re: [PATCH] RFC: Use Levenshtein spelling suggestions in Fortran FE

2015-12-03 Thread Steve Kargl
On Thu, Dec 03, 2015 at 02:53:06PM +0100, Mikael Morin wrote:
> Le 03/12/2015 10:29, Janne Blomqvist a écrit :
> > On Tue, Dec 1, 2015 at 7:51 PM, Bernhard Reutner-Fischer
> >  wrote:
> >> As said, we could as well use a list of candidates with NULL as record 
> >> marker.
> >> Implementation cosmetics. Steve seems to not be thrilled by the
> >> overall idea in the first place, so unless there is clear support by
> >> somebody else i won't pursue this any further, it's not that i'm bored
> >> or ran out of stuff i should do.. ;)
> >
> > FWIW, I think the idea of this patch is quite nice, and I'd like to
> > see it in the compiler.
> >
> I like this feature as well.
> 
> > I'm personally Ok with "C++-isms", but nowadays my contributions are
> > so minor that my opinion shouldn't carry that much weight on this
> > matter.
> >
> Same here.
> David Malcolm suggested to move the candidate selection code to the 
> common middle-end infrastructure, which would move half of the so-called 
> "bloat" there.  Steve, would that work for you?

Fine with me.

When debugging, if I run into C++isms, I'll stop and move to
a new bug.  We certainly have enough open bugs to choose from. 

-- 
Steve


Re: [C] Issue an error on scalar va_list with reverse storage order

2015-12-03 Thread Joseph Myers
On Thu, 3 Dec 2015, Eric Botcazou wrote:

> Hi,
> 
> further testing revealed an issue with va_arg handling and reverse scalar 
> storage order on some platforms: when va_list is scalar, passing a field of a 
> structure with reverse SSO as first argument to va_start/va_arg/va_end 
> doesn't 
> work because the machinery takes its address and this is not allowed for such 
> a field (it's really a corner case but gcc.c-torture/execute/stdarg-2.c does 
> exercise it).  Hence the attached patch which issues an error in this case.
> 
> Tested on x86_64-suse-linux, OK for the mainline?

OK.

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: [PATCH] Handle OBJ_TYPE_REF in FRE

2015-12-03 Thread Jan Hubicka
> >may lead to wrong code.
> 
> Can you try generating a testcase?
>  Because with equal vptr and voffset I can't see how that can happen unless 
> some pass extracts information from the pointer types without sanity checking 
> with the pointers and offsets.

I am not sure I can get a wrong code with current mainline, because for now you
only substitute for the lookup done for speculative devirt and if we wrongly
predict the thing to be __builtin_unreachable, we dispatch to usual virtual
call.  Once you get movement on calls it will be easier to do.

OBJ_TYPE_REF is a wrapper around OBJ_TYPE_EXPR adding three extra parameters:
 - OBJ_TYPE_REF_OBJECT
 - OBJ_TYPE_REF_TOKEN
 - obj_type_ref_class which is computed from TREE_TYPE (obj_type_ref) itself.

While two OBJ_TYPE_REFS with equivalent OBJ_TYPE_EXPR are kind of same
expressions, they are optimized differently (just as if they was in different
alias set).  For that reason you need to match the type of obj_type_ref_class
because that one is not matched by usless_type_conversion (it is a pointer to
method of corresponding class type we are looking up)

The following testcase:
struct foo {virtual void bar(void) __attribute__ ((const));};
struct foobar {virtual void bar(void) __attribute__ ((const));};
void
dojob(void *ptr, int t)
{
  if (t)
   ((struct foo*)ptr)->bar();
  else
   ((struct foobar*)ptr)->bar();
}

produces
void dojob(void*, int) (void * ptr, int t)
{
  int (*__vtbl_ptr_type) () * _5;
  int (*__vtbl_ptr_type) () _6;
  int (*__vtbl_ptr_type) () * _8;
  int (*__vtbl_ptr_type) () _9;

  :
  if (t_2(D) != 0)
goto ;
  else
goto ;

  :
  _5 = MEM[(struct foo *)ptr_4(D)]._vptr.foo;
  _6 = *_5;
  OBJ_TYPE_REF(_6;(struct foo)ptr_4(D)->0) (ptr_4(D));
  goto ;

  :
  _8 = MEM[(struct foobar *)ptr_4(D)]._vptr.foobar;
  _9 = *_8;
  OBJ_TYPE_REF(_9;(struct foobar)ptr_4(D)->0) (ptr_4(D));

  :
  return;

}

Now I would need to get some code movement done to get _5 and _6
moved and unified with _8 and _9 that we currently don't do.  
Still would feel safer if the equivalence predicate also checked
that the type is the same.
> >Or do you just substitute the operands of OBJ_TYPE_REF? 
> 
> No, I value number them.  But yes, the type issue also crossed my mind.  
> Meanwhile testing revealed that I need to adjust gimple_expr_type to preserve 
> the type of the obj-type-ref, otherwise the devirt machinery ICEs (receiving 
> void *). That's also a reason we can't make obj-type-ref a ternary RHS.

Yep, type of OBJ_TYPE_REF matters...
> 
> >> Bootstrap & regtest running on x86_64-unknown-linux-gnu.
> >> 
> >> Note that this does not (yet) substitute OBJ_TYPE_REFs in calls
> >> with SSA names that have the same value - not sure if that would
> >> be desired generally (does the devirt machinery cope with that?).
> >
> >This should work fine.
> 
> OK. So with that substituting the direct call later should work as well.
Great!
> 
> Richard.

Honza
> 
> >> 
> >> Thanks,
> >> Richard.
> >> 
> >> 2015-12-03  Richard Biener  
> >> 
> >>PR tree-optimization/64812
> >>* tree-ssa-sccvn.c (vn_get_stmt_kind): Handle OBJ_TYPE_REF.
> >>(vn_nary_length_from_stmt): Likewise.
> >>(init_vn_nary_op_from_stmt): Likewise.
> >>* gimple-match-head.c (maybe_build_generic_op): Likewise.
> >>* gimple-pretty-print.c (dump_unary_rhs): Likewise.
> >> 
> >>* g++.dg/tree-ssa/ssa-fre-1.C: New testcase.
> >> 
> >> Index: gcc/tree-ssa-sccvn.c
> >> ===
> >> *** gcc/tree-ssa-sccvn.c   (revision 231221)
> >> --- gcc/tree-ssa-sccvn.c   (working copy)
> >> *** vn_get_stmt_kind (gimple *stmt)
> >> *** 460,465 
> >> --- 460,467 
> >>  ? VN_CONSTANT : VN_REFERENCE);
> >>else if (code == CONSTRUCTOR)
> >>  return VN_NARY;
> >> +  else if (code == OBJ_TYPE_REF)
> >> +return VN_NARY;
> >>return VN_NONE;
> >>  }
> >>  default:
> >> *** vn_nary_length_from_stmt (gimple *stmt)
> >> *** 2479,2484 
> >> --- 2481,2487 
> >> return 1;
> >>   
> >>   case BIT_FIELD_REF:
> >> + case OBJ_TYPE_REF:
> >> return 3;
> >>   
> >>   case CONSTRUCTOR:
> >> *** init_vn_nary_op_from_stmt (vn_nary_op_t
> >> *** 2508,2513 
> >> --- 2511,2517 
> >> break;
> >>   
> >>   case BIT_FIELD_REF:
> >> + case OBJ_TYPE_REF:
> >> vno->length = 3;
> >> vno->op[0] = TREE_OPERAND (gimple_assign_rhs1 (stmt), 0);
> >> vno->op[1] = TREE_OPERAND (gimple_assign_rhs1 (stmt), 1);
> >> Index: gcc/gimple-match-head.c
> >> ===
> >> *** gcc/gimple-match-head.c(revision 231221)
> >> --- gcc/gimple-match-head.c(working copy)
> >> *** maybe_build_generic_op (enum tree_code c
> >> *** 243,248 
> >> --- 243,249 
> >> *op0 = build1 (code, type, *

[C++ PATCH] fix canonical type node ICE when from satisfy_argument_deduction_constraint function (PR c++/68683)

2015-12-03 Thread Ryan Burn
When determining if a constraint is satisfied, the function
satisfy_argument_deduction_constraint temporarily changes the
PLACEHOLDER_TYPE_CONSTRAINTS of an tree node. Since
PLACEHOLDER_TYPE_CONSTRAINTS are taken into account when determining
the equality of two types, this means that the node's canonical type
must also change; otherwise, an ICE results if the type is compared to
another type that it was previously equal to.

The attached patch sets the node's TYPE_CANONICAL field to NULL_TREE
after the PLACEHOLDER_TYPE_CONSTRAINTS are changed and restores
TYPE_CANONICAL to its original value when the
PLACEHOLDER_TYPE_CONSTRAINTS value are changed back so that erroneous
canonical type comparisons aren't made.

Bootstrapped and regression tested on x86_64-linux

2015-12-03  Ryan Burn  

  PR c++/68683
   * constraint.cc (satisfy_argument_deduction_constraint): Set
TYPE_CANONICAL to NULL_TREE if PLACEHOLDER_TYPE_CONSTRAINTS are
changed.

   * g++.dg/concepts/pr68683.C: New test


pr68683.patch
Description: Binary data


[PATCH] Use ECF_MAY_BE_ALLOCA for __builtin_alloca_with_align (PR tree-optimization/68680)

2015-12-03 Thread Jakub Jelinek
Hi!

As mentioned in the PR, GCC 4.7+ seems to have regressed for
-fstack-protector*, functions containing VLAs and no other arrays are not
protected anymore.  Before 4.7, VLAs were gimplified as __builtin_alloca
call, which sets ECF_MAY_BE_ALLOCA and in turn cfun->calls_alloca.
These two are used in various places:
1) for stack protector purposes (this issue), early during expansion
2) in the inliner
3) for tail call optimization
4) for some non-NULL optimizations
and tons of places in RTL.  As 4.7+ emits __builtin_alloca_with_align
instead and special_function_p has not been adjusted, this does not happen
any longer, though cfun->calls_alloca gets set during the expansion of
__builtin_alloca_with_align, so for RTL optimizers it is already set.

The following patch restores the previous behavior, making VLAs be
ECF_MAY_BE_ALLOCA and cfun->calls_alloca already during GIMPLE passes.
It could be also done by testing the name, but I thought that it would be
too ugly (would need another case anyway, as the current tests are for
names with length <= 16).

1) and 4) surely want to treat the VLAs like the patch does, I'm not 100%
sure about 2) and 3), as VLAs are slightly different, they release
the stack afterwards at the end of scope of the VLA var.  If we wanted to
treat the two differently, maybe we'd need another ECF* flag and another
cfun bitfield for VLAs.

The following patch has been bootstrapped/regtested on x86_64-linux and
i686-linux.

2015-12-03  Jakub Jelinek  

PR tree-optimization/68680
* calls.c (special_function_p): Return ECF_MAY_BE_ALLOCA for
BUILT_IN_ALLOCA{,_WITH_ALIGN}.

* gcc.target/i386/pr68680.c: New test.

--- gcc/calls.c.jj  2015-11-26 11:17:25.0 +0100
+++ gcc/calls.c 2015-12-03 19:03:59.342306457 +0100
@@ -553,6 +553,17 @@ special_function_p (const_tree fndecl, i
flags |= ECF_NORETURN;
 }
 
+  if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
+switch (DECL_FUNCTION_CODE (fndecl))
+  {
+  case BUILT_IN_ALLOCA:
+  case BUILT_IN_ALLOCA_WITH_ALIGN:
+   flags |= ECF_MAY_BE_ALLOCA;
+   break;
+  default:
+   break;
+  }
+
   return flags;
 }
 
--- gcc/testsuite/gcc.target/i386/pr68680.c.jj  2015-12-03 19:10:14.836037923 
+0100
+++ gcc/testsuite/gcc.target/i386/pr68680.c 2015-12-03 19:09:57.0 
+0100
@@ -0,0 +1,15 @@
+/* PR tree-optimization/68680 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fstack-protector-strong" } */
+
+int foo (char *);
+
+int
+bar (unsigned long x)
+{
+  char a[x];
+  return foo (a);
+}
+
+/* Verify that this function is stack protected.  */
+/* { dg-final { scan-assembler "stack_chk_fail" } } */

Jakub


Re: [PATCH 02/10] Fix g++.dg/cpp0x/nsdmi-template14.C

2015-12-03 Thread Jason Merrill

On 12/03/2015 04:43 PM, David Malcolm wrote:

On Thu, 2015-12-03 at 15:33 -0500, Jason Merrill wrote:

On 12/03/2015 09:55 AM, David Malcolm wrote:

This patch adds bulletproofing to detect purged tokens, and avoid using
them.

Alternatively, is it OK to access purged tokens for this kind of thing?
If so, would it make more sense to instead leave their locations untouched
when purging them?


I think cp_lexer_previous_token should skip past purged tokens.


Sorry if this is a silly question, but should I limit the iteration e.g.
by detecting a sentinel value?  e.g.
   parser->lexer->buffer->address () ?

Or is there guaranteed to be an unpurged token somewhere beforehand?


There should always be an unpurged token.


Out of interest, the prior tokens here are:

(gdb) p end_tok[0]
$25 = {type = CPP_GREATER, keyword = RID_MAX, flags = 0 '\000',
pragma_kind = PRAGMA_NONE, implicit_extern_c = 0,
   error_reported = 0, purged_p = 1, location = 0, u = {tree_check_value
= 0x0, value = }}

(gdb) p end_tok[-1]
$26 = {type = CPP_NAME, keyword = RID_MAX, flags = 0 '\000', pragma_kind
= PRAGMA_NONE, implicit_extern_c = 0,
   error_reported = 0, purged_p = 1, location = 0, u = {tree_check_value
= 0x0, value = }}

(gdb) p end_tok[-2]
$27 = {type = CPP_LESS, keyword = RID_MAX, flags = 0 '\000', pragma_kind
= PRAGMA_NONE, implicit_extern_c = 0,
   error_reported = 0, purged_p = 1, location = 0, u = {tree_check_value
= 0x0, value = }}

(gdb) p end_tok[-3]
$28 = {type = 86, keyword = RID_MAX, flags = 1 '\001', pragma_kind =
PRAGMA_NONE, implicit_extern_c = 0, error_reported = 0,
   purged_p = 0, location = 202016, u = {tree_check_value =
0x719dfd98, value = }}

(gdb) p end_tok[-4]
$29 = {type = CPP_KEYWORD, keyword = RID_NEW, flags = 1 '\001',
pragma_kind = PRAGMA_NONE, implicit_extern_c = 0,
   error_reported = 0, purged_p = 0, location = 201890, u =
{tree_check_value = 0x718a8318,
 value = }}

where the previous unpurged token is:

(gdb) p end_tok[-3].purged_p
$31 = 0

(gdb) call inform (end_tok[-3].location, "")
../../src/gcc/testsuite/g++.dg/cpp0x/nsdmi-template14.C:11:14: note:
B* p = new B;
   ^

which would give a range of:

B* p = new B;
   ^

for the erroneous new expression, rather than:


B* p = new B;
   ^~~~

if we used the location of the purged token (the CPP_GREATER).
I prefer the latter, hence my suggestion about not zero-ing out the
locations of tokens when purging them.


The unpurged token you're finding is the artificial CPP_TEMPLATE_ID 
token, which seems to need to have its location adjusted to reflect the 
full range of the template-id.


Jason




Re: [PATCH 07/10] Fix g++.dg/template/ref3.C

2015-12-03 Thread David Malcolm
On Thu, 2015-12-03 at 15:38 -0500, Jason Merrill wrote:
> On 12/03/2015 09:55 AM, David Malcolm wrote:
> > Testcase g++.dg/template/ref3.C:
> >
> >   1 // PR c++/28341
> >   2
> >   3 template struct A {};
> >   4
> >   5 template struct B
> >   6 {
> >   7   A<(T)0> b; // { dg-error "constant|not a valid" }
> >   8   A a; // { dg-error "constant|not a valid" }
> >   9 };
> >  10
> >  11 B b;
> >
> > The output of this test for both c++11 and c++14 is unaffected
> > by the patch kit:
> >   g++.dg/template/ref3.C: In instantiation of 'struct B':
> >   g++.dg/template/ref3.C:11:15:   required from here
> >   g++.dg/template/ref3.C:7:11: error: '0' is not a valid template argument 
> > for type 'const int&' because it is not an lvalue
> >   g++.dg/template/ref3.C:8:11: error: '0' is not a valid template argument 
> > for type 'const int&' because it is not an lvalue
> >
> > However, the c++98 output is changed:
> >
> > Status quo for c++98:
> > g++.dg/template/ref3.C: In instantiation of 'struct B':
> > g++.dg/template/ref3.C:11:15:   required from here
> > g++.dg/template/ref3.C:7:11: error: a cast to a type other than an integral 
> > or enumeration type cannot appear in a constant-expression
> > g++.dg/template/ref3.C:8:11: error: a cast to a type other than an integral 
> > or enumeration type cannot appear in a constant-expression
> >
> > (line 7 and 8 are at the closing semicolon for fields b and a)
> >
> > With the patchkit for c++98:
> > g++.dg/template/ref3.C: In instantiation of 'struct B':
> > g++.dg/template/ref3.C:11:15:   required from here
> > g++.dg/template/ref3.C:7:5: error: a cast to a type other than an integral 
> > or enumeration type cannot appear in a constant-expression
> > g++.dg/template/ref3.C:7:5: error: a cast to a type other than an integral 
> > or enumeration type cannot appear in a constant-expression
> >
> > So the 2nd:
> >"error: a cast to a type other than an integral or enumeration type 
> > cannot appear in a constant-expression"
> > moves from line 8 to line 7 (and moves them to earlier, having ranges)
> >
> > What's happening is that cp_parser_enclosed_template_argument_list
> > builds a CAST_EXPR, the first time from cp_parser_cast_expression,
> > the second time from cp_parser_functional_cast; these have locations
> > representing the correct respective caret&ranges, i.e.:
> >
> > A<(T)0> b;
> >   ^~~~
> >
> > and:
> >
> > A a;
> >   ^~~~
> >
> > Eventually finish_template_type is called for each, to build a RECORD_TYPE,
> > and we get a cache hit the 2nd time through here in pt.c:
> > 8281  hash = spec_hasher::hash (&elt);
> > 8282  entry = type_specializations->find_with_hash (&elt, hash);
> > 8283
> > 8284  if (entry)
> > 8285return entry->spec;
> >
> > due to:
> >template_args_equal (ot=, nt= > 0x719bc480>) at ../../src/gcc/cp/pt.c:7778
> > which calls:
> >cp_tree_equal (t1=, t2= > 0x719bc480>) at ../../src/gcc/cp/tree.c:2833
> > and returns equality.
> >
> > Hence we get a single RECORD_TYPE for the type A<(T)(0)>, and hence
> > when issuing the errors it uses the TREE_VEC for the first one,
> > using the location of the first line.
> 
> Why does the type sharing affect where the parser gives the error?

I believe what's happening is that the patchkit is setting location_t
values for more expressions than before, including the expression for
the template param.  pt.c:tsubst_expr has this:

  if (EXPR_HAS_LOCATION (t))
input_location = EXPR_LOCATION (t);

I believe that before (in the status quo), the substituted types didn't
have location_t values, and hence the above conditional didn't fire;
input_location was coming from a *token* where the expansion happened,
hence we got an error message on the relevant line for each expansion.

With the patch, the substituted types have location_t values within
their params, hence the conditional above fires: input_location is
updated to use the EXPR_LOCATION, which comes from that of the param
within the type - but with type-sharing it's using the first place where
the type is created.

Perhaps a better fix is for cp_parser_non_integral_constant_expression
to take a location_t, rather than have it rely on input_location?


> > I'm not sure what the ideal fix for this is; for now I've worked
> > around it by updating the dg directives to reflect the new output.
> >
> > gcc/testsuite/ChangeLog:
> > * g++.dg/template/ref3.C: Update locations of dg directives.
> > ---
> >   gcc/testsuite/g++.dg/template/ref3.C | 6 --
> >   1 file changed, 4 insertions(+), 2 deletions(-)
> >
> > diff --git a/gcc/testsuite/g++.dg/template/ref3.C 
> > b/gcc/testsuite/g++.dg/template/ref3.C
> > index 976c093..6e568c3 100644
> > --- a/gcc/testsuite/g++.dg/template/ref3.C
> > +++ b/gcc/testsuite/g++.dg/template/ref3.C
> > @@ -4,8 +4,10 @@ template struct A {};
> >
> >   templa

Re: [PATCH][ARC] Refurbish emitting DWARF2 for epilogue.

2015-12-03 Thread Joern Wolfgang Rennecke


On 27/11/15 13:53, Claudiu Zissulescu wrote:
   
@@ -2502,11 +2540,18 @@ arc_expand_epilogue (int sibcall_p)

/* Restore any saved registers.  */
if (frame_pointer_needed)
  {
- rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
+  insn = emit_insn (gen_blockage ());


Is this actually part of the patch to fix cfi generation?
It looks to me like it is working around an alias.c issue - namely, that 
alias.c does

not consider stack and frame pointer based addresses to alias.
https://gcc.gnu.org/ml/gcc/2011-07/msg00461.html
https://github.com/adapteva/epiphany-gcc/commit/6d1194a563e05dfa826ab4635514477af1f7a2b0



Re: [PATCH 02/10] Fix g++.dg/cpp0x/nsdmi-template14.C

2015-12-03 Thread David Malcolm
On Thu, 2015-12-03 at 15:33 -0500, Jason Merrill wrote:
> On 12/03/2015 09:55 AM, David Malcolm wrote:
> > This patch adds bulletproofing to detect purged tokens, and avoid using
> > them.
> >
> > Alternatively, is it OK to access purged tokens for this kind of thing?
> > If so, would it make more sense to instead leave their locations untouched
> > when purging them?
> 
> I think cp_lexer_previous_token should skip past purged tokens.

Sorry if this is a silly question, but should I limit the iteration e.g.
by detecting a sentinel value?  e.g.
  parser->lexer->buffer->address () ?

Or is there guaranteed to be an unpurged token somewhere beforehand?

Out of interest, the prior tokens here are:

(gdb) p end_tok[0]
$25 = {type = CPP_GREATER, keyword = RID_MAX, flags = 0 '\000',
pragma_kind = PRAGMA_NONE, implicit_extern_c = 0, 
  error_reported = 0, purged_p = 1, location = 0, u = {tree_check_value
= 0x0, value = }}

(gdb) p end_tok[-1]
$26 = {type = CPP_NAME, keyword = RID_MAX, flags = 0 '\000', pragma_kind
= PRAGMA_NONE, implicit_extern_c = 0, 
  error_reported = 0, purged_p = 1, location = 0, u = {tree_check_value
= 0x0, value = }}

(gdb) p end_tok[-2]
$27 = {type = CPP_LESS, keyword = RID_MAX, flags = 0 '\000', pragma_kind
= PRAGMA_NONE, implicit_extern_c = 0, 
  error_reported = 0, purged_p = 1, location = 0, u = {tree_check_value
= 0x0, value = }}

(gdb) p end_tok[-3]
$28 = {type = 86, keyword = RID_MAX, flags = 1 '\001', pragma_kind =
PRAGMA_NONE, implicit_extern_c = 0, error_reported = 0, 
  purged_p = 0, location = 202016, u = {tree_check_value =
0x719dfd98, value = }}

(gdb) p end_tok[-4]
$29 = {type = CPP_KEYWORD, keyword = RID_NEW, flags = 1 '\001',
pragma_kind = PRAGMA_NONE, implicit_extern_c = 0, 
  error_reported = 0, purged_p = 0, location = 201890, u =
{tree_check_value = 0x718a8318, 
value = }}

where the previous unpurged token is:

(gdb) p end_tok[-3].purged_p
$31 = 0

(gdb) call inform (end_tok[-3].location, "")
../../src/gcc/testsuite/g++.dg/cpp0x/nsdmi-template14.C:11:14: note:
   B* p = new B;
  ^

which would give a range of:

   B* p = new B;
  ^

for the erroneous new expression, rather than:


   B* p = new B;
  ^~~~

if we used the location of the purged token (the CPP_GREATER).
I prefer the latter, hence my suggestion about not zero-ing out the
locations of tokens when purging them.




Re: C PATCH for c/68668 (grokdeclarator and wrong type of PARM_DECL)

2015-12-03 Thread Joseph Myers
On Thu, 3 Dec 2015, Marek Polacek wrote:

> > I think you also need to decrement orig_qual_indirect, which counts the 
> > number of levels of array type derivation from orig_qual_type.
> 
> Thus:
> 
> Bootstrapped/regtested on x86_64-linux, ok for trunk?
> 
> 2015-12-03  Marek Polacek  
> 
>   PR c/68668
>   * c-decl.c (grokdeclarator): When creating a PARM_DECL of ARRAY_TYPE,
>   use TREE_TYPE of orig_qual_type.  Decrement ORIG_QUAL_INDIRECT.

On further consideration:

Removing one level of array type derivation from type means it is one 
fewer levels indirect from the original version of orig_qual_type.  So I 
think you should actually decrement orig_qual_indirect without changing 
orig_qual_type.  But, if orig_qual_indirect is indirect, in that case you 
may get better results from changing orig_qual_type without decrementing 
orig_qual_indirect.

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: C PATCH for c/68668 (grokdeclarator and wrong type of PARM_DECL)

2015-12-03 Thread Marek Polacek
On Thu, Dec 03, 2015 at 06:11:42PM +, Joseph Myers wrote:
> On Thu, 3 Dec 2015, Marek Polacek wrote:
> 
> > This ought to fix the fallout from PR c/68162 fix.  Here the problem is that
> > grokdeclarator created a wrong type for PARM_DECL "p".  It created this decl
> > with type "const int[] *" while it should be "const int *".
> > 
> > I think the problem is that we weren't using TREE_TYPE on orig_qual_type and
> > thus c_build_qualified_type and subsequent c_build_pointer_type might create
> > a bogus type.  So when we're transfering const-ness of an array into that of
> > type pointed to, use TREE_TYPE not only of "type", but even of the orig qual
> > type.
> 
> I think you also need to decrement orig_qual_indirect, which counts the 
> number of levels of array type derivation from orig_qual_type.

Thus:

Bootstrapped/regtested on x86_64-linux, ok for trunk?

2015-12-03  Marek Polacek  

PR c/68668
* c-decl.c (grokdeclarator): When creating a PARM_DECL of ARRAY_TYPE,
use TREE_TYPE of orig_qual_type.  Decrement ORIG_QUAL_INDIRECT.

* gcc.dg/pr68668.c: New test.

diff --git gcc/c/c-decl.c gcc/c/c-decl.c
index 9ad8219..25bd1e0 100644
--- gcc/c/c-decl.c
+++ gcc/c/c-decl.c
@@ -6417,6 +6417,11 @@ grokdeclarator (const struct c_declarator *declarator,
  {
/* Transfer const-ness of array into that of type pointed to.  */
type = TREE_TYPE (type);
+   if (orig_qual_type != NULL_TREE)
+ {
+   orig_qual_type = TREE_TYPE (orig_qual_type);
+   orig_qual_indirect--;
+ }
if (type_quals)
  type = c_build_qualified_type (type, type_quals, orig_qual_type,
 orig_qual_indirect);
diff --git gcc/testsuite/gcc.dg/pr68668.c gcc/testsuite/gcc.dg/pr68668.c
index e69de29..d144fb6 100644
--- gcc/testsuite/gcc.dg/pr68668.c
+++ gcc/testsuite/gcc.dg/pr68668.c
@@ -0,0 +1,10 @@
+/* PR c/68668 */
+/* { dg-do compile } */
+
+typedef const int T[];
+
+int
+fn1 (T p)
+{
+  return p[0];
+}

Marek


Re: [PATCH 2/2] [graphite] fix invalid bounds on array refs

2015-12-03 Thread Sebastian Pop
Richard Biener wrote:
> On Wed, Dec 2, 2015 at 10:36 PM, Sebastian Paul Pop  wrote:
> > Do you recommend that we add a gcc_assert that min is always lower than max?
> 
> No, min can be one less than max if the array has size zero.

Maybe a typo: do you mean max can be one less than min?

If the array has size zero, then I think ISL is correct in saying that there are
no dependences.  As we miscompiled the testcase, I think that the bug is in the
Fortran front-end.



Re: [PATCH 2/4][AArch64] Increase the loop peeling limit

2015-12-03 Thread Evandro Menezes

On 11/20/2015 05:53 AM, James Greenhalgh wrote:

On Thu, Nov 19, 2015 at 04:04:41PM -0600, Evandro Menezes wrote:

On 11/05/2015 02:51 PM, Evandro Menezes wrote:

2015-11-05  Evandro Menezes 

   gcc/

   * config/aarch64/aarch64.c (aarch64_override_options_internal):
   Increase loop peeling limit.

This patch increases the limit for the number of peeled insns.
With this change, I noticed no major regression in either
Geekbench v3 or SPEC CPU2000 while some benchmarks, typically FP
ones, improved significantly.

I tested this tuning on Exynos M1 and on A57.  ThunderX seems to
benefit from this tuning too.  However, I'd appreciate comments

>from other stakeholders.

Ping.

I'd like to leave this for a call from the port maintainers. I can see why
this leads to more opportunities for vectorization, but I'm concerned about
the wider impact on code size. Certainly I wouldn't expect this to be our
default at -O2 and below.

My gut feeling is that this doesn't really belong in the back-end (there are
presumably good reasons why the default for this parameter across GCC has
fluctuated from 400 to 100 to 200 over recent years), but as I say, I'd
like Marcus or Richard to make the call as to whether or not we take this
patch.


Please, correct me if I'm wrong, but loop peeling is enabled only with 
loop unrolling (and with PGO).  If so, then extra code size is not a 
concern, for this heuristic is only active when unrolling loops, when 
code size is already of secondary importance.


Thank you,

--
Evandro Menezes



Re: [PATCH][AArch64] Replace insn to zero up DF register

2015-12-03 Thread Evandro Menezes

On 11/09/2015 04:59 PM, Evandro Menezes wrote:

Hi, Marcus.

Have you an update from the architecture folks about this?

Thank you,


Marcus?

--
Evandro Menezes



[PATCH] Fix reassoc range test vs. value ranges (PR tree-optimization/68671)

2015-12-03 Thread Jakub Jelinek
Hi!

As mentioned in the PR, maybe_optimize_range_tests considers basic blocks
with not just the final GIMPLE_COND (or for last_bb store feeding into PHI),
but also assign stmts that don't trap, don't have side-effects and where
the SSA_NAMEs they set are used only in their own bb.
Now, if we decide to optimize some range test, we can change some conditions
on previous bbs and that means we could execute some basic blocks that
wouldn't be executed in the original program.  As the stmts don't set
anything used in other bbs, they are most likely dead after the
optimization, but the problem on the testcase is that because of the
condition changes in previous bb we end up with incorrect value range
for some SSA_NAME(s).  That can result in the miscompilation of the testcase
on certain targets.

Fixed by resetting the value range info of such SSA_NAMEs.  I believe it
shouldn't be a big deal, they will be mostly dead anyway.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2015-12-03  Jakub Jelinek  

PR tree-optimization/68671
* tree-ssa-reassoc.c (maybe_optimize_range_tests): For basic
blocks starting with the successor of first bb we've modified
and ending with last_bb, reset value ranges of all integral
SSA_NAMEs set in those basic blocks.

* gcc.dg/pr68671.c: New test.

--- gcc/tree-ssa-reassoc.c.jj   2015-11-18 11:22:51.0 +0100
+++ gcc/tree-ssa-reassoc.c  2015-12-03 18:12:08.915210122 +0100
@@ -3204,7 +3204,7 @@ maybe_optimize_range_tests (gimple *stmt
 any_changes = optimize_range_tests (ERROR_MARK, &ops);
   if (any_changes)
 {
-  unsigned int idx;
+  unsigned int idx, max_idx = 0;
   /* update_ops relies on has_single_use predicates returning the
 same values as it did during get_ops earlier.  Additionally it
 never removes statements, only adds new ones and it should walk
@@ -3220,6 +3220,7 @@ maybe_optimize_range_tests (gimple *stmt
{
  tree new_op;
 
+ max_idx = idx;
  stmt = last_stmt (bb);
  new_op = update_ops (bbinfo[idx].op,
   (enum tree_code)
@@ -3289,6 +3290,10 @@ maybe_optimize_range_tests (gimple *stmt
  && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
{
  gcond *cond_stmt = as_a  (last_stmt (bb));
+
+ if (idx > max_idx)
+   max_idx = idx;
+
  if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
gimple_cond_make_false (cond_stmt);
  else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
@@ -3305,6 +3310,30 @@ maybe_optimize_range_tests (gimple *stmt
  if (bb == first_bb)
break;
}
+
+  /* The above changes could result in basic blocks after the first
+modified one, up to and including last_bb, to be executed even if
+they would not be in the original program.  If the value ranges of
+assignment lhs' in those bbs were dependent on the conditions
+guarding those basic blocks which now can change, the VRs might
+be incorrect.  As no_side_effect_bb should ensure those SSA_NAMEs
+are only used within the same bb, it should be not a big deal if
+we just reset all the VRs in those bbs.  See PR68671.  */
+  for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
+   {
+ gimple_stmt_iterator gsi;
+ for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
+   {
+ gimple *g = gsi_stmt (gsi);
+ if (!is_gimple_assign (g))
+   continue;
+ tree lhs = gimple_assign_lhs (g);
+ if (TREE_CODE (lhs) != SSA_NAME)
+   continue;
+ if (INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
+   SSA_NAME_RANGE_INFO (lhs) = NULL;
+   }
+   }
 }
 }
 
--- gcc/testsuite/gcc.dg/pr68671.c.jj   2015-12-03 18:19:24.769104484 +0100
+++ gcc/testsuite/gcc.dg/pr68671.c  2015-12-03 18:19:07.0 +0100
@@ -0,0 +1,23 @@
+/* PR tree-optimization/68671 */
+/* { dg-do run } */
+/* { dg-options " -O2 -fno-tree-dce" } */
+
+volatile int a = -1;
+volatile int b;
+
+static inline int
+fn1 (signed char p1, int p2)
+{
+  return (p1 < 0) || (p1 > (1 >> p2)) ? 0 : (p1 << 1);
+}
+
+int
+main ()
+{
+  signed char c = a;
+  b = fn1 (c, 1);
+  c = ((128 | c) < 0 ? 1 : 0);
+  if (c != 1)
+__builtin_abort ();
+  return 0;
+}

Jakub


Re: [PATCH 3b/4][AArch64] Add scheduling model for Exynos M1

2015-12-03 Thread Evandro Menezes

On 11/20/2015 11:17 AM, James Greenhalgh wrote:

On Tue, Nov 10, 2015 at 11:54:00AM -0600, Evandro Menezes wrote:

2015-11-10  Evandro Menezes 

gcc/

* config/aarch64/aarch64-cores.def: Use the Exynos M1 sched model.
* config/aarch64/aarch64.md: Include "exynos-m1.md".
* config/arm/arm-cores.def: Use the Exynos M1 sched model.
* config/arm/arm.md: Include "exynos-m1.md".
* config/arm/arm-tune.md: Regenerated.
* config/arm/exynos-m1.md: New file.

This patch adds the scheduling model for Exynos M1.  It depends on
https://gcc.gnu.org/ml/gcc-patches/2015-11/msg01257.html

Bootstrapped on arm-unknown-linux-gnueabihf, aarch64-unknown-linux-gnu.

Please, commit if it's alright.



 From 0b7b6d597e5877c78c4d88e0d4491858555a5364 Mon Sep 17 00:00:00 2001
From: Evandro Menezes 
Date: Mon, 9 Nov 2015 17:18:52 -0600
Subject: [PATCH 2/2] [AArch64] Add scheduling model for Exynos M1

gcc/
* config/aarch64/aarch64-cores.def: Use the Exynos M1 sched model.
* config/aarch64/aarch64.md: Include "exynos-m1.md".

These changes are fine.


* config/arm/arm-cores.def: Use the Exynos M1 sched model.
* config/arm/arm.md: Include "exynos-m1.md".
* config/arm/arm-tune.md: Regenerated.

These changes need an ack from an ARM reviewer.


* config/arm/exynos-m1.md: New file.

I have a few comments on this model.


+;; The Exynos M1 core is modeled as a triple issue pipeline that has
+;; the following functional units.
+
+(define_automaton "exynos_m1_gp")
+(define_automaton "exynos_m1_ls")
+(define_automaton "exynos_m1_fp")
+
+;; 1.  Two pipelines for simple integer operations: A, B
+;; 2.  One pipeline for simple or complex integer operations: C
+
+(define_cpu_unit "em1_xa, em1_xb, em1_xc" "exynos_m1_gp")
+
+(define_reservation "em1_alu" "(em1_xa | em1_xb | em1_xc)")
+(define_reservation "em1_c" "em1_xc")

Is this extra reservation useful, can we not just use em1_xc directly?


+;; 3.  Two asymmetric pipelines for Neon and FP operations: F0, F1
+
+(define_cpu_unit "em1_f0, em1_f1" "exynos_m1_fp")
+
+(define_reservation "em1_fmac" "em1_f0")
+(define_reservation "em1_fcvt" "em1_f0")
+(define_reservation "em1_nalu" "(em1_f0 | em1_f1)")
+(define_reservation "em1_nalu0" "em1_f0")
+(define_reservation "em1_nalu1" "em1_f1")
+(define_reservation "em1_nmisc" "em1_f0")
+(define_reservation "em1_ncrypt" "em1_f0")
+(define_reservation "em1_fadd" "em1_f1")
+(define_reservation "em1_fvar" "em1_f1")
+(define_reservation "em1_fst" "em1_f1")

Same comment here, does this not just obfuscate the interaction between
instruction classes in the description. I'm not against doing it this way
if you prefer, but it would seem to reduce readability to me. I think there
is also an argument that this increases readability, so it is your choice.


+
+;; 4.  One pipeline for branch operations: BX
+
+(define_cpu_unit "em1_bx" "exynos_m1_gp")
+
+(define_reservation "em1_br" "em1_bx")
+

And again?


+;; 5.  One AGU for loads: L
+;; One AGU for stores and one pipeline for stores: S, SD
+
+(define_cpu_unit "em1_lx" "exynos_m1_ls")
+(define_cpu_unit "em1_sx, em1_sd" "exynos_m1_ls")
+
+(define_reservation "em1_ld" "em1_lx")
+(define_reservation "em1_st" "(em1_sx + em1_sd)")
+
+;; Common occurrences
+(define_reservation "em1_sfst" "(em1_fst + em1_st)")
+(define_reservation "em1_lfst" "(em1_fst + em1_ld)")
+
+;; Branches
+;;
+;; No latency as there is no result
+;; TODO: Unconditional branches use no units;
+;; conditional branches add the BX unit;
+;; indirect branches add the C unit.
+(define_insn_reservation "exynos_m1_branch" 0
+  (and (eq_attr "tune" "exynosm1")
+   (eq_attr "type" "branch"))
+  "em1_br")
+
+(define_insn_reservation "exynos_m1_call" 1
+  (and (eq_attr "tune" "exynosm1")
+   (eq_attr "type" "call"))
+  "em1_alu")
+
+;; Basic ALU
+;;
+;; Simple ALU without shift, non-predicated
+(define_insn_reservation "exynos_m1_alu" 1
+  (and (eq_attr "tune" "exynosm1")
+   (and (not (eq_attr "predicated" "yes"))

(and (eq_attr "predicated" "no")) ?

Likewise throughout the file? Again this is your choice.

This is OK from the AArch64 side, let me know if you plan to change any
of the above, otherwise I'll commit it (or someone else can commit it)
after I see an OK from an ARM reviewer.


ARM ping.

--
Evandro Menezes



[PATCH] Improve constant vec_perm expansion on i?86 (PR target/68655)

2015-12-03 Thread Jakub Jelinek
Hi!

As discussed in the PR, for some permutation we can get better code
if we try to expand it as if it was a permutation in a mode with the
same vector size, but wider vector element.  The first attempt to do this
always had mixed results, lots of improvements, lots of pessimizations,
this one at least on gcc.dg/vshuf*
{-msse2,-msse4,-mavx,-mavx2,-mavx512f,-mavx512bw} shows only
improvements - it tries the original permutation for single insn,
if that doesn't work tries the wider one single insn, and then
as complete fallback, if we don't have any expansion whatsoever, tries
the wider one too.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2015-12-03  Jakub Jelinek  

PR target/68655
* config/i386/i386.c (canonicalize_vector_int_perm): New function.
(expand_vec_perm_1): Use it and recurse if everything else
failed.  Use nd.perm instead of perm2.
(expand_vec_perm_even_odd_1): If testing_p, use gen_raw_REG
instead of gen_lowpart for the target.
(ix86_expand_vec_perm_const_1): Use canonicalize_vector_int_perm
and recurse if everything else failed.

* gcc.dg/torture/vshuf-4.inc (TESTS): Add one extra test.
* gcc.dg/torture/vshuf-4.inc (TESTS): Add two extra tests.

--- gcc/config/i386/i386.c.jj   2015-12-02 20:27:00.0 +0100
+++ gcc/config/i386/i386.c  2015-12-03 15:03:13.415764986 +0100
@@ -49365,6 +49365,57 @@ expand_vec_perm_pshufb (struct expand_ve
   return true;
 }
 
+/* For V*[QHS]Imode permutations, check if the same permutation
+   can't be performed in a 2x, 4x or 8x wider inner mode.  */
+
+static bool
+canonicalize_vector_int_perm (const struct expand_vec_perm_d *d,
+ struct expand_vec_perm_d *nd)
+{
+  int i;
+  enum machine_mode mode = VOIDmode;
+
+  switch (d->vmode)
+{
+case V16QImode: mode = V8HImode; break;
+case V32QImode: mode = V16HImode; break;
+case V64QImode: mode = V32HImode; break;
+case V8HImode: mode = V4SImode; break;
+case V16HImode: mode = V8SImode; break;
+case V32HImode: mode = V16SImode; break;
+case V4SImode: mode = V2DImode; break;
+case V8SImode: mode = V4DImode; break;
+case V16SImode: mode = V8DImode; break;
+default: return false;
+}
+  for (i = 0; i < d->nelt; i += 2)
+if ((d->perm[i] & 1) || d->perm[i + 1] != d->perm[i] + 1)
+  return false;
+  nd->vmode = mode;
+  nd->nelt = d->nelt / 2;
+  for (i = 0; i < nd->nelt; i++)
+nd->perm[i] = d->perm[2 * i] / 2;
+  if (GET_MODE_INNER (mode) != DImode)
+canonicalize_vector_int_perm (nd, nd);
+  if (nd != d)
+{
+  nd->one_operand_p = d->one_operand_p;
+  nd->testing_p = d->testing_p;
+  if (d->op0 == d->op1)
+   nd->op0 = nd->op1 = gen_lowpart (nd->vmode, d->op0);
+  else
+   {
+ nd->op0 = gen_lowpart (nd->vmode, d->op0);
+ nd->op1 = gen_lowpart (nd->vmode, d->op1);
+   }
+  if (d->testing_p)
+   nd->target = gen_raw_REG (nd->vmode, LAST_VIRTUAL_REGISTER + 1);
+  else
+   nd->target = gen_reg_rtx (nd->vmode);
+}
+  return true;
+}
+
 /* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to instantiate D
in a single instruction.  */
 
@@ -49372,7 +49423,7 @@ static bool
 expand_vec_perm_1 (struct expand_vec_perm_d *d)
 {
   unsigned i, nelt = d->nelt;
-  unsigned char perm2[MAX_VECT_LEN];
+  struct expand_vec_perm_d nd;
 
   /* Check plain VEC_SELECT first, because AVX has instructions that could
  match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
@@ -49385,10 +49436,10 @@ expand_vec_perm_1 (struct expand_vec_per
 
   for (i = 0; i < nelt; i++)
{
- perm2[i] = d->perm[i] & mask;
- if (perm2[i] != i)
+ nd.perm[i] = d->perm[i] & mask;
+ if (nd.perm[i] != i)
identity_perm = false;
- if (perm2[i])
+ if (nd.perm[i])
broadcast_perm = false;
}
 
@@ -49457,7 +49508,7 @@ expand_vec_perm_1 (struct expand_vec_per
}
}
 
-  if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
+  if (expand_vselect (d->target, d->op0, nd.perm, nelt, d->testing_p))
return true;
 
   /* There are plenty of patterns in sse.md that are written for
@@ -49468,10 +49519,10 @@ expand_vec_perm_1 (struct expand_vec_per
 every other permutation operand.  */
   for (i = 0; i < nelt; i += 2)
{
- perm2[i] = d->perm[i] & mask;
- perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
+ nd.perm[i] = d->perm[i] & mask;
+ nd.perm[i + 1] = (d->perm[i + 1] & mask) + nelt;
}
-  if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
+  if (expand_vselect_vconcat (d->target, d->op0, d->op0, nd.perm, nelt,
  d->testing_p))
return true;
 
@@ -49480,13 +49531,13 @@ expand_vec_perm_1 (struct expand_vec_per
{
  for (i

Re: [PATCH 4/4][AArch64] Add cost model for Exynos M1

2015-12-03 Thread Evandro Menezes

On 11/05/2015 06:09 PM, Evandro Menezes wrote:

2015-10-25  Evandro Menezes 

   gcc/

   * config/aarch64/aarch64-cores.def: Use the Exynos M1 cost model.
   * config/aarch64/aarch64.c (exynosm1_addrcost_table): New 
variable.

   (exynosm1_regmove_cost): Likewise.
   (exynosm1_vector_cost): Likewise.
   (exynosm1_tunings): Likewise.
   * config/arm/aarch-cost-tables.h (exynosm1_extra_costs): Likewise.
   * config/arm/arm.c (arm_exynos_m1_tune): Likewise.

This patch adds the cost model for Exynos M1.  This patch depends on a 
couple of previous patches though, 
https://gcc.gnu.org/ml/gcc-patches/2015-11/msg00505.html and 
https://gcc.gnu.org/ml/gcc-patches/2015-11/msg00538.html


Checked in as r231233.

Thank you,

--
Evandro Menezes



Re: [PATCH] Handle OBJ_TYPE_REF in FRE

2015-12-03 Thread Richard Biener
On December 3, 2015 6:40:07 PM GMT+01:00, Jan Hubicka  wrote:
>> 
>> The following patch handles CSEing OBJ_TYPE_REF which was omitted
>> because it is a GENERIC expression even on GIMPLE (for whatever
>
>Why it is generic? It is part of gimple grammar :)
>
>> reason...).  Rather than changing this now the following patch
>> simply treats it properly as such.
>
>Thanks for working on this! Will this do code motion, too?

It will do PRE, so "yes".

>I think you may want to compare the ODR type of obj_type_ref_class
>otherwise two otherwise equivalent OBJ_TYPE_REFs may lead to different
>optimizations later.  I suppose we can have code of form
>
>if (test)
>  OBJ_TYPE_REF1
>  ...
>else
>  OBJ_TYPE_REF2
>  ..
>where each invoke method of different class type but would otherwise
>match as equivalent for tree-ssa-sccvn becuase we ignore pointed-to
>types.
>so doing
>
>OBJ_TYPE_REF1
>if (test)
>  ...
>else
>  ...
>
>may lead to wrong code.

Can you try generating a testcase?  Because with equal vptr and voffset I can't 
see how that can happen unless some pass extracts information from the pointer 
types without sanity checking with the pointers and offsets.

>Or do you just substitute the operands of OBJ_TYPE_REF? 

No, I value number them.  But yes, the type issue also crossed my mind.  
Meanwhile testing revealed that I need to adjust gimple_expr_type to preserve 
the type of the obj-type-ref, otherwise the devirt machinery ICEs (receiving 
void *). That's also a reason we can't make obj-type-ref a ternary RHS.

>> Bootstrap & regtest running on x86_64-unknown-linux-gnu.
>> 
>> Note that this does not (yet) substitute OBJ_TYPE_REFs in calls
>> with SSA names that have the same value - not sure if that would
>> be desired generally (does the devirt machinery cope with that?).
>
>This should work fine.

OK. So with that substituting the direct call later should work as well.

Richard.

>> 
>> Thanks,
>> Richard.
>> 
>> 2015-12-03  Richard Biener  
>> 
>>  PR tree-optimization/64812
>>  * tree-ssa-sccvn.c (vn_get_stmt_kind): Handle OBJ_TYPE_REF.
>>  (vn_nary_length_from_stmt): Likewise.
>>  (init_vn_nary_op_from_stmt): Likewise.
>>  * gimple-match-head.c (maybe_build_generic_op): Likewise.
>>  * gimple-pretty-print.c (dump_unary_rhs): Likewise.
>> 
>>  * g++.dg/tree-ssa/ssa-fre-1.C: New testcase.
>> 
>> Index: gcc/tree-ssa-sccvn.c
>> ===
>> *** gcc/tree-ssa-sccvn.c (revision 231221)
>> --- gcc/tree-ssa-sccvn.c (working copy)
>> *** vn_get_stmt_kind (gimple *stmt)
>> *** 460,465 
>> --- 460,467 
>>? VN_CONSTANT : VN_REFERENCE);
>>  else if (code == CONSTRUCTOR)
>>return VN_NARY;
>> +else if (code == OBJ_TYPE_REF)
>> +  return VN_NARY;
>>  return VN_NONE;
>>}
>>default:
>> *** vn_nary_length_from_stmt (gimple *stmt)
>> *** 2479,2484 
>> --- 2481,2487 
>> return 1;
>>   
>>   case BIT_FIELD_REF:
>> + case OBJ_TYPE_REF:
>> return 3;
>>   
>>   case CONSTRUCTOR:
>> *** init_vn_nary_op_from_stmt (vn_nary_op_t
>> *** 2508,2513 
>> --- 2511,2517 
>> break;
>>   
>>   case BIT_FIELD_REF:
>> + case OBJ_TYPE_REF:
>> vno->length = 3;
>> vno->op[0] = TREE_OPERAND (gimple_assign_rhs1 (stmt), 0);
>> vno->op[1] = TREE_OPERAND (gimple_assign_rhs1 (stmt), 1);
>> Index: gcc/gimple-match-head.c
>> ===
>> *** gcc/gimple-match-head.c  (revision 231221)
>> --- gcc/gimple-match-head.c  (working copy)
>> *** maybe_build_generic_op (enum tree_code c
>> *** 243,248 
>> --- 243,249 
>> *op0 = build1 (code, type, *op0);
>> break;
>>   case BIT_FIELD_REF:
>> + case OBJ_TYPE_REF:
>> *op0 = build3 (code, type, *op0, op1, op2);
>> break;
>>   default:;
>> Index: gcc/gimple-pretty-print.c
>> ===
>> *** gcc/gimple-pretty-print.c(revision 231221)
>> --- gcc/gimple-pretty-print.c(working copy)
>> *** dump_unary_rhs (pretty_printer *buffer,
>> *** 302,308 
>>|| TREE_CODE_CLASS (rhs_code) == tcc_reference
>>|| rhs_code == SSA_NAME
>>|| rhs_code == ADDR_EXPR
>> !  || rhs_code == CONSTRUCTOR)
>>  {
>>dump_generic_node (buffer, rhs, spc, flags, false);
>>break;
>> --- 302,309 
>>|| TREE_CODE_CLASS (rhs_code) == tcc_reference
>>|| rhs_code == SSA_NAME
>>|| rhs_code == ADDR_EXPR
>> !  || rhs_code == CONSTRUCTOR
>> !  || rhs_code == OBJ_TYPE_REF)
>>  {
>>dump_generic_node (buffer, rhs, spc, flags, false);
>>break;
>> Index: gcc/testsuite/g++.dg/tree-ssa/ssa-fre-1.C
>> =

Re: [PATCH 07/10] Fix g++.dg/template/ref3.C

2015-12-03 Thread Jason Merrill

On 12/03/2015 09:55 AM, David Malcolm wrote:

Testcase g++.dg/template/ref3.C:

  1 // PR c++/28341
  2
  3 template struct A {};
  4
  5 template struct B
  6 {
  7   A<(T)0> b; // { dg-error "constant|not a valid" }
  8   A a; // { dg-error "constant|not a valid" }
  9 };
 10
 11 B b;

The output of this test for both c++11 and c++14 is unaffected
by the patch kit:
  g++.dg/template/ref3.C: In instantiation of 'struct B':
  g++.dg/template/ref3.C:11:15:   required from here
  g++.dg/template/ref3.C:7:11: error: '0' is not a valid template argument for type 
'const int&' because it is not an lvalue
  g++.dg/template/ref3.C:8:11: error: '0' is not a valid template argument for type 
'const int&' because it is not an lvalue

However, the c++98 output is changed:

Status quo for c++98:
g++.dg/template/ref3.C: In instantiation of 'struct B':
g++.dg/template/ref3.C:11:15:   required from here
g++.dg/template/ref3.C:7:11: error: a cast to a type other than an integral or 
enumeration type cannot appear in a constant-expression
g++.dg/template/ref3.C:8:11: error: a cast to a type other than an integral or 
enumeration type cannot appear in a constant-expression

(line 7 and 8 are at the closing semicolon for fields b and a)

With the patchkit for c++98:
g++.dg/template/ref3.C: In instantiation of 'struct B':
g++.dg/template/ref3.C:11:15:   required from here
g++.dg/template/ref3.C:7:5: error: a cast to a type other than an integral or 
enumeration type cannot appear in a constant-expression
g++.dg/template/ref3.C:7:5: error: a cast to a type other than an integral or 
enumeration type cannot appear in a constant-expression

So the 2nd:
   "error: a cast to a type other than an integral or enumeration type cannot appear 
in a constant-expression"
moves from line 8 to line 7 (and moves them to earlier, having ranges)

What's happening is that cp_parser_enclosed_template_argument_list
builds a CAST_EXPR, the first time from cp_parser_cast_expression,
the second time from cp_parser_functional_cast; these have locations
representing the correct respective caret&ranges, i.e.:

A<(T)0> b;
  ^~~~

and:

A a;
  ^~~~

Eventually finish_template_type is called for each, to build a RECORD_TYPE,
and we get a cache hit the 2nd time through here in pt.c:
8281  hash = spec_hasher::hash (&elt);
8282  entry = type_specializations->find_with_hash (&elt, hash);
8283
8284  if (entry)
8285return entry->spec;

due to:
   template_args_equal (ot=, nt=) at ../../src/gcc/cp/pt.c:7778
which calls:
   cp_tree_equal (t1=, t2=) 
at ../../src/gcc/cp/tree.c:2833
and returns equality.

Hence we get a single RECORD_TYPE for the type A<(T)(0)>, and hence
when issuing the errors it uses the TREE_VEC for the first one,
using the location of the first line.


Why does the type sharing affect where the parser gives the error?


I'm not sure what the ideal fix for this is; for now I've worked
around it by updating the dg directives to reflect the new output.

gcc/testsuite/ChangeLog:
* g++.dg/template/ref3.C: Update locations of dg directives.
---
  gcc/testsuite/g++.dg/template/ref3.C | 6 --
  1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/g++.dg/template/ref3.C 
b/gcc/testsuite/g++.dg/template/ref3.C
index 976c093..6e568c3 100644
--- a/gcc/testsuite/g++.dg/template/ref3.C
+++ b/gcc/testsuite/g++.dg/template/ref3.C
@@ -4,8 +4,10 @@ template struct A {};

  template struct B
  {
-  A<(T)0> b; // { dg-error "constant|not a valid" }
-  A a; // { dg-error "constant|not a valid" }
+  A<(T)0> b; // { dg-error "constant" "" { target c++98_only } }
+  // { dg-error "not a valid" "" { target c++11 } 7 }
+
+  A a; // { dg-error "not a valid" "" { target c++11 } }
  };

  B b;





Re: [PATCH 02/10] Fix g++.dg/cpp0x/nsdmi-template14.C

2015-12-03 Thread Jason Merrill

On 12/03/2015 09:55 AM, David Malcolm wrote:

This patch adds bulletproofing to detect purged tokens, and avoid using
them.

Alternatively, is it OK to access purged tokens for this kind of thing?
If so, would it make more sense to instead leave their locations untouched
when purging them?


I think cp_lexer_previous_token should skip past purged tokens.

Jason



Re: [PATCH][PR tree-optimization/67816] Fix jump threading when DOM removes conditionals in jump threading path

2015-12-03 Thread Jeff Law

On 12/02/2015 08:35 AM, Richard Biener wrote:



The most interesting side effect, and one I haven't fully analyzed yet is an
unexpected jump thread -- which I've traced back to differences in what the
alias oracle is able to find when we walk unaliased vuses. Which makes
totally no sense that it's unable to find the unaliased vuse in the
simplified CFG, but finds it when we don't remove the unexecutable edge.  As
I said, it makes no sense to me yet and I'm still digging.


The walking of PHI nodes is quite simplistic to avoid doing too much work so
an extra (not executable) edge may confuse it enough.  So this might be
"expected".  Adding a flag on whether EDGE_EXECUTABLE is to be
trusted would be an option (also helping SCCVN).
Found it.  In the CFG with the unexectuable edges _not_ removed there is 
a PHI associated with that edge which provides a dominating unaliased 
vuse.  Once that edge is removed, the PHI arg gets removed and thus we 
can't easily see the unaliased vuse.


So all is working as expected.  It wasn't ever a big issue, I just 
wanted to make sure I thoroughly understood the somewhat 
counter-intuitive result.


Jeff


Re: [PATCH] Fix shrink-wrap bug with anticipating into loops (PR67778, PR68634)

2015-12-03 Thread Segher Boessenkool
On Thu, Dec 03, 2015 at 12:31:53PM +0100, Bernd Schmidt wrote:
> On 12/02/2015 07:21 PM, Segher Boessenkool wrote:
> >After shrink-wrapping has found the "tightest fit" for where to place
> >the prologue, it tries move it earlier (so that frame saves are run
> >earlier) -- but without copying any more basic blocks.
> >
> >Unfortunately a candidate block we select can be inside a loop, and we
> >will still allow it (because the loop always exits via our previously
> >chosen block).
> 
> >So we need to detect this situation.  We can place the prologue at a
> >previous block PRE only if PRE dominates every block reachable from
> >it.  This is a bit hard / expensive to compute, so instead this patch
> >allows a block PRE only if PRE does not post-dominate any of its
> >successors (other than itself).
> 
> Are the two conditions equivalent though?

They are not, one is a subset of the other.  By construction, the block
PRE (the new candidate for getting the prologue) dominates PRO (the
original block to get the prologue), and PRO post-dominates PRE.  Now,
PRE is only suitable if it dominates every block reachable from it,
since otherwise putting the prologue on PRE instead of on PRO requires
duplicating more blocks.

Hrm.  A successor block of PRE could loop back to PRE conditionally,
and go to PRO otherwise.  Rats, what was I thinking.  Thanks for catching
it; I'll have to think of something better.  A bit more factoring will
probably help, we'll see.

> I think I agree with Jakub that we don't want to do unnecessary work in 
> this piece of code.

I agree as well.

> >/* If we can move PRO back without having to duplicate more blocks, do 
> >so.
> >   We can move back to a block PRE if every path from PRE will 
> >   eventually
> >- need a prologue, that is, PRO is a post-dominator of PRE.  */
> >+ need a prologue, that is, PRO is a post-dominator of PRE.  We might
> >+ need to duplicate PRE if there is any path from a successor of PRE 
> >back
> >+ to PRE, so don't allow that either (but self-loops are fine, as are 
> >any
> >+ other loops entirely dominated by PRE; this in general seems too
> >+ expensive to check for, for such an uncommon case).  */
> 
> The last comment is unclear and I don't know what it wants to tell me.

Yeah, sorry.  Writing text is hard :-)


Segher


[PATCH] Fix missing range information for "%q+D" format code

2015-12-03 Thread David Malcolm
There are about 220 or so diagnostics in trunk that use "%q+D" in
their format string, which, as well as printing a quoted decl,
overwrites any location_t supplied to the diagnostic, instead using
the location of the associated decl.

During development of the location range patch kit I adjusted
things to use both location&range of the decl for this case, but it
looks I broke it at some point; in the version in trunk the code is
currently discarding range information, so that just the caret is
printed.

For example:

diagnostic-ranges-1.c:6:7: warning: unused variable 'redundant' 
[-Wunused-variable]
   int redundant;
   ^
The attached patch updates the handling of %q+D, simplifying
the implementation, and ensuring that it retains the range
information of the decl, giving:

diagnostic-ranges-1.c:6:7: warning: unused variable ‘redundant’ 
[-Wunused-variable]
   int redundant;
   ^

As well as the above fix, the patch adds test coverage, both
- for the specific case above, and
- as a unit test for %q+D via one of the existing test plugins

Successfully bootstrapped®rtested on x86_64-pc-linux-gnu; adds
5 PASS results to gcc.sum.

OK for trunk?

gcc/c-family/ChangeLog:
* c-common.c (c_cpp_error): Update for change to
rich_location::set_range.

gcc/fortran/ChangeLog:
* error.c (gfc_format_decoder): Update for change of
text_info::set_range to text_info::set_location.

gcc/ChangeLog:
* pretty-print.c (text_info::set_range): Rename to...
(text_info::set_location): ...this, converting 2nd param
from source_range to a location_t.
* pretty-print.h (text_info::set_location): Convert
from inline function to external definition.
(text_info::set_range): Delete.

gcc/testsuite/ChangeLog:
* gcc.dg/diagnostic-ranges-1.c: New test file.
* gcc.dg/plugin/diagnostic-test-show-locus-bw.c
(test_percent_q_plus_d): New test function.
* gcc.dg/plugin/diagnostic_plugin_test_show_locus.c
(test_show_locus): Rewrite test code using
rich_location::set_range.  Add code to unit-test the "%q+D"
format code.

libcpp/ChangeLog:
* include/line-map.h (rich_location::set_range): Add line_maps *
param; convert param from source_range to source_location.  Drop
"overwrite_loc_p" param.
* line-map.c (rich_location::set_range): Likewise, acting as if
"overwrite_loc_p" were true, and getting range from the location.
---
 gcc/c-family/c-common.c|  4 +---
 gcc/fortran/error.c| 11 -
 gcc/pretty-print.c |  6 ++---
 gcc/pretty-print.h |  9 +---
 gcc/testsuite/gcc.dg/diagnostic-ranges-1.c | 11 +
 .../gcc.dg/plugin/diagnostic-test-show-locus-bw.c  | 12 ++
 .../plugin/diagnostic_plugin_test_show_locus.c | 27 +-
 libcpp/include/line-map.h  |  4 ++--
 libcpp/line-map.c  | 14 ++-
 9 files changed, 64 insertions(+), 34 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/diagnostic-ranges-1.c

diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c
index a8122b3..59cfc19 100644
--- a/gcc/c-family/c-common.c
+++ b/gcc/c-family/c-common.c
@@ -10129,9 +10129,7 @@ c_cpp_error (cpp_reader *pfile ATTRIBUTE_UNUSED, int 
level, int reason,
   gcc_unreachable ();
 }
   if (done_lexing)
-richloc->set_range (0,
-   source_range::from_location (input_location),
-   true, true);
+richloc->set_range (line_table, 0, input_location, true);
   diagnostic_set_info_translated (&diagnostic, msg, ap,
  richloc, dlevel);
   diagnostic_override_option_index (&diagnostic,
diff --git a/gcc/fortran/error.c b/gcc/fortran/error.c
index b4f7020..8f57aff 100644
--- a/gcc/fortran/error.c
+++ b/gcc/fortran/error.c
@@ -939,12 +939,11 @@ gfc_format_decoder (pretty_printer *pp,
/* If location[0] != UNKNOWN_LOCATION means that we already
   processed one of %C/%L.  */
int loc_num = text->get_location (0) == UNKNOWN_LOCATION ? 0 : 1;
-   source_range range
- = source_range::from_location (
- linemap_position_for_loc_and_offset (line_table,
-  loc->lb->location,
-  offset));
-   text->set_range (loc_num, range, true);
+   location_t src_loc
+ = linemap_position_for_loc_and_offset (line_table,
+loc->lb->location,
+offset);
+   text->set_location (loc_num, src_loc, true);
pp_string (pp, result[loc_num]);
return true;
   }
diff --git a/gcc/pretty-print.c b/gcc/pretty-print.c
index 4a28d3c..3365074 1006

Re: [Fortran, Patch] (RFC, Coarray) Implement TS18508's EVENTS

2015-12-03 Thread Alessandro Fanfarillo
Yes please.

Thanks.

2015-12-02 23:00 GMT+01:00 Steve Kargl :
> Committed as revision 231208.
>
> Alessandro, Tobias, is this a candidate for a commit to
> the 5-branch when it is re-opened?
>
> --
> steve
>
> On Wed, Dec 02, 2015 at 03:16:05PM +0100, Alessandro Fanfarillo wrote:
>> *PING*
>>
>> 2015-11-26 17:51 GMT+01:00 Steve Kargl :
>> > On Wed, Nov 25, 2015 at 06:24:49PM +0100, Alessandro Fanfarillo wrote:
>> >> Dear all,
>> >>
>> >> in attachment the previous patch compatible with the current trunk.
>> >> The patch also includes the changes introduced in the latest TS 18508.
>> >>
>> >> Built and regtested on x86_64-pc-linux-gnu.
>> >>
>> >> PS: I will add the test cases in a different patch.
>> >>
>> >
>> > I have now built and regression tested the patch on
>> > x86_64-*-freebsd and i386-*-freebsd.  There were no
>> > regressions.  In reading through the patch, nothing
>> > jumped out at me as suspicious/wrong.  Tobias, this
>> > is OK to commit.  If you don't committed by Sunday,
>> > I'll do it for you.
>> >
>> > --
>> > steve
>
> --
> Steve


Re: [PATCH] Fix shrink-wrap bug with anticipating into loops (PR67778, PR68634)

2015-12-03 Thread Segher Boessenkool
On Thu, Dec 03, 2015 at 12:35:51PM +0100, Bernd Schmidt wrote:
> On 12/02/2015 07:21 PM, Segher Boessenkool wrote:
> >After shrink-wrapping has found the "tightest fit" for where to place
> >the prologue, it tries move it earlier (so that frame saves are run
> >earlier) -- but without copying any more basic blocks.
> 
> Another question would be - is there really a good reason to do this at all?

I haven't actually benchmarked it to see if it in fact matters for
performance.  The original code did something similar, but perhaps not
for the same reasons.  The goal is to put the prologue as early as
possible while only putting it on paths that need it (the code before
here puts it as *late* as possible instead).

Moving the prologue earlier gives more free registers (the ones it saved)
in the blocks "skipped", so that late passes have more to work with.
More importantly, moving the prologue and the epilogue further apart
avoids some execution hazards.


Segher


Re: Add fuzzing coverage support

2015-12-03 Thread Dmitry Vyukov
On Thu, Dec 3, 2015 at 7:34 PM, Dmitry Vyukov  wrote:
> I've attached updated patch (also reuploaded
> https://codereview.appspot.com/280140043).
> Fixed ChangeLog.
> Added invoke.texi.
> Fixed style issues.
>
> The function is defined only in kernel at the moment. Here is my patch:
> https://github.com/dvyukov/linux/commit/f86eda0c895c47ea02ee37e981aeade7b03014d7
> It is not mailed yet, for kernel asan people requested submit to gcc
> first, then to kernel.
>
> It will also be supported by libsanitizer later (Kostya?). But it is
> not yet there.
>
> Regarding plugins, we did tsan first as gcc plugin. It was difficult
> to support, difficult to use, difficult to distribute. I maintain this
> patch for a month, two people complained that it does not build
> (because they synched to slightly different revisions).


Added missing:
  stmt = gsi_stmt (gsi);
Now actually run tests and compiled kernel with it.
Index: ChangeLog
===
--- ChangeLog	(revision 231234)
+++ ChangeLog	(working copy)
@@ -1,3 +1,15 @@
+2015-12-03  Dmitry Vyukov  
+
+	* sancov.c: New file.
+	* Makefile.in (OBJS): Add sancov.o.
+	* invoke.texi (-fsanitize-coverage=trace-pc): Describe.
+	* passes.def (sancov_pass): Add.
+	* tree-pass.h  (sancov_pass): Add.
+	* common.opt (-fsanitize-coverage=trace-pc): Add.
+	* sanitizer.def (BUILT_IN_SANITIZER_COV_TRACE_PC): Add.
+	* builtins.def (DEF_SANITIZER_BUILTIN): Enable for
+	flag_sanitize_coverage.
+
 2015-12-03  Evandro Menezes  
 
 	* config/aarch64/aarch64-cores.def: Use the Exynos M1 cost model.
@@ -360,7 +372,6 @@
 	* tree-ssa-structalias.c (find_func_aliases_for_builtin_call)
 	(find_func_clobbers, ipa_pta_execute): Handle BUILT_IN_GOACC_PARALLEL.
 
->>> .r231221
 2015-12-02  Segher Boessenkool  
 
 	* config/rs6000/rs6000.md (cstore_si_as_di): New expander.
Index: Makefile.in
===
--- Makefile.in	(revision 231234)
+++ Makefile.in	(working copy)
@@ -1427,6 +1427,7 @@
 	tsan.o \
 	ubsan.o \
 	sanopt.o \
+	sancov.o \
 	tree-call-cdce.o \
 	tree-cfg.o \
 	tree-cfgcleanup.o \
@@ -2400,6 +2401,7 @@
   $(srcdir)/ubsan.c \
   $(srcdir)/tsan.c \
   $(srcdir)/sanopt.c \
+  $(srcdir)/sancov.c \
   $(srcdir)/ipa-devirt.c \
   $(srcdir)/internal-fn.h \
   @all_gtfiles@
Index: builtins.def
===
--- builtins.def	(revision 231234)
+++ builtins.def	(working copy)
@@ -210,7 +210,8 @@
   DEF_BUILTIN (ENUM, "__builtin_" NAME, BUILT_IN_NORMAL, TYPE, TYPE,\
 	   true, true, true, ATTRS, true, \
 	  (flag_sanitize & (SANITIZE_ADDRESS | SANITIZE_THREAD \
-| SANITIZE_UNDEFINED | SANITIZE_NONDEFAULT)))
+| SANITIZE_UNDEFINED | SANITIZE_NONDEFAULT) \
+	   || flag_sanitize_coverage))
 
 #undef DEF_CILKPLUS_BUILTIN
 #define DEF_CILKPLUS_BUILTIN(ENUM, NAME, TYPE, ATTRS)  \
Index: common.opt
===
--- common.opt	(revision 231234)
+++ common.opt	(working copy)
@@ -225,6 +225,11 @@
 Variable
 unsigned int flag_sanitize_recover = SANITIZE_UNDEFINED | SANITIZE_NONDEFAULT | SANITIZE_KERNEL_ADDRESS
 
+fsanitize-coverage=trace-pc
+Common Report Var(flag_sanitize_coverage)
+Enable coverage-guided fuzzing code instrumentation.
+Inserts call to __sanitizer_cov_trace_pc into every basic block.
+
 ; Flag whether a prefix has been added to dump_base_name
 Variable
 bool dump_base_name_prefixed = false
Index: doc/invoke.texi
===
--- doc/invoke.texi	(revision 231234)
+++ doc/invoke.texi	(working copy)
@@ -6135,6 +6135,11 @@
 @code{libubsan} library is not needed and is not linked in, so this
 is usable even in freestanding environments.
 
+@item -fsanitize-coverage=trace-pc
+@opindex fsanitize-coverage=trace-pc
+Enable coverage-guided fuzzing code instrumentation.
+Inserts call to __sanitizer_cov_trace_pc into every basic block.
+
 @item -fcheck-pointer-bounds
 @opindex fcheck-pointer-bounds
 @opindex fno-check-pointer-bounds
Index: passes.def
===
--- passes.def	(revision 231234)
+++ passes.def	(working copy)
@@ -237,6 +237,7 @@
   NEXT_PASS (pass_split_crit_edges);
   NEXT_PASS (pass_pre);
   NEXT_PASS (pass_sink_code);
+  NEXT_PASS (pass_sancov);
   NEXT_PASS (pass_asan);
   NEXT_PASS (pass_tsan);
   /* Pass group that runs when 1) enabled, 2) there are loops
@@ -346,6 +347,7 @@
  to forward object-size and builtin folding results properly.  */
   NEXT_PASS (pass_copy_prop);
   NEXT_PASS (pass_dce);
+  NEXT_PASS (pass_sancov);
   NEXT_PASS (pass_asan);
   NEXT_PASS (pass_tsan);
   /* ???  We do want some kind of loop invariant motion, but we possibly
@@ -369,6 +371,7 @@
   NEXT_PASS (pass_lower_vaarg);
   NEXT_PASS (pass_lower_vector);
   NEXT_PASS (pass_lower_complex_O0);
+  NE

Re: [hsa] Make copy_gimple_seq_and_replace_locals copy seqs in omp clauses

2015-12-03 Thread Jakub Jelinek
On Thu, Dec 03, 2015 at 07:26:20PM +0100, Martin Jambor wrote:
> this is a fix to the last "last" ICE of the hsa branch.  THe problem
> turned out not to be in the gridification itself but, depending your
> point of view, in the gimple and tree walking infrastructure or in
> function copy_gimple_seq_and_replace_locals from tree-inline.c on
> which hsa gridification relies.
> 
> The issue is that in between gimplification and omplow pass, there can
> be gimple sequences attached to OMP_CLAUSE trees that are attached to
> omp statements and that are neither copied by gimple_seq_copy nor
> walked by walk_gimple_seq.
> 
> While the correct solution would probably be to extend tree and gimple
> walkers to handle them, that would be a big change.  I have talked
> with Jakub about this yesterday on the IRC and he suggested that I
> enhance the internal walkers of copy_gimple_seq_and_replace_locals
> deal with this situation.  Even though that leaves gimple_seq_copy,
> walk_gimple_seq and other to be technically incorrect, that is what I
> have done in the patch below, which fixes my last ICEs and which I
> have already committed to the branch.

The point is that those gimple_seqs are there only from gimplification
till omplower, and I believe nothing else for now cares about those.
> @@ -5200,6 +5231,18 @@ replace_locals_stmt (gimple_stmt_iterator *gsip,
>return NULL_TREE;
>  }
>  
> +/* Create a copy of SEQ and remap all decls in it.  */
> +
> +static gimple_seq
> +duplicate_remap_omp_clause_seq (gimple_seq seq, struct walk_stmt_info *wi)
> +{

I would have expected an early if (seq == NULL) return NULL; either here,
or in the callers (not doing anything in the common case when it is NULL).

Jakub


Re: Add fuzzing coverage support

2015-12-03 Thread Dmitry Vyukov
I've attached updated patch (also reuploaded
https://codereview.appspot.com/280140043).
Fixed ChangeLog.
Added invoke.texi.
Fixed style issues.

The function is defined only in kernel at the moment. Here is my patch:
https://github.com/dvyukov/linux/commit/f86eda0c895c47ea02ee37e981aeade7b03014d7
It is not mailed yet, for kernel asan people requested submit to gcc
first, then to kernel.

It will also be supported by libsanitizer later (Kostya?). But it is
not yet there.

Regarding plugins, we did tsan first as gcc plugin. It was difficult
to support, difficult to use, difficult to distribute. I maintain this
patch for a month, two people complained that it does not build
(because they synched to slightly different revisions).
Index: ChangeLog
===
--- ChangeLog	(revision 231234)
+++ ChangeLog	(working copy)
@@ -1,3 +1,15 @@
+2015-12-03  Dmitry Vyukov  
+
+	* sancov.c: New file.
+	* Makefile.in (OBJS): Add sancov.o.
+	* invoke.texi (-fsanitize-coverage=trace-pc): Describe.
+	* passes.def (sancov_pass): Add.
+	* tree-pass.h  (sancov_pass): Add.
+	* common.opt (-fsanitize-coverage=trace-pc): Add.
+	* sanitizer.def (BUILT_IN_SANITIZER_COV_TRACE_PC): Add.
+	* builtins.def (DEF_SANITIZER_BUILTIN): Enable for
+	flag_sanitize_coverage.
+
 2015-12-03  Evandro Menezes  
 
 	* config/aarch64/aarch64-cores.def: Use the Exynos M1 cost model.
@@ -360,7 +372,6 @@
 	* tree-ssa-structalias.c (find_func_aliases_for_builtin_call)
 	(find_func_clobbers, ipa_pta_execute): Handle BUILT_IN_GOACC_PARALLEL.
 
->>> .r231221
 2015-12-02  Segher Boessenkool  
 
 	* config/rs6000/rs6000.md (cstore_si_as_di): New expander.
Index: Makefile.in
===
--- Makefile.in	(revision 231234)
+++ Makefile.in	(working copy)
@@ -1427,6 +1427,7 @@
 	tsan.o \
 	ubsan.o \
 	sanopt.o \
+	sancov.o \
 	tree-call-cdce.o \
 	tree-cfg.o \
 	tree-cfgcleanup.o \
@@ -2400,6 +2401,7 @@
   $(srcdir)/ubsan.c \
   $(srcdir)/tsan.c \
   $(srcdir)/sanopt.c \
+  $(srcdir)/sancov.c \
   $(srcdir)/ipa-devirt.c \
   $(srcdir)/internal-fn.h \
   @all_gtfiles@
Index: builtins.def
===
--- builtins.def	(revision 231234)
+++ builtins.def	(working copy)
@@ -210,7 +210,8 @@
   DEF_BUILTIN (ENUM, "__builtin_" NAME, BUILT_IN_NORMAL, TYPE, TYPE,\
 	   true, true, true, ATTRS, true, \
 	  (flag_sanitize & (SANITIZE_ADDRESS | SANITIZE_THREAD \
-| SANITIZE_UNDEFINED | SANITIZE_NONDEFAULT)))
+| SANITIZE_UNDEFINED | SANITIZE_NONDEFAULT) \
+	   || flag_sanitize_coverage))
 
 #undef DEF_CILKPLUS_BUILTIN
 #define DEF_CILKPLUS_BUILTIN(ENUM, NAME, TYPE, ATTRS)  \
Index: common.opt
===
--- common.opt	(revision 231234)
+++ common.opt	(working copy)
@@ -225,6 +225,11 @@
 Variable
 unsigned int flag_sanitize_recover = SANITIZE_UNDEFINED | SANITIZE_NONDEFAULT | SANITIZE_KERNEL_ADDRESS
 
+fsanitize-coverage=trace-pc
+Common Report Var(flag_sanitize_coverage)
+Enable coverage-guided fuzzing code instrumentation.
+Inserts call to __sanitizer_cov_trace_pc into every basic block.
+
 ; Flag whether a prefix has been added to dump_base_name
 Variable
 bool dump_base_name_prefixed = false
Index: doc/invoke.texi
===
--- doc/invoke.texi	(revision 231234)
+++ doc/invoke.texi	(working copy)
@@ -6135,6 +6135,11 @@
 @code{libubsan} library is not needed and is not linked in, so this
 is usable even in freestanding environments.
 
+@item -fsanitize-coverage=trace-pc
+@opindex fsanitize-coverage=trace-pc
+Enable coverage-guided fuzzing code instrumentation.
+Inserts call to __sanitizer_cov_trace_pc into every basic block.
+
 @item -fcheck-pointer-bounds
 @opindex fcheck-pointer-bounds
 @opindex fno-check-pointer-bounds
Index: passes.def
===
--- passes.def	(revision 231234)
+++ passes.def	(working copy)
@@ -237,6 +237,7 @@
   NEXT_PASS (pass_split_crit_edges);
   NEXT_PASS (pass_pre);
   NEXT_PASS (pass_sink_code);
+  NEXT_PASS (pass_sancov);
   NEXT_PASS (pass_asan);
   NEXT_PASS (pass_tsan);
   /* Pass group that runs when 1) enabled, 2) there are loops
@@ -346,6 +347,7 @@
  to forward object-size and builtin folding results properly.  */
   NEXT_PASS (pass_copy_prop);
   NEXT_PASS (pass_dce);
+  NEXT_PASS (pass_sancov);
   NEXT_PASS (pass_asan);
   NEXT_PASS (pass_tsan);
   /* ???  We do want some kind of loop invariant motion, but we possibly
@@ -369,6 +371,7 @@
   NEXT_PASS (pass_lower_vaarg);
   NEXT_PASS (pass_lower_vector);
   NEXT_PASS (pass_lower_complex_O0);
+  NEXT_PASS (pass_sancov_O0);
   NEXT_PASS (pass_asan_O0);
   NEXT_PASS (pass_tsan_O0);
   NEXT_PASS (pass_sanopt);
Index: sancov.c
=

[hsa] Make copy_gimple_seq_and_replace_locals copy seqs in omp clauses

2015-12-03 Thread Martin Jambor
Hi,

this is a fix to the last "last" ICE of the hsa branch.  THe problem
turned out not to be in the gridification itself but, depending your
point of view, in the gimple and tree walking infrastructure or in
function copy_gimple_seq_and_replace_locals from tree-inline.c on
which hsa gridification relies.

The issue is that in between gimplification and omplow pass, there can
be gimple sequences attached to OMP_CLAUSE trees that are attached to
omp statements and that are neither copied by gimple_seq_copy nor
walked by walk_gimple_seq.

While the correct solution would probably be to extend tree and gimple
walkers to handle them, that would be a big change.  I have talked
with Jakub about this yesterday on the IRC and he suggested that I
enhance the internal walkers of copy_gimple_seq_and_replace_locals
deal with this situation.  Even though that leaves gimple_seq_copy,
walk_gimple_seq and other to be technically incorrect, that is what I
have done in the patch below, which fixes my last ICEs and which I
have already committed to the branch.

Any feedback is of course very much appreciated,

Martin


2015-12-03  Martin Jambor  

* tree-inline.c (duplicate_remap_omp_clause_seq): New function.
(replace_locals_op): Duplicate gimple sequences in OMP clauses.

---
 gcc/tree-inline.c | 43 +++
 1 file changed, 43 insertions(+)

diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c
index ebab189..15141dc 100644
--- a/gcc/tree-inline.c
+++ b/gcc/tree-inline.c
@@ -5116,6 +5116,8 @@ mark_local_labels_stmt (gimple_stmt_iterator *gsip,
   return NULL_TREE;
 }
 
+static gimple_seq duplicate_remap_omp_clause_seq (gimple_seq seq,
+ struct walk_stmt_info *wi);
 
 /* Called via walk_gimple_seq by copy_gimple_seq_and_replace_local.
Using the splay_tree pointed to by ST (which is really a `splay_tree'),
@@ -5160,6 +5162,35 @@ replace_locals_op (tree *tp, int *walk_subtrees, void 
*data)
  TREE_OPERAND (expr, 3) = NULL_TREE;
}
 }
+  else if (TREE_CODE (expr) == OMP_CLAUSE)
+{
+  /* Before the omplower pass completes, some OMP clauses can contain
+sequences that are neither copied by gimple_seq_copy nor walked by
+walk_gimple_seq.  To make copy_gimple_seq_and_replace_locals work even
+in those situations, we have to copy and process them explicitely.  */
+
+  if (OMP_CLAUSE_CODE (expr) == OMP_CLAUSE_LASTPRIVATE)
+   {
+ gimple_seq seq = OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (expr);
+ seq = duplicate_remap_omp_clause_seq (seq, wi);
+ OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (expr) = seq;
+   }
+  else if (OMP_CLAUSE_CODE (expr) == OMP_CLAUSE_LINEAR)
+   {
+ gimple_seq seq = OMP_CLAUSE_LINEAR_GIMPLE_SEQ (expr);
+ seq = duplicate_remap_omp_clause_seq (seq, wi);
+ OMP_CLAUSE_LINEAR_GIMPLE_SEQ (expr) = seq;
+   }
+  else if (OMP_CLAUSE_CODE (expr) == OMP_CLAUSE_REDUCTION)
+   {
+ gimple_seq seq = OMP_CLAUSE_REDUCTION_GIMPLE_INIT (expr);
+ seq = duplicate_remap_omp_clause_seq (seq, wi);
+ OMP_CLAUSE_REDUCTION_GIMPLE_INIT (expr) = seq;
+ seq = OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (expr);
+ seq = duplicate_remap_omp_clause_seq (seq, wi);
+ OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (expr) = seq;
+   }
+}
 
   /* Keep iterating.  */
   return NULL_TREE;
@@ -5200,6 +5231,18 @@ replace_locals_stmt (gimple_stmt_iterator *gsip,
   return NULL_TREE;
 }
 
+/* Create a copy of SEQ and remap all decls in it.  */
+
+static gimple_seq
+duplicate_remap_omp_clause_seq (gimple_seq seq, struct walk_stmt_info *wi)
+{
+  /* If there are any labels in OMP sequences, they can be only referred to in
+ the sequence itself and therefore we can do both here.  */
+  walk_gimple_seq (seq, mark_local_labels_stmt, NULL, wi);
+  gimple_seq copy = gimple_seq_copy (seq);
+  walk_gimple_seq (copy, replace_locals_stmt, replace_locals_op, wi);
+  return copy;
+}
 
 /* Copies everything in SEQ and replaces variables and labels local to
current_function_decl.  */
-- 
2.6.3



Re: [PATCH] Add options -finstrument-functions-include-{file,function}-list

2015-12-03 Thread Bert Wesarg
Hi,

better write your own instrumentation plug-in and do the filtering on
your own. The plug-in interface exists since 4.5 so you have a much
greater version base that can support your feature already, than some
future version of GCC which may have this patch. While we didn't
announced it here on GCC, we maintain such plug-in already in Score-P
[1], and the overhead is also much lower (we also have a runtime
filter), we do not instrument inlined functions and functions from
system headers by default, and we do not need debug symbols to get
function names.

Best,
Bert

[1] www.score-p.org

On Thu, Dec 3, 2015 at 7:06 PM, Andi Drebes  wrote:
> By default -finstrument-functions instruments all functions. To limit
> instrumentation to certain functions or files it is necessary to
> specify the complement using -finstrument-functions-exclude-file-list
> or -finstrument-functions-exclude-function-list.
>
> The new options -finstrument-functions-include-file-list and
> -finstrument-functions-include-function-list make the specification of
> the complement unnecessary by allowing the user to limit
> instrumentation to a set of file names and functions.
> ---
>  gcc/common.opt   | 16 ++-
>  gcc/doc/invoke.texi  | 52 
> 
>  gcc/gimplify.c   | 51 ---
>  gcc/opts.c   | 10 +++
>  gcc/testsuite/ChangeLog  | 10 +++
>  gcc/testsuite/gcc.dg/instrument-10.c |  7 +
>  gcc/testsuite/gcc.dg/instrument-4.c  |  7 +
>  gcc/testsuite/gcc.dg/instrument-5.c  |  7 +
>  gcc/testsuite/gcc.dg/instrument-6.c  |  7 +
>  gcc/testsuite/gcc.dg/instrument-7.c  |  7 +
>  gcc/testsuite/gcc.dg/instrument-8.c  |  7 +
>  gcc/testsuite/gcc.dg/instrument-9.c  |  7 +
>  12 files changed, 172 insertions(+), 16 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/instrument-10.c
>  create mode 100644 gcc/testsuite/gcc.dg/instrument-4.c
>  create mode 100644 gcc/testsuite/gcc.dg/instrument-5.c
>  create mode 100644 gcc/testsuite/gcc.dg/instrument-6.c
>  create mode 100644 gcc/testsuite/gcc.dg/instrument-7.c
>  create mode 100644 gcc/testsuite/gcc.dg/instrument-8.c
>  create mode 100644 gcc/testsuite/gcc.dg/instrument-9.c
>
> diff --git a/gcc/common.opt b/gcc/common.opt
> index 3eb520e..ac797b3 100644
> --- a/gcc/common.opt
> +++ b/gcc/common.opt
> @@ -97,7 +97,7 @@ int flag_gen_aux_info = 0
>  Variable
>  int flag_shlib
>
> -; These two are really VEC(char_p,heap) *.
> +; These are really VEC(char_p,heap) *.
>
>  Variable
>  void *flag_instrument_functions_exclude_functions
> @@ -105,6 +105,12 @@ void *flag_instrument_functions_exclude_functions
>  Variable
>  void *flag_instrument_functions_exclude_files
>
> +Variable
> +void *flag_instrument_functions_include_functions
> +
> +Variable
> +void *flag_instrument_functions_include_files
> +
>  ; Generic structs (e.g. templates not explicitly specialized)
>  ; may not have a compilation unit associated with them, and so
>  ; may need to be treated differently from ordinary structs.
> @@ -1477,6 +1483,14 @@ finstrument-functions-exclude-file-list=
>  Common RejectNegative Joined
>  -finstrument-functions-exclude-file-list=filename,...  Do not instrument 
> functions listed in files.
>
> +finstrument-functions-include-function-list=
> +Common RejectNegative Joined
> +-finstrument-functions-include-function-list=name,...  Only instrument 
> listed functions.
> +
> +finstrument-functions-include-file-list=
> +Common RejectNegative Joined
> +-finstrument-functions-include-file-list=filename,...  Only instrument 
> functions listed in files.
> +
>  fipa-cp
>  Common Report Var(flag_ipa_cp) Optimization
>  Perform interprocedural constant propagation.
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index 53f1fe2..ba9a3bd 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -1150,6 +1150,8 @@ See S/390 and zSeries Options.
>  -finhibit-size-directive  -finstrument-functions @gol
>  -finstrument-functions-exclude-function-list=@var{sym},@var{sym},@dots{} @gol
>  -finstrument-functions-exclude-file-list=@var{file},@var{file},@dots{} @gol
> +-finstrument-functions-include-function-list=@var{sym},@var{sym},@dots{} @gol
> +-finstrument-functions-include-file-list=@var{file},@var{file},@dots{} @gol
>  -fno-common  -fno-ident @gol
>  -fpcc-struct-return  -fpic  -fPIC -fpie -fPIE -fno-plt @gol
>  -fno-jump-tables @gol
> @@ -24529,6 +24531,56 @@ of the function name, it is considered to be a 
> match.  For C99 and C++
>  extended identifiers, the function name must be given in UTF-8, not
>  using universal character names.
>
> +@item -finstrument-functions-include-file-list=@var{file},@var{file},@dots{}
> +@opindex finstrument-functions-include-file-list
> +
> +Limit function instrumentation to functions from files specified in
> +the list. The matching of file names is identical to the matchi

[hsa] Useful checking assert in scan_omp_1_op

2015-12-03 Thread Martin Jambor
Hi,

I have found that adding the following checking assert very useful
when debugging omp lowering issues, so I have added it to the hsa
branch.  I hope that nobody will mind, but it of course is not an
essential thing to have if someone does.

Thanks,

Martin

2015-12-03  Martin Jambor  

* omp-low.c (scan_omp_1_op): Add checking assert that we are not
re-mapping to ERROR_MARK.
---
 gcc/omp-low.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 8854df7..05d8901 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -3731,7 +3731,11 @@ scan_omp_1_op (tree *tp, int *walk_subtrees, void *data)
 case LABEL_DECL:
 case RESULT_DECL:
   if (ctx)
-   *tp = remap_decl (t, &ctx->cb);
+   {
+ tree repl = remap_decl (t, &ctx->cb);
+ gcc_checking_assert (TREE_CODE (repl) != ERROR_MARK);
+ *tp = repl;
+   }
   break;
 
 default:
-- 
2.6.3



Re: C PATCH for c/68668 (grokdeclarator and wrong type of PARM_DECL)

2015-12-03 Thread Joseph Myers
On Thu, 3 Dec 2015, Marek Polacek wrote:

> This ought to fix the fallout from PR c/68162 fix.  Here the problem is that
> grokdeclarator created a wrong type for PARM_DECL "p".  It created this decl
> with type "const int[] *" while it should be "const int *".
> 
> I think the problem is that we weren't using TREE_TYPE on orig_qual_type and
> thus c_build_qualified_type and subsequent c_build_pointer_type might create
> a bogus type.  So when we're transfering const-ness of an array into that of
> type pointed to, use TREE_TYPE not only of "type", but even of the orig qual
> type.

I think you also need to decrement orig_qual_indirect, which counts the 
number of levels of array type derivation from orig_qual_type.

-- 
Joseph S. Myers
jos...@codesourcery.com


[PATCH] Add options -finstrument-functions-include-{file,function}-list

2015-12-03 Thread Andi Drebes
By default -finstrument-functions instruments all functions. To limit
instrumentation to certain functions or files it is necessary to
specify the complement using -finstrument-functions-exclude-file-list
or -finstrument-functions-exclude-function-list.

The new options -finstrument-functions-include-file-list and
-finstrument-functions-include-function-list make the specification of
the complement unnecessary by allowing the user to limit
instrumentation to a set of file names and functions.
---
 gcc/common.opt   | 16 ++-
 gcc/doc/invoke.texi  | 52 
 gcc/gimplify.c   | 51 ---
 gcc/opts.c   | 10 +++
 gcc/testsuite/ChangeLog  | 10 +++
 gcc/testsuite/gcc.dg/instrument-10.c |  7 +
 gcc/testsuite/gcc.dg/instrument-4.c  |  7 +
 gcc/testsuite/gcc.dg/instrument-5.c  |  7 +
 gcc/testsuite/gcc.dg/instrument-6.c  |  7 +
 gcc/testsuite/gcc.dg/instrument-7.c  |  7 +
 gcc/testsuite/gcc.dg/instrument-8.c  |  7 +
 gcc/testsuite/gcc.dg/instrument-9.c  |  7 +
 12 files changed, 172 insertions(+), 16 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/instrument-10.c
 create mode 100644 gcc/testsuite/gcc.dg/instrument-4.c
 create mode 100644 gcc/testsuite/gcc.dg/instrument-5.c
 create mode 100644 gcc/testsuite/gcc.dg/instrument-6.c
 create mode 100644 gcc/testsuite/gcc.dg/instrument-7.c
 create mode 100644 gcc/testsuite/gcc.dg/instrument-8.c
 create mode 100644 gcc/testsuite/gcc.dg/instrument-9.c

diff --git a/gcc/common.opt b/gcc/common.opt
index 3eb520e..ac797b3 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -97,7 +97,7 @@ int flag_gen_aux_info = 0
 Variable
 int flag_shlib
 
-; These two are really VEC(char_p,heap) *.
+; These are really VEC(char_p,heap) *.
 
 Variable
 void *flag_instrument_functions_exclude_functions
@@ -105,6 +105,12 @@ void *flag_instrument_functions_exclude_functions
 Variable
 void *flag_instrument_functions_exclude_files
 
+Variable
+void *flag_instrument_functions_include_functions
+
+Variable
+void *flag_instrument_functions_include_files
+
 ; Generic structs (e.g. templates not explicitly specialized)
 ; may not have a compilation unit associated with them, and so
 ; may need to be treated differently from ordinary structs.
@@ -1477,6 +1483,14 @@ finstrument-functions-exclude-file-list=
 Common RejectNegative Joined
 -finstrument-functions-exclude-file-list=filename,...  Do not instrument 
functions listed in files.
 
+finstrument-functions-include-function-list=
+Common RejectNegative Joined
+-finstrument-functions-include-function-list=name,...  Only instrument listed 
functions.
+
+finstrument-functions-include-file-list=
+Common RejectNegative Joined
+-finstrument-functions-include-file-list=filename,...  Only instrument 
functions listed in files.
+
 fipa-cp
 Common Report Var(flag_ipa_cp) Optimization
 Perform interprocedural constant propagation.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 53f1fe2..ba9a3bd 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -1150,6 +1150,8 @@ See S/390 and zSeries Options.
 -finhibit-size-directive  -finstrument-functions @gol
 -finstrument-functions-exclude-function-list=@var{sym},@var{sym},@dots{} @gol
 -finstrument-functions-exclude-file-list=@var{file},@var{file},@dots{} @gol
+-finstrument-functions-include-function-list=@var{sym},@var{sym},@dots{} @gol
+-finstrument-functions-include-file-list=@var{file},@var{file},@dots{} @gol
 -fno-common  -fno-ident @gol
 -fpcc-struct-return  -fpic  -fPIC -fpie -fPIE -fno-plt @gol
 -fno-jump-tables @gol
@@ -24529,6 +24531,56 @@ of the function name, it is considered to be a match.  
For C99 and C++
 extended identifiers, the function name must be given in UTF-8, not
 using universal character names.
 
+@item -finstrument-functions-include-file-list=@var{file},@var{file},@dots{}
+@opindex finstrument-functions-include-file-list
+
+Limit function instrumentation to functions from files specified in
+the list. The matching of file names is identical to the matching of
+@option{-finstrument-functions-exclude-file-list}. For example
+
+@smallexample
+-finstrument-functions-include-file-list=/foo/bar,baz
+@end smallexample
+
+@noindent
+includes only functions defined in files whose pathnames contain
+@file{/foo/bar} or @file{baz}. Additional functions can be added by
+using the option
+@option{-finstrument-functions-include-function-list}. For example
+
+@smallexample
+-finstrument-functions-include-file-list=/foo/bar,baz
+-finstrument-functions-include-function-list=fn1,fn2
+@end smallexample
+includes functions defined in files whose pathnames contain
+@file{/foo/bar} or @file{baz} as well as functions whose user-readable
+names contain @code{fn1} or @code{fn2}.
+
+The option can also be combined with exclusions, where exclusions take
+precedence. For example
+
+@smallexample
+-fins

C PATCH for c/68668 (grokdeclarator and wrong type of PARM_DECL)

2015-12-03 Thread Marek Polacek
This ought to fix the fallout from PR c/68162 fix.  Here the problem is that
grokdeclarator created a wrong type for PARM_DECL "p".  It created this decl
with type "const int[] *" while it should be "const int *".

I think the problem is that we weren't using TREE_TYPE on orig_qual_type and
thus c_build_qualified_type and subsequent c_build_pointer_type might create
a bogus type.  So when we're transfering const-ness of an array into that of
type pointed to, use TREE_TYPE not only of "type", but even of the orig qual
type.

Bootstrapped/regtested on x86_64-linux, ok for trunk?

2015-12-03  Marek Polacek  

PR c/68668
* c-decl.c (grokdeclarator): When creating a PARM_DECL of ARRAY_TYPE,
use TREE_TYPE of orig_qual_type.

* gcc.dg/pr68668.c: New test.

diff --git gcc/c/c-decl.c gcc/c/c-decl.c
index 9ad8219..0edff2a 100644
--- gcc/c/c-decl.c
+++ gcc/c/c-decl.c
@@ -6417,6 +6417,8 @@ grokdeclarator (const struct c_declarator *declarator,
  {
/* Transfer const-ness of array into that of type pointed to.  */
type = TREE_TYPE (type);
+   if (orig_qual_type != NULL_TREE)
+ orig_qual_type = TREE_TYPE (orig_qual_type);
if (type_quals)
  type = c_build_qualified_type (type, type_quals, orig_qual_type,
 orig_qual_indirect);
diff --git gcc/testsuite/gcc.dg/pr68668.c gcc/testsuite/gcc.dg/pr68668.c
index e69de29..d144fb6 100644
--- gcc/testsuite/gcc.dg/pr68668.c
+++ gcc/testsuite/gcc.dg/pr68668.c
@@ -0,0 +1,10 @@
+/* PR c/68668 */
+/* { dg-do compile } */
+
+typedef const int T[];
+
+int
+fn1 (T p)
+{
+  return p[0];
+}

Marek


[RFA][PATCH] Run CFG cleanups after reassociation as needed

2015-12-03 Thread Jeff Law

This is something I noticed while working on fixing 67816.

Essentially I was seeing trivially true or trivially false conditionals 
left in the IL for DOM to clean up.


While DOM can and will clean that crud up, but a trivially true or 
trivially false conditional ought to be detected and cleaned up by 
cleanup_cfg.


It turns out the reassociation pass does not schedule a CFG cleanup even 
in cases where it optimizes a conditional to TRUE or FALSE.


Bubbling up an indicator that we optimized away a conditional and using 
that to trigger a CFG cleanup is trivial.


While I have a slight preference to see this fix in GCC 6, if folks 
object and want this to wait for GCC 7 stage1, I'd understand.


Bootstrapped and regression tested on x86_64-linux-gnu.

OK for the trunk?

Thanks,
Jeff
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 04dbcb0..61a5e54 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,12 @@
+2015-12-03  Jeff Law  
+
+   * tree-ssa-reassoc.c (maybe_optimize_range_tests): Return boolean
+   indicating if a gimple conditional was optimized to true/false.
+   (reassociate_bb): Bubble up return value from
+   maybe_optimize_range_tests.
+   (do_reassoc): Similarly, but for reassociate_bb.
+   (execute_reassoc): Return TODO_cleanup_cfg as needed.
+
 2015-11-27  Jiri Engelthaler  
 
PR driver/68029
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 4e62a06..893aab1 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,7 @@
+2015-12-02  Jeff Law  
+
+   * gcc.dg/tree-ssa/reassoc-43.c: New test.
+
 2015-12-02  Andreas Krebbel  
 
* gcc.dg/optimize-bswapdi-1.c: Force using -mzarch on s390 and
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/reassoc-43.c 
b/gcc/testsuite/gcc.dg/tree-ssa/reassoc-43.c
new file mode 100644
index 000..ea44f30
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/reassoc-43.c
@@ -0,0 +1,53 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-reassoc -w" } */
+
+typedef union tree_node *tree;
+enum cpp_ttype { CPP_COLON, CPP_SEMICOLON, CPP_CLOSE_BRACE, CPP_COMMA };
+enum rid { RID_STATIC = 0, RID_ATTRIBUTE, };
+typedef struct c_token
+{
+  enum cpp_ttype type:8;
+}
+c_token;
+typedef struct c_parser
+{
+  c_token tokens[2];
+  short tokens_avail;
+}
+c_parser;
+__inline__ c_token *
+c_parser_peek_token (c_parser * parser)
+{
+  if (parser->tokens_avail == 0)
+{
+  parser->tokens_avail = 1;
+}
+  return &parser->tokens[0];
+}
+
+__inline__ unsigned char
+c_parser_next_token_is (c_parser * parser, enum cpp_ttype type)
+{
+  return c_parser_peek_token (parser)->type == type;
+}
+
+void
+c_parser_translation_unit (c_parser * parser)
+{
+  tree prefix_attrs;
+  tree all_prefix_attrs;
+  while (1)
+{
+  if (c_parser_next_token_is (parser, CPP_COLON)
+ || c_parser_next_token_is (parser, CPP_COMMA)
+ || c_parser_next_token_is (parser, CPP_SEMICOLON)
+ || c_parser_next_token_is (parser, CPP_CLOSE_BRACE)
+ || c_parser_next_token_is_keyword (parser, RID_ATTRIBUTE))
+   {
+ if (c_parser_next_token_is_keyword (parser, RID_ATTRIBUTE))
+   all_prefix_attrs =
+ chainon (c_parser_attributes (parser), prefix_attrs);
+   }
+}
+}
+/* { dg-final { scan-tree-dump-not "0 != 0" "reassoc2"} } */
diff --git a/gcc/tree-ssa-reassoc.c b/gcc/tree-ssa-reassoc.c
index dfd0da1..315b0bf 100644
--- a/gcc/tree-ssa-reassoc.c
+++ b/gcc/tree-ssa-reassoc.c
@@ -2976,9 +2976,15 @@ struct inter_bb_range_test_entry
   unsigned int first_idx, last_idx;
 };
 
-/* Inter-bb range test optimization.  */
+/* Inter-bb range test optimization.
 
-static void
+   Returns TRUE if a gimple conditional is optimized to a true/false,
+   otherwise return FALSE.
+
+   This indicates to the caller that it should run a CFG cleanup pass
+   once reassociation is completed.  */
+
+static bool
 maybe_optimize_range_tests (gimple *stmt)
 {
   basic_block first_bb = gimple_bb (stmt);
@@ -2990,6 +2996,7 @@ maybe_optimize_range_tests (gimple *stmt)
   auto_vec ops;
   auto_vec bbinfo;
   bool any_changes = false;
+  bool cfg_cleanup_needed = false;
 
   /* Consider only basic blocks that end with GIMPLE_COND or
  a cast statement satisfying final_range_test_p.  All
@@ -2998,15 +3005,15 @@ maybe_optimize_range_tests (gimple *stmt)
   if (gimple_code (stmt) == GIMPLE_COND)
 {
   if (EDGE_COUNT (first_bb->succs) != 2)
-   return;
+   return cfg_cleanup_needed;
 }
   else if (final_range_test_p (stmt))
 other_bb = single_succ (first_bb);
   else
-return;
+return cfg_cleanup_needed;
 
   if (stmt_could_throw_p (stmt))
-return;
+return cfg_cleanup_needed;
 
   /* As relative ordering of post-dominator sons isn't fixed,
  maybe_optimize_range_tests can be called first on any
@@ -3030,14 +3037,14 @@ maybe_optimize_range_tests (gimple *stmt)
   /* As non-GIMPLE_COND last stmt always terminates the range,
   

[gomp4] backport fortran array reduction changes

2015-12-03 Thread Cesar Philippidis
This patch backports the recent array reduction changes in trunk to
gomp-4_0-branch. It's mostly straightforward, except I couldn't include
changes to reduction-2.f95 because the gimplifier is reordering the loop
clauses slightly different in trunk and gomp4. I'm not sure why. Thomas,
that's something to keep in mind next time you do a trunk merge.

I've applied this patch to gomp-4_0-branch.

Cesar
2015-12-03  Cesar Philippidis  

	gcc/fortran/
	* openmp.c (gfc_match_omp_clauses): Allow subarrays for acc reductions.
	(resolve_omp_clauses): Error on any acc reductions on arrays.

	gcc/testsuite/
	* gfortran.dg/goacc/array-reduction.f90: New test.
	* gfortran.dg/goacc/assumed.f95: Update expected diagnostics.
	* gfortran.dg/goacc/coarray.f95: Likewise.
	* gfortran.dg/goacc/coarray_2.f90: Likewise.
	* gfortran.dg/goacc/reduction.f95: Likewise.

diff --git a/gcc/fortran/openmp.c b/gcc/fortran/openmp.c
index 0e87f54..e7f61f2 100644
--- a/gcc/fortran/openmp.c
+++ b/gcc/fortran/openmp.c
@@ -997,7 +997,8 @@ gfc_match_omp_clauses (gfc_omp_clauses **cp, uint64_t mask,
 
 	  if (gfc_match_omp_variable_list (" :",
 	   &c->lists[OMP_LIST_REDUCTION],
-	   false, NULL, &head) == MATCH_YES)
+	   false, NULL, &head, openacc)
+	  == MATCH_YES)
 	{
 	  gfc_omp_namelist *n;
 	  if (rop == OMP_REDUCTION_NONE)
@@ -3429,6 +3430,11 @@ resolve_omp_clauses (gfc_code *code, gfc_omp_clauses *omp_clauses,
 		   n->sym->name, &n->where);
 	  else
 	n->sym->mark = 1;
+
+	  /* OpenACC does not support reductions on arrays.  */
+	  if (n->sym->as)
+	gfc_error ("Array %qs is not permitted in reduction at %L",
+		   n->sym->name, &n->where);
 	}
 }
   
diff --git a/gcc/testsuite/gfortran.dg/goacc/array-reduction.f90 b/gcc/testsuite/gfortran.dg/goacc/array-reduction.f90
new file mode 100644
index 000..d71c400
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/goacc/array-reduction.f90
@@ -0,0 +1,74 @@
+program test
+  implicit none
+  integer a(10), i
+
+  a(:) = 0
+  
+  ! Array reductions.
+  
+  !$acc parallel reduction (+:a) ! { dg-error "Array 'a' is not permitted in reduction" }
+  do i = 1, 10
+ a = a + 1
+  end do
+  !$acc end parallel
+
+  !$acc parallel
+  !$acc loop reduction (+:a) ! { dg-error "Array 'a' is not permitted in reduction" }
+  do i = 1, 10
+ a = a + 1
+  end do
+  !$acc end parallel
+
+  !$acc kernels
+  !$acc loop reduction (+:a) ! { dg-error "Array 'a' is not permitted in reduction" }
+  do i = 1, 10
+ a = a + 1
+  end do
+  !$acc end kernels
+
+  ! Subarray reductions.
+  
+  !$acc parallel reduction (+:a(1:5)) ! { dg-error "Array 'a' is not permitted in reduction" }
+  do i = 1, 10
+ a = a + 1
+  end do
+  !$acc end parallel
+
+  !$acc parallel
+  !$acc loop reduction (+:a(1:5)) ! { dg-error "Array 'a' is not permitted in reduction" }
+  do i = 1, 10
+ a = a + 1
+  end do
+  !$acc end parallel
+
+  !$acc kernels
+  !$acc loop reduction (+:a(1:5)) ! { dg-error "Array 'a' is not permitted in reduction" }
+  do i = 1, 10
+ a = a + 1
+  end do
+  !$acc end kernels
+
+  ! Reductions on array elements.
+  
+  !$acc parallel reduction (+:a(1)) ! { dg-error "Array 'a' is not permitted in reduction" }
+  do i = 1, 10
+ a(1) = a(1) + 1
+  end do
+  !$acc end parallel
+
+  !$acc parallel
+  !$acc loop reduction (+:a(1)) ! { dg-error "Array 'a' is not permitted in reduction" }
+  do i = 1, 10
+ a(1) = a(1) + 1
+  end do
+  !$acc end parallel
+
+  !$acc kernels
+  !$acc loop reduction (+:a(1)) ! { dg-error "Array 'a' is not permitted in reduction" }
+  do i = 1, 10
+ a(1) = a(1) + 1
+  end do
+  !$acc end kernels
+  
+  print *, a
+end program test
diff --git a/gcc/testsuite/gfortran.dg/goacc/assumed.f95 b/gcc/testsuite/gfortran.dg/goacc/assumed.f95
index 3287241..4efe5a2 100644
--- a/gcc/testsuite/gfortran.dg/goacc/assumed.f95
+++ b/gcc/testsuite/gfortran.dg/goacc/assumed.f95
@@ -45,3 +45,6 @@ contains
 !$acc update self (a) ! { dg-error "Assumed rank" }
   end subroutine assumed_rank
 end module test
+
+! { dg-error "Array 'a' is not permitted in reduction" "" { target "*-*-*" } 18 }
+! { dg-error "Array 'a' is not permitted in reduction" "" { target "*-*-*" } 39 }
diff --git a/gcc/testsuite/gfortran.dg/goacc/coarray.f95 b/gcc/testsuite/gfortran.dg/goacc/coarray.f95
index d2f10d5..932e1f7 100644
--- a/gcc/testsuite/gfortran.dg/goacc/coarray.f95
+++ b/gcc/testsuite/gfortran.dg/goacc/coarray.f95
@@ -2,8 +2,6 @@
 ! { dg-additional-options "-fcoarray=single" }
 !
 ! PR fortran/63861
-! { dg-xfail-if "" { *-*-* } }
-! { dg-excess-errors "TODO" }
 
 module test
 contains
@@ -20,7 +18,7 @@ contains
 !$acc end parallel
 !$acc host_data use_device (a)
 !$acc end host_data
-!$acc parallel loop reduction(+:a)
+!$acc parallel loop reduction(+:a) ! { dg-error "Array 'a' is not permitted in reduction" }
 do i = 1,5
 enddo
 !$acc end parallel loop
diff --git a/gcc/testsuite/gfortran.dg/goacc/

Re: [PATCH] Handle OBJ_TYPE_REF in FRE

2015-12-03 Thread Jan Hubicka
> 
> The following patch handles CSEing OBJ_TYPE_REF which was omitted
> because it is a GENERIC expression even on GIMPLE (for whatever

Why it is generic? It is part of gimple grammar :)

> reason...).  Rather than changing this now the following patch
> simply treats it properly as such.

Thanks for working on this! Will this do code motion, too?
I think you may want to compare the ODR type of obj_type_ref_class
otherwise two otherwise equivalent OBJ_TYPE_REFs may lead to different
optimizations later.  I suppose we can have code of form

if (test)
  OBJ_TYPE_REF1
  ...
else
  OBJ_TYPE_REF2
  ..
where each invoke method of different class type but would otherwise
match as equivalent for tree-ssa-sccvn becuase we ignore pointed-to types.

so doing

OBJ_TYPE_REF1
if (test)
  ...
else
  ...

may lead to wrong code.

Or do you just substitute the operands of OBJ_TYPE_REF? 
> 
> Bootstrap & regtest running on x86_64-unknown-linux-gnu.
> 
> Note that this does not (yet) substitute OBJ_TYPE_REFs in calls
> with SSA names that have the same value - not sure if that would
> be desired generally (does the devirt machinery cope with that?).

This should work fine.
> 
> Thanks,
> Richard.
> 
> 2015-12-03  Richard Biener  
> 
>   PR tree-optimization/64812
>   * tree-ssa-sccvn.c (vn_get_stmt_kind): Handle OBJ_TYPE_REF.
>   (vn_nary_length_from_stmt): Likewise.
>   (init_vn_nary_op_from_stmt): Likewise.
>   * gimple-match-head.c (maybe_build_generic_op): Likewise.
>   * gimple-pretty-print.c (dump_unary_rhs): Likewise.
> 
>   * g++.dg/tree-ssa/ssa-fre-1.C: New testcase.
> 
> Index: gcc/tree-ssa-sccvn.c
> ===
> *** gcc/tree-ssa-sccvn.c  (revision 231221)
> --- gcc/tree-ssa-sccvn.c  (working copy)
> *** vn_get_stmt_kind (gimple *stmt)
> *** 460,465 
> --- 460,467 
> ? VN_CONSTANT : VN_REFERENCE);
>   else if (code == CONSTRUCTOR)
> return VN_NARY;
> + else if (code == OBJ_TYPE_REF)
> +   return VN_NARY;
>   return VN_NONE;
> }
> default:
> *** vn_nary_length_from_stmt (gimple *stmt)
> *** 2479,2484 
> --- 2481,2487 
> return 1;
>   
>   case BIT_FIELD_REF:
> + case OBJ_TYPE_REF:
> return 3;
>   
>   case CONSTRUCTOR:
> *** init_vn_nary_op_from_stmt (vn_nary_op_t
> *** 2508,2513 
> --- 2511,2517 
> break;
>   
>   case BIT_FIELD_REF:
> + case OBJ_TYPE_REF:
> vno->length = 3;
> vno->op[0] = TREE_OPERAND (gimple_assign_rhs1 (stmt), 0);
> vno->op[1] = TREE_OPERAND (gimple_assign_rhs1 (stmt), 1);
> Index: gcc/gimple-match-head.c
> ===
> *** gcc/gimple-match-head.c   (revision 231221)
> --- gcc/gimple-match-head.c   (working copy)
> *** maybe_build_generic_op (enum tree_code c
> *** 243,248 
> --- 243,249 
> *op0 = build1 (code, type, *op0);
> break;
>   case BIT_FIELD_REF:
> + case OBJ_TYPE_REF:
> *op0 = build3 (code, type, *op0, op1, op2);
> break;
>   default:;
> Index: gcc/gimple-pretty-print.c
> ===
> *** gcc/gimple-pretty-print.c (revision 231221)
> --- gcc/gimple-pretty-print.c (working copy)
> *** dump_unary_rhs (pretty_printer *buffer,
> *** 302,308 
> || TREE_CODE_CLASS (rhs_code) == tcc_reference
> || rhs_code == SSA_NAME
> || rhs_code == ADDR_EXPR
> !   || rhs_code == CONSTRUCTOR)
>   {
> dump_generic_node (buffer, rhs, spc, flags, false);
> break;
> --- 302,309 
> || TREE_CODE_CLASS (rhs_code) == tcc_reference
> || rhs_code == SSA_NAME
> || rhs_code == ADDR_EXPR
> !   || rhs_code == CONSTRUCTOR
> !   || rhs_code == OBJ_TYPE_REF)
>   {
> dump_generic_node (buffer, rhs, spc, flags, false);
> break;
> Index: gcc/testsuite/g++.dg/tree-ssa/ssa-fre-1.C
> ===
> *** gcc/testsuite/g++.dg/tree-ssa/ssa-fre-1.C (revision 0)
> --- gcc/testsuite/g++.dg/tree-ssa/ssa-fre-1.C (working copy)
> ***
> *** 0 
> --- 1,44 
> + /* { dg-do compile } */
> + /* { dg-options "-O2 -fdump-tree-fre2" } */
> + 
> + template  class A
> + {
> +   T *p;
> + 
> + public:
> +   A (T *p1) : p (p1) { p->acquire (); }
> + };
> + 
> + class B
> + {
> + public:
> + virtual void acquire ();
> + };
> + class D : public B
> + {
> + };
> + class F : B
> + {
> +   int mrContext;
> + };
> + class WindowListenerMultiplexer : F, public D
> + {
> +   void acquire () { acquire (); }
> + };
> + class C
> + {
> +   void createPeer () throw ();
> +   WindowListenerMultiplexer maWindowListeners;
> + };
> + class FmXGridPeer
> + {
> + public:
> +

RE: [PATCH] MIPS/GCC/doc: Reorder `-mcompact-branches='

2015-12-03 Thread Matthew Fortune
Maciej Rozycki  writes:
> Move the `-mcompact-branches=' option out of the middle of a block of
> floating-point options.  The option is not related to FP in any way.
> Place it immediately below other branch instruction selection options.
> 
>   gcc/
>   * doc/invoke.texi (Option Summary) : Reorder
>   `-mcompact-branches='.
>   (MIPS Options): Likewise.
> ---
> 
>  OK to apply?

OK, thanks.

Matthew


Re: [PATCH][install.texi] Add note against GNAT 4.8 on ARM targets.

2015-12-03 Thread Gerald Pfeifer
On Thu, 3 Dec 2015, Alan Lawrence wrote:
>>  doc/install.texi: Add note against GNAT 4.8 on ARM targets.

This looks fine (provided it builds and looks okay).

Just...

>> +Building the Ada frontend commonly fails (an infinite loop executing
>> @code{xsinfo}) if the host compiler is GNAT 4.8.  Host compilers built from
>> the GNAT 4.6, 4.9 or 5 release branches are known to succeed.

...if this is only just one long line, can you please wrap?

Gerald


Re: [Patch,tree-optimization]: Add new path Splitting pass on tree ssa representation

2015-12-03 Thread Jeff Law

On 12/03/2015 07:38 AM, Richard Biener wrote:


This pass is now enabled by default with -Os but has no limits on the amount of
stmts it copies.  It also will make all loops with this shape have at least two
exits (if the resulting loop will be disambiguated the inner loop will
have two exits).
Having more than one exit will disable almost all loop optimizations after it.

[ ... ]
split-paths in the queue -- behind addressing a couple of correctness 
issues that are on my plate (not split-paths related).  I'll respond 
fully.  FWIW, I wouldn't lose much sleep if this were disabled by 
default -- without the "sink-common-code-past-phi" stuff we've discussed 
in the past it's fairly hard to justify path-splitting this aggressively.


jeff



Re: [PATCH][install.texi] Add note against GNAT 4.8 on ARM targets.

2015-12-03 Thread Alan Lawrence

On 16/11/15 15:08, Alan Lawrence wrote:

This follows from the discussion here: 
https://gcc.gnu.org/ml/gcc/2015-10/msg00082.html .

OK for trunk?

--Alan

gcc/ChangeLog:

doc/install.texi: Add note against GNAT 4.8 on ARM targets.
---
  gcc/doc/install.texi | 2 ++
  1 file changed, 2 insertions(+)

diff --git a/gcc/doc/install.texi b/gcc/doc/install.texi
index 1fd773e..1ce93d4 100644
--- a/gcc/doc/install.texi
+++ b/gcc/doc/install.texi
@@ -3481,6 +3481,8 @@ require GNU binutils 2.13 or newer.  Such subtargets 
include:
  @code{arm-*-netbsdelf}, @code{arm-*-*linux-*}
  and @code{arm-*-rtemseabi}.

+Building the Ada frontend commonly fails (an infinite loop executing 
@code{xsinfo}) if the host compiler is GNAT 4.8.  Host compilers built from the 
GNAT 4.6, 4.9 or 5 release branches are known to succeed.
+
  @html
  
  @end html



Ping.



Re: [RFA] [PR tree-optimization/68599] Avoid over-zealous optimization with -funsafe-loop-optimizations

2015-12-03 Thread Jeff Law

On 12/03/2015 02:36 AM, Richard Biener wrote:

On Wed, Dec 2, 2015 at 5:27 PM, Jeff Law  wrote:



I strongly recommend reading the analysis in pr45122 since pr68599 uses the
same testcase and just triggers the same bug in the RTL optimizers instead
of the tree optimziers.

As noted in 45122, with -funsafe-loop-optimizations, we may exit the loop an
iteration too early.  The loop in question is finite and the counter does
not overflow.  Yet -funsafe-loop-optimizations munges it badly.

As is noted in c#6 and patched in c#8, when there's more than one exit from
the loop, simply discarding the assumptions for the trip count is "a bit too
unsafe".  Richi & Zdenek agreed that disabling the optimization when the
loop has > 1 exit was the preferred approach. Alex's patch did just that,
but only for the tree optimizers.

This patch does essentially the same thing for the RTL loop optimizer. If
the candidate loop has > 1 exit, then we don't allow
-funsafe-loop-optimizations to drop the assumptions/infinite notes for the
RTL loop.

This required ensuring that LOOPS_HAVE_RECORDED_EXITS when initializing the
loop optimizer.

Bootstrapped and regression tested on x86_64-linux-gnu and
powerpc64-linux-gnu.  For the latter, pr45122.c flips to a pass.  Given this
is covered by the pr45122 testcase, I didn't add a new one.

OK for the trunk?


Ok.

Note that I believe we should dump -funsafe-loop-optimizations in
favor of a per-loop
#pragma now that we can properly track such.  Globally it's known to miscompile
SPEC at least.
Yea, I saw that on IRC and almost went down that path.  Certainly 
wouldn't get any argument from me if we were to remove that option. 
Sounds like Bin might want to do that and he'll have my full support.


Jeff



Re: [PATCH] Fix shrink-wrap bug with anticipating into loops (PR67778, PR68634)

2015-12-03 Thread Richard Sandiford
Segher Boessenkool  writes:
> On Wed, Dec 02, 2015 at 08:19:05PM +0100, Jakub Jelinek wrote:
>> On Wed, Dec 02, 2015 at 06:21:47PM +, Segher Boessenkool wrote:
>> > --- a/gcc/shrink-wrap.c
>> > +++ b/gcc/shrink-wrap.c
>> > @@ -752,7 +752,11 @@ try_shrink_wrapping (edge *entry_edge, bitmap_head 
>> > *bb_with,
>> >  
>> >/* If we can move PRO back without having to duplicate more blocks, do 
>> > so.
>> >   We can move back to a block PRE if every path from PRE will 
>> > eventually
>> > - need a prologue, that is, PRO is a post-dominator of PRE.  */
>> > + need a prologue, that is, PRO is a post-dominator of PRE.  We might
>> > + need to duplicate PRE if there is any path from a successor of PRE 
>> > back
>> > + to PRE, so don't allow that either (but self-loops are fine, as are 
>> > any
>> > + other loops entirely dominated by PRE; this in general seems too
>> > + expensive to check for, for such an uncommon case).  */
>> 
>> So, what will happen if PRE self-loops?
>
> The prologue is put in a new block before the chosen one (as always).
>
>> It would be nice to have it covered by a testcase.
>
> If I knew how to prepare one, that stayed stable for more than about
> two weeks, yes :-/
>
>> > +bool ok = true;
>> > +
>> > +if (!can_get_prologue (pre, prologue_clobbered))
>> > +  ok = false;
>> > +
>> > +FOR_EACH_EDGE (e, ei, pre->succs)
>> > +  if (e->dest != pre
>> > +  && dominated_by_p (CDI_POST_DOMINATORS, e->dest, pre))
>> > +ok = false;
>> 
>> I wonder if it wouldn't be better to:
>> 
>>  if (!can_get_prologue (pre, prologue_clobbered))
>>ok = false;
>>  else
>>FOR_EACH_EDGE (e, ei, pre->succs)
>>  if (e->dest != pre
>>  && dominated_by_p (CDI_POST_DOMINATORS, e->dest, pre))
>>{
>>  ok = false;
>>  break;
>>}
>> 
>> so that it doesn't walk or continue walking the edges if not needed.
>
> If the compiler is any good, neither does my code, right?  :-)
>
> I think it is more important to have this code readable than a teeny
> tiny bit faster.  It is all linear (assuming dominator lookups are O(1)),
> which isn't too hard to ascertain (yeah, famous last words).

Maybe the clearest thing is to split it out into a function that returns
false as soon as it finds a reason why the transform is not OK.
The "decent compiler" ought to inline that function.

Thanks,
Richard



Re: [RTL] canonical form of AND-immediate within COMPARE?

2015-12-03 Thread Eric Botcazou
> Some ISAs have instructions to perform a bitwise AND operation with an
> immediate and compare the result with zero.

Many of them I'd say.

> Is there a good way to fix this? It would seem rather weird to have extra MD
> patterns to match the zero_extract forms explicitly. Maybe teaching the
> aarch64 implementation of SELECT_CC_MODE to handle ZERO_EXTRACTS the same
> as AND-immediates? Or is there something that can be done in combine
> itself?

ARM, i386, MIPS, PA, SPARC, etc have ZERO_EXTRACT-based patterns though.

-- 
Eric Botcazou


Re: Documentation tweaks for internal-fn-related optabs

2015-12-03 Thread Richard Sandiford
Bernd Schmidt  writes:
> On 12/03/2015 02:06 PM, Richard Sandiford wrote:
>> As Bernd requested, this patch adds "This pattern cannot FAIL" to the
>> documentation of optabs that came to be mapped to interal functions.
>> For consistency I did the same for optabs that were already being
>> used for internal functions.
>>
>> Many of the optabs weren't documented in the first place, so I added
>> entries for the missing ones.  Also, there were some inaccuracies in
>> the documentation of the rounding optabs.  The bitcount optabs said
>> that operand 0 has mode @var{m} and that operand 1 is under target
>> control, whereas it should be the other way around.
>
> That actually goes beyond what I imagined. I was looking at the top part 
> of md.texi (line 87), where there is a brief discussion of what is 
> allowed to FAIL and what isn't. Also, there is "@item FAIL":
>
>"Failure is currently supported only for binary (addition,
> multiplication, shifting, etc.) and bit-field (@code{extv},
> @code{extzv}, and @code{insv}) operations."
>
> That's pretty outdated. I think unary operations are probably missing by 
> accident, and from what my grep showed there are also conditional moves, 
> atomic operations, certain vec_ patterns that can all fail. As a minimum 
> this paragraph should also mention internal functions.

I don't think that quote means that FAIL is supported for _all_ optabs
with two inputs and one output.  What "etc." includes is left vague.

A blanket statement about internal functions is likely to get out of
date, since there's no reason in principle why optabs used for future
internal functions couldn't have fallbacks.  Also, "internal-function
optabs" aren't self-describing: no-one's going to know what an internal
function optab is without looking at the source.

I'd rather keep it as the patch has it and say for each relevant optab
that the expander can't fail.

Richard



Re: [PATCH] Empty redirect_edge_var_map after each pass and function

2015-12-03 Thread Richard Biener
On Thu, 3 Dec 2015, Alan Lawrence wrote:

> On 03/12/15 12:58, Richard Biener wrote:
> > On Thu, 3 Dec 2015, Alan Lawrence wrote:
> > 
> > > On 02/12/15 14:13, Jeff Law wrote:
> > > > On 12/02/2015 01:33 AM, Richard Biener wrote:
> > > > > > Right.  So the question I have is how/why did DOM leave anything in
> > > > > > the
> > > > > > map.
> > > > > > And if DOM is fixed to not leave stuff lying around, can we then
> > > > > > assert
> > > > > > that
> > > > > > nothing is ever left in those maps between passes?  There's
> > > > > > certainly no
> > > > > > good
> > > > > > reason I'm aware of why DOM would leave things in this state.
> > > > > 
> > > > > It happens not only with DOM but with all passes doing edge
> > > > > redirection.
> > > > > This is because the map is populated by GIMPLE cfg hooks just in case
> > > > > it might be used.  But there is no such thing as a "start CFG manip"
> > > > > and "end CFG manip" to cleanup such dead state.
> > > > Sigh.
> > > > 
> > > > > 
> > > > > IMHO the redirect-edge-var-map stuff is just the very most possible
> > > > > unclean implementation possible. :(  (see how remove_edge "clears"
> > > > > stale info from the map to avoid even more "interesting" stale
> > > > > data)
> > > > > 
> > > > > Ideally we could assert the map is empty whenever we leave a pass,
> > > > > but as said it triggers all over the place.  Even cfg-cleanup causes
> > > > > such stale data.
> > > > > 
> > > > > I agree that the patch is only a half-way "solution", but a full
> > > > > solution would require sth more explicit, like we do with
> > > > > initialize_original_copy_tables/free_original_copy_tables.  Thus
> > > > > require passes to explicitely request the edge data to be preserved
> > > > > with a initialize_edge_var_map/free_edge_var_map call pair.
> > > > > 
> > > > > Not appropriate at this stage IMHO (well, unless it turns out to be
> > > > > a very localized patch).
> > > > So maybe as a follow-up to aid folks in the future, how about a
> > > > debugging
> > > > verify_whatever function that we can call manually if debugging a
> > > > problem in
> > > > this space.  With a comment indicating why we can't call it
> > > > unconditionally
> > > > (yet).
> > > > 
> > > > 
> > > > jeff
> > > 
> > > I did a (fwiw disable bootstrap) build with the map-emptying code in
> > > passes.c
> > > (not functions.c), printing out passes after which the map was non-empty
> > > (before emptying it, to make sure passes weren't just carrying through
> > > stale
> > > data from earlier). My (non-exhaustive!) list of passes after which the
> > > edge_var_redirect_map can be non-empty stands at...
> > > 
> > > aprefetch ccp cddce ch ch_vect copyprop crited crited cselim cunroll
> > > cunrolli
> > > dce dom ehcleanup einline esra fab fnsplit forwprop fre graphite ifcvt
> > > isolate-paths ldist lim local-pure-const mergephi oaccdevlow ompexpssa
> > > optimized parloops pcom phicprop phiopt phiprop pre profile
> > > profile_estimate
> > > sccp sink slsr split-paths sra switchconv tailc tailr tracer unswitch
> > > veclower2 vect vrm vrp whole-program
> > 
> > Yeah, exactly my findings...  note that most of the above are likely
> > due to cfgcleanup even though it already does sth like
> > 
> >e = redirect_edge_and_branch (e, dest);
> >redirect_edge_var_map_clear (e);
> > 
> > so eventually placing a redirect_edge_var_map_empty () at the end
> > of the cleanup_tree_cfg function should prune down the above list
> > considerably (well, then assert the map is empty on entry to that
> > function of course)
> > 
> > > FWIW, the route by which dom added the edge to the redirect map was:
> > > #0  redirect_edge_var_map_add (e=e@entry=0x7fb7a5f508,
> > > result=0x7fb725a000,
> > >  def=0x7fb78eaea0, locus=2147483884) at ../../gcc/gcc/tree-ssa.c:54
> > > #1  0x00cccf58 in ssa_redirect_edge (e=e@entry=0x7fb7a5f508,
> > >  dest=dest@entry=0x7fb79cc680) at ../../gcc/gcc/tree-ssa.c:158
> > > #2  0x00b00738 in gimple_redirect_edge_and_branch (e=0x7fb7a5f508,
> > >  dest=0x7fb79cc680) at ../../gcc/gcc/tree-cfg.c:5662
> > > #3  0x006ec678 in redirect_edge_and_branch
> > > (e=e@entry=0x7fb7a5f508,
> > >  dest=) at ../../gcc/gcc/cfghooks.c:356
> > > #4  0x00cb4530 in ssa_fix_duplicate_block_edges (rd=0x1a29f10,
> > >  local_info=local_info@entry=0x7fed40)
> > >  at ../../gcc/gcc/tree-ssa-threadupdate.c:1184
> > > #5  0x00cb5520 in ssa_fixup_template_block (slot=,
> > >  local_info=0x7fed40) at
> > > ../../gcc/gcc/tree-ssa-threadupdate.c:1369
> > > #6  traverse_noresize (
> > >  argument=0x7fed40, this=0x1a21a00) at
> > > ../../gcc/gcc/hash-table.h:911
> > > #7  traverse (
> > >  argument=0x7fed40, this=0x1a21a00) at
> > > ../../gcc/gcc/hash-table.h:933
> > > #8  thread_block_1 (bb=bb@entry=0x7fb7485bc8,
> > >  noloop_only=noloop_only@entry=true, joiners=joiners@entry=true)
> > >  at 

[PATCH 01/10] C++ FE: expression ranges v4

2015-12-03 Thread David Malcolm
Changes in this version:
- removal of gcc_assert (m_loc != UNKNOWN_LOCATION) from cp_expr ctor
- uses protected_set_expr_location or cp_expr::set_location/set_range,
  rather than attempting to add location_t arguments
- adds location support and test coverage based on issues seen in
  the analogous work onthe C FE (see r230497 and r230775).
  Specifically:
  - various Objective C++ constructs (creating obj-c++.dg/plugin in
order to unit-test these, analogous to changes for C FE)
  - braced initializers
  - statement expressions
  - address of label
  - transaction expressions
  - __FUNCTION__ et al
  - __builtin_va_arg and __builtin_offsetof
- handle locations of functional casts and _Cilk_spawn
- fixes locations of negative numeric literals
- various other bugfixes and additional test coverage

gcc/ChangeLog:
* convert.c (convert_to_real_1): When converting from a
REAL_TYPE, preserve the location of EXPR in the result.
* tree.c (get_pure_location): Make non-static.
(set_source_range): Return the resulting location_t.
(make_location): New function.
* tree.h (get_pure_location): New decl.
(get_finish): New inline function.
(set_source_range): Convert return type from void to location_t.
(make_location): New decl.

gcc/cp/ChangeLog:
* cp-tree.h (class cp_expr): New class.
(finish_parenthesized_expr): Convert return type and param to
cp_expr.
(perform_koenig_lookup): Convert return type and param from tree
to cp_expr.
(finish_increment_expr): Likewise.
(finish_unary_op_expr): Likewise.
(finish_id_expression): Likewise for return type.
(build_class_member_access_expr): Likewise for param.
(finish_class_member_access_expr): Likewise.
(build_x_unary_op): Likewise.
(build_c_cast): New decl.
(build_x_modify_expr): Convert return type from tree to cp_expr.
* name-lookup.c (lookup_arg_dependent_1): Likewise.
(lookup_arg_dependent): Likewise; also for local "ret".
* name-lookup.h (lookup_arg_dependent): Likewise for return type.
* parser.c (struct cp_parser_expression_stack_entry): Likewise
for field "lhs".
(cp_parser_identifier): Likewise for return type.  Use cp_expr
ctor to preserve the token's location.
(cp_parser_string_literal): Likewise, building up a meaningful
location for the case where a compound string literal is built by
concatentation.
(cp_parser_userdef_char_literal): Likewise for return type.
(cp_parser_userdef_numeric_literal): Likewise.
(cp_parser_statement_expr): Convert return type to cp_expr.
Generate a suitable location for the expr and return it via the
cp_expr ctor.
(cp_parser_fold_expression): Convert return type to cp_expr.
(cp_parser_primary_expression): Likewise, and for locals "expr",
"lam", "id_expression", "decl".
Use cp_expr ctor when parsing literals, to preserve the spelling
location of the token.  Preserve the locations of parentheses.
Preserve location when calling objc_lookup_ivar.
Preserve the location for "this" tokens.  Generate suitable
locations for "__builtin_va_arg" constructs and for
Objective C 2.0 dot-syntax.  Set the location for the result of
finish_id_expression.
(cp_parser_primary_expression): Convert return type from tree to
cp_expr.
(cp_parser_id_expression): Likewise.
(cp_parser_unqualified_id): Likewise.  Also for local "id".
(cp_parser_postfix_expression): Likewise, also for local
"postfix_expression".  Generate suitable locations for
C++-style casts, "_Cilk_spawn" constructs.  Convert local
"initializer" to cp_expr and use it to preserve the location of
compound literals.  Capture the location of the closing
parenthesis of a call site via
cp_parser_parenthesized_expression_list, and use it to build
a source range for a call.  Use cp_expr in ternary expression.
(cp_parser_postfix_dot_deref_expression): Convert param from tree to
cp_expr.  Generate and set a location.
(cp_parser_parenthesized_expression_list): Add "close_paren_loc"
out-param, and write back to it.
(cp_parser_unary_expression): Convert return type from tree to
cp_expr.  Also for locals "cast_expression" and "expression".
Generate and use suitable locations for addresses of
labels and for cast expressions.  Call cp_expr::set_location where
necessary.  Preserve the locations of negated numeric literals.
(cp_parser_new_expression): Generate meaningful locations/ranges.
(cp_parser_cast_expression): Convert return type from tree to
cp_expr; also for local "expr".  Use the paren location to generate a
meaningful range for the express

Re: [PATCH] Empty redirect_edge_var_map after each pass and function

2015-12-03 Thread Alan Lawrence

On 03/12/15 12:58, Richard Biener wrote:

On Thu, 3 Dec 2015, Alan Lawrence wrote:


On 02/12/15 14:13, Jeff Law wrote:

On 12/02/2015 01:33 AM, Richard Biener wrote:

Right.  So the question I have is how/why did DOM leave anything in the
map.
And if DOM is fixed to not leave stuff lying around, can we then assert
that
nothing is ever left in those maps between passes?  There's certainly no
good
reason I'm aware of why DOM would leave things in this state.


It happens not only with DOM but with all passes doing edge redirection.
This is because the map is populated by GIMPLE cfg hooks just in case
it might be used.  But there is no such thing as a "start CFG manip"
and "end CFG manip" to cleanup such dead state.

Sigh.



IMHO the redirect-edge-var-map stuff is just the very most possible
unclean implementation possible. :(  (see how remove_edge "clears"
stale info from the map to avoid even more "interesting" stale
data)

Ideally we could assert the map is empty whenever we leave a pass,
but as said it triggers all over the place.  Even cfg-cleanup causes
such stale data.

I agree that the patch is only a half-way "solution", but a full
solution would require sth more explicit, like we do with
initialize_original_copy_tables/free_original_copy_tables.  Thus
require passes to explicitely request the edge data to be preserved
with a initialize_edge_var_map/free_edge_var_map call pair.

Not appropriate at this stage IMHO (well, unless it turns out to be
a very localized patch).

So maybe as a follow-up to aid folks in the future, how about a debugging
verify_whatever function that we can call manually if debugging a problem in
this space.  With a comment indicating why we can't call it unconditionally
(yet).


jeff


I did a (fwiw disable bootstrap) build with the map-emptying code in passes.c
(not functions.c), printing out passes after which the map was non-empty
(before emptying it, to make sure passes weren't just carrying through stale
data from earlier). My (non-exhaustive!) list of passes after which the
edge_var_redirect_map can be non-empty stands at...

aprefetch ccp cddce ch ch_vect copyprop crited crited cselim cunroll cunrolli
dce dom ehcleanup einline esra fab fnsplit forwprop fre graphite ifcvt
isolate-paths ldist lim local-pure-const mergephi oaccdevlow ompexpssa
optimized parloops pcom phicprop phiopt phiprop pre profile profile_estimate
sccp sink slsr split-paths sra switchconv tailc tailr tracer unswitch
veclower2 vect vrm vrp whole-program


Yeah, exactly my findings...  note that most of the above are likely
due to cfgcleanup even though it already does sth like

   e = redirect_edge_and_branch (e, dest);
   redirect_edge_var_map_clear (e);

so eventually placing a redirect_edge_var_map_empty () at the end
of the cleanup_tree_cfg function should prune down the above list
considerably (well, then assert the map is empty on entry to that
function of course)


FWIW, the route by which dom added the edge to the redirect map was:
#0  redirect_edge_var_map_add (e=e@entry=0x7fb7a5f508, result=0x7fb725a000,
 def=0x7fb78eaea0, locus=2147483884) at ../../gcc/gcc/tree-ssa.c:54
#1  0x00cccf58 in ssa_redirect_edge (e=e@entry=0x7fb7a5f508,
 dest=dest@entry=0x7fb79cc680) at ../../gcc/gcc/tree-ssa.c:158
#2  0x00b00738 in gimple_redirect_edge_and_branch (e=0x7fb7a5f508,
 dest=0x7fb79cc680) at ../../gcc/gcc/tree-cfg.c:5662
#3  0x006ec678 in redirect_edge_and_branch (e=e@entry=0x7fb7a5f508,
 dest=) at ../../gcc/gcc/cfghooks.c:356
#4  0x00cb4530 in ssa_fix_duplicate_block_edges (rd=0x1a29f10,
 local_info=local_info@entry=0x7fed40)
 at ../../gcc/gcc/tree-ssa-threadupdate.c:1184
#5  0x00cb5520 in ssa_fixup_template_block (slot=,
 local_info=0x7fed40) at ../../gcc/gcc/tree-ssa-threadupdate.c:1369
#6  traverse_noresize (
 argument=0x7fed40, this=0x1a21a00) at ../../gcc/gcc/hash-table.h:911
#7  traverse (
 argument=0x7fed40, this=0x1a21a00) at ../../gcc/gcc/hash-table.h:933
#8  thread_block_1 (bb=bb@entry=0x7fb7485bc8,
 noloop_only=noloop_only@entry=true, joiners=joiners@entry=true)
 at ../../gcc/gcc/tree-ssa-threadupdate.c:1592
#9  0x00cb5a40 in thread_block (bb=0x7fb7485bc8,
 noloop_only=noloop_only@entry=true)
 at ../../gcc/gcc/tree-ssa-threadupdate.c:1629
---Type  to continue, or q  to quit---
#10 0x00cb6bf8 in thread_through_all_blocks (
 may_peel_loop_headers=true) at ../../gcc/gcc/tree-ssa-threadupdate.c:2736
#11 0x00becf6c in (anonymous namespace)::pass_dominator::execute (
 this=, fun=0x7fb77d1b28)
 at ../../gcc/gcc/tree-ssa-dom.c:622
#12 0x009feef4 in execute_one_pass (pass=pass@entry=0x16d1a80)
 at ../../gcc/gcc/passes.c:2311

The edge is then deleted much later:
#3  0x00f858e4 in free_edge (fn=, e=)
 at ../../gcc/gcc/cfg.c:91
#4  remove_edge_raw (e=) at ../../gcc/gcc/cfg.c:350
#5  0x006ec814 in remove

[C] Issue an error on scalar va_list with reverse storage order

2015-12-03 Thread Eric Botcazou
Hi,

further testing revealed an issue with va_arg handling and reverse scalar 
storage order on some platforms: when va_list is scalar, passing a field of a 
structure with reverse SSO as first argument to va_start/va_arg/va_end doesn't 
work because the machinery takes its address and this is not allowed for such 
a field (it's really a corner case but gcc.c-torture/execute/stdarg-2.c does 
exercise it).  Hence the attached patch which issues an error in this case.

Tested on x86_64-suse-linux, OK for the mainline?


2015-12-03  Eric Botcazou  

* c-tree.h (c_build_va_arg): Adjust prototype.
* c-parser.c (c_parser_postfix_expression): Adjust call to above.
* c-typeck.c (c_build_va_arg): Rename LOC parameter to LOC2, add LOC1
parameter, adjust throughout and issue an error if EXPR is a component
with reverse storage order.


2015-12-03  Eric Botcazou  

* gcc.dg/sso-9.c: New test.

-- 
Eric BotcazouIndex: c-parser.c
===
--- c-parser.c	(revision 231206)
+++ c-parser.c	(working copy)
@@ -7485,7 +7485,7 @@ c_parser_postfix_expression (c_parser *p
 	else
 	  {
 		tree type_expr = NULL_TREE;
-		expr.value = c_build_va_arg (loc, e1.value,
+		expr.value = c_build_va_arg (start_loc, e1.value, loc,
 	 groktypename (t1, &type_expr, NULL));
 		if (type_expr)
 		  {
Index: c-tree.h
===
--- c-tree.h	(revision 231206)
+++ c-tree.h	(working copy)
@@ -661,7 +661,7 @@ extern tree c_finish_omp_task (location_
 extern void c_finish_omp_cancel (location_t, tree);
 extern void c_finish_omp_cancellation_point (location_t, tree);
 extern tree c_finish_omp_clauses (tree, bool, bool = false);
-extern tree c_build_va_arg (location_t, tree, tree);
+extern tree c_build_va_arg (location_t, tree, location_t, tree);
 extern tree c_finish_transaction (location_t, tree, int);
 extern bool c_tree_equal (tree, tree);
 extern tree c_build_function_call_vec (location_t, vec, tree,
Index: c-typeck.c
===
--- c-typeck.c	(revision 231206)
+++ c-typeck.c	(working copy)
@@ -13426,20 +13426,28 @@ c_build_qualified_type (tree type, int t
 /* Build a VA_ARG_EXPR for the C parser.  */
 
 tree
-c_build_va_arg (location_t loc, tree expr, tree type)
+c_build_va_arg (location_t loc1, tree expr, location_t loc2, tree type)
 {
   if (error_operand_p (type))
 return error_mark_node;
+  /* VA_ARG_EXPR cannot be used for a scalar va_list with reverse storage
+ order because it takes the address of the expression.  */
+  else if (handled_component_p (expr)
+	   && reverse_storage_order_for_component_p (expr))
+{
+  error_at (loc1, "cannot use % with reverse storage order");
+  return error_mark_node;
+}
   else if (!COMPLETE_TYPE_P (type))
 {
-  error_at (loc, "second argument to % is of incomplete "
+  error_at (loc2, "second argument to % is of incomplete "
 		"type %qT", type);
   return error_mark_node;
 }
   else if (warn_cxx_compat && TREE_CODE (type) == ENUMERAL_TYPE)
-warning_at (loc, OPT_Wc___compat,
+warning_at (loc2, OPT_Wc___compat,
 		"C++ requires promoted type, not enum type, in %");
-  return build_va_arg (loc, expr, type);
+  return build_va_arg (loc2, expr, type);
 }
 
 /* Return truthvalue of whether T1 is the same tree structure as T2.
/* Test support of scalar_storage_order attribute */

/* { dg-do compile } */

#include 

int x;

#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
struct __attribute__((scalar_storage_order("big-endian"))) Rec
{
  va_list v;
};
#else
struct __attribute__((scalar_storage_order("little-endian"))) Rec
{
  va_list v;
};
#endif

void foo (int i, ...)
{
  struct Rec a;
  va_start (a.v, i);
  a.v = a.v, x = va_arg (a.v, int); /* { dg-error "array type|reverse storage order" } */
  va_end (a.v);
}


[PATCH 06/10] Fix g++.dg/template/pseudodtor3.C

2015-12-03 Thread David Malcolm
gcc/testsuite/ChangeLog:
* g++.dg/template/pseudodtor3.C: Update column numbers in dg-error
directives.
---
 gcc/testsuite/g++.dg/template/pseudodtor3.C | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/g++.dg/template/pseudodtor3.C 
b/gcc/testsuite/g++.dg/template/pseudodtor3.C
index 202182f..8700bb9 100644
--- a/gcc/testsuite/g++.dg/template/pseudodtor3.C
+++ b/gcc/testsuite/g++.dg/template/pseudodtor3.C
@@ -11,7 +11,7 @@ struct A
 template  struct B
 {
   T &foo ();
-  B () { foo.~T (); }  // { dg-error "10:invalid use of member" }
+  B () { foo.~T (); }  // { dg-error "15:invalid use of member" }
 };
 
 B b;
@@ -19,7 +19,7 @@ B b;
 template  struct C
 {
   T t;
-  C () { t.~S (); }// { dg-error "10:is not of type" }
+  C () { t.~S (); }// { dg-error "13:is not of type" }
 };
 
 C c;
-- 
1.8.5.3



[RTL] canonical form of AND-immediate within COMPARE?

2015-12-03 Thread Kyrill Tkachov

Hi all,

Some ISAs have instructions to perform a bitwise AND operation with an 
immediate and compare
the result with zero. For example, the aarch64 TST instruction.
This is represented naturally in the MD file as:

(define_insn "*and3nr_compare0"
  [(set (reg:CC_NZ CC_REGNUM)
(compare:CC_NZ
 (and:GPI (match_operand:GPI 0 "register_operand" "%r,r")
  (match_operand:GPI 1 "aarch64_logical_operand" "r,"))
 (const_int 0)))]
  ""
  "tst\\t%0, %1"
  [(set_attr "type" "logics_reg,logics_imm")]
)

However, when the immediate operand of the AND is all ones, combine transforms 
that
into a zero_extract.
For example, the testcase is on aarch64:
void g ();

void
f1 (int x)
{
  if (x & 15)
g ();
}

We're trying to combine the insns:
(insn 6 3 7 2 (set (reg:SI 75)
(and:SI (reg/v:SI 74 [ x ])
(const_int 15 [0xf]))) cbz.c:7 460 {andsi3}
 (expr_list:REG_DEAD (reg/v:SI 74 [ x ])
(nil)))
(insn 7 6 8 2 (set (reg:CC 66 cc)
(compare:CC (reg:SI 75)
(const_int 0 [0]))) cbz.c:7 385 {*cmpsi}
 (expr_list:REG_DEAD (reg:SI 75)
(nil)))

followed by a conditional branch:
(jump_insn 8 7 9 2 (set (pc)
(if_then_else (eq (reg:CC 66 cc)
(const_int 0 [0]))
(label_ref:DI 14)
(pc))) cbz.c:7 7 {condjump}

combine attempts to match the pattern:
(set (reg:CC 66 cc)
(compare:CC (zero_extract:DI (reg:DI 0 x0 [ x ])
(const_int 4 [0x4])
(const_int 0 [0]))
(const_int 0 [0])))


and fails. This fails, first because our pattern matches the and-immediate 
form, not the zero_extract form.
The change_zero_ext step at the end of combine can't fix the damage because 
earlier in simplify_set when we
called SELECT_CC_MODE on the COMPARE and its use in the conditional branch the 
aarch64 implementation of SELECT_CC_MODE
doesn't handle the ZERO_EXTRACT form to return the CC_NZ form that the pattern 
needs, so the change_zero_ext code can
transform the zero_extract back into the AND-immediate but it doesn't have the 
context to fix the CC mode.

Is there a good way to fix this? It would seem rather weird to have extra MD 
patterns to match the zero_extract forms
explicitly. Maybe teaching the aarch64 implementation of SELECT_CC_MODE to 
handle ZERO_EXTRACTS the same as AND-immediates?
Or is there something that can be done in combine itself?

Thanks,
Kyrill





Re: [Patch,tree-optimization]: Add new path Splitting pass on tree ssa representation

2015-12-03 Thread Richard Biener
On Thu, Dec 3, 2015 at 3:38 PM, Richard Biener
 wrote:
> On Sat, Nov 14, 2015 at 12:35 AM, Jeff Law  wrote:
>> On 11/13/2015 01:23 PM, Jeff Law wrote:
>>>
>>> On 11/13/2015 11:09 AM, Richard Biener wrote:
>>>
>
> BTW Do we have an API for indicating that new blocks have been added to
>
> a loop?  If so, then we can likely drop the LOOPS_NEED_FIXUP.


 Please. It's called add_to_loop or so.
>>>
>>> Haha, the block duplication code was handling this already.  So in
>>> theory I can just drop the LOOPS_NEED_FIXUP completely.  Testing now.
>>>
>>> jeff
>>>
>> Attached is the committed patch for path splitting.  As noted above, we
>> didn't need the LOOPS_NEED_FIXUP in the final version, so that wart is gone
>> :-)
>>
>> I do find myself wondering if this can/should be generalized beyond just
>> paths heading to loop backedges.  However to do so I think we'd need to be
>> able to undo this transformation reliably and we'd need some heuristics when
>> to duplicate to expose the redundancy vs rely on PRE techniques and jump
>> threading.  I vaguely remember a paper which touched on these topics, but I
>> can't seem to find it.
>>
>> Anyway, bootstrapped and regression tested on x86_64-linux-gnu. Installed on
>> the trunk.
>
> This pass is now enabled by default with -Os but has no limits on the amount 
> of
> stmts it copies.  It also will make all loops with this shape have at least 
> two
> exits (if the resulting loop will be disambiguated the inner loop will
> have two exits).
> Having more than one exit will disable almost all loop optimizations after it.
>
> The pass itself documents the transform it does but does zero to motivate it.
>
> What's the benefit of this pass (apart from disrupting further optimizations)?
>
> I can see a _single_ case where duplicating the latch will allow threading
> one of the paths through the loop header to eliminate the original exit.  Then
> disambiguation may create a nice nested loop out of this.  Of course that
> is only profitable again if you know the remaining single exit of the inner
> loop (exiting to the outer one) is executed infrequently (thus the inner loop
> actually loops).
>
> But no checks other than on the CFG shape exist (oh, it checks it will
> at _least_ copy two stmts!).
>
> Given the profitability constraints above (well, correct me if I am
> wrong on these)
> it looks like the whole transform should be done within the FSM threading
> code which might be able to compute whether there will be an inner loop
> with a single exit only.
>
> I'm inclined to request the pass to be removed again or at least disabled by
> default.
>
> What closed source benchmark was this transform invented for?

Ah, some EEMBC one.

Btw, the testcase that was added shows

   if (xc < xm)
 {
   xk = (unsigned char) (xc < xy ? xc : xy);
 }
   else
{
  xk = (unsigned char) (xm < xy ? xm : xy);
}

which might be better handled by phiopt transforming it into

xk = MIN (xc, MIN (xm, xy))

phiopt1 sees (hooray to GENERIC folding)

  xc_26 = ~xr_21;
  xm_27 = ~xg_23;
  xy_28 = ~xb_25;
  if (xr_21 > xg_23)
goto ;
  else
goto ;

  :
  xk_29 = MIN_EXPR ;
  goto ;

  :
  xk_30 = MIN_EXPR ;

  :
  # xk_4 = PHI 

btw, see PR67438 for a similar testcase and the above pattern.

Richard.

> Richard.
>
>>
>>
>>
>> commit c1891376e5dcc99ad8be2d22f9551c03f9bb2729
>> Author: Jeff Law 
>> Date:   Fri Nov 13 16:29:34 2015 -0700
>>
>> [Patch,tree-optimization]: Add new path Splitting pass on tree ssa
>> representation
>>
>> * Makefile.in (OBJS): Add gimple-ssa-split-paths.o
>> * common.opt (-fsplit-paths): New flag controlling path splitting.
>> * doc/invoke.texi (fsplit-paths): Document.
>> * opts.c (default_options_table): Add -fsplit-paths to -O2.
>> * passes.def: Add split_paths pass.
>> * timevar.def (TV_SPLIT_PATHS): New timevar.
>> * tracer.c: Include "tracer.h"
>> (ignore_bb_p): No longer static.
>> (transform_duplicate): New function, broken out of tail_duplicate.
>> (tail_duplicate): Use transform_duplicate.
>> * tracer.h (ignore_bb_p): Declare
>> (transform_duplicate): Likewise.
>> * tree-pass.h (make_pass_split_paths): Declare.
>> * gimple-ssa-split-paths.c: New file.
>>
>> * gcc.dg/tree-ssa/split-path-1.c: New test.
>>
>> diff --git a/gcc/ChangeLog b/gcc/ChangeLog
>> index dde2695..a7abe37 100644
>> --- a/gcc/ChangeLog
>> +++ b/gcc/ChangeLog
>> @@ -1,3 +1,21 @@
>> +2015-11-13  Ajit Agarwal  
>> +   Jeff Law  
>> +
>> +   * Makefile.in (OBJS): Add gimple-ssa-split-paths.o
>> +   * common.opt (-fsplit-paths): New flag controlling path splitting.
>> +   * doc/invoke.texi (fsplit-paths): Document.
>> +   * opts.c (default_options_table): Add -fsplit-paths to -O2.
>> +   * passes.def: Add split_paths pass.
>> +   * timevar.def (TV_SPLIT_PATHS): 

[PATCH 05/10] Fix location of dg-error within g++.dg/template/pr64100.C

2015-12-03 Thread David Malcolm
Here's what it now emits (if caret-printing were enabled):

g++.dg/template/pr64100.C: In instantiation of ‘class foo’:
g++.dg/template/pr64100.C:8:16:   required from here
g++.dg/template/pr64100.C:5:41: error: invalid use of incomplete type ‘class 
foo’
 static_assert(noexcept(((foo *)1)->~foo()), ""); // { dg-error "incomplete 
type" }
~^~~

g++.dg/template/pr64100.C:3:27: note: definition of ‘class foo’ is not 
complete until the closing brace
 template struct foo // { dg-message "note" }
   ^~~

gcc/testsuite/ChangeLog:
* g++.dg/template/pr64100.C: Update location of dg-error
directive.
---
 gcc/testsuite/g++.dg/template/pr64100.C | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/g++.dg/template/pr64100.C 
b/gcc/testsuite/g++.dg/template/pr64100.C
index 493849f..051800c 100644
--- a/gcc/testsuite/g++.dg/template/pr64100.C
+++ b/gcc/testsuite/g++.dg/template/pr64100.C
@@ -1,8 +1,8 @@
 // { dg-do compile { target c++11 } }
 
 template struct foo // { dg-message "note" }
-{ // { dg-error "incomplete type" }
-static_assert(noexcept(((foo *)1)->~foo()), "");
+{
+static_assert(noexcept(((foo *)1)->~foo()), ""); // { dg-error "incomplete 
type" }
 }; 
 
 template class foo;
-- 
1.8.5.3



[PATCH 00/10] C++ expression ranges v4

2015-12-03 Thread David Malcolm
On Wed, 2015-11-25 at 16:26 -0500, Jason Merrill wrote:
> > It's not clear to me whether I should be passing in UNKNOWN_LOCATION
> > or input_location to the various functions.
> >
> > cp_build_unary_op used input_location in various places internally,
> > so I've passed that in wherever there isn't a better value.
> 
> Rather than try to get this right now I'm inclined to save it for the 
> next stage 1 and go back to protected_set_expr_location for GCC 6.

Thanks; I've reworked the patch based on that idea.  I found whilst
bugfixing that in general it was better to use
cp_expr::set_location, which calls protected_set_expr_location,
since the former sets both the location in the tree node (if any)
*and* the shadow copy in the cp_expr (thus ensuring that compound
expressions use the correct location_t).

I've also done a lot of bugfixing, and rebased
from r230562 (Nov 18th) to r231208 (Dec 2nd).

> > Bootstraps (on x86_64-pc-linux-gnu), but regresses some tests, due to
> > changes in locations at which diagnostics are emitted:
> >
> >   c-c++-common/cilk-plus/CK/cilk_for_errors.c
> >   c-c++-common/cilk-plus/PS/for1.c
> >   c-c++-common/gomp/pr59073.c
> >   g++.dg/cpp0x/nsdmi-template14.C
> >   g++.dg/gomp/for-1.C
> >   g++.dg/gomp/pr39495-2.C
> >   g++.dg/init/new38.C
> >   g++.dg/warn/Wconversion-real-integer2.C
> >   g++.dg/warn/pr35635.C
> 
> Are the changes good or bad?

Some were bad, which I've fixed in the code.  Others were
improvements, requiring tweaks/movement of dg- directives.
I've broken out any such changes I needed to make to
specific test cases as separate patches in the kit, with notes
on each, in the hope it will make review easier.  (The kit would be
applied as a single commit; I've been testing it as one).

The following 10-patch kit bootstraps®rtests successfully on
x86_64-pc-linux-gnu.

It adds 213 new PASS results to g++.sum, and changes the location
of 154 PASS results there.

It adds 16 new PASS results to obj-c++.sum.

OK for trunk for gcc 6?


David Malcolm (10):
  C++ FE: expression ranges v4
  Fix g++.dg/cpp0x/nsdmi-template14.C
  Fix g++.dg/gomp/loop-1.C
  Fix g++.dg/template/crash55.C
  Fix location of dg-error within g++.dg/template/pr64100.C
  Fix g++.dg/template/pseudodtor3.C
  Fix g++.dg/template/ref3.C
  Fix g++.dg/ubsan/pr63956.C
  Fix g++.dg/warn/pr35635.C
  Fix g++.dg/warn/Wconversion-real-integer2.C

 gcc/convert.c  |   9 +-
 gcc/cp/cp-tree.h   |  86 ++-
 gcc/cp/cvt.c   |   4 +-
 gcc/cp/name-lookup.c   |   6 +-
 gcc/cp/name-lookup.h   |   2 +-
 gcc/cp/parser.c| 576 +++
 gcc/cp/semantics.c |  53 +-
 gcc/cp/typeck.c|  42 +-
 gcc/testsuite/g++.dg/cpp0x/nsdmi-template14.C  |   4 +-
 gcc/testsuite/g++.dg/gomp/loop-1.C |  32 +-
 .../g++.dg/plugin/diagnostic-test-expressions-1.C  | 775 +
 gcc/testsuite/g++.dg/plugin/plugin.exp |   5 +-
 gcc/testsuite/g++.dg/template/crash55.C|   3 +-
 gcc/testsuite/g++.dg/template/pr64100.C|   4 +-
 gcc/testsuite/g++.dg/template/pseudodtor3.C|   4 +-
 gcc/testsuite/g++.dg/template/ref3.C   |   6 +-
 gcc/testsuite/g++.dg/ubsan/pr63956.C   |  28 +-
 .../g++.dg/warn/Wconversion-real-integer2.C|   4 +-
 gcc/testsuite/g++.dg/warn/pr35635.C|   6 +-
 .../plugin/diagnostic-test-expressions-1.mm|  94 +++
 gcc/testsuite/obj-c++.dg/plugin/plugin.exp |  90 +++
 gcc/tree.c |  25 +-
 gcc/tree.h |  17 +-
 23 files changed, 1632 insertions(+), 243 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/plugin/diagnostic-test-expressions-1.C
 create mode 100644 
gcc/testsuite/obj-c++.dg/plugin/diagnostic-test-expressions-1.mm
 create mode 100644 gcc/testsuite/obj-c++.dg/plugin/plugin.exp

-- 
1.8.5.3



[PATCH 09/10] Fix g++.dg/warn/pr35635.C

2015-12-03 Thread David Malcolm
This testcase was broken by the patch kit; upon investigation
the best fix is to try to use the location of the relevant
expression when warning about conversions, rather than
input_location, falling back to the latter via EXPR_LOC_OR_LOC.

One dg-warning needed moving, since the caret is on the "?" of the
conditional here:

   uchar_x = bar != 0
 
 ? (unsigned char) 1024
 ^~
 : -1;
 

gcc/cp/ChangeLog:
* cvt.c (cp_convert_and_check): When warning about conversions,
attempt to use the location of "expr" if available, otherwise
falling back to the old behavior of using input_location.

gcc/testsuite/ChangeLog:
* g++.dg/warn/pr35635.C (func3): Update location of a
dg-warning.
---
 gcc/cp/cvt.c| 4 ++--
 gcc/testsuite/g++.dg/warn/pr35635.C | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/gcc/cp/cvt.c b/gcc/cp/cvt.c
index ebca004..f24f280 100644
--- a/gcc/cp/cvt.c
+++ b/gcc/cp/cvt.c
@@ -650,8 +650,8 @@ cp_convert_and_check (tree type, tree expr, tsubst_flags_t 
complain)
   folded_result = fold_simple (folded_result);
   if (!TREE_OVERFLOW_P (folded)
  && folded_result != error_mark_node)
-   warnings_for_convert_and_check (input_location, type, folded,
-   folded_result);
+   warnings_for_convert_and_check (EXPR_LOC_OR_LOC (expr, input_location),
+   type, folded, folded_result);
 }
 
   return result;
diff --git a/gcc/testsuite/g++.dg/warn/pr35635.C 
b/gcc/testsuite/g++.dg/warn/pr35635.C
index de68ceb..19345c5 100644
--- a/gcc/testsuite/g++.dg/warn/pr35635.C
+++ b/gcc/testsuite/g++.dg/warn/pr35635.C
@@ -62,9 +62,9 @@ void func3()
   /* At least one branch of ? does not fit in the destination, thus
  warn.  */
   uchar_x = bar != 0 ? 2.1 : 10; /* { dg-warning "conversion" } */
-  uchar_x = bar != 0  /* { dg-warning "negative integer implicitly converted 
to unsigned type" } */
-? (unsigned char) 1024 
-: -1; 
+  uchar_x = bar != 0
+? (unsigned char) 1024 /* { dg-warning "negative integer implicitly 
converted to unsigned type" } */
+: -1;
 }
 
 void func4()
-- 
1.8.5.3



[PATCH 02/10] Fix g++.dg/cpp0x/nsdmi-template14.C

2015-12-03 Thread David Malcolm
When building new-expressions, we use cp_lexer_previous_token
and access its location to get the final position in the source
range.

Within g++.dg/cpp0x/nsdmi-template14.C, the previous token
within a new expr can have been purged, leading to UNKNOWN_LOCATION.

  g++.dg/cpp0x/nsdmi-template14.C:11:10: error: recursive instantiation of 
non-static data member initializer for ‘B<1>::p’
  B* p = new B;

(note the lack of caret)

(gdb) p *end_tok
$54 = {type = CPP_GREATER, keyword = RID_MAX, flags = 0 '\000', pragma_kind = 
PRAGMA_NONE, implicit_extern_c = 0,
error_reported = 0, purged_p = 1, location = 0, u = {tree_check_value = 0x0, 
value = }}

This patch adds bulletproofing to detect purged tokens, and avoid using
them.

Alternatively, is it OK to access purged tokens for this kind of thing?
If so, would it make more sense to instead leave their locations untouched
when purging them?

The patch also updates the location of a dg-error directive in the
testcase to reflect improved location information.

gcc/cp/ChangeLog:
* parser.c (cp_parser_new_expression): Avoid accessing purged
tokens when getting end of location range.

gcc/testsuite/ChangeLog:
* g++.dg/cpp0x/nsdmi-template14.C: Move dg-error directive.
---
 gcc/cp/parser.c   | 10 +++---
 gcc/testsuite/g++.dg/cpp0x/nsdmi-template14.C |  4 ++--
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
index d859a89..f3d406e 100644
--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -7957,9 +7957,13 @@ cp_parser_new_expression (cp_parser* parser)
  with caret == start at the start of the "new" token, and the end
  at the end of the final token we consumed.  */
   cp_token *end_tok = cp_lexer_previous_token (parser->lexer);
-  location_t end_loc = get_finish (end_tok->location);
-  location_t combined_loc = make_location (start_loc, start_loc, end_loc);
-
+  location_t combined_loc = start_loc;
+  if (!end_tok->purged_p)
+{
+  location_t end_loc = get_finish (end_tok->location);
+  gcc_assert (end_loc);
+  combined_loc = make_location (start_loc, start_loc, end_loc);
+}
   /* Create a representation of the new-expression.  */
   ret = build_new (&placement, type, nelts, &initializer, global_scope_p,
   tf_warning_or_error);
diff --git a/gcc/testsuite/g++.dg/cpp0x/nsdmi-template14.C 
b/gcc/testsuite/g++.dg/cpp0x/nsdmi-template14.C
index 9cb01f1..47f5b63 100644
--- a/gcc/testsuite/g++.dg/cpp0x/nsdmi-template14.C
+++ b/gcc/testsuite/g++.dg/cpp0x/nsdmi-template14.C
@@ -8,10 +8,10 @@ template struct A // { dg-error "has been parsed" }
 
 template struct B
 {
-  B* p = new B;
+  B* p = new B; // { dg-error "recursive instantiation of non-static data" }
 };
 
-B<1> x; // { dg-error "recursive instantiation of non-static data" }
+B<1> x;
 
 struct C
 {
-- 
1.8.5.3



[PATCH 07/10] Fix g++.dg/template/ref3.C

2015-12-03 Thread David Malcolm
Testcase g++.dg/template/ref3.C:

 1  // PR c++/28341
 2
 3  template struct A {};
 4
 5  template struct B
 6  {
 7A<(T)0> b; // { dg-error "constant|not a valid" }
 8A a; // { dg-error "constant|not a valid" }
 9  };
10
11  B b;

The output of this test for both c++11 and c++14 is unaffected
by the patch kit:
 g++.dg/template/ref3.C: In instantiation of 'struct B':
 g++.dg/template/ref3.C:11:15:   required from here
 g++.dg/template/ref3.C:7:11: error: '0' is not a valid template argument for 
type 'const int&' because it is not an lvalue
 g++.dg/template/ref3.C:8:11: error: '0' is not a valid template argument for 
type 'const int&' because it is not an lvalue

However, the c++98 output is changed:

Status quo for c++98:
g++.dg/template/ref3.C: In instantiation of 'struct B':
g++.dg/template/ref3.C:11:15:   required from here
g++.dg/template/ref3.C:7:11: error: a cast to a type other than an integral or 
enumeration type cannot appear in a constant-expression
g++.dg/template/ref3.C:8:11: error: a cast to a type other than an integral or 
enumeration type cannot appear in a constant-expression

(line 7 and 8 are at the closing semicolon for fields b and a)

With the patchkit for c++98:
g++.dg/template/ref3.C: In instantiation of 'struct B':
g++.dg/template/ref3.C:11:15:   required from here
g++.dg/template/ref3.C:7:5: error: a cast to a type other than an integral or 
enumeration type cannot appear in a constant-expression
g++.dg/template/ref3.C:7:5: error: a cast to a type other than an integral or 
enumeration type cannot appear in a constant-expression

So the 2nd:
  "error: a cast to a type other than an integral or enumeration type cannot 
appear in a constant-expression"
moves from line 8 to line 7 (and moves them to earlier, having ranges)

What's happening is that cp_parser_enclosed_template_argument_list
builds a CAST_EXPR, the first time from cp_parser_cast_expression,
the second time from cp_parser_functional_cast; these have locations
representing the correct respective caret&ranges, i.e.:

   A<(T)0> b;
 ^~~~

and:

   A a;
 ^~~~

Eventually finish_template_type is called for each, to build a RECORD_TYPE,
and we get a cache hit the 2nd time through here in pt.c:
8281  hash = spec_hasher::hash (&elt);
8282  entry = type_specializations->find_with_hash (&elt, hash);
8283
8284  if (entry)
8285return entry->spec;

due to:
  template_args_equal (ot=, nt=) at ../../src/gcc/cp/pt.c:7778
which calls:
  cp_tree_equal (t1=, t2=) 
at ../../src/gcc/cp/tree.c:2833
and returns equality.

Hence we get a single RECORD_TYPE for the type A<(T)(0)>, and hence
when issuing the errors it uses the TREE_VEC for the first one,
using the location of the first line.

I'm not sure what the ideal fix for this is; for now I've worked
around it by updating the dg directives to reflect the new output.

gcc/testsuite/ChangeLog:
* g++.dg/template/ref3.C: Update locations of dg directives.
---
 gcc/testsuite/g++.dg/template/ref3.C | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/g++.dg/template/ref3.C 
b/gcc/testsuite/g++.dg/template/ref3.C
index 976c093..6e568c3 100644
--- a/gcc/testsuite/g++.dg/template/ref3.C
+++ b/gcc/testsuite/g++.dg/template/ref3.C
@@ -4,8 +4,10 @@ template struct A {};
 
 template struct B
 {
-  A<(T)0> b; // { dg-error "constant|not a valid" }
-  A a; // { dg-error "constant|not a valid" }
+  A<(T)0> b; // { dg-error "constant" "" { target c++98_only } }
+  // { dg-error "not a valid" "" { target c++11 } 7 }
+
+  A a; // { dg-error "not a valid" "" { target c++11 } }
 };
 
 B b;
-- 
1.8.5.3



[PATCH 03/10] Fix g++.dg/gomp/loop-1.C

2015-12-03 Thread David Malcolm
The patch kit affects the locations of the errors reported by
g++.dg/gomp/loop-1.C.

I reviewed the new locations, and they seemed sane.

This patch updates the locations of omp_for_cond to use the location of
the cond if available, falling back to the existing behavior of using
input_location otherwise.  This improves the reported locations.

The patch also updates the testcase to reflect the various changes
to the locations.

For reference, here's the updated output from the testcase (with
caret-printing enabled):

g++.dg/gomp/loop-1.C: In function ‘void f1(int)’:
g++.dg/gomp/loop-1.C:21:3: error: initializer expression refers to iteration 
variable ‘i’
   for (i = i; i < 16; i++) /* { dg-error "initializer expression refers to 
iteration variable" } */
   ^~~

g++.dg/gomp/loop-1.C:24:14: error: initializer expression refers to iteration 
variable ‘i’
   for (i = 2 * (i & x); i < 16; i++) /* { dg-error "initializer expression 
refers to iteration variable" } */
~~^

g++.dg/gomp/loop-1.C:27:3: error: initializer expression refers to iteration 
variable ‘i’
   for (i = bar (i); i < 16; i++) /* { dg-error "initializer expression refers 
to iteration variable" } */
   ^~~

g++.dg/gomp/loop-1.C:30:3: error: initializer expression refers to iteration 
variable ‘i’
   for (i = baz (&i); i < 16; i++) /* { dg-error "initializer expression refers 
to iteration variable" } */
   ^~~

g++.dg/gomp/loop-1.C:33:17: error: condition expression refers to iteration 
variable ‘i’
   for (i = 5; i < 2 * i + 17; i++) /* { dg-error "condition expression refers 
to iteration variable" } */
   ~~^~~~

g++.dg/gomp/loop-1.C:36:26: error: condition expression refers to iteration 
variable ‘i’
   for (i = 5; 2 * i + 17 > i; i++) /* { dg-error "condition expression refers 
to iteration variable" } */
   ~~~^~~

g++.dg/gomp/loop-1.C:39:23: error: condition expression refers to iteration 
variable ‘i’
   for (i = 5; bar (i) > i; i++) /* { dg-error "condition expression refers to 
iteration variable" } */
   ^~~

g++.dg/gomp/loop-1.C:42:17: error: condition expression refers to iteration 
variable ‘i’
   for (i = 5; i <= baz (&i); i++) /* { dg-error "condition expression refers 
to iteration variable" } */
   ~~^~~

g++.dg/gomp/loop-1.C:45:17: error: condition expression refers to iteration 
variable ‘i’
   for (i = 5; i <= i; i++) /* { dg-error "invalid controlling 
predicate|condition expression refers to iteration variable" } */
   ~~^~~~

g++.dg/gomp/loop-1.C:48:3: error: increment expression refers to iteration 
variable ‘i’
   for (i = 5; i < 16; i += i) /* { dg-error "increment expression refers to 
iteration variable" } */
   ^~~

g++.dg/gomp/loop-1.C:51:33: error: increment expression refers to iteration 
variable ‘i’
   for (i = 5; i < 16; i = i + 2 * i) /* { dg-error "invalid increment 
expression|increment expression refers to iteration variable" } */
   ~~^~~

g++.dg/gomp/loop-1.C:54:3: error: increment expression refers to iteration 
variable ‘i’
   for (i = 5; i < 16; i = i + i) /* { dg-error "increment expression refers to 
iteration variable" } */
   ^~~

g++.dg/gomp/loop-1.C:57:35: error: increment expression refers to iteration 
variable ‘i’
   for (i = 5; i < 16; i = i + bar (i)) /* { dg-error "increment expression 
refers to iteration variable" } */
   ^~~

g++.dg/gomp/loop-1.C:60:31: error: increment expression refers to iteration 
variable ‘i’
   for (i = 5; i < 16; i = baz (&i) + i) /* { dg-error "increment expression 
refers to iteration variable" } */
   ^~~~

g++.dg/gomp/loop-1.C:63:32: error: increment expression refers to iteration 
variable ‘i’
   for (i = 5; i < 16; i += bar (i)) /* { dg-error "increment expression refers 
to iteration variable" } */
^~~

g++.dg/gomp/loop-1.C:66:32: error: increment expression refers to iteration 
variable ‘i’
   for (i = 5; i < 16; i += baz (&i)) /* { dg-error "increment expression 
refers to iteration variable" } */
^~~~

g++.dg/gomp/loop-1.C:73:3: error: initializer expression refers to iteration 
variable ‘j’
   for (i = j; i < 16; i = i + 2) /* { dg-error "initializer expression refers 
to iteration variable" } */
   ^~~

g++.dg/gomp/loop-1.C:77:3: error: initializer expression refers to iteration 
variable ‘i’
   for (i = 0; i < 16; i = i + 2) /* { dg-error "initializer expression refers 
to iteration variable" } */
   ^~~

g++.dg/gomp/loop-1.C:82:16: error: initializer expression refers to iteration 
variable ‘i’
 for (j = i + 3; j < 16; j += 2) /* { dg-error "initializer expression 
refers to iteration variable" } */
  ~~^~~

g++.dg/gomp/loop-1.C:85:3: error: initializer expression refers to iteration 
variable ‘i’
   for (i = 0; i < 16; i++) /* { dg-error "initializer expression refers to

Re: [Patch,tree-optimization]: Add new path Splitting pass on tree ssa representation

2015-12-03 Thread Richard Biener
On Sat, Nov 14, 2015 at 12:35 AM, Jeff Law  wrote:
> On 11/13/2015 01:23 PM, Jeff Law wrote:
>>
>> On 11/13/2015 11:09 AM, Richard Biener wrote:
>>

 BTW Do we have an API for indicating that new blocks have been added to

 a loop?  If so, then we can likely drop the LOOPS_NEED_FIXUP.
>>>
>>>
>>> Please. It's called add_to_loop or so.
>>
>> Haha, the block duplication code was handling this already.  So in
>> theory I can just drop the LOOPS_NEED_FIXUP completely.  Testing now.
>>
>> jeff
>>
> Attached is the committed patch for path splitting.  As noted above, we
> didn't need the LOOPS_NEED_FIXUP in the final version, so that wart is gone
> :-)
>
> I do find myself wondering if this can/should be generalized beyond just
> paths heading to loop backedges.  However to do so I think we'd need to be
> able to undo this transformation reliably and we'd need some heuristics when
> to duplicate to expose the redundancy vs rely on PRE techniques and jump
> threading.  I vaguely remember a paper which touched on these topics, but I
> can't seem to find it.
>
> Anyway, bootstrapped and regression tested on x86_64-linux-gnu. Installed on
> the trunk.

This pass is now enabled by default with -Os but has no limits on the amount of
stmts it copies.  It also will make all loops with this shape have at least two
exits (if the resulting loop will be disambiguated the inner loop will
have two exits).
Having more than one exit will disable almost all loop optimizations after it.

The pass itself documents the transform it does but does zero to motivate it.

What's the benefit of this pass (apart from disrupting further optimizations)?

I can see a _single_ case where duplicating the latch will allow threading
one of the paths through the loop header to eliminate the original exit.  Then
disambiguation may create a nice nested loop out of this.  Of course that
is only profitable again if you know the remaining single exit of the inner
loop (exiting to the outer one) is executed infrequently (thus the inner loop
actually loops).

But no checks other than on the CFG shape exist (oh, it checks it will
at _least_ copy two stmts!).

Given the profitability constraints above (well, correct me if I am
wrong on these)
it looks like the whole transform should be done within the FSM threading
code which might be able to compute whether there will be an inner loop
with a single exit only.

I'm inclined to request the pass to be removed again or at least disabled by
default.

What closed source benchmark was this transform invented for?

Richard.

>
>
>
> commit c1891376e5dcc99ad8be2d22f9551c03f9bb2729
> Author: Jeff Law 
> Date:   Fri Nov 13 16:29:34 2015 -0700
>
> [Patch,tree-optimization]: Add new path Splitting pass on tree ssa
> representation
>
> * Makefile.in (OBJS): Add gimple-ssa-split-paths.o
> * common.opt (-fsplit-paths): New flag controlling path splitting.
> * doc/invoke.texi (fsplit-paths): Document.
> * opts.c (default_options_table): Add -fsplit-paths to -O2.
> * passes.def: Add split_paths pass.
> * timevar.def (TV_SPLIT_PATHS): New timevar.
> * tracer.c: Include "tracer.h"
> (ignore_bb_p): No longer static.
> (transform_duplicate): New function, broken out of tail_duplicate.
> (tail_duplicate): Use transform_duplicate.
> * tracer.h (ignore_bb_p): Declare
> (transform_duplicate): Likewise.
> * tree-pass.h (make_pass_split_paths): Declare.
> * gimple-ssa-split-paths.c: New file.
>
> * gcc.dg/tree-ssa/split-path-1.c: New test.
>
> diff --git a/gcc/ChangeLog b/gcc/ChangeLog
> index dde2695..a7abe37 100644
> --- a/gcc/ChangeLog
> +++ b/gcc/ChangeLog
> @@ -1,3 +1,21 @@
> +2015-11-13  Ajit Agarwal  
> +   Jeff Law  
> +
> +   * Makefile.in (OBJS): Add gimple-ssa-split-paths.o
> +   * common.opt (-fsplit-paths): New flag controlling path splitting.
> +   * doc/invoke.texi (fsplit-paths): Document.
> +   * opts.c (default_options_table): Add -fsplit-paths to -O2.
> +   * passes.def: Add split_paths pass.
> +   * timevar.def (TV_SPLIT_PATHS): New timevar.
> +   * tracer.c: Include "tracer.h"
> +   (ignore_bb_p): No longer static.
> +   (transform_duplicate): New function, broken out of tail_duplicate.
> +   (tail_duplicate): Use transform_duplicate.
> +   * tracer.h (ignore_bb_p): Declare
> +   (transform_duplicate): Likewise.
> +   * tree-pass.h (make_pass_split_paths): Declare.
> +   * gimple-ssa-split-paths.c: New file.
> +
>  2015-11-13  Kai Tietz  
> Marek Polacek  
> Jason Merrill  
> diff --git a/gcc/Makefile.in b/gcc/Makefile.in
> index d3fd5e9..5c294df 100644
> --- a/gcc/Makefile.in
> +++ b/gcc/Makefile.in
> @@ -1277,6 +1277,7 @@ OBJS = \
> gimple-pretty-print.o \
> gimple-ssa-backprop.o \
> gimple-ssa-isolate-paths.o \
> +   gimple-ssa-split-paths.o \
>

[PATCH 08/10] Fix g++.dg/ubsan/pr63956.C

2015-12-03 Thread David Malcolm
With the location patch, various errors in g++.dg/ubsan/pr63956.C
change:

 8  constexpr int
 9  fn1 (int a, int b)
10  {
11if (b != 2)
12  a <<= b;
13return a;
14  }
15
16  constexpr int i1 = fn1 (5, 3);
17  constexpr int i2 = fn1 (5, -2); // { dg-error "is negative" }

Here's the first error as printed by the status quo:
g++.dg/ubsan/pr63956.C:17:24:   in constexpr expansion of ‘fn1(5, -2)’
g++.dg/ubsan/pr63956.C:17:30: error: right operand of shift expression ‘(5 << 
-2)’ is negative
 constexpr int i2 = fn1 (5, -2); // { dg-error "is negative" }
  ^

...and with the location patch:
g++.dg/ubsan/pr63956.C:17:24:   in constexpr expansion of ‘fn1(5, -2)’
g++.dg/ubsan/pr63956.C:12:11: error: right operand of shift expression ‘(5 << 
-2)’ is negative
 a <<= b;
   ^
I believe this is an improvement: we're now identifying both relevant
places, rather than just one, and clearly highlighting the exact
subexpression of interest.

Hence this patch updates the testcase to reflect the improved
location information.

gcc/testsuite/ChangeLog:
* g++.dg/ubsan/pr63956.C: Update dg directives to reflect
improved location information.
---
 gcc/testsuite/g++.dg/ubsan/pr63956.C | 28 +---
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/gcc/testsuite/g++.dg/ubsan/pr63956.C 
b/gcc/testsuite/g++.dg/ubsan/pr63956.C
index 185a719..b265631 100644
--- a/gcc/testsuite/g++.dg/ubsan/pr63956.C
+++ b/gcc/testsuite/g++.dg/ubsan/pr63956.C
@@ -10,15 +10,18 @@ fn1 (int a, int b)
 {
   if (b != 2)
 a <<= b;
+// { dg-error "5 << -2.. is negative" "" { target *-*-* } 12 }
+// { dg-error "is >= than the precision of the left operand" "" { target 
*-*-* } 12 }
+// { dg-error "-2 << 4.. is negative" "" { target *-*-* } 12 }
   return a;
 }
 
 constexpr int i1 = fn1 (5, 3);
-constexpr int i2 = fn1 (5, -2); // { dg-error "is negative" }
-constexpr int i3 = fn1 (5, sizeof (int) * __CHAR_BIT__); // { dg-error "is >= 
than the precision of the left operand" }
-constexpr int i4 = fn1 (5, 256); // { dg-error "is >= than the precision of 
the left operand" }
+constexpr int i2 = fn1 (5, -2); // { dg-message "in constexpr expansion" }
+constexpr int i3 = fn1 (5, sizeof (int) * __CHAR_BIT__); // { dg-message "in 
constexpr expansion" }
+constexpr int i4 = fn1 (5, 256); // { dg-message "in constexpr expansion" }
 constexpr int i5 = fn1 (5, 2);
-constexpr int i6 = fn1 (-2, 4); // { dg-error "is negative" }
+constexpr int i6 = fn1 (-2, 4); // { dg-message "in constexpr expansion" }
 constexpr int i7 = fn1 (0, 2);
 
 SA (i1 == 40);
@@ -30,13 +33,16 @@ fn2 (int a, int b)
 {
   if (b != 2)
 a >>= b;
+// { dg-error "4 >> -1.. is negative" "" { target *-*-* } 35 }
+// { dg-error "is >= than the precision of the left operand" "" { target 
*-*-* } 35 }
+
   return a;
 }
 
 constexpr int j1 = fn2 (4, 1);
-constexpr int j2 = fn2 (4, -1); // { dg-error "is negative" }
-constexpr int j3 = fn2 (10, sizeof (int) * __CHAR_BIT__); // { dg-error "is >= 
than the precision of the left operand" }
-constexpr int j4 = fn2 (1, 256); // { dg-error "is >= than the precision of 
the left operand" }
+constexpr int j2 = fn2 (4, -1); // { dg-message "in constexpr expansion" }
+constexpr int j3 = fn2 (10, sizeof (int) * __CHAR_BIT__); // { dg-message "in 
constexpr expansion" }
+constexpr int j4 = fn2 (1, 256); // { dg-message "in constexpr expansion" }
 constexpr int j5 = fn2 (5, 2);
 constexpr int j6 = fn2 (-2, 4);
 constexpr int j7 = fn2 (0, 4);
@@ -49,12 +55,12 @@ constexpr int
 fn3 (int a, int b)
 {
   if (b != 2)
-a = a / b;
+a = a / b; // { dg-error "..7 / 0.. is not a constant expression" }
   return a;
 }
 
 constexpr int k1 = fn3 (8, 4);
-constexpr int k2 = fn3 (7, 0); // { dg-error "is not a constant expression" }
+constexpr int k2 = fn3 (7, 0); // { dg-message "in constexpr expansion" }
 constexpr int k3 = fn3 (INT_MIN, -1); // { dg-error "overflow in constant 
expression" }
 
 SA (k1 == 2);
@@ -63,12 +69,12 @@ constexpr float
 fn4 (float a, float b)
 {
   if (b != 2.0)
-a = a / b;
+a = a / b; // { dg-error "is not a constant expression" }
   return a;
 }
 
 constexpr float l1 = fn4 (5.0, 3.0);
-constexpr float l2 = fn4 (7.0, 0.0); // { dg-error "is not a constant 
expression" }
+constexpr float l2 = fn4 (7.0, 0.0); // { dg-message "in constexpr expansion" }
 
 constexpr int
 fn5 (const int *a, int b)
-- 
1.8.5.3



[PATCH 10/10] Fix g++.dg/warn/Wconversion-real-integer2.C

2015-12-03 Thread David Malcolm
This testcase's output is changed by the patchkit from printing at the "=":

BEFORE:
g++.dg/warn/Wconversion-real-integer2.C: In function 'void h()':
g++.dg/warn/Wconversion-real-integer2.C:32:12: warning: conversion to 'float' 
alters 'int' constant value [-Wfloat-conversion]
 vfloat = INT_MAX; // { dg-warning "conversion to .float. alters .int. 
constant value" }
^
to showing the token of interest and its macro expansion:

AFTER:
g++.dg/warn/Wconversion-real-integer2.C: In function ‘void h()’:
g++.dg/warn/Wconversion-real-integer2.C:26:17: warning: conversion to ‘float’ 
alters ‘int’ constant value [-Wfloat-conversion]
 #define INT_MAX __INT_MAX__
 ^

g++.dg/warn/Wconversion-real-integer2.C:32:14: note: in expansion of macro 
‘INT_MAX’
 vfloat = INT_MAX; // { dg-warning "conversion to .float. alters .int. 
constant value" }
  ^~~

This is an improvement, so this patch updates the test case accordingly.

gcc/testsuite/ChangeLog:
* g++.dg/warn/Wconversion-real-integer2.C: Update location of
dg-warning; add a dg-message.
---
 gcc/testsuite/g++.dg/warn/Wconversion-real-integer2.C | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/g++.dg/warn/Wconversion-real-integer2.C 
b/gcc/testsuite/g++.dg/warn/Wconversion-real-integer2.C
index 0494588..7e39d5f 100644
--- a/gcc/testsuite/g++.dg/warn/Wconversion-real-integer2.C
+++ b/gcc/testsuite/g++.dg/warn/Wconversion-real-integer2.C
@@ -23,11 +23,11 @@
 //
 // That is more useful.
 
-#define INT_MAX __INT_MAX__ 
+#define INT_MAX __INT_MAX__ // { dg-warning "17: conversion to .float. alters 
.int. constant value" }
 
 float  vfloat;
 
 void h (void)
 {
-vfloat = INT_MAX; // { dg-warning "conversion to .float. alters .int. 
constant value" }
+vfloat = INT_MAX; // { dg-message "14: in expansion of macro .INT_MAX." }
 }
-- 
1.8.5.3



[PATCH 04/10] Fix g++.dg/template/crash55.C

2015-12-03 Thread David Malcolm
The patch kit changes the output of this case:

  1  //PR c++/27668
  2
  3  template // { dg-error 
"nested-name-specifier|two or more|valid type" }
  4  struct A {};
  5
  6  template void foo(A);  // { dg-error "cast|argument" "" { target 
c++98_only } }

but only for c++98, from:
  g++.dg/template/crash55.C:3:19: error: expected nested-name-specifier before 
'class'
  g++.dg/template/crash55.C:3:25: error: two or more data types in declaration 
of 'parameter'
  g++.dg/template/crash55.C:3:34: error: 'class T' is not a valid type for a 
template non-type parameter
  g++.dg/template/crash55.C:6:29: error: a cast to a type other than an 
integral or enumeration type cannot appear in a constant-expression
  g++.dg/template/crash55.C:6:29: error: template argument 2 is invalid
to:
  g++.dg/template/crash55.C:3:19: error: expected nested-name-specifier before 
'class'
  g++.dg/template/crash55.C:3:25: error: two or more data types in declaration 
of 'parameter'
  g++.dg/template/crash55.C:3:34: error: 'class T' is not a valid type for a 
template non-type parameter
  g++.dg/template/crash55.C:3:32: error: a cast to a type other than an 
integral or enumeration type cannot appear in a constant-expression
  g++.dg/template/crash55.C:6:29: error: template argument 2 is invalid

i.e. the 4th error moves from line 6 to line 3
("a cast to a type other than an integral or enumeration type cannot appear in 
a constant-expression")

This change is reasonable, so the patch updates the dg-error
directives accordingly.

gcc/testsuite/ChangeLog:
* g++.dg/template/crash55.C: Update dg-error directives.
---
 gcc/testsuite/g++.dg/template/crash55.C | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/g++.dg/template/crash55.C 
b/gcc/testsuite/g++.dg/template/crash55.C
index 9b80fd1..b9b29f7 100644
--- a/gcc/testsuite/g++.dg/template/crash55.C
+++ b/gcc/testsuite/g++.dg/template/crash55.C
@@ -1,6 +1,7 @@
 //PR c++/27668
 
 template // { dg-error "nested-name-specifier|two 
or more|valid type" }
+// { dg-error "cast" "" { target c++98_only } 3 }
 struct A {};
 
-template void foo(A);// { dg-error "cast|argument" "" { 
target c++98_only } }
+template void foo(A);// { dg-error "template argument 2" "" 
{ target c++98_only } }
-- 
1.8.5.3



Re: [PR67383][ARM][4.9]Backport of "Allow any register for DImode values in Thumb2"

2015-12-03 Thread Christophe Lyon
On 27 November 2015 at 12:26, Ramana Radhakrishnan
 wrote:
>
>
> On 27/11/15 09:40, Renlin Li wrote:
>> Hi Ramana,
>>
>> On 16/10/15 14:54, Renlin Li wrote:
>>>
>>>
 The command line implies we remove r7 (frame pointer in Thumb2 - 
 historical accident, fno-omit-frame-pointer), r9 (ffixed-r9), r10 
 (-mpic-register) which
 leaves us with:

 * r0, r1
 * r2, r3
 * r4, r5

 as the only free registers available for DImode values for the whole 
 compilation.

 We then have r0, r1 and r2 live across the insn which means that there are 
 no free registers to handle DImode values
 under the constraints provided unless LRA / reload can spill the argument 
 registers which it doesn't seem to be able to do
 in this particular testcase. Vlad, is that correct ?
>>> According to the logic, conflict hard register are excluded from spill 
>>> candidate. That's why, in this case, r0, r1, r2 cannot be used.
>>
>>
>> In the test case, there are code structure like this.
>>
>>
>> uint64_t callee (int a, int b, int c, int d);
>> uint64_t caller (int a, int b, int c, int d)
>> {
>>   uint64_t res;
>> /*
>> single BB contains complicated data processing which requires register pair
>> */
>>
>>   res = callee (tmp, b ,c, d);
>>   return res;
>> }
>>
>> CES pass in this case will extend the hard register live range across the 
>> whole BB until the callee. In this case, r1, r2, r3 are excluded from 
>> allocatable registers.
>>
>> There are places in CES which prevents extending the hard register's live 
>> range, for example for hard register which fullfil 
>> small_register_classes_for_mode_p(), class_likely_spilled_p(). However, 
>> argument registers belong to neither of them.
>>
>> I tried to stop CES from extending argument registers live range. However, 
>> later, scheduler jumps in and re-orders the instruction to reduce the pseudo 
>> register pressure, which in effect extend the argument register live again.
>
> Thanks for digging further and trying to figure out what the solution was. I 
> can't think of a less risky fix than what you have proposed, thus Ok if no 
> regressions.
>
>

Hi,

I have noticed regressions after this commit to the 4.9 branch:
Passed now fails  [PASS => FAIL]:
  gcc.c-torture/compile/pr34856.c  -O3 -fomit-frame-pointer
-funroll-all-loops -finline-functions  (test for excess errors)
  gcc.c-torture/compile/pr34856.c  -O3 -fomit-frame-pointer
-funroll-loops  (test for excess errors)
Pass disappears   [PASS => ]:
  gcc.c-torture/execute/scal-to-vec1.c compilation,  -O2
  gcc.c-torture/execute/scal-to-vec1.c compilation,  -O2 -flto
-fno-use-linker-plugin -flto-partition=none
  gcc.c-torture/execute/scal-to-vec1.c compilation,  -O2 -flto
-fuse-linker-plugin -fno-fat-lto-objects
Fail appears  [ => FAIL]:
  gcc.c-torture/compile/pr34856.c  -O3 -fomit-frame-pointer
-funroll-all-loops -finline-functions  (internal compiler error)
  gcc.c-torture/compile/pr34856.c  -O3 -fomit-frame-pointer
-funroll-loops  (internal compiler error)
  gcc.c-torture/execute/scal-to-vec1.c compilation,  -O2  (internal
compiler error)
  gcc.c-torture/execute/scal-to-vec1.c compilation,  -O2 -flto
-fno-use-linker-plugin -flto-partition=none  (internal compiler error)
  gcc.c-torture/execute/scal-to-vec1.c compilation,  -O2 -flto
-fuse-linker-plugin -fno-fat-lto-objects  (internal compiler error)

See the red links in
http://people.linaro.org/~christophe.lyon/cross-validation/gcc/gcc-4_9-branch/231177/report-build-info.html

Christophe.

> regards
> Ramana
>
>
>
>
>
>>
>> Regards,
>>
>> Renlin Li
>>
>>
>>


Re: [PATCH] S/390: Fix warning in "*movstr" pattern.

2015-12-03 Thread Dominik Vogt
Version 5 with two fixes to the test case.  :-/  (ChangeLog is the
same.)

Ciao

Dominik ^_^  ^_^

-- 

Dominik Vogt
IBM Germany
>From 5965f62501b271285bacb90b11ef3f748338d1cf Mon Sep 17 00:00:00 2001
From: Dominik Vogt 
Date: Tue, 3 Nov 2015 18:03:02 +0100
Subject: [PATCH] S/390: Fix warning in "*movstr" pattern.

---
 gcc/config/s390/s390.md | 20 ---
 gcc/testsuite/gcc.target/s390/md/movstr-1.c | 30 +
 gcc/testsuite/gcc.target/s390/s390.exp  | 25 +++-
 3 files changed, 67 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/s390/md/movstr-1.c

diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index e5db537..7eca315 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -2910,13 +2910,27 @@
 ;
 
 (define_expand "movstr"
+  ;; The pattern is never generated.
+  [(match_operand 0 "" "")
+   (match_operand 1 "" "")
+   (match_operand 2 "" "")]
+  ""
+{
+  if (TARGET_64BIT)
+emit_insn (gen_movstrdi (operands[0], operands[1], operands[2]));
+  else
+emit_insn (gen_movstrsi (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "movstr"
   [(set (reg:SI 0) (const_int 0))
(parallel
 [(clobber (match_dup 3))
  (set (match_operand:BLK 1 "memory_operand" "")
 	  (match_operand:BLK 2 "memory_operand" ""))
- (set (match_operand 0 "register_operand" "")
-	  (unspec [(match_dup 1)
+ (set (match_operand:P 0 "register_operand" "")
+	  (unspec:P [(match_dup 1)
 		   (match_dup 2)
 		   (reg:SI 0)] UNSPEC_MVST))
  (clobber (reg:CC CC_REGNUM))])]
@@ -2937,7 +2951,7 @@
(set (mem:BLK (match_operand:P 1 "register_operand" "0"))
 	(mem:BLK (match_operand:P 3 "register_operand" "2")))
(set (match_operand:P 0 "register_operand" "=d")
-	(unspec [(mem:BLK (match_dup 1))
+	(unspec:P [(mem:BLK (match_dup 1))
 		 (mem:BLK (match_dup 3))
 		 (reg:SI 0)] UNSPEC_MVST))
(clobber (reg:CC CC_REGNUM))]
diff --git a/gcc/testsuite/gcc.target/s390/md/movstr-1.c b/gcc/testsuite/gcc.target/s390/md/movstr-1.c
new file mode 100644
index 000..6ab0050
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/md/movstr-1.c
@@ -0,0 +1,30 @@
+/* Machine description pattern tests.  */
+
+/* { dg-do run } */
+/* { dg-options "-dP -save-temps" } */
+
+__attribute__ ((noinline))
+void test(char *dest, const char *src)
+{
+  __builtin_stpcpy (dest, src);
+}
+
+/* { dg-final { scan-assembler-times {{[*]movstr}} 1 } } */
+
+#include 
+#include 
+
+#define LEN 200
+char buf[LEN];
+
+int main(void)
+{
+  memset(buf, 0, LEN);
+  test(buf, "hello world!");
+  if (strcmp(buf, "hello world!") != 0)
+{
+  fprintf(stderr, "error: test() failed\n");
+  return 1;
+}
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/s390/s390.exp b/gcc/testsuite/gcc.target/s390/s390.exp
index 0b8f80ed..0d7a7eb 100644
--- a/gcc/testsuite/gcc.target/s390/s390.exp
+++ b/gcc/testsuite/gcc.target/s390/s390.exp
@@ -61,20 +61,35 @@ if ![info exists DEFAULT_CFLAGS] then {
 # Initialize `dg'.
 dg-init
 
-set hotpatch_tests $srcdir/$subdir/hotpatch-\[0-9\]*.c
+set md_tests $srcdir/$subdir/md/*.c
 
 # Main loop.
 dg-runtest [lsort [prune [glob -nocomplain $srcdir/$subdir/*.\[cS\]] \
-			 $hotpatch_tests]] "" $DEFAULT_CFLAGS
+			 $md_tests]] "" $DEFAULT_CFLAGS
 
 dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*vector*/*.\[cS\]]] \
 	"" $DEFAULT_CFLAGS
 
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/md/*.\[cS\]]] \
+	"" $DEFAULT_CFLAGS
+
 # Additional hotpatch torture tests.
 torture-init
-set HOTPATCH_TEST_OPTS [list -Os -O0 -O1 -O2 -O3]
-set-torture-options $HOTPATCH_TEST_OPTS
-gcc-dg-runtest [lsort [glob -nocomplain $hotpatch_tests]] "" $DEFAULT_CFLAGS
+set-torture-options [list -Os -O0 -O1 -O2 -O3]
+gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/hotpatch-\[0-9\]*.c]] \
+	"" $DEFAULT_CFLAGS
+torture-finish
+
+# Additional md torture tests.
+torture-init
+set MD_TEST_OPTS [list \
+	{-Os -march=z900} {-Os -march=z13} \
+	{-O0 -march=z900} {-O0 -march=z13} \
+	{-O1 -march=z900} {-O1 -march=z13} \
+	{-O2 -march=z900} {-O2 -march=z13} \
+	{-O3 -march=z900} {-O3 -march=z13}]
+set-torture-options $MD_TEST_OPTS
+gcc-dg-runtest [lsort [glob -nocomplain $md_tests]] "" $DEFAULT_CFLAGS
 torture-finish
 
 # All done.
-- 
2.3.0



Re: [PATCH,RFC] Introduce RUN_UNDER_VALGRIND in test-suite

2015-12-03 Thread Bernd Schmidt

On 11/23/2015 10:34 AM, Martin Liška wrote:

On 11/21/2015 05:26 AM, Hans-Peter Nilsson wrote:

IIRC you can replace the actual dg-runtest proc with your own
(implementing a wrapper).  Grep aroung, I think we do that
already.  That's certainly preferable instead of touching all
callers.


You are right, the suggested patch was over-kill, wrapper should be fine for 
that.
Currently I've been playing with a bit different approach (suggested by Markus),
where I would like to enable valgrind in gcc.c using an environmental variable.

Question is if it should replace existing ENABLE_VALGRIND_CHECKING and how to
integrate it with a valgrind suppressions file?


This patch still seems to be in the queue. I've been looking at it every 
now and then, without really forming an opinion. In any case, I think 
we'll need to postpone this to stage1 at this point.


Wouldn't it be better to fix issues first and only then enable running 
the testsuite with valgrind, rather than make a suppression file?


Your latest patch seems to add the option of running the compiler 
without ENABLE_CHECKING_VALGRIND being defined. Doesn't this run into 
problems when the support in ggc isn't compiled in?



Bernd


[PTX] reorganize data space handling

2015-12-03 Thread Nathan Sidwell
The PTX backend superficially looks like it is using the address space extension 
mechanism to handle the various PTX data areas.  However, it is not really doing 
that -- the ADDR_SPACE #define values are not registered with the address space 
handling.  The addr_space_t enumeration is used to hold values not of that type.


GCC already has a mechanism to handle symbols that need special addressing 
instructions -- SYMBOL_REF_FLAGS & the TARGET_ENCODE_SECTION_INFO hook.


This patch uses those to mark SYMBOL_REFs with the PTX section they are placed 
in and then uses those same flags when emitting the cvta insn to get the 
address, the load/store directly accessing them, and the object emission code.


We still have a single unspec 'UNSPEC_TO_GENERIC' to move a SYMBOL_REF into a 
register.  You'll probably notice this is really just a fancy mov insn.  I'm 
sure with a little tinkering with the move insn predicates and constraints, that 
unspec can go away too, but I didn't want to tackle that in this patch.


nathan
2015-12-03  Nathan Sidwell  

	gcc/
	* config/nvptx/nvptx-protos.h (npvptx_section_from_addr_space):	Delete.
	* config/nvptx/nvptx.c (enum nvptx_data_area): New.
	(SYMBOL_DATA_AREA, SET_SYMBOL_DATA_AREA): New defines.
	(nvptx_option_override): Set data ares for worker vars.
	(nvptx_addr_space_from_sym): Delete.
	(nvptx_encode_section_info): New.
	(section_for_sym, section_for_decl): New.
	(nvptx_maybe_convert_symbolic_operand): Get data area from symbol
	flags,
	(nvptx_section_from_addr_space): Delete.
	(nvptx_section_for_decl): Delete.
	(nvptx_output_aligned, nvptx_declare_object_name,
	nvptx_assemble_undefined_decl): Use section_for_decl, remove
	unnecessary checks.
	(nvptx_print_operand): Add 'D', adjust 'A'.
	(nvptx_expand_worker_addr): Adjust unspec generation.
	(TARGET_ENCODE_SECTION_INFO): Override.
	* config/nvptx/nvptx.h (ADDR_SPACE_GLOBAL, ADDR_SPACE_SHARED,
	ADDR_SPACE_CONST, ADDR_SPACE_LOCAL, ADDR_SPACE_PARAM): Delete.
	* config/nvptx/nvptx.md (UNSPEC_FROM_GLOBAL, UNSPEC_FROM_LOCAL,
	UNSPEC_FROM_PARAM, UNSPEC_FROM_SHARED, UNSPEC_FROM_CONST,
	UNSPEC_TO_GLOBAL, UNSPEC_TO_LOCAL, UNSPEC_TO_PARAM,
	UNSPEC_TO_SHARED, UNSPEC_TO_CONST): Delete.
	(UNSPEC_TO_GENERIC): New.
	(nvptx_register_or_symbolic_operand): Delete.
	(cvt_code, cvt_name, cvt_str): Delete.
	(convaddr_ [P]): Delete.
	(convaddr_ [P]): New.

	gcc/testsuite/
	* gcc.target/nvptx/decl.c: New.
	* gcc.target/nvptx/uninit-decl.c: Robustify regexps.

Index: gcc/config/nvptx/nvptx-protos.h
===
--- gcc/config/nvptx/nvptx-protos.h	(revision 231226)
+++ gcc/config/nvptx/nvptx-protos.h	(working copy)
@@ -41,7 +41,6 @@ extern const char *nvptx_ptx_type_from_m
 extern const char *nvptx_output_mov_insn (rtx, rtx);
 extern const char *nvptx_output_call_insn (rtx_insn *, rtx, rtx);
 extern const char *nvptx_output_return (void);
-extern const char *nvptx_section_from_addr_space (addr_space_t);
 extern bool nvptx_hard_regno_mode_ok (int, machine_mode);
 extern rtx nvptx_maybe_convert_symbolic_operand (rtx);
 #endif
Index: gcc/config/nvptx/nvptx.c
===
--- gcc/config/nvptx/nvptx.c	(revision 231226)
+++ gcc/config/nvptx/nvptx.c	(working copy)
@@ -80,6 +80,25 @@ enum nvptx_shuffle_kind
   SHUFFLE_MAX
 };
 
+/* The various PTX memory areas an object might reside in.  */
+enum nvptx_data_area
+{
+  DATA_AREA_GENERIC,
+  DATA_AREA_GLOBAL,
+  DATA_AREA_SHARED,
+  DATA_AREA_LOCAL,
+  DATA_AREA_CONST,
+  DATA_AREA_PARAM,
+  DATA_AREA_MAX
+};
+
+/*  We record the data area in the target symbol flags.  */
+#define SYMBOL_DATA_AREA(SYM) \
+  (nvptx_data_area)((SYMBOL_REF_FLAGS (SYM) >> SYMBOL_FLAG_MACH_DEP_SHIFT) \
+		& 7)
+#define SET_SYMBOL_DATA_AREA(SYM,AREA) \
+  (SYMBOL_REF_FLAGS (SYM) |= (AREA) << SYMBOL_FLAG_MACH_DEP_SHIFT)
+
 /* Record the function decls we've written, and the libfuncs and function
decls corresponding to them.  */
 static std::stringstream func_decls;
@@ -154,9 +173,11 @@ nvptx_option_override (void)
 = hash_table::create_ggc (17);
 
   worker_bcast_sym = gen_rtx_SYMBOL_REF (Pmode, worker_bcast_name);
+  SET_SYMBOL_DATA_AREA (worker_bcast_sym, DATA_AREA_SHARED);
   worker_bcast_align = GET_MODE_ALIGNMENT (SImode) / BITS_PER_UNIT;
 
   worker_red_sym = gen_rtx_SYMBOL_REF (Pmode, worker_red_name);
+  SET_SYMBOL_DATA_AREA (worker_red_sym, DATA_AREA_SHARED);
   worker_red_align = GET_MODE_ALIGNMENT (SImode) / BITS_PER_UNIT;
 }
 
@@ -194,22 +215,49 @@ nvptx_ptx_type_from_mode (machine_mode m
 }
 }
 
-/* Determine the address space to use for SYMBOL_REF SYM.  */
+/* Encode the PTX data area that DECL (which might not actually be a
+   _DECL) should reside in.  */
 
-static addr_space_t
-nvptx_addr_space_from_sym (rtx sym)
+static void
+nvptx_encode_section_info (tree decl, rtx rtl, int first)
 {
-  tree decl = SYMBOL_REF_DECL (sym);
-  if (decl == NULL_TREE || TREE_CODE (decl) == FUNCTION_DECL)
-

Re: [1/2] OpenACC routine support

2015-12-03 Thread Cesar Philippidis
On 12/03/2015 12:36 AM, Thomas Schwinge wrote:

>> Here's the updated patch.
> 
> ENOPATCH.

Here it is.

>> The test cases were written in a way such that
>> none of them needed to be updated with these changes.
> 
> ... which potentially means they'd match for all kinds of "random"
> diagnostics.  ;-)

They were supposed to be generic enough so that they work both in c and
c++. But, yeah, that randomness is likely.

>> I'm tempted to commit this as obvious, but I want to make sure you're ok
>> with these new messages.
> 
> I don't care very much, as long as it's understandable for a user.  I
> just tripped over this because of mismatches between C and C++ as well as
> different C++ diagnostic variants.
> 
>> The major change is to report these errors as
>> "pragma acc routine not followed by a function declaration or
>> definition". I think that's more descriptive then "not followed by a
>> single function". That said, it looks like the c front end uses the
>> latter error message.
> 
> (In the C front end, the "a" is missing: "not followed by single
> function"; that should be fixed up as well.)
> 
>> Is this OK or do you prefer the "not followed by a single function" message?
> 
> "not followed by a function declaration or definition" sounds good to me.

Ok, I'll apply this patch in a couple of hours.

Cesar
2015-12-02  Cesar Philippidis  

	gcc/cp/
	* parser.c (cp_ensure_no_oacc_routine): Update error message.
	(cp_parser_oacc_routine): Likewise.
	(cp_parser_late_parsing_oacc_routine): Likewise.  Update comment
	describing this function.
	(cp_finalize_oacc_routine): Update error message.

diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
index b4ecac7..1c14354 100644
--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -1329,7 +1329,7 @@ cp_ensure_no_oacc_routine (cp_parser *parser)
   tree clauses = parser->oacc_routine->clauses;
   location_t loc = OMP_CLAUSE_LOCATION (TREE_PURPOSE (clauses));
 
-  error_at (loc, "%<#pragma oacc routine%> not followed by function "
+  error_at (loc, "%<#pragma acc routine%> not followed by a function "
 		"declaration or definition");
   parser->oacc_routine = NULL;
 }
@@ -35857,7 +35857,7 @@ cp_parser_oacc_routine (cp_parser *parser, cp_token *pragma_tok,
 	  cp_parser_require_pragma_eol (parser, pragma_tok);
 
 	  error_at (OMP_CLAUSE_LOCATION (parser->oacc_routine->clauses),
-		"%<#pragma oacc routine%> not followed by a single "
+		"%<#pragma acc routine%> not followed by a "
 		"function declaration or definition");
 
 	  parser->oacc_routine->error_seen = true;
@@ -35962,7 +35962,7 @@ cp_parser_oacc_routine (cp_parser *parser, cp_token *pragma_tok,
 	  if (parser->oacc_routine
 	  && !parser->oacc_routine->error_seen
 	  && !parser->oacc_routine->fndecl_seen)
-	error_at (loc, "%<#pragma acc routine%> not followed by "
+	error_at (loc, "%<#pragma acc routine%> not followed by a "
 		  "function declaration or definition");
 
 	  data.tokens.release ();
@@ -35972,7 +35972,7 @@ cp_parser_oacc_routine (cp_parser *parser, cp_token *pragma_tok,
 }
 
 /* Finalize #pragma acc routine clauses after direct declarator has
-   been parsed, and put that into "oacc routine" attribute.  */
+   been parsed, and put that into "oacc function" attribute.  */
 
 static tree
 cp_parser_late_parsing_oacc_routine (cp_parser *parser, tree attrs)
@@ -35987,7 +35987,7 @@ cp_parser_late_parsing_oacc_routine (cp_parser *parser, tree attrs)
   if ((!data->error_seen && data->fndecl_seen)
   || data->tokens.length () != 1)
 {
-  error_at (loc, "%<#pragma oacc routine%> not followed by a single "
+  error_at (loc, "%<#pragma acc routine%> not followed by a "
 		"function declaration or definition");
   data->error_seen = true;
   return attrs;
@@ -36003,7 +36003,7 @@ cp_parser_late_parsing_oacc_routine (cp_parser *parser, tree attrs)
 
   cp_token *pragma_tok = cp_lexer_consume_token (parser->lexer);
   cl = cp_parser_oacc_all_clauses (parser, OACC_ROUTINE_CLAUSE_MASK,
-  "#pragma oacc routine", pragma_tok);
+  "#pragma acc routine", pragma_tok);
   cp_parser_pop_lexer (parser);
 
   tree c_head = build_omp_clause (loc, OMP_CLAUSE_SEQ);
@@ -36044,7 +36044,8 @@ cp_finalize_oacc_routine (cp_parser *parser, tree fndecl, bool is_defn)
   if (!fndecl || TREE_CODE (fndecl) != FUNCTION_DECL)
 	{
 	  error_at (loc,
-		"%<#pragma acc routine%> not followed by single function");
+		"%<#pragma acc routine%> not followed by a function "
+		"declaration or definition");
 	  parser->oacc_routine = NULL;
 	}
 	  


Re: [gomp-nvptx 2/9] nvptx backend: new "uniform SIMT" codegen variant

2015-12-03 Thread Alexander Monakov
On Wed, 2 Dec 2015, Nathan Sidwell wrote:
> On 12/02/15 12:09, Alexander Monakov wrote:
> 
> > I meant the PTX linked (post PTX-JIT link) image, so regardless of support,
> > it's not an issue.  E.g. check early in gomp_nvptx_main if .weak
> > __nvptx_has_simd != 0.  It would only break if there was dlopen on PTX.
> 
> Note I found a bug in .weak support.  See the comment in
> gcc.dg/special/weak-2.c
> 
> /* NVPTX's implementation of weak is broken when a strong symbol is in
>a later object file than the weak definition.   */

Thanks for the warning.  However, the issue seems limited to function symbols:
I've made a test for data symbols, and they appear to work fine -- which
suffices in this context.

Alexander


Re: [Patch,microblaze]: Instruction prefetch optimization for microblaze.

2015-12-03 Thread Michael Eager

On 12/01/2015 12:49 AM, Ajit Kumar Agarwal wrote:

The changes are made in this patch for the instruction prefetch optimizations 
for Microblaze.

Reg tested for Microblaze target.

The changes are made for instruction prefetch optimizations for Microblaze. The 
"wic" microblaze instruction is the
instruction prefetch instruction. The instruction prefetch optimization is done 
to generate the iprefetch instruction
at the call site fall through path. This optimization is enabled with  
microblaze target flag mxl-prefetch. The purpose
of adding the flags is that selection of "wic" instruction should be enabled in 
the reconfigurable design and the
selection is not enabled by default.

ChangeLog:
2015-12-01  Ajit Agarwal  

* config/microblaze/microblaze.c
(get_branch_target): New.
(insert_wic_for_ilb_runout): New.
(insert_wic): New.
(microblaze_machine_dependent_reorg): New.
(TARGET_MACHINE_DEPENDENT_REORG): Define macro.
* config/microblaze/microblaze.md
(UNSPEC_IPREFETCH): Define.
(iprefetch): New pattern
* config/microblaze/microblaze.opt
(mxl-prefetch): New flag.

Signed-off-by:Ajit Agarwal ajit...@xilinx.com


Thanks & Regards
Ajit



+  rtx_insn *insn, *before_4 = 0, *before_16 = 0;
+  int addr = 0, length, first_addr = -1;
+  int wic_addr0 = 128 * 4, wic_addr1 = 128 * 4;

Especially when there are initializers, I prefer to see each variable declared
on a separate line.  If the meaning of a variable is not clear (and most of
these are not), include a comment before the declaration.

+if (first_addr == -1)
+  first_addr = INSN_ADDRESSES (INSN_UID (insn));

Can be moved to initialize first_addr.

+addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;

Is "addr" and address or offset?  If the latter, use a more descriptive name.

+if (before_4 == 0 && addr + length >= 4 * 4)
+  before_4 = insn;
...

Please add comments to describe what you are doing here.  What are before_4
and before_16?  What are all these conditions testing?

+  loop_optimizer_finalize();

Space before parens.

--
Michael Eagerea...@eagercon.com
1960 Park Blvd., Palo Alto, CA 94306  650-325-8077


Re: [PATCH] RFC: Use Levenshtein spelling suggestions in Fortran FE

2015-12-03 Thread Mikael Morin

Le 03/12/2015 10:29, Janne Blomqvist a écrit :

On Tue, Dec 1, 2015 at 7:51 PM, Bernhard Reutner-Fischer
 wrote:

As said, we could as well use a list of candidates with NULL as record marker.
Implementation cosmetics. Steve seems to not be thrilled by the
overall idea in the first place, so unless there is clear support by
somebody else i won't pursue this any further, it's not that i'm bored
or ran out of stuff i should do.. ;)


FWIW, I think the idea of this patch is quite nice, and I'd like to
see it in the compiler.


I like this feature as well.


I'm personally Ok with "C++-isms", but nowadays my contributions are
so minor that my opinion shouldn't carry that much weight on this
matter.


Same here.
David Malcolm suggested to move the candidate selection code to the 
common middle-end infrastructure, which would move half of the so-called 
"bloat" there.  Steve, would that work for you?


It seems to me that the remaining C++-isms are rather acceptable.
I do agree that the vec implementation details seem overly complex for 
something whose job is just the memory management of a growing (or 
shrinking) vector.  However, the API is consistent and self-explanatory, 
and the usage of it that is made here (just a few "safe_push") is not 
more complex than what would be done with a C-only API.


Mikael


Re: Documentation tweaks for internal-fn-related optabs

2015-12-03 Thread Bernd Schmidt

On 12/03/2015 02:06 PM, Richard Sandiford wrote:

As Bernd requested, this patch adds "This pattern cannot FAIL" to the
documentation of optabs that came to be mapped to interal functions.
For consistency I did the same for optabs that were already being
used for internal functions.

Many of the optabs weren't documented in the first place, so I added
entries for the missing ones.  Also, there were some inaccuracies in
the documentation of the rounding optabs.  The bitcount optabs said
that operand 0 has mode @var{m} and that operand 1 is under target
control, whereas it should be the other way around.


That actually goes beyond what I imagined. I was looking at the top part 
of md.texi (line 87), where there is a brief discussion of what is 
allowed to FAIL and what isn't. Also, there is "@item FAIL":


  "Failure is currently supported only for binary (addition,
   multiplication, shifting, etc.) and bit-field (@code{extv},
   @code{extzv}, and @code{insv}) operations."

That's pretty outdated. I think unary operations are probably missing by 
accident, and from what my grep showed there are also conditional moves, 
atomic operations, certain vec_ patterns that can all fail. As a minimum 
this paragraph should also mention internal functions.


Thank you for this patch, it is OK, but we probably ought to tweak at 
least the @item FAIL sections as well.



Bernd


Re: [PATCH][RTL-ifcvt] PR rtl-optimization/68624: Clean up logic that checks for clobbering conflicts across basic blocks

2015-12-03 Thread Bernd Schmidt

On 12/03/2015 10:33 AM, Kyrill Tkachov wrote:

 PR rtl-optimization/68624
 * ifcvt.c (noce_try_cmove_arith): Check clobbers of temp regs in both
 blocks if they exist and simplify the logic choosing the order to emit
 them in.

2015-12-03  Kyrylo Tkachov  

 PR rtl-optimization/68624
 * gcc.c-torture/execute/pr68624.c: New test.


I think this is good. OK.


Bernd


Re: [PATCH] Empty redirect_edge_var_map after each pass and function

2015-12-03 Thread Richard Biener
On Thu, 3 Dec 2015, Richard Biener wrote:

> On Thu, 3 Dec 2015, Alan Lawrence wrote:
> 
> > On 02/12/15 14:13, Jeff Law wrote:
> > > On 12/02/2015 01:33 AM, Richard Biener wrote:
> > > > > Right.  So the question I have is how/why did DOM leave anything in 
> > > > > the
> > > > > map.
> > > > > And if DOM is fixed to not leave stuff lying around, can we then 
> > > > > assert
> > > > > that
> > > > > nothing is ever left in those maps between passes?  There's certainly 
> > > > > no
> > > > > good
> > > > > reason I'm aware of why DOM would leave things in this state.
> > > > 
> > > > It happens not only with DOM but with all passes doing edge redirection.
> > > > This is because the map is populated by GIMPLE cfg hooks just in case
> > > > it might be used.  But there is no such thing as a "start CFG manip"
> > > > and "end CFG manip" to cleanup such dead state.
> > > Sigh.
> > > 
> > > > 
> > > > IMHO the redirect-edge-var-map stuff is just the very most possible
> > > > unclean implementation possible. :(  (see how remove_edge "clears"
> > > > stale info from the map to avoid even more "interesting" stale
> > > > data)
> > > > 
> > > > Ideally we could assert the map is empty whenever we leave a pass,
> > > > but as said it triggers all over the place.  Even cfg-cleanup causes
> > > > such stale data.
> > > > 
> > > > I agree that the patch is only a half-way "solution", but a full
> > > > solution would require sth more explicit, like we do with
> > > > initialize_original_copy_tables/free_original_copy_tables.  Thus
> > > > require passes to explicitely request the edge data to be preserved
> > > > with a initialize_edge_var_map/free_edge_var_map call pair.
> > > > 
> > > > Not appropriate at this stage IMHO (well, unless it turns out to be
> > > > a very localized patch).
> > > So maybe as a follow-up to aid folks in the future, how about a debugging
> > > verify_whatever function that we can call manually if debugging a problem 
> > > in
> > > this space.  With a comment indicating why we can't call it 
> > > unconditionally
> > > (yet).
> > > 
> > > 
> > > jeff
> > 
> > I did a (fwiw disable bootstrap) build with the map-emptying code in 
> > passes.c
> > (not functions.c), printing out passes after which the map was non-empty
> > (before emptying it, to make sure passes weren't just carrying through stale
> > data from earlier). My (non-exhaustive!) list of passes after which the
> > edge_var_redirect_map can be non-empty stands at...
> > 
> > aprefetch ccp cddce ch ch_vect copyprop crited crited cselim cunroll 
> > cunrolli
> > dce dom ehcleanup einline esra fab fnsplit forwprop fre graphite ifcvt
> > isolate-paths ldist lim local-pure-const mergephi oaccdevlow ompexpssa
> > optimized parloops pcom phicprop phiopt phiprop pre profile profile_estimate
> > sccp sink slsr split-paths sra switchconv tailc tailr tracer unswitch
> > veclower2 vect vrm vrp whole-program
> 
> Yeah, exactly my findings...  note that most of the above are likely
> due to cfgcleanup even though it already does sth like
> 
>   e = redirect_edge_and_branch (e, dest);
>   redirect_edge_var_map_clear (e);
> 
> so eventually placing a redirect_edge_var_map_empty () at the end
> of the cleanup_tree_cfg function should prune down the above list
> considerably (well, then assert the map is empty on entry to that
> function of course)

Maybe

Index: gcc/tree-cfgcleanup.c
===
--- gcc/tree-cfgcleanup.c   (revision 231221)
+++ gcc/tree-cfgcleanup.c   (working copy)
@@ -456,6 +456,7 @@ remove_forwarder_block (basic_block bb)
}
   else
s = redirect_edge_and_branch (e, dest);
+  redirect_edge_var_map_clear (s);
 
   if (s == e)
{

also helps...

Richard.

> 
> > FWIW, the route by which dom added the edge to the redirect map was:
> > #0  redirect_edge_var_map_add (e=e@entry=0x7fb7a5f508, result=0x7fb725a000,
> > def=0x7fb78eaea0, locus=2147483884) at ../../gcc/gcc/tree-ssa.c:54
> > #1  0x00cccf58 in ssa_redirect_edge (e=e@entry=0x7fb7a5f508,
> > dest=dest@entry=0x7fb79cc680) at ../../gcc/gcc/tree-ssa.c:158
> > #2  0x00b00738 in gimple_redirect_edge_and_branch (e=0x7fb7a5f508,
> > dest=0x7fb79cc680) at ../../gcc/gcc/tree-cfg.c:5662
> > #3  0x006ec678 in redirect_edge_and_branch (e=e@entry=0x7fb7a5f508,
> > dest=) at ../../gcc/gcc/cfghooks.c:356
> > #4  0x00cb4530 in ssa_fix_duplicate_block_edges (rd=0x1a29f10,
> > local_info=local_info@entry=0x7fed40)
> > at ../../gcc/gcc/tree-ssa-threadupdate.c:1184
> > #5  0x00cb5520 in ssa_fixup_template_block (slot=,
> > local_info=0x7fed40) at ../../gcc/gcc/tree-ssa-threadupdate.c:1369
> > #6  traverse_noresize (
> > argument=0x7fed40, this=0x1a21a00) at ../../gcc/gcc/hash-table.h:911
> > #7  traverse (
> > argument=0x7fed40, this=0x1a21a00) at ../../gcc/gcc/hash-table.h:933
> > 

Documentation tweaks for internal-fn-related optabs

2015-12-03 Thread Richard Sandiford
As Bernd requested, this patch adds "This pattern cannot FAIL" to the
documentation of optabs that came to be mapped to interal functions.
For consistency I did the same for optabs that were already being
used for internal functions.

Many of the optabs weren't documented in the first place, so I added
entries for the missing ones.  Also, there were some inaccuracies in
the documentation of the rounding optabs.  The bitcount optabs said
that operand 0 has mode @var{m} and that operand 1 is under target
control, whereas it should be the other way around.

Tested on x86_64-linux-gnu.  OK to install?

Thanks,
Richard


gcc/
* doc/md.texi (vec_load_lanes@var{m}@var{n}): Document that
the pattern cannot FAIL.
(vec_store_lanes@var{m}@var{n}): Likewise.
(maskload@var{m}@var{n}): Likewise.
(maskstore@var{m}@var{n}): Likewise.  Fix a cut-&-paste error
in the name of the pattern.
(rsqrt@var{m}2): Document that mode m must be a scalar or vector
floating-point mode and that all operands have that mode.
(fmin@var{m}3, fmax@var{m}3): Likewise.  Document that the
pattern cannot FAIL.
(sqrt@var{m}2): Document that mode m must be a scalar or vector
floating-point mode, that all operands have that mode, and that
the patterns cannot FAIL.  Remove previous documentation referring
to @code{double} and @code{float}.
(fmod@var{m}3, remainder@var{m}3, cos@var{m}2, sin@var{m}2)
(sincos@var{m}3, log@var{m}2, pow@var{m}3, atan2@var{m}3)
(copysign@var{m}3): Likewise.
(exp@var{m}2): Likewise.  Explicitly state the base.
(floor@var{m}2): As for sqrt@var{m}2, but also specify the operands.
(btrunc@var{m}2, rint@var{m}2): Likewise.
(round@var{m}2): Likewise.  Fix incorrect description of rounding
effect.
(ceil@var{m}2): As for round@var{m}2.
(nearbyint@var{m}2): As for floor@var{m}2, but also mention that
the instruction must not raise an inexact condition.
(scalb@var{m}3): Document previously-undocumented pattern
(ldexp@var{m}3, tan@var{m}2, asin@var{m}2, acos@var{m}2)
(atan@var{m}2, expm1@var{m}2, exp10@var{m}2, exp2@var{m}2)
(log1p@var{m}2, log10@var{m}2, log2@var{m}2, logb@var{m}2)
(significand@var{m}2): Likewise.
(ffs@var{m}2): Fix the description of the modes, so that operand 1 has
mode m and operand 0 is defined more freely.  Document that @var{m}
can be a scalar or vector integer mode and that the pattern is not
allowed to FAIL.
(clz@var{m}2, ctz@var{m}2, popcount@var{m}2, parity@var{m}2): Likewise.
(clrsb@var{m}2): Likewise, except that the description of the
mode was missing in this case.

diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index dcb3ee0..4848e64 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -4753,6 +4753,8 @@ is true.  GCC assumes that, if a target supports this 
kind of
 instruction for some mode @var{n}, it also supports unaligned
 loads for vectors of mode @var{n}.
 
+This pattern is not allowed to @code{FAIL}.
+
 @cindex @code{vec_store_lanes@var{m}@var{n}} instruction pattern
 @item @samp{vec_store_lanes@var{m}@var{n}}
 Equivalent to @samp{vec_load_lanes@var{m}@var{n}}, with the memory
@@ -4768,6 +4770,8 @@ for (j = 0; j < GET_MODE_NUNITS (@var{n}); j++)
 
 for a memory operand 0 and register operand 1.
 
+This pattern is not allowed to @code{FAIL}.
+
 @cindex @code{vec_set@var{m}} instruction pattern
 @item @samp{vec_set@var{m}}
 Set given field in the vector value.  Operand 0 is the vector to modify,
@@ -4822,12 +4826,16 @@ Perform a masked load of vector from memory operand 1 
of mode @var{m}
 into register operand 0.  Mask is provided in register operand 2 of
 mode @var{n}.
 
+This pattern is not allowed to @code{FAIL}.
+
 @cindex @code{maskstore@var{m}@var{n}} instruction pattern
-@item @samp{maskload@var{m}@var{n}}
+@item @samp{maskstore@var{m}@var{n}}
 Perform a masked store of vector from register operand 1 of mode @var{m}
 into memory operand 0.  Mask is provided in register operand 2 of
 mode @var{n}.
 
+This pattern is not allowed to @code{FAIL}.
+
 @cindex @code{vec_perm@var{m}} instruction pattern
 @item @samp{vec_perm@var{m}}
 Output a (variable) vector permutation.  Operand 0 is the destination
@@ -4993,6 +5001,9 @@ IEEE-conformant minimum and maximum operations.  If one 
operand is a quiet
 signalling @code{NaN} (-fsignaling-nans) an invalid floating point exception is
 raised and a quiet @code{NaN} is returned.
 
+All operands have mode @var{m}, which is a scalar or vector
+floating-point mode.  These patterns are not allowed to @code{FAIL}.
+
 @cindex @code{reduc_smin_@var{m}} instruction pattern
 @cindex @code{reduc_smax_@var{m}} instruction pattern
 @item @samp{reduc_smin_@var{m}}, @samp{reduc_smax_@var{m}}
@@ -5324,16 +5335,17 @@ Store the absolute value of operand 1 into operand 0.
 
 @cind

Re: [PATCH] Empty redirect_edge_var_map after each pass and function

2015-12-03 Thread Richard Biener
On Thu, 3 Dec 2015, Alan Lawrence wrote:

> On 02/12/15 14:13, Jeff Law wrote:
> > On 12/02/2015 01:33 AM, Richard Biener wrote:
> > > > Right.  So the question I have is how/why did DOM leave anything in the
> > > > map.
> > > > And if DOM is fixed to not leave stuff lying around, can we then assert
> > > > that
> > > > nothing is ever left in those maps between passes?  There's certainly no
> > > > good
> > > > reason I'm aware of why DOM would leave things in this state.
> > > 
> > > It happens not only with DOM but with all passes doing edge redirection.
> > > This is because the map is populated by GIMPLE cfg hooks just in case
> > > it might be used.  But there is no such thing as a "start CFG manip"
> > > and "end CFG manip" to cleanup such dead state.
> > Sigh.
> > 
> > > 
> > > IMHO the redirect-edge-var-map stuff is just the very most possible
> > > unclean implementation possible. :(  (see how remove_edge "clears"
> > > stale info from the map to avoid even more "interesting" stale
> > > data)
> > > 
> > > Ideally we could assert the map is empty whenever we leave a pass,
> > > but as said it triggers all over the place.  Even cfg-cleanup causes
> > > such stale data.
> > > 
> > > I agree that the patch is only a half-way "solution", but a full
> > > solution would require sth more explicit, like we do with
> > > initialize_original_copy_tables/free_original_copy_tables.  Thus
> > > require passes to explicitely request the edge data to be preserved
> > > with a initialize_edge_var_map/free_edge_var_map call pair.
> > > 
> > > Not appropriate at this stage IMHO (well, unless it turns out to be
> > > a very localized patch).
> > So maybe as a follow-up to aid folks in the future, how about a debugging
> > verify_whatever function that we can call manually if debugging a problem in
> > this space.  With a comment indicating why we can't call it unconditionally
> > (yet).
> > 
> > 
> > jeff
> 
> I did a (fwiw disable bootstrap) build with the map-emptying code in passes.c
> (not functions.c), printing out passes after which the map was non-empty
> (before emptying it, to make sure passes weren't just carrying through stale
> data from earlier). My (non-exhaustive!) list of passes after which the
> edge_var_redirect_map can be non-empty stands at...
> 
> aprefetch ccp cddce ch ch_vect copyprop crited crited cselim cunroll cunrolli
> dce dom ehcleanup einline esra fab fnsplit forwprop fre graphite ifcvt
> isolate-paths ldist lim local-pure-const mergephi oaccdevlow ompexpssa
> optimized parloops pcom phicprop phiopt phiprop pre profile profile_estimate
> sccp sink slsr split-paths sra switchconv tailc tailr tracer unswitch
> veclower2 vect vrm vrp whole-program

Yeah, exactly my findings...  note that most of the above are likely
due to cfgcleanup even though it already does sth like

  e = redirect_edge_and_branch (e, dest);
  redirect_edge_var_map_clear (e);

so eventually placing a redirect_edge_var_map_empty () at the end
of the cleanup_tree_cfg function should prune down the above list
considerably (well, then assert the map is empty on entry to that
function of course)

> FWIW, the route by which dom added the edge to the redirect map was:
> #0  redirect_edge_var_map_add (e=e@entry=0x7fb7a5f508, result=0x7fb725a000,
> def=0x7fb78eaea0, locus=2147483884) at ../../gcc/gcc/tree-ssa.c:54
> #1  0x00cccf58 in ssa_redirect_edge (e=e@entry=0x7fb7a5f508,
> dest=dest@entry=0x7fb79cc680) at ../../gcc/gcc/tree-ssa.c:158
> #2  0x00b00738 in gimple_redirect_edge_and_branch (e=0x7fb7a5f508,
> dest=0x7fb79cc680) at ../../gcc/gcc/tree-cfg.c:5662
> #3  0x006ec678 in redirect_edge_and_branch (e=e@entry=0x7fb7a5f508,
> dest=) at ../../gcc/gcc/cfghooks.c:356
> #4  0x00cb4530 in ssa_fix_duplicate_block_edges (rd=0x1a29f10,
> local_info=local_info@entry=0x7fed40)
> at ../../gcc/gcc/tree-ssa-threadupdate.c:1184
> #5  0x00cb5520 in ssa_fixup_template_block (slot=,
> local_info=0x7fed40) at ../../gcc/gcc/tree-ssa-threadupdate.c:1369
> #6  traverse_noresize (
> argument=0x7fed40, this=0x1a21a00) at ../../gcc/gcc/hash-table.h:911
> #7  traverse (
> argument=0x7fed40, this=0x1a21a00) at ../../gcc/gcc/hash-table.h:933
> #8  thread_block_1 (bb=bb@entry=0x7fb7485bc8,
> noloop_only=noloop_only@entry=true, joiners=joiners@entry=true)
> at ../../gcc/gcc/tree-ssa-threadupdate.c:1592
> #9  0x00cb5a40 in thread_block (bb=0x7fb7485bc8,
> noloop_only=noloop_only@entry=true)
> at ../../gcc/gcc/tree-ssa-threadupdate.c:1629
> ---Type  to continue, or q  to quit---
> #10 0x00cb6bf8 in thread_through_all_blocks (
> may_peel_loop_headers=true) at ../../gcc/gcc/tree-ssa-threadupdate.c:2736
> #11 0x00becf6c in (anonymous namespace)::pass_dominator::execute (
> this=, fun=0x7fb77d1b28)
> at ../../gcc/gcc/tree-ssa-dom.c:622
> #12 0x009feef4 in execute_one_pass (pass=pass

Re: [PATCH] Empty redirect_edge_var_map after each pass and function

2015-12-03 Thread Alan Lawrence

On 02/12/15 14:13, Jeff Law wrote:

On 12/02/2015 01:33 AM, Richard Biener wrote:

Right.  So the question I have is how/why did DOM leave anything in the map.
And if DOM is fixed to not leave stuff lying around, can we then assert that
nothing is ever left in those maps between passes?  There's certainly no good
reason I'm aware of why DOM would leave things in this state.


It happens not only with DOM but with all passes doing edge redirection.
This is because the map is populated by GIMPLE cfg hooks just in case
it might be used.  But there is no such thing as a "start CFG manip"
and "end CFG manip" to cleanup such dead state.

Sigh.



IMHO the redirect-edge-var-map stuff is just the very most possible
unclean implementation possible. :(  (see how remove_edge "clears"
stale info from the map to avoid even more "interesting" stale
data)

Ideally we could assert the map is empty whenever we leave a pass,
but as said it triggers all over the place.  Even cfg-cleanup causes
such stale data.

I agree that the patch is only a half-way "solution", but a full
solution would require sth more explicit, like we do with
initialize_original_copy_tables/free_original_copy_tables.  Thus
require passes to explicitely request the edge data to be preserved
with a initialize_edge_var_map/free_edge_var_map call pair.

Not appropriate at this stage IMHO (well, unless it turns out to be
a very localized patch).

So maybe as a follow-up to aid folks in the future, how about a debugging
verify_whatever function that we can call manually if debugging a problem in
this space.  With a comment indicating why we can't call it unconditionally 
(yet).


jeff


I did a (fwiw disable bootstrap) build with the map-emptying code in passes.c 
(not functions.c), printing out passes after which the map was non-empty (before 
emptying it, to make sure passes weren't just carrying through stale data from 
earlier). My (non-exhaustive!) list of passes after which the 
edge_var_redirect_map can be non-empty stands at...


aprefetch ccp cddce ch ch_vect copyprop crited crited cselim cunroll cunrolli 
dce dom ehcleanup einline esra fab fnsplit forwprop fre graphite ifcvt 
isolate-paths ldist lim local-pure-const mergephi oaccdevlow ompexpssa optimized 
parloops pcom phicprop phiopt phiprop pre profile profile_estimate sccp sink 
slsr split-paths sra switchconv tailc tailr tracer unswitch veclower2 vect vrm 
vrp whole-program


FWIW, the route by which dom added the edge to the redirect map was:
#0  redirect_edge_var_map_add (e=e@entry=0x7fb7a5f508, result=0x7fb725a000,
def=0x7fb78eaea0, locus=2147483884) at ../../gcc/gcc/tree-ssa.c:54
#1  0x00cccf58 in ssa_redirect_edge (e=e@entry=0x7fb7a5f508,
dest=dest@entry=0x7fb79cc680) at ../../gcc/gcc/tree-ssa.c:158
#2  0x00b00738 in gimple_redirect_edge_and_branch (e=0x7fb7a5f508,
dest=0x7fb79cc680) at ../../gcc/gcc/tree-cfg.c:5662
#3  0x006ec678 in redirect_edge_and_branch (e=e@entry=0x7fb7a5f508,
dest=) at ../../gcc/gcc/cfghooks.c:356
#4  0x00cb4530 in ssa_fix_duplicate_block_edges (rd=0x1a29f10,
local_info=local_info@entry=0x7fed40)
at ../../gcc/gcc/tree-ssa-threadupdate.c:1184
#5  0x00cb5520 in ssa_fixup_template_block (slot=,
local_info=0x7fed40) at ../../gcc/gcc/tree-ssa-threadupdate.c:1369
#6  traverse_noresize (
argument=0x7fed40, this=0x1a21a00) at ../../gcc/gcc/hash-table.h:911
#7  traverse (
argument=0x7fed40, this=0x1a21a00) at ../../gcc/gcc/hash-table.h:933
#8  thread_block_1 (bb=bb@entry=0x7fb7485bc8,
noloop_only=noloop_only@entry=true, joiners=joiners@entry=true)
at ../../gcc/gcc/tree-ssa-threadupdate.c:1592
#9  0x00cb5a40 in thread_block (bb=0x7fb7485bc8,
noloop_only=noloop_only@entry=true)
at ../../gcc/gcc/tree-ssa-threadupdate.c:1629
---Type  to continue, or q  to quit---
#10 0x00cb6bf8 in thread_through_all_blocks (
may_peel_loop_headers=true) at ../../gcc/gcc/tree-ssa-threadupdate.c:2736
#11 0x00becf6c in (anonymous namespace)::pass_dominator::execute (
this=, fun=0x7fb77d1b28)
at ../../gcc/gcc/tree-ssa-dom.c:622
#12 0x009feef4 in execute_one_pass (pass=pass@entry=0x16d1a80)
at ../../gcc/gcc/passes.c:2311

The edge is then deleted much later:
#3  0x00f858e4 in free_edge (fn=, e=)
at ../../gcc/gcc/cfg.c:91
#4  remove_edge_raw (e=) at ../../gcc/gcc/cfg.c:350
#5  0x006ec814 in remove_edge (e=)
at ../../gcc/gcc/cfghooks.c:418
#6  0x006ecaec in delete_basic_block (bb=bb@entry=0x7fb74b3618)
at ../../gcc/gcc/cfghooks.c:597
#7  0x00f8d1d4 in try_optimize_cfg (mode=32)
at ../../gcc/gcc/cfgcleanup.c:2701
#8  cleanup_cfg (mode=mode@entry=32) at ../../gcc/gcc/cfgcleanup.c:3028
#9  0x0070180c in cfg_layout_initialize (flags=flags@entry=0)
at ../../gcc/gcc/cfgrtl.c:4264
#10 0x00f7cdc8 in (anonymous 
namespace)::pass_duplicate_computed_gotos::execute (this=, 
fun=0x7fb77d1b28)


Re: [ARM] Fix PR middle-end/65958

2015-12-03 Thread Eric Botcazou
> I can understand this restriction, but...
> 
> > +  /* See the same assertion on PROBE_INTERVAL above.  */
> > +  gcc_assert ((first % 4096) == 0);
> 
> ... why isn't this a test that FIRST is aligned to PROBE_INTERVAL?

Because that isn't guaranteed, FIRST is related to the size of the protection 
area while PROBE_INTERVAL is related to the page size.

> blank line between declarations and code. Also, can we come up with a
> suitable define for 4096 here that expresses the context and then use
> that consistently through the remainder of this function?

OK, let's use ARITH_BASE.

> > +(define_insn "probe_stack_range"
> > +  [(set (match_operand:DI 0 "register_operand" "=r")
> > +   (unspec_volatile:DI [(match_operand:DI 1 "register_operand" "0")
> > +(match_operand:DI 2 "register_operand" "r")]
> > +UNSPEC_PROBE_STACK_RANGE))]
> 
> I think this should really use PTRmode, so that it's ILP32 ready (I'm
> not going to ask you to make sure that works though, since I suspect
> there are still other issues to resolve with ILP32 at this time).

Done.  Manually tested for now, I'll fully test it if approved.


PR middle-end/65958
* config/aarch64/aarch64-protos.h (aarch64_output_probe_stack-range):
Declare.
* config/aarch64/aarch64.md: Declare UNSPECV_BLOCKAGE and
UNSPEC_PROBE_STACK_RANGE.
(blockage): New instruction.
(probe_stack_range_): Likewise.
* config/aarch64/aarch64.c (aarch64_emit_probe_stack_range): New
function.
(aarch64_output_probe_stack_range): Likewise.
(aarch64_expand_prologue): Invoke aarch64_emit_probe_stack_range if
static builtin stack checking is enabled.
* config/aarch64/aarch64-linux.h (STACK_CHECK_STATIC_BUILTIN):
Define.

-- 
Eric BotcazouIndex: config/aarch64/aarch64-linux.h
===
--- config/aarch64/aarch64-linux.h	(revision 231206)
+++ config/aarch64/aarch64-linux.h	(working copy)
@@ -88,4 +88,7 @@
 #undef TARGET_BINDS_LOCAL_P
 #define TARGET_BINDS_LOCAL_P default_binds_local_p_2
 
+/* Define this to be nonzero if static stack checking is supported.  */
+#define STACK_CHECK_STATIC_BUILTIN 1
+
 #endif  /* GCC_AARCH64_LINUX_H */
Index: config/aarch64/aarch64-protos.h
===
--- config/aarch64/aarch64-protos.h	(revision 231206)
+++ config/aarch64/aarch64-protos.h	(working copy)
@@ -340,6 +340,7 @@ void aarch64_asm_output_labelref (FILE *
 void aarch64_cpu_cpp_builtins (cpp_reader *);
 void aarch64_elf_asm_named_section (const char *, unsigned, tree);
 const char * aarch64_gen_far_branch (rtx *, int, const char *, const char *);
+const char * aarch64_output_probe_stack_range (rtx, rtx);
 void aarch64_err_no_fpadvsimd (machine_mode, const char *);
 void aarch64_expand_epilogue (bool);
 void aarch64_expand_mov_immediate (rtx, rtx);
Index: config/aarch64/aarch64.c
===
--- config/aarch64/aarch64.c	(revision 231206)
+++ config/aarch64/aarch64.c	(working copy)
@@ -62,6 +62,7 @@
 #include "sched-int.h"
 #include "cortex-a57-fma-steering.h"
 #include "target-globals.h"
+#include "common/common-target.h"
 
 /* This file should be included last.  */
 #include "target-def.h"
@@ -2183,6 +2184,179 @@ aarch64_libgcc_cmp_return_mode (void)
   return SImode;
 }
 
+#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
+
+/* We use the 12-bit shifted immediate arithmetic instructions so values
+   must be multiple of (1 << 12), i.e. 4096.  */
+#define ARITH_BASE 4096
+
+#if (PROBE_INTERVAL % ARITH_BASE) != 0
+#error Cannot use simple address calculation for stack probing
+#endif
+
+/* The pair of scratch registers used for stack probing.  */
+#define PROBE_STACK_FIRST_REG  9
+#define PROBE_STACK_SECOND_REG 10
+
+/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
+   inclusive.  These are offsets from the current stack pointer.  */
+
+static void
+aarch64_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
+{
+  rtx reg1 = gen_rtx_REG (ptr_mode, PROBE_STACK_FIRST_REG);
+
+  /* See the same assertion on PROBE_INTERVAL above.  */
+  gcc_assert ((first % ARITH_BASE) == 0);
+
+  /* See if we have a constant small number of probes to generate.  If so,
+ that's the easy case.  */
+  if (size <= PROBE_INTERVAL)
+{
+  const HOST_WIDE_INT base = ROUND_UP (size, ARITH_BASE);
+
+  emit_set_insn (reg1,
+		 plus_constant (ptr_mode,
+stack_pointer_rtx, -(first + base)));
+  emit_stack_probe (plus_constant (ptr_mode, reg1, base - size));
+}
+
+  /* The run-time loop is made up of 8 insns in the generic case while the
+ compile-time loop is made up of 4+2*(n-2) insns for n # of intervals.  */
+  else if (size <= 4 * PROBE_INTERVAL)
+{
+  HOST_WIDE_INT i, rem;
+
+  emit_set_insn (reg1,
+		

Re: [PATCH, 4/16] Implement -foffload-alias

2015-12-03 Thread Tom de Vries

On 11/11/15 12:00, Jakub Jelinek wrote:

On Wed, Nov 11, 2015 at 11:51:02AM +0100, Richard Biener wrote:

The option -foffload-alias=pointer instructs the compiler to assume that
objects references in an offload region do not alias.

The option -foffload-alias=all instructs the compiler to make no
assumptions about aliasing in offload regions.

The default value is -foffload-alias=none.


I think global options for this is nonsense.  Please follow what
we do for #pragma GCC ivdep for example, thus allow the alias
behavior to be specified per "region" (whatever makes sense here
in the context of offloading).


Yeah, completely agreed.  I don't see why the offloaded region would be in
any way special, they are C/C++/Fortran code as any other.
What we can and should improve is teach IPA aliasing/points to analysis
about the way we lower the host vs. offloading region boundary, so that
if alias analysis on the caller of GOMP_target_ext/GOACC_parallel_keyed
determines something it can be used on the offloaded function side and vice
versa, but a switch like the above is just wrong.


Filed the GOMP_target_ext bit as PR 68675 - Handle GOMP_target_ext 
optimally in ipa-pta.


Thanks,
- Tom


Re: [PATCH, PR46032] Handle BUILT_IN_GOMP_PARALLEL in ipa-pta

2015-12-03 Thread Tom de Vries

On 30/11/15 14:32, Jakub Jelinek wrote:

On Mon, Nov 30, 2015 at 02:24:18PM +0100, Richard Biener wrote:

OK for stage3 trunk if bootstrap and reg-test succeeds?


-|| node->address_taken);
+|| (node->address_taken
+&& !node->parallelized_function));

please add a comment here on why this is safe.

Ok with this change.


BTW, __builting_GOMP_task supposedly can be treated similarly
if the third argument is NULL (if 3rd arg is non-NULL, then
the caller passes a different structure from what the callee receives,
but perhaps it could be emulated as pretending that cpyfn is called first
with address of a temporary var and the data argument and then fn
is called with the address of the temporary var).


Filed as PR68673 - Handle __builtin_GOMP_task optimally in ipa-pta.

Can you provide testcases for both (3rd arg NULL/non-NULL) cases? I'm 
not fluent in openmp.


Thanks,
- Tom


Re: Add fuzzing coverage support

2015-12-03 Thread Bernd Schmidt

On 12/02/2015 06:38 PM, Dmitry Vyukov wrote:

One thing to consider would
be whether you really need this split between O0/optimize versions, or
whether you can find a place in the queue where to insert it
unconditionally. Have you considered this at all or did you just follow
asan/tsan?


I inserted the pass just before asan/tsan because it looks like the
right place for it. If we do it after asan, it will insert coverage
for all asan-emited BBs which is highly undesirable. I also think it
is a good idea to run a bunch of optimizations before coverage pass to
not emit too many coverage callbacks (but I can't say that I am very
knowledgeable in this area). FWIW clang does the same: coverage passes
run just before asan/tsan.


There's one other thing I want to put out there. Is this kind of thing 
maybe what plugins were invented for? I don't really like the concept of 
plugins, but it seems to me that this sort of thing might be an 
application for them.



+public:
+  static pass_data pd ()
+  {
+static const pass_data data =



I think a static data member would be better than the unnecessary pd ()
function. This is also unlike existing practice, and I wonder how others
think about it. IMO a fairly strong case could be made that if we're using
C++, then this sort of thing ought to be part of the class definition.


I vary name of the pass depending on the O0 template argument (again
following asan):

 O0 ? "sancov_O0" : "sancov", /* name */

If we call it "sancov" always, then I can make it just a global var
(as all other passes in gcc).
Or I can make it a static variable of the template class and move
definition of the class (as you proposed).
What would you prefer?


I think I prefer the static var of the template class. I just wonder why 
we don't have the pass_data for all the existing passes as static data 
members? I'm sure there's some reason.


asan also distinguishes the name between asan/asan0. I'd either follow 
that naming convention, or remove the _O0 variant for all three of them. 
I lean towards the latter.



Bernd


Re: Add an rsqrt_optab and IFN_RSQRT internal function

2015-12-03 Thread Richard Biener
On Thu, Dec 3, 2015 at 10:39 AM, Jakub Jelinek  wrote:
> On Thu, Dec 03, 2015 at 09:21:03AM +, Richard Sandiford wrote:
>>   * internal-fn.def (RSQRT): New function.
>>   * optabs.def (rsqrt_optab): New optab.
>>   * doc/tm.texi (rsqrtM2): Document
>
> Missing full stop.
>
> Otherwise looks to me like a nice cleanup and hopefully fixes the aarch64
> regression.

Looks good to me as well.

Richard.

> Jakub


Re: [PATCH] Fix shrink-wrap bug with anticipating into loops (PR67778, PR68634)

2015-12-03 Thread Bernd Schmidt

On 12/02/2015 07:21 PM, Segher Boessenkool wrote:

After shrink-wrapping has found the "tightest fit" for where to place
the prologue, it tries move it earlier (so that frame saves are run
earlier) -- but without copying any more basic blocks.


Another question would be - is there really a good reason to do this at all?


Bernd


Re: [PATCH] Fix shrink-wrap bug with anticipating into loops (PR67778, PR68634)

2015-12-03 Thread Bernd Schmidt

On 12/02/2015 07:21 PM, Segher Boessenkool wrote:

After shrink-wrapping has found the "tightest fit" for where to place
the prologue, it tries move it earlier (so that frame saves are run
earlier) -- but without copying any more basic blocks.

Unfortunately a candidate block we select can be inside a loop, and we
will still allow it (because the loop always exits via our previously
chosen block).



So we need to detect this situation.  We can place the prologue at a
previous block PRE only if PRE dominates every block reachable from
it.  This is a bit hard / expensive to compute, so instead this patch
allows a block PRE only if PRE does not post-dominate any of its
successors (other than itself).


Are the two conditions equivalent though? I'm not fully convinced. Let's 
say the loop has multiple exits, then none of these exit blocks 
postdominate the loop entry block, right?


I think I agree with Jakub that we don't want to do unnecessary work in 
this piece of code.



/* If we can move PRO back without having to duplicate more blocks, do so.
   We can move back to a block PRE if every path from PRE will eventually
- need a prologue, that is, PRO is a post-dominator of PRE.  */
+ need a prologue, that is, PRO is a post-dominator of PRE.  We might
+ need to duplicate PRE if there is any path from a successor of PRE back
+ to PRE, so don't allow that either (but self-loops are fine, as are any
+ other loops entirely dominated by PRE; this in general seems too
+ expensive to check for, for such an uncommon case).  */


The last comment is unclear and I don't know what it wants to tell me.


Bernd


Re: [Patch, fortran] PR68534 - No error on mismatch in number of arguments between submodule and module interface

2015-12-03 Thread Paul Richard Thomas
Dear Steve,

I'll take a look at this this afternoon. Thanks for bringing it to my attention.

Cheers

Paul

On 3 December 2015 at 07:43, Steve Kargl
 wrote:
> On Wed, Dec 02, 2015 at 10:26:30PM -0800, Steve Kargl wrote:
>> On Wed, Dec 02, 2015 at 10:02:33PM -0800, Steve Kargl wrote:
>> > Paul,
>> >
>> > I'm stumped.  Something is broken on i386-*-freebsd. :-(
>> >
>> > Running /mnt/kargl/gcc/gcc/testsuite/gfortran.dg/dg.exp ...
>> > FAIL: gfortran.dg/submodule_10.f08   -O  (internal compiler error)
>> > FAIL: gfortran.dg/submodule_10.f08   -O  (test for excess errors)
>> > FAIL: gfortran.dg/submodule_11.f08   -O0  (internal compiler error)
>> > FAIL: gfortran.dg/submodule_11.f08   -O0  (test for excess errors)
>> > FAIL: gfortran.dg/submodule_11.f08   -O1  (internal compiler error)
>> > FAIL: gfortran.dg/submodule_11.f08   -O1  (test for excess errors)
>> > FAIL: gfortran.dg/submodule_11.f08   -O2  (internal compiler error)
>> > FAIL: gfortran.dg/submodule_11.f08   -O2  (test for excess errors)
>> > FAIL: gfortran.dg/submodule_11.f08   -O3 -fomit-frame-pointer 
>> > -funroll-loops -fpeel-loops -ftracer -finline-functions  (internal 
>> > compiler error)
>> > FAIL: gfortran.dg/submodule_11.f08   -O3 -fomit-frame-pointer 
>> > -funroll-loops -fpeel-loops -ftracer -finline-functions  (test for excess 
>> > errors)
>> > FAIL: gfortran.dg/submodule_11.f08   -O3 -g  (internal compiler error)
>> > FAIL: gfortran.dg/submodule_11.f08   -O3 -g  (test for excess errors)
>> > FAIL: gfortran.dg/submodule_11.f08   -Os  (internal compiler error)
>> > FAIL: gfortran.dg/submodule_11.f08   -Os  (test for excess errors)
>>
>> Well, if I change the order of the conditionals decl.c:4831, I
>> can get rid of the above FAILs.
>>
>> Index: decl.c
>> ===
>> --- decl.c  (revision 231219)
>> +++ decl.c  (working copy)
>> @@ -4826,7 +4826,7 @@ ok:
>>
>>/* Abbreviated module procedure declaration is not meant to have any
>>  formal arguments!  */
>> -  if (!sym->abr_modproc_decl && formal && !head)
>> +  if (formal && !head && sym && !sym->abr_modproc_decl)
>> arg_count_mismatch = true;
>>
>>for (p = formal, q = head; p && q; p = p->next, q = q->next)
>>
>> --
>> steve
>>
>> > FAIL: gfortran.dg/submodule_13.f08   -O  (internal compiler error)
>> > FAIL: gfortran.dg/submodule_13.f08   -O   (test for errors, line 29)
>> > FAIL: gfortran.dg/submodule_13.f08   -O  (test for excess errors)
>
> These ICEs persist at line 4831.  In looking at the code, I'm
> now somewhat unsure what it should be doing.  In particular,
> there are 2 gfc_error_now() calls in the below:
>
>
>   for (p = formal, q = head; p && q; p = p->next, q = q->next)
> {
>   if ((p->next != NULL && q->next == NULL)
>   || (p->next == NULL && q->next != NULL))
> arg_count_mismatch = true;
>   else if ((p->sym == NULL && q->sym == NULL)
> || strcmp (p->sym->name, q->sym->name) == 0)
> continue;
>   else
> gfc_error_now ("Mismatch in MODULE PROCEDURE formal "
>"argument names (%s/%s) at %C",
>p->sym->name, q->sym->name);
> }
>
>   if (arg_count_mismatch)
>   gfc_error_now ("Mismatch in number of MODULE PROCEDURE "
>  "formal arguments at %C");
> }
>
>   return MATCH_YES;
>
> cleanup:
>   gfc_free_formal_arglist (head);
>   return m;
>
> But, we return MATCH_YES?  I would expect setting m = MATCH_ERROR
> and jumping to cleanup.  That's ugly.
>
> --
> Steve



-- 
Outside of a dog, a book is a man's best friend. Inside of a dog it's
too dark to read.

Groucho Marx


Re: [PATCH] Handle BUILT_IN_GOACC_PARALLEL in ipa-pta

2015-12-03 Thread Jakub Jelinek
On Thu, Dec 03, 2015 at 12:09:04PM +0100, Tom de Vries wrote:
> The flag is set here in expand_omp_target:
> ...
> 12682 /* Prevent IPA from removing child_fn as unreachable,
>  since there are no
> 12683refs from the parent function to child_fn in offload
>  LTO mode.  */
> 12684 if (ENABLE_OFFLOADING)
> 12685   cgraph_node::get (child_fn)->mark_force_output ();
> ...
> 
> I guess setting forced_by_abi instead would also mean child_fn is not
> removed as unreachable, while still allowing optimizations:
> ...
>   /* Like FORCE_OUTPUT, but in the case it is ABI requiring the symbol
>  to be exported.  Unlike FORCE_OUTPUT this flag gets cleared to
>  symbols promoted to static and it does not inhibit
>  optimization.  */
>   unsigned forced_by_abi : 1;
> ...
> 
> But I suspect that other optimizations (than ipa-pta) might break things.
> 
> Essentially we have two situations:
> - in the host compiler, there is no need for the forced_output flag,
>   and it inhibits optimization
> - in the accelerator compiler, it (or some equivalent) is needed
> 
> I wonder if setting the force_output flag only when streaming the bytecode
> for offloading would work. That way, it wouldn't be set in the host
> compiler, while being set in the accelerator compiler.

I believe that the host and offload func (and var) tables need to be in
sync, so there needs to be something both in the host and accel compilers
that prevents the functions and variables that have their accel or host
counterpart in the tables from being optimized away, or say replaced by
a clone with different arguments etc.

Jakub


Re: [PATCH] Handle BUILT_IN_GOACC_PARALLEL in ipa-pta

2015-12-03 Thread Richard Biener
On Thu, 3 Dec 2015, Tom de Vries wrote:

> On 03/12/15 09:59, Richard Biener wrote:
> > On Thu, 3 Dec 2015, Tom de Vries wrote:
> > 
> > > On 03/12/15 01:10, Tom de Vries wrote:
> > > > 
> > > > I've managed to reproduce it. The difference between pass and fail is
> > > > whether the compiler is configured with or without accelerator.
> > > > 
> > > > I'll look into it.
> > > 
> > > In the configuration with accelerator, the flag node->force_output is on
> > > for
> > > foo._omp.fn.
> > > 
> > > This causes nonlocal_p to be true in ipa_pta_execute, which causes the
> > > optimization to fail.
> > > 
> > > The flag is decribed as:
> > > ...
> > >/* The symbol will be assumed to be used in an invisible way (like
> > >   by an toplevel asm statement).  */
> > >   ...
> > > 
> > > Looks like I have to ignore the force_output flag as well in
> > > ipa_pta_execute
> > > for this sort of node.
> > 
> > It rather looks like the flag shouldn't be set.  The fn after all has
> > its address taken!(?)
> > 
> 
> The flag is set here in expand_omp_target:
> ...
> 12682 /* Prevent IPA from removing child_fn as unreachable,
>  since there are no
> 12683refs from the parent function to child_fn in offload
>  LTO mode.  */
> 12684 if (ENABLE_OFFLOADING)
> 12685   cgraph_node::get (child_fn)->mark_force_output ();
> ...
> 

How are there no refs from the "parent"?  Are there not refs from
some kind of descriptor that maps fallback CPU and offloaded variants?

I think the above needs sorting out in somw way, making the refs
explicit rather than implicit via force_output.

> I guess setting forced_by_abi instead would also mean child_fn is not removed
> as unreachable, while still allowing optimizations:
> ...
>   /* Like FORCE_OUTPUT, but in the case it is ABI requiring the symbol
>  to be exported.  Unlike FORCE_OUTPUT this flag gets cleared to
>  symbols promoted to static and it does not inhibit
>  optimization.  */
>   unsigned forced_by_abi : 1;
> ...
> 
> But I suspect that other optimizations (than ipa-pta) might break things.

How so?

> Essentially we have two situations:
> - in the host compiler, there is no need for the forced_output flag,
>   and it inhibits optimization
> - in the accelerator compiler, it (or some equivalent) is needed
> 
> I wonder if setting the force_output flag only when streaming the bytecode for
> offloading would work. That way, it wouldn't be set in the host compiler,
> while being set in the accelerator compiler.

Yeah, that was my original thinking btw.

Richard.


Re: [PATCH] Handle BUILT_IN_GOACC_PARALLEL in ipa-pta

2015-12-03 Thread Tom de Vries

On 03/12/15 09:59, Richard Biener wrote:

On Thu, 3 Dec 2015, Tom de Vries wrote:


On 03/12/15 01:10, Tom de Vries wrote:


I've managed to reproduce it. The difference between pass and fail is
whether the compiler is configured with or without accelerator.

I'll look into it.


In the configuration with accelerator, the flag node->force_output is on for
foo._omp.fn.

This causes nonlocal_p to be true in ipa_pta_execute, which causes the
optimization to fail.

The flag is decribed as:
...
   /* The symbol will be assumed to be used in an invisible way (like
  by an toplevel asm statement).  */
  ...

Looks like I have to ignore the force_output flag as well in ipa_pta_execute
for this sort of node.


It rather looks like the flag shouldn't be set.  The fn after all has
its address taken!(?)



The flag is set here in expand_omp_target:
...
12682 /* Prevent IPA from removing child_fn as unreachable,
 since there are no
12683refs from the parent function to child_fn in offload
 LTO mode.  */
12684 if (ENABLE_OFFLOADING)
12685   cgraph_node::get (child_fn)->mark_force_output ();
...

I guess setting forced_by_abi instead would also mean child_fn is not 
removed as unreachable, while still allowing optimizations:

...
  /* Like FORCE_OUTPUT, but in the case it is ABI requiring the symbol
 to be exported.  Unlike FORCE_OUTPUT this flag gets cleared to
 symbols promoted to static and it does not inhibit
 optimization.  */
  unsigned forced_by_abi : 1;
...

But I suspect that other optimizations (than ipa-pta) might break things.

Essentially we have two situations:
- in the host compiler, there is no need for the forced_output flag,
  and it inhibits optimization
- in the accelerator compiler, it (or some equivalent) is needed

I wonder if setting the force_output flag only when streaming the 
bytecode for offloading would work. That way, it wouldn't be set in the 
host compiler, while being set in the accelerator compiler.


Thanks,
- Tom


Re: [ARM] Fix PR middle-end/65958

2015-12-03 Thread Richard Earnshaw
Sorry for the delay, very busy on other things these days...

On 16/11/15 20:00, Eric Botcazou wrote:
>> More comments inline.
>
> Revised version attached, which addresses all your comments and in
particular
> removes the
>
> +#if PROBE_INTERVAL > 4096
> +#error Cannot use indexed addressing mode for stack probing
> +#endif
>
> compile-time assertion.  It generates the same code for PROBE_INTERVAL
== 4096
> as before and it generates code that can be assembled for 8192.
>
> Tested on Aarch64/Linux, OK for the mainline?
>

> +#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
> +
> +/* We use the 12-bit shifted immediate arithmetic instructions so values
> +   must be multiple of (1 << 12), i.e. 4096.  */
> +#if (PROBE_INTERVAL % 4096) != 0

I can understand this restriction, but...

> +  /* See the same assertion on PROBE_INTERVAL above.  */
> +  gcc_assert ((first % 4096) == 0);

... why isn't this a test that FIRST is aligned to PROBE_INTERVAL?

> +  /* See if we have a constant small number of probes to generate.
If so,
> + that's the easy case.  */
> +  if (size <= PROBE_INTERVAL)
> +{
> +  const HOST_WIDE_INT base = ROUND_UP (size, 4096);
> +  emit_set_insn (reg1,

blank line between declarations and code. Also, can we come up with a
suitable define for 4096 here that expresses the context and then use
that consistently through the remainder of this function?

> +(define_insn "probe_stack_range"
> +  [(set (match_operand:DI 0 "register_operand" "=r")
> + (unspec_volatile:DI [(match_operand:DI 1 "register_operand" "0")
> +  (match_operand:DI 2 "register_operand" "r")]
> +  UNSPEC_PROBE_STACK_RANGE))]

I think this should really use PTRmode, so that it's ILP32 ready (I'm
not going to ask you to make sure that works though, since I suspect
there are still other issues to resolve with ILP32 at this time).

R.




Re: [UPC 02/22] tree-related changes

2015-12-03 Thread Richard Biener
On Wed, 2 Dec 2015, Gary Funck wrote:

> On 12/01/15 12:26:32, Richard Biener wrote:
> > On Mon, 30 Nov 2015, Gary Funck wrote:
> > > -struct GTY(()) tree_type_common {
> > > +struct GTY((user)) tree_type_common {
> > >struct tree_common common;
> > >tree size;
> > >tree size_unit;
> > > @@ -1441,10 +1458,10 @@ struct GTY(()) tree_type_common {
> > >tree pointer_to;
> > >tree reference_to;
> > >union tree_type_symtab {
> > > -int GTY ((tag ("TYPE_SYMTAB_IS_ADDRESS"))) address;
> > > -const char * GTY ((tag ("TYPE_SYMTAB_IS_POINTER"))) pointer;
> > > -struct die_struct * GTY ((tag ("TYPE_SYMTAB_IS_DIE"))) die;
> > > -  } GTY ((desc ("debug_hooks->tree_type_symtab_field"))) symtab;
> > > +int address;
> > > +const char *pointer;
> > > +struct die_struct *die;
> > > +  } symtab;
> >
> > Err, you don't have debug info for this?  What is address?
> 
> Not sure what you mean.  The 'die' field is retained.
> Is there something in the semantics of "GTY(( ((tag "
> that relates to debug information?

Ah, sorry.  I misread the diff.

> > I do not like the explict GC of tree_type_common.
> 
> I'm not a fan either.
> 
> The gist is that we needed a map from tree nodes to tree nodes
> to record the "layout qualifier" for layout qualifiers with
> a value greater than one.  But when the garbage collector ran
> over the hash table that maps integer constants to tree nodes,
> it didn't know that the constant was being referenced by the
> layout qualifier tree map.
> 
> We described the issue here:
> https://gcc.gnu.org/ml/gcc-patches/2011-10/msg00800.html
> 
> The conclusion that we reached is that when tree nodes
> were walked, we needed to check if there was a
> tree node -> integer constant mapping, the integer constant map
> (used to make tree nodes used to hold CST's unique)
> needed to be marked to keep the CST mapping from going away.
> 
> This led to the conclusion that a custom GC routine was
> needed for tree nodes.  Maybe that conclusion is wrong or
> there is a better way to do things?

It should simply work as long as the hash-map is properly marked
as GC root.  It might _not_ work (reliably) if the hash-map is
also a "cache" by itself.  But it eventually works now given some
fixes went into the area of collecting/marking caches.

> > > ===
> > > --- gcc/tree-pretty-print.c   (.../trunk) (revision 231059)
> > > +++ gcc/tree-pretty-print.c   (.../branches/gupc) (revision 
> > > 231080)
> > > @@ -1105,6 +1105,25 @@ dump_block_node (pretty_printer *pp, tre
> > >  }
> > >  
> > >  
> > > +static void
> > > +dump_upc_type_quals (pretty_printer *buffer, tree type, int quals)
> >
> > Functions need comments.
> 
> OK.  Missed that one.  Will check on others.
> 
> > > Index: gcc/tree-sra.c
> > > ===
> > > --- gcc/tree-sra.c(.../trunk) (revision 231059)
> > > +++ gcc/tree-sra.c(.../branches/gupc) (revision 231080)
> > > @@ -3882,6 +3882,7 @@ find_param_candidates (void)
> > >  
> > > if (TREE_CODE (type) == FUNCTION_TYPE
> > > || TYPE_VOLATILE (type)
> > > +   || SHARED_TYPE_P (type)
> > 
> > UPC_SHARED_TYPE_P ()
> 
> OK. As I mentioned in a previous reply, originally we prefixed
> all "UPC" specific tree node fields and functions with UPC_ or upc_,
> but as we transitioned away from UPC as a separate language
> (ala ObjC) and made compilation conditional upon -fupc, an
> observation was made off list that since the base tree nodes
> are generic that naming UPC-related fields with "UPC" prefixes
> didn't make sense, so we removed those prefixes.  There might
> be a middle ground, however, whee UPC_SHARED_TYPE_P() is preferred
> to SHARED_TYPE_P() because as you/others have mentioned,
> the term "shared" gets used in a lot of contexts.

Yes, specifically for predicates/functions used in the middle-end.

> > > @@ -4381,6 +4422,7 @@ build1_stat (enum tree_code code, tree t
> > >/* Whether a dereference is readonly has nothing to do with whether
> > >its operand is readonly.  */
> > >TREE_READONLY (t) = 0;
> > > +  TREE_SHARED (t) = SHARED_TYPE_P (type);
> > 
> > This is frontend logic and should reside in FEs.
> 
> [... several other similar actions taken contingent
> upon SHARED_TYPE_P() elided ...]
> 
> OK, will take a look.
> 
> > > +  outer_is_pts_p = (POINTER_TYPE_P (outer_type)
> > > +&& SHARED_TYPE_P (TREE_TYPE (outer_type)));
> > > +  inner_is_pts_p = (POINTER_TYPE_P (inner_type)
> > > +&& SHARED_TYPE_P (TREE_TYPE (inner_type)));
> > > +
> > > +  /* Pointer-to-shared types have special
> > > + equivalence rules that must be checked.  */
> > > +  if (outer_is_pts_p && inner_is_pts_p
> > > +  && lang_hooks.types_compatible_p)
> > > +return lang_hooks.types_compatible_p (outer_type, inner_type);
> > 
> > Sorry, but 

[PATCH, CHKP] Fix bounds returned for structures

2015-12-03 Thread Ilya Enkovich
Hi,

Currently multiple return-struct-* tests from MPX testsuite fail.  This patch 
fixes it.  Bootstrapped and tested on x86_64-unknown-linux-gnu.  Applied to 
trunk.  I'm going to port it to GCC5 after 5.3 release.

Thanks,
Ilya
--
gcc/

2015-12-03  Ilya Enkovich  

* cfgexpand.c (expand_gimple_stmt_1): Return statement with
DECL as return value is allowed to have NULL bounds.


diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
index 1990e10..2c3b23d 100644
--- a/gcc/cfgexpand.c
+++ b/gcc/cfgexpand.c
@@ -3534,6 +3534,12 @@ expand_gimple_stmt_1 (gimple *stmt)
  {
tree result = DECL_RESULT (current_function_decl);
 
+   /* Mark we have return statement with missing bounds.  */
+   if (!bnd
+   && chkp_function_instrumented_p (cfun->decl)
+   && !DECL_P (op0))
+ bnd = error_mark_node;
+
/* If we are not returning the current function's RESULT_DECL,
   build an assignment to it.  */
if (op0 != result)
@@ -3550,9 +3556,6 @@ expand_gimple_stmt_1 (gimple *stmt)
op0 = build2 (MODIFY_EXPR, TREE_TYPE (result),
  result, op0);
  }
-   /* Mark we have return statement with missing bounds.  */
-   if (!bnd && chkp_function_instrumented_p (cfun->decl))
- bnd = error_mark_node;
  }
 
if (!op0)


[PATCH] Handle OBJ_TYPE_REF in FRE

2015-12-03 Thread Richard Biener

The following patch handles CSEing OBJ_TYPE_REF which was omitted
because it is a GENERIC expression even on GIMPLE (for whatever
reason...).  Rather than changing this now the following patch
simply treats it properly as such.

Bootstrap & regtest running on x86_64-unknown-linux-gnu.

Note that this does not (yet) substitute OBJ_TYPE_REFs in calls
with SSA names that have the same value - not sure if that would
be desired generally (does the devirt machinery cope with that?).

Thanks,
Richard.

2015-12-03  Richard Biener  

PR tree-optimization/64812
* tree-ssa-sccvn.c (vn_get_stmt_kind): Handle OBJ_TYPE_REF.
(vn_nary_length_from_stmt): Likewise.
(init_vn_nary_op_from_stmt): Likewise.
* gimple-match-head.c (maybe_build_generic_op): Likewise.
* gimple-pretty-print.c (dump_unary_rhs): Likewise.

* g++.dg/tree-ssa/ssa-fre-1.C: New testcase.

Index: gcc/tree-ssa-sccvn.c
===
*** gcc/tree-ssa-sccvn.c(revision 231221)
--- gcc/tree-ssa-sccvn.c(working copy)
*** vn_get_stmt_kind (gimple *stmt)
*** 460,465 
--- 460,467 
  ? VN_CONSTANT : VN_REFERENCE);
else if (code == CONSTRUCTOR)
  return VN_NARY;
+   else if (code == OBJ_TYPE_REF)
+ return VN_NARY;
return VN_NONE;
  }
  default:
*** vn_nary_length_from_stmt (gimple *stmt)
*** 2479,2484 
--- 2481,2487 
return 1;
  
  case BIT_FIELD_REF:
+ case OBJ_TYPE_REF:
return 3;
  
  case CONSTRUCTOR:
*** init_vn_nary_op_from_stmt (vn_nary_op_t
*** 2508,2513 
--- 2511,2517 
break;
  
  case BIT_FIELD_REF:
+ case OBJ_TYPE_REF:
vno->length = 3;
vno->op[0] = TREE_OPERAND (gimple_assign_rhs1 (stmt), 0);
vno->op[1] = TREE_OPERAND (gimple_assign_rhs1 (stmt), 1);
Index: gcc/gimple-match-head.c
===
*** gcc/gimple-match-head.c (revision 231221)
--- gcc/gimple-match-head.c (working copy)
*** maybe_build_generic_op (enum tree_code c
*** 243,248 
--- 243,249 
*op0 = build1 (code, type, *op0);
break;
  case BIT_FIELD_REF:
+ case OBJ_TYPE_REF:
*op0 = build3 (code, type, *op0, op1, op2);
break;
  default:;
Index: gcc/gimple-pretty-print.c
===
*** gcc/gimple-pretty-print.c   (revision 231221)
--- gcc/gimple-pretty-print.c   (working copy)
*** dump_unary_rhs (pretty_printer *buffer,
*** 302,308 
  || TREE_CODE_CLASS (rhs_code) == tcc_reference
  || rhs_code == SSA_NAME
  || rhs_code == ADDR_EXPR
! || rhs_code == CONSTRUCTOR)
{
  dump_generic_node (buffer, rhs, spc, flags, false);
  break;
--- 302,309 
  || TREE_CODE_CLASS (rhs_code) == tcc_reference
  || rhs_code == SSA_NAME
  || rhs_code == ADDR_EXPR
! || rhs_code == CONSTRUCTOR
! || rhs_code == OBJ_TYPE_REF)
{
  dump_generic_node (buffer, rhs, spc, flags, false);
  break;
Index: gcc/testsuite/g++.dg/tree-ssa/ssa-fre-1.C
===
*** gcc/testsuite/g++.dg/tree-ssa/ssa-fre-1.C   (revision 0)
--- gcc/testsuite/g++.dg/tree-ssa/ssa-fre-1.C   (working copy)
***
*** 0 
--- 1,44 
+ /* { dg-do compile } */
+ /* { dg-options "-O2 -fdump-tree-fre2" } */
+ 
+ template  class A
+ {
+   T *p;
+ 
+ public:
+   A (T *p1) : p (p1) { p->acquire (); }
+ };
+ 
+ class B
+ {
+ public:
+ virtual void acquire ();
+ };
+ class D : public B
+ {
+ };
+ class F : B
+ {
+   int mrContext;
+ };
+ class WindowListenerMultiplexer : F, public D
+ {
+   void acquire () { acquire (); }
+ };
+ class C
+ {
+   void createPeer () throw ();
+   WindowListenerMultiplexer maWindowListeners;
+ };
+ class FmXGridPeer
+ {
+ public:
+ void addWindowListener (A);
+ } a;
+ void
+ C::createPeer () throw ()
+ {
+   a.addWindowListener (&maWindowListeners);
+ }
+ 
+ /* { dg-final { scan-tree-dump-times "= OBJ_TYPE_REF" 1 "fre2" } } */


Re: [gomp-nvptx 4/9] nvptx backend: add -mgomp option and multilib

2015-12-03 Thread Alexander Monakov
On Wed, 2 Dec 2015, Jakub Jelinek wrote:
> Can you post sample code with assembly for -msoft-stack and -muniform-simt
> showing how are short interesting cases expanded?

Here's short examples;  please let me know if I'm misunderstanding and you
wanted something else.

First, -muniform-simt effect on this input:

int f (int *p, int v)
{
  return __atomic_exchange_n (p, v, __ATOMIC_SEQ_CST);
}

leads to this assembly (showing diff -without/+with option):

 .visible .func (.param.u32 %out_retval)f(.param.u64 %in_ar1, .param.u32 
%in_ar2)
 {
.reg.u64 %ar1;
.reg.u32 %ar2;
.reg.u32 %retval;
.reg.u64 %hr10;
.reg.u32 %r23;
.reg.u64 %r25;
.reg.u32 %r26;
+   .reg.u32 %r28;
+   .reg.pred %r29;
ld.param.u64 %ar1, [%in_ar1];
ld.param.u32 %ar2, [%in_ar2];
+   {
+   .reg.u32 %ustmp0;
+   .reg.u64 %ustmp1;
+   .reg.u64 %ustmp2;
+   mov.u32 %ustmp0, %tid.y;
+   mul.wide.u32 %ustmp1, %ustmp0, 4;
+   mov.u64 %ustmp2, __nvptx_uni;
+   add.u64 %ustmp2, %ustmp2, %ustmp1;
+   ld.shared.u32 %r28, [%ustmp2];
+   mov.u32 %ustmp0, %tid.x;
+   and.b32 %r28, %r28, %ustmp0;
+   setp.eq.u32 %r29, %r28, %ustmp0;
+   }
mov.u64 %r25, %ar1;
mov.u32 %r26, %ar2;
-   atom.exch.b32   %r23, [%r25], %r26;
+   @%r29   atom.exch.b32   %r23, [%r25], %r26;
+   shfl.idx.b32%r23, %r23, %r28, 31;
mov.u32 %retval, %r23;
st.param.u32[%out_retval], %retval;
ret;
}
+// BEGIN GLOBAL VAR DECL: __nvptx_uni
+.extern .shared .u32 __nvptx_uni[32];

And, -msoft-stack for this input:

void g(void *);
void f()
{
  char a[42] __attribute__((aligned(64)));
  g(a);
}

leads to:

 .visible .func f
 {
.reg.u64 %hr10;
.reg.u64 %r22;
.reg.u64 %frame;
-   .local.align 64 .b8 %farray[48];
-   cvta.local.u64 %frame, %farray;
+   .reg.u32 %fstmp0;
+   .reg.u64 %fstmp1;
+   .reg.u64 %fstmp2;
+   mov.u32 %fstmp0, %tid.y;
+   mul.wide.u32 %fstmp1, %fstmp0, 8;
+   mov.u64 %fstmp2, __nvptx_stacks;
+   add.u64 %fstmp2, %fstmp2, %fstmp1;
+   ld.shared.u64 %fstmp1, [%fstmp2];
+   sub.u64 %frame, %fstmp1, 48;
+   and.b64 %frame, %frame, -64;
+   st.shared.u64 [%fstmp2], %frame;
mov.u64 %r22, %frame;
{
.param.u64 %out_arg0;
st.param.u64 [%out_arg0], %r22;
call g, (%out_arg0);
}
+   st.shared.u64 [%fstmp2], %fstmp1;
ret;
}
 // BEGIN GLOBAL FUNCTION DECL: g
 .extern .func g(.param.u64 %in_ar1);
+// BEGIN GLOBAL VAR DECL: __nvptx_stacks
+.extern .shared .u64 __nvptx_stacks[32];


Alexander


  1   2   >