Re: [PATCH] Respin sparc pixel-compare patterns using iterators.

2011-09-29 Thread Uros Bizjak
Hello!

> This is heavily inspired by a proof-of-concept patch David
> Bremner sent to me the other week.
>
>   * config/sparc/sparc.md (UNSPEC_FCMPLE, UNSPEC_FCMPNE,
>   UNSPEC_FCMPGT, UNSPEC_FCMPEQ): Delete and reduce to...
>   (UNSPEC_FCMP): New unspec.
>   (gcond): New code iterator.
>   (gcond_name): New code attr.
>   (GCM): New mode iterator.
>   (gcm_name): New mode attr.
>   (fcmp{le,ne,gt,eq}{16,32}_vis): Reimplement using iterators.

You don't need gcond_name, gcond can be used directly in the pattern:

-(define_insn "fcmpeq32_vis"
+(define_insn "fcmp_vis"

(define_insn "fcmp_vis"

   [(set (match_operand:P 0 "register_operand" "=r")
-   (unspec:P [(match_operand:V2SI 1 "register_operand" "e")
-   (match_operand:V2SI 2 "register_operand" "e")]
-UNSPEC_FCMPEQ))]
+   (unspec:P [(gcond:GCM (match_operand:GCM 1 "register_operand" "e")
+ (match_operand:GCM 2 "register_operand" "e"))]
+UNSPEC_FCMP))]
   "TARGET_VIS"
-  "fcmpeq32\t%1, %2, %0"
+  "fcmp\t%1, %2, %0"

"fcmp..."

   [(set_attr "type" "fpmul")
(set_attr "fptype" "double")])

Uros.


[PATCH] Add sparc 3D array addressing VIS intrinsics.

2011-09-29 Thread David Miller

Commited to trunk.

gcc/

* config/sparc/sparc.md (UNSPEC_ARRAY8, UNSPEC_ARRAY16,
UNSPEC_ARRAY32): New unspec.
(define_attr type): New type 'array'.
(array{8,16,32}_vis): New patterns.
* config/sparc/ultra1_2.md: Add reservations for 'array'.
* config/sparc/ultra3.md: Likewise.
* config/sparc/niagara.md: Likewise.
* config/sparc/niagara2.md: Likewise.
* config/sparc/sparc.c (sparc_vis_init_builtins): Build new
array builtins.
* config/sparc/visintrin.h (__vis_array8, __vis_array16,
__vis_array32): New.
* doc/extend.texi: Document new VIS builtins.

gcc/testsuite/

* gcc.target/sparc/array.c: New test.
---
 gcc/ChangeLog  |   16 
 gcc/config/sparc/niagara.md|2 +-
 gcc/config/sparc/niagara2.md   |4 ++--
 gcc/config/sparc/sparc.c   |   23 +++
 gcc/config/sparc/sparc.md  |   32 +++-
 gcc/config/sparc/ultra1_2.md   |2 +-
 gcc/config/sparc/ultra3.md |5 +
 gcc/config/sparc/visintrin.h   |   21 +
 gcc/doc/extend.texi|4 
 gcc/testsuite/ChangeLog|4 
 gcc/testsuite/gcc.target/sparc/array.c |   21 +
 11 files changed, 129 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/sparc/array.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 42c50e8..f4b9c8a 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,19 @@
+2011-09-29  David S. Miller  
+
+   * config/sparc/sparc.md (UNSPEC_ARRAY8, UNSPEC_ARRAY16,
+   UNSPEC_ARRAY32): New unspec.
+   (define_attr type): New type 'array'.
+   (array{8,16,32}_vis): New patterns.
+   * config/sparc/ultra1_2.md: Add reservations for 'array'.
+   * config/sparc/ultra3.md: Likewise.
+   * config/sparc/niagara.md: Likewise.
+   * config/sparc/niagara2.md: Likewise.
+   * config/sparc/sparc.c (sparc_vis_init_builtins): Build new
+   array builtins.
+   * config/sparc/visintrin.h (__vis_array8, __vis_array16,
+   __vis_array32): New.
+   * doc/extend.texi: Document new VIS builtins.
+
 2011-09-29  Iain Sandoe  
 
* config/darwin9.h (STACK_CHECK_STATIC_BUILTIN): Enable for
diff --git a/gcc/config/sparc/niagara.md b/gcc/config/sparc/niagara.md
index 3e5a3e5..a75088b 100644
--- a/gcc/config/sparc/niagara.md
+++ b/gcc/config/sparc/niagara.md
@@ -114,5 +114,5 @@
  */
 (define_insn_reservation "niag_vis" 8
   (and (eq_attr "cpu" "niagara")
-(eq_attr "type" "fga,fgm_pack,fgm_mul,fgm_cmp,fgm_pdist,edge,gsr"))
+(eq_attr "type" "fga,fgm_pack,fgm_mul,fgm_cmp,fgm_pdist,edge,gsr,array"))
   "niag_pipe*8")
diff --git a/gcc/config/sparc/niagara2.md b/gcc/config/sparc/niagara2.md
index 9ea6b04..f261ac1 100644
--- a/gcc/config/sparc/niagara2.md
+++ b/gcc/config/sparc/niagara2.md
@@ -111,10 +111,10 @@
 
 (define_insn_reservation "niag2_vis" 6
   (and (eq_attr "cpu" "niagara2")
-(eq_attr "type" "fga,fgm_pack,fgm_mul,fgm_cmp,fgm_pdist,edge,gsr"))
+(eq_attr "type" "fga,fgm_pack,fgm_mul,fgm_cmp,fgm_pdist,edge,array,gsr"))
   "niag2_pipe*6")
 
 (define_insn_reservation "niag3_vis" 9
   (and (eq_attr "cpu" "niagara3")
-(eq_attr "type" "fga,fgm_pack,fgm_mul,fgm_cmp,fgm_pdist,edge,gsr"))
+(eq_attr "type" "fga,fgm_pack,fgm_mul,fgm_cmp,fgm_pdist,edge,array,gsr"))
   "niag2_pipe*9")
diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c
index 44d7f20..c8c0677 100644
--- a/gcc/config/sparc/sparc.c
+++ b/gcc/config/sparc/sparc.c
@@ -9195,6 +9195,9 @@ sparc_vis_init_builtins (void)
   tree di_ftype_di_di = build_function_type_list (intDI_type_node,
  intDI_type_node,
  intDI_type_node, 0);
+  tree si_ftype_si_si = build_function_type_list (intSI_type_node,
+ intSI_type_node,
+ intSI_type_node, 0);
   tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
ptr_type_node,
intSI_type_node, 0);
@@ -9371,6 +9374,26 @@ sparc_vis_init_builtins (void)
 v2si_ftype_v2si_v2si);
   def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subsi3,
 v1si_ftype_v1si_v1si);
+
+  /* Three-dimensional array addressing.  */
+  if (TARGET_ARCH64)
+{
+  def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis,
+di_ftype_di_di);
+  def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis,
+di_ftype_di_di);
+  def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis,
+di_ftype_di_di);
+}
+  els

Re: [PATCH] Respin sparc pixel-compare patterns using iterators.

2011-09-29 Thread David Miller
From: Uros Bizjak 
Date: Thu, 29 Sep 2011 09:04:37 +0200

> Hello!
> 
>> This is heavily inspired by a proof-of-concept patch David
>> Bremner sent to me the other week.
>>
>>  * config/sparc/sparc.md (UNSPEC_FCMPLE, UNSPEC_FCMPNE,
>>  UNSPEC_FCMPGT, UNSPEC_FCMPEQ): Delete and reduce to...
>>  (UNSPEC_FCMP): New unspec.
>>  (gcond): New code iterator.
>>  (gcond_name): New code attr.
>>  (GCM): New mode iterator.
>>  (gcm_name): New mode attr.
>>  (fcmp{le,ne,gt,eq}{16,32}_vis): Reimplement using iterators.
> 
> You don't need gcond_name, gcond can be used directly in the pattern:

Thanks for the tip!


[PATCH] Remove unnecessary sparc code attr.

2011-09-29 Thread David Miller

Thanks to Uros Bizjak for pointing this out.

Committed to trunk.

* config/sparc/sparc.md (gcond_name): Delete unnecessary code attr.
(VIS pixel-compare insn): Just use .

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@179335 
138bc75d-0d04-0410-961f-82ee72b054a4
---
 gcc/ChangeLog |3 +++
 gcc/config/sparc/sparc.md |5 ++---
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index f4b9c8a..0e65631 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -14,6 +14,9 @@
__vis_array32): New.
* doc/extend.texi: Document new VIS builtins.
 
+   * config/sparc/sparc.md (gcond_name): Delete unnecessary code attr.
+   (VIS pixel-compare insn): Just use .
+
 2011-09-29  Iain Sandoe  
 
* config/darwin9.h (STACK_CHECK_STATIC_BUILTIN): Enable for
diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md
index ee772d6..d9bcd31 100644
--- a/gcc/config/sparc/sparc.md
+++ b/gcc/config/sparc/sparc.md
@@ -8148,17 +8148,16 @@
   [(set_attr "type" "edge")])
 
 (define_code_iterator gcond [le ne gt eq])
-(define_code_attr gcond_name [(le "le") (ne "ne") (gt "gt") (eq "eq")])
 (define_mode_iterator GCM [V4HI V2SI])
 (define_mode_attr gcm_name [(V4HI "16") (V2SI "32")])
 
-(define_insn "fcmp_vis"
+(define_insn "fcmp_vis"
   [(set (match_operand:P 0 "register_operand" "=r")
(unspec:P [(gcond:GCM (match_operand:GCM 1 "register_operand" "e")
  (match_operand:GCM 2 "register_operand" "e"))]
 UNSPEC_FCMP))]
   "TARGET_VIS"
-  "fcmp\t%1, %2, %0"
+  "fcmp\t%1, %2, %0"
   [(set_attr "type" "fpmul")
(set_attr "fptype" "double")])
 
-- 
1.7.6.401.g6a319



Re: [PATCH] Remove unnecessary sparc code attr.

2011-09-29 Thread Jakub Jelinek
On Thu, Sep 29, 2011 at 04:01:55AM -0400, David Miller wrote:
> --- a/gcc/config/sparc/sparc.md
> +++ b/gcc/config/sparc/sparc.md
> @@ -8148,17 +8148,16 @@
>[(set_attr "type" "edge")])
>  
>  (define_code_iterator gcond [le ne gt eq])
> -(define_code_attr gcond_name [(le "le") (ne "ne") (gt "gt") (eq "eq")])
>  (define_mode_iterator GCM [V4HI V2SI])
>  (define_mode_attr gcm_name [(V4HI "16") (V2SI "32")])
>  
> -(define_insn "fcmp_vis"
> +(define_insn "fcmp_vis"
>[(set (match_operand:P 0 "register_operand" "=r")
>   (unspec:P [(gcond:GCM (match_operand:GCM 1 "register_operand" "e")
> (match_operand:GCM 2 "register_operand" "e"))]
>UNSPEC_FCMP))]
>"TARGET_VIS"
> -  "fcmp\t%1, %2, %0"
> +  "fcmp\t%1, %2, %0"
>[(set_attr "type" "fpmul")
> (set_attr "fptype" "double")])

On the other side, I'm surprised you don't need to prefix gcm_name with GCM:
- when you have more than one mode iterator in a pattern, IMHO you should
make it clear which mode you are talking about.  Maybe it works, but which
one it is?  The first mode_iterator declared in the *.md files (and used in
the pattern), the last one, the first mode_iterator encountered in the
pattern, the last one?
So IMHO it should be  in both cases.

And there is just one code iterator, can't you use just  instead of
?

Jakub


Re: [PATCH, PR43814] Assume function arguments of pointer type are aligned.

2011-09-29 Thread Tom de Vries
On 09/29/2011 12:21 AM, Tom de Vries wrote:
> On 09/24/2011 11:31 AM, Richard Guenther wrote:
>> On Tue, Sep 20, 2011 at 1:13 PM, Tom de Vries  wrote:
>>> Hi Richard,
>>>
>>> I have a patch for PR43814. It introduces an option that assumes that 
>>> function
>>> arguments of pointer type are aligned, and uses that information in
>>> tree-ssa-ccp. This enables the memcpy in pr43814-2.c to be inlined.
>>>
>>> I tested the patch successfully on-by-default on x86_64 and i686 (both gcc 
>>> only
>>> builds).
>>>
>>> I also tested the patch on-by-default for ARM (gcc/glibc build). The patch
>>> generated wrong code for uselocale.c:
>>> ...
>>> glibc/locale/locale.h:
>>> ...
>>> /* This value can be passed to `uselocale' and may be returned by
>>>   it. Passing this value to any other function has undefined behavior.  */
>>> # define LC_GLOBAL_LOCALE   ((__locale_t) -1L)
>>> ...
>>> glibc/locale/uselocale.c:
>>> ...
>>> locale_t
>>> __uselocale (locale_t newloc)
>>> {
>>>  locale_t oldloc = _NL_CURRENT_LOCALE;
>>>
>>>  if (newloc != NULL)
>>>{
>>>  const locale_t locobj
>>>= newloc == LC_GLOBAL_LOCALE ? &_nl_global_locale : newloc;
>>>
>>> ...
>>> The assumption that function arguments of pointer type are aligned, allowed 
>>> the
>>> test 'newloc == LC_GLOBAL_LOCALE' to evaluate to false.
>>> But the usage of ((__locale_t) -1L) as function argument in uselocale 
>>> violates
>>> that assumption.
>>>
>>> Fixing the definition of LC_GLOBAL_LOCALE allowed the gcc tests to run 
>>> without
>>> regressions for ARM.
>>>
>>> Furthermore, the patch fixes ipa-sra-2.c and ipa-sra-6.c regressions on ARM,
>>> discussed here:
>>> - http://gcc.gnu.org/ml/gcc-patches/2011-08/msg00930.html
>>> - http://gcc.gnu.org/ml/gcc-patches/2011-09/msg00459.html
>>>
>>> But, since glibc uses this construct currently, the option is 
>>> off-by-default for
>>> now.
>>>
>>> OK for trunk?
>>
>> No, I don't like to have an option to control this.  And given the issue
>> you spotted it doesn't look like the best idea either.  This area in GCC
>> is simply too fragile right now :/
>>
>> It would be nice if we could extend IPA-CP to propagate alignment
>> information though.
>>
>> And at some point devise a reliable way for frontends to communicate
>> alignment constraints the middle-end can rely on (well, yes, you could
>> argue that's what TYPE_ALIGN is about, and I thought that maybe we
>> can unconditionally rely on TYPE_ALIGN for pointer PARM_DECLs
>> at least - your example shows we can't).
>>
>> In the end I'd probably say the patch is ok without the option (thus
>> turned on by default), but if LC_GLOBAL_LOCALE is part of the
>> glibc ABI then we clearly can't do this.
>>
> 
> I removed the flag, and enabled the optimization now with the aligned 
> attribute.
> 
> bootstrapped and tested on x86_64 (gcc only build), and build and reg-tested 
> on
> arm (gcc + glibc build), no issues found.
> 

Sorry for the EWRONGPATCH. Correct patch this time.

OK for trunk?

Thanks,
- Tom

> 
> I intend to send a follow-up patch that introduces a target hook
> function_pointers_aligned, that is false by default, and on by default for
> -mandroid. I asked Google to test their Android codebase with the optimization
> on by default. Would such a target hook be acceptable?
> 
>> Richard.
>>
> 
> Thanks,
> - Tom
> 
> 2011-09-29  Tom de Vries 
> 
>   PR target/43814
>   * tree-ssa-ccp.c (get_align_value): New function, factored out of
>   get_value_from_alignment.
>   (get_value_from_alignment): Use get_align_value.
>   (get_value_for_expr): Use get_align_value to handle alignment of
>   function argument pointers with TYPE_USER_ALIGN.
> 
>   * gcc/testsuite/gcc.dg/pr43814.c: New test.
>   * gcc/testsuite/gcc.target/arm/pr43814-2.c: New test.

Index: gcc/tree-ssa-ccp.c
===
--- gcc/tree-ssa-ccp.c (revision 179210)
+++ gcc/tree-ssa-ccp.c (working copy)
@@ -500,20 +500,14 @@ value_to_double_int (prop_value_t val)
 return double_int_zero;
 }
 
-/* Return the value for the address expression EXPR based on alignment
-   information.  */
+/* Return the value for an expr of type TYPE with alignment ALIGN and offset
+   BITPOS relative to the alignment.  */
 
 static prop_value_t
-get_value_from_alignment (tree expr)
+get_align_value (unsigned int align, tree type, unsigned HOST_WIDE_INT bitpos)
 {
-  tree type = TREE_TYPE (expr);
   prop_value_t val;
-  unsigned HOST_WIDE_INT bitpos;
-  unsigned int align;
-
-  gcc_assert (TREE_CODE (expr) == ADDR_EXPR);
 
-  align = get_object_alignment_1 (TREE_OPERAND (expr, 0), &bitpos);
   val.mask
 = double_int_and_not (POINTER_TYPE_P (type) || TYPE_UNSIGNED (type)
 			  ? double_int_mask (TYPE_PRECISION (type))
@@ -529,6 +523,21 @@ get_value_from_alignment (tree expr)
   return val;
 }
 
+/* Return the value for the address expression EXPR based on alignment
+   information.  */
+
+static prop_value_t
+get_v

[Patch, Darwin/libiberty] fix target/48108 by adding a section wrapper scheme to the darwin port.

2011-09-29 Thread Iain Sandoe
It appears that the number of sections in a mach-o file is to be  
limited to 256.
Whilst we all (probably) agree that the documentation is not clear  
about this, there is likely little point in jumping up and down...


It's a show-stopper for LTO with current darwin tool-chains where 'as'  
errors-out for section count > 256.


... so here is a patch that implements a wrapper scheme on top of mach- 
o (used presently only for LTO - but in the most general sense  
available elsewhere to mach-o should the problem re-emerge).


The scheme is switched on by specifying a segment name to  
simple_object_mach_o_match () and always for the __GNU_LTO segment in  
darwin.c.


When the scheme is engaged, three sections are expected within the  
specified segment: an index, a name table and a 'sects' section.


All the section data from the segment are concatenated within the  
'sects' section with the offset to each 'input section', its length  
and its name recorded in the index.  The names themselves are stored  
in a separate table, with identical layout to the original scheme that  
allowed arbitrary section name lengths to be used with mach-o.


I guess that one might consider adding some padding to the index to  
cater for a possible future use of flags or alignment info (but I  
haven't done that at this stage).


Whilst it is darwin-specific, it does need to touch the simple-object  
stuff in libiberty - so I need approval there.


tested with bootstrap-lto across the darwin9, 10 and 11 range on 4.6  
and trunk (11 on trunk only).
The 4.6 tests included Ada, (but Ada won't 'bootstrap-lto' on trunk at  
present for unrelated reasons).


OK for trunk and 4.6 ?
Iain

libiberty:

PR target/48108
* simple-object-mach-o.c (GNU_SECTION_NAMES): Remove.
(GNU_WRAPPER_SECTS, GNU_WRAPPER_INDEX,
GNU_WRAPPER_NAMES): New macros.
(simple_object_mach_o_match): Amend comment and error message.
Do not require that a segment name is specified.
	(simple_object_mach_o_segment): Handle wrapper scheme, if a segment  
name is
	specified.  (simple_object_mach_o_write_section_header): Allow the  
segment name
	to be supplied. (simple_object_mach_o_write_segment): Handle wrapper  
scheme, if
	a segment name is specified.  Ensure that the top-level segment name  
in the load

command is empty.   (simple_object_mach_o_write_to_file): Determine the
	number of sections during segment output, use that in writing the  
header.


gcc:

PR target/48108
* config/darwin.c (top level): Amend comments concerning LTO output.
(lto_section_num): New variable.  (darwin_lto_section_e): New GTY.
(LTO_SECTS_SECTION, LTO_INDEX_SECTION): New.
(LTO_NAMES_SECTION): Rename.
(darwin_asm_named_section): Record LTO section counts and switches
in a vec of darwin_lto_section_e.
(darwin_file_start): Remove unused code.
(darwin_file_end): Put an LTO section termination label.  Handle output
of the wrapped LTO sections, index and names table.

include:

PR target/48108
*simple-object.h: Amend comments for simple read/write to note the
wrapper scheme for Mach-o.

Index: libiberty/simple-object-mach-o.c
===
--- libiberty/simple-object-mach-o.c(revision 179332)
+++ libiberty/simple-object-mach-o.c(working copy)
@@ -170,9 +170,14 @@ struct mach_o_section_64
 
 #define MACH_O_NAME_LEN (16)
 
-/* A GNU specific extension for long section names.  */
+/* A GNU-specific extension to wrap multiple sections using three 
+   mach-o sections within a given segment.  The section '__wrapper_sects'
+   is subdivided according to the index '__wrapper_index' and each sub
+   sect is named according to the names supplied in '__wrapper_names'.  */
 
-#define GNU_SECTION_NAMES "__section_names"
+#define GNU_WRAPPER_SECTS "__wrapper_sects"
+#define GNU_WRAPPER_INDEX "__wrapper_index"
+#define GNU_WRAPPER_NAMES "__wrapper_names"
 
 /* Private data for an simple_object_read.  */
 
@@ -214,8 +219,19 @@ struct simple_object_mach_o_attributes
   unsigned int reserved;
 };
 
-/* See if we have a Mach-O file.  */
+/* See if we have a Mach-O MH_OBJECT file:
 
+   A standard MH_OBJECT (from as) will have three load commands:
+   0 - LC_SEGMENT/LC_SEGMENT64
+   1 - LC_SYMTAB
+   2 - LC_DYSYMTAB
+   
+   The LC_SEGMENT/LC_SEGMENT64 will introduce a single anonymous segment
+   containing all the sections.
+   
+   Files written by simple-object will have only the segment command
+   (no symbol tables).  */
+
 static void *
 simple_object_mach_o_match (
 unsigned char header[SIMPLE_OBJECT_MATCH_HEADER_LEN],
@@ -258,18 +274,10 @@ simple_object_mach_o_match (
 }
 #endif
 
-  /* We require the user to provide a segment name.  This is
- unfortunate but I don't see any good choices here.  */
-
-  if (segment_name == NULL)
+  /* Although a standard MH_OBJECT

Re: [PING] Re: [PATCH] Fix PR50183

2011-09-29 Thread Richard Guenther
On Thu, Sep 29, 2011 at 12:10 AM, William J. Schmidt
 wrote:
> Hi there,
>
> Ping.  I'm seeking approval for this fix on trunk and 4_6-branch.
> Thanks!

Ok.

Thanks,
Richard.

> Bill
>
> On Tue, 2011-09-13 at 17:55 -0500, William J. Schmidt wrote:
>> Greetings,
>>
>> The code to build scops (static control parts) for graphite first
>> rewrites loops into canonical loop-closed SSA form.  PR50183 identifies
>> a scenario where the results do not fulfill all required invariants of
>> this form.  In particular, a value defined inside a loop and used
>> outside that loop must reach exactly one definition, which must be a
>> single-argument PHI node called a close-phi.  When nested loops exist,
>> it is possible that, following the rewrite, a definition may reach two
>> close-phis.  This patch corrects that problem.
>>
>> The problem arises because loops are processed from outside in.  While
>> processing a loop, duplicate close-phis are eliminated.  However,
>> eliminating duplicate close-phis for an inner loop can sometimes create
>> duplicate close-phis for an already-processed outer loop.  This patch
>> detects when this may have occurred and repeats the removal of duplicate
>> close-phis as necessary.
>>
>> The problem was noted on ibm/4_6-branch and 4_6-branch; it is apparently
>> latent on trunk.  The same patch can be applied to all three branches.
>>
>> Bootstrapped and regression-tested on powerpc64-linux.  OK to commit to
>> these three branches?
>>
>> Thanks,
>> Bill
>>
>>
>> 2011-09-13  Bill Schmidt  
>>
>>       * graphite-scop-detection.c (make_close_phi_nodes_unique):  New
>>       forward declaration.
>>       (remove_duplicate_close_phi): Detect and repair creation of
>>       duplicate close-phis for a containing loop.
>>
>>
>> Index: gcc/graphite-scop-detection.c
>> ===
>> --- gcc/graphite-scop-detection.c     (revision 178829)
>> +++ gcc/graphite-scop-detection.c     (working copy)
>> @@ -30,6 +30,9 @@ along with GCC; see the file COPYING3.  If not see
>>  #include "tree-pass.h"
>>  #include "sese.h"
>>
>> +/* Forward declarations.  */
>> +static void make_close_phi_nodes_unique (basic_block);
>> +
>>  #ifdef HAVE_cloog
>>  #include "ppl_c.h"
>>  #include "graphite-ppl.h"
>> @@ -1231,6 +1234,13 @@ remove_duplicate_close_phi (gimple phi, gimple_stm
>>       SET_USE (use_p, res);
>>
>>        update_stmt (use_stmt);
>> +
>> +      /* It is possible that we just created a duplicate close-phi
>> +      for an already-processed containing loop.  Check for this
>> +      case and clean it up.  */
>> +      if (gimple_code (use_stmt) == GIMPLE_PHI
>> +       && gimple_phi_num_args (use_stmt) == 1)
>> +     make_close_phi_nodes_unique (gimple_bb (use_stmt));
>>      }
>>
>>    remove_phi_node (gsi, true);
>>
>>
>
>
>


Re: [PATCH] Fix stack red zone bug (PR38644)

2011-09-29 Thread Richard Guenther
On Thu, Sep 29, 2011 at 5:13 AM, Jiangning Liu  wrote:
>
>
>> -Original Message-
>> From: Richard Guenther [mailto:richard.guent...@gmail.com]
>> Sent: Wednesday, September 28, 2011 5:56 PM
>> To: Jiangning Liu
>> Cc: Andrew Pinski; gcc-patches@gcc.gnu.org
>> Subject: Re: [PATCH] Fix stack red zone bug (PR38644)
>>
>> On Wed, Sep 28, 2011 at 11:40 AM, Jiangning Liu 
>> wrote:
>> >
>> >
>> >> -Original Message-
>> >> From: Richard Guenther [mailto:richard.guent...@gmail.com]
>> >> Sent: Wednesday, September 28, 2011 5:20 PM
>> >> To: Jiangning Liu
>> >> Cc: Andrew Pinski; gcc-patches@gcc.gnu.org
>> >> Subject: Re: [PATCH] Fix stack red zone bug (PR38644)
>> >>
>> >> On Wed, Sep 28, 2011 at 11:10 AM, Jiangning Liu
>> 
>> >> wrote:
>> >> >
>> >> >
>> >> >> -Original Message-
>> >> >> From: Richard Guenther [mailto:richard.guent...@gmail.com]
>> >> >> Sent: Wednesday, September 28, 2011 4:39 PM
>> >> >> To: Jiangning Liu
>> >> >> Cc: Andrew Pinski; gcc-patches@gcc.gnu.org
>> >> >> Subject: Re: [PATCH] Fix stack red zone bug (PR38644)
>> >> >>
>> >> >> On Wed, Sep 28, 2011 at 3:49 AM, Jiangning Liu
>> >> 
>> >> >> wrote:
>> >> >> >
>> >> >> >
>> >> >> >> -Original Message-
>> >> >> >> From: Richard Guenther [mailto:richard.guent...@gmail.com]
>> >> >> >> Sent: Tuesday, September 27, 2011 3:41 PM
>> >> >> >> To: Jiangning Liu
>> >> >> >> Cc: Andrew Pinski; gcc-patches@gcc.gnu.org
>> >> >> >> Subject: Re: [PATCH] Fix stack red zone bug (PR38644)
>> >> >> >>
>> >> >> >> On Tue, Sep 27, 2011 at 5:32 AM, Jiangning Liu
>> >> >> 
>> >> >> >> wrote:
>> >> >> >> >> Think of it this way.  What the IR says is there is no
>> barrier
>> >> >> >> between
>> >> >> >> >> those moves.  You either have an implicit barrier (which is
>> >> what
>> >> >> you
>> >> >> >> >> are proposing) or you have it explicitly.  I think we all
>> >> rather
>> >> >> >> have
>> >> >> >> >> more things explicit rather than implicit in the IR.  And
>> that
>> >> >> has
>> >> >> >> >> been the overall feeling for a few years now.
>> >> >> >> >>
>> >> >> >> >
>> >> >> >> > Sorry, I'm afraid I can't agree with you. Instead, I think
>> >> using
>> >> >> >> barrier to describe this kind of dependence is a kind of
>> implicit
>> >> >> >> method. Please note that this is not an usual data dependence
>> >> issue.
>> >> >> >> The stack pointer change doesn't have any dependence with
>> memory
>> >> >> access
>> >> >> >> at all.
>> >> >> >>
>> >> >> >> It is similar to atomic instructions that require being an
>> >> >> >> optimization/memory barrier.  Sure it is not a usual data
>> >> dependence
>> >> >> >> (otherwise we would handle
>> >> >> >> it already), so the targets have to explicitly express the
>> >> >> dependence
>> >> >> >> somehow, for which we only have barriers right now.
>> >> >> >>
>> >> >> >
>> >> >> > Richard,
>> >> >> >
>> >> >> > Thanks for your explanation. It's explicit to back-end, while
>> it's
>> >> >> implicit
>> >> >> > to scheduler in middle end, because barrier can decide
>> dependence
>> >> in
>> >> >> > scheduler but barrier can be generated from several different
>> >> >> scenarios.
>> >> >> > It's unsafe and prone to introduce bug if any one of the
>> scenarios
>> >> >> requiring
>> >> >> > generating barriers is being missed in back-end.
>> >> >> >
>> >> >> > Between middle-end and back-end, we should have interfaces that
>> is
>> >> >> easy to
>> >> >> > be implemented by back-end. After all, middle-end itself can't
>> >> >> consist of a
>> >> >> > compiler, and vice versa. Back-end needs middle-end's help to
>> make
>> >> >> sure
>> >> >> > back-end is easy to be implemented and reduce the possibility
>> of
>> >> >> introducing
>> >> >> > bugs.
>> >> >> >
>> >> >> > Without an explicit hook as I'm proposing, back-end
>> implementers
>> >> have
>> >> >> to
>> >> >> > clearly know all scenarios of generating barrier very clearly,
>> >> >> because there
>> >> >> > isn't any code tips and comments in middle end telling back-end
>> >> the
>> >> >> list of
>> >> >> > all scenarios on generating barriers.
>> >> >> >
>> >> >> > Yes, barrier is a perfect interface for scheduler in theory.
>> But
>> >> from
>> >> >> > engineering point of view, I think it's better to explicitly
>> >> define
>> >> >> an
>> >> >> > interface to describe stack red zone and inform back-end, or
>> vice
>> >> >> versa. Not
>> >> >> > like computer, people is easy to make mistake if you don't tell
>> >> them.
>> >> >> On
>> >> >> > this bug, the fact is over the years different back-ends made
>> >> similar
>> >> >> bugs.
>> >> >> >
>> >> >> > GCC is really a perfect platform on building new ports, and I
>> saw
>> >> a
>> >> >> lot of
>> >> >> > new back-ends. The current middle end is unsafe, if port
>> doesn't
>> >> >> support
>> >> >> > stack red zone and back-ends doesn't generate barrier for it.
>> Why
>> >> >> can't we
>> >> >> > explicitly clarify this in compiler code between middle end and
>> >> back
>> >> >> end?
>> 

Re: [PING] Re: [PATCH] Fix PR50183

2011-09-29 Thread Tobias Grosser

On 09/29/2011 09:58 AM, Richard Guenther wrote:

On Thu, Sep 29, 2011 at 12:10 AM, William J. Schmidt
  wrote:

Hi there,

Ping.  I'm seeking approval for this fix on trunk and 4_6-branch.
Thanks!


Ok.

Yes, also looks good from me. Though, you may want to move the forward
declaration after the "#ifdef HAVE_CLOOG". This makes it clearer that 
the whole file is not compiled, if cloog is not available.


Cheers
Tobi


Re: [PATCH, PR43814] Assume function arguments of pointer type are aligned.

2011-09-29 Thread Richard Guenther
On Wed, Sep 28, 2011 at 10:08 PM, Jakub Jelinek  wrote:
> On Wed, Sep 28, 2011 at 09:56:27PM +0200, Richard Guenther wrote:
>> There is nothing like "very likely aligned" ;)  Note that what is new is
>
> On non-strict aligned targets there is no reason not to have something like
> "very likely aligned".  You would expand stores/loads as if it was aligned
> in that case, and if it isn't, all you'd need to ensure from that is that
> you don't derive properties about the pointer value from the "likely
> aligned" info, only from alignment.

As if aligned, say, using movaps? ;)  Then we can as well derive
properties about the pointer value.

Note that I don't see a real difference between expanding a load/store
assuming the pointer is aligned from deriving properties about the pointer
value.  To support existing questionable code I'd do both only if
there is a dereference post-dominating the pointer assignment of course.

> "Very likely aligned" is interesting to the vectorizer too, if it is very
> likely something is sufficiently aligned, the vectorizer could decide to
> assume the alignment in the vectorized loop and add the check for the
> alignment to the loop guards.  In the likely case the vectorized loop would
> be used (if other guards were true too), in the unlikely case it is
> unaligned it would just use a slower loop.
>
>> that we now no longer assume alignment by default (we did in the past)
>> and that we derive properties about the pointer _value_ from alignment.
>>
>> I think we can derive pointer values when we see dereferences, the
>> code wouldn't be portable to strict-alignment targets otherwise.  We
>
> But any references?  If you have
> int foo (int *p)
> {
>  memcpy (p, "a", 1);
>  return ((uintptr_t) p & 3) == 0;
> }
> then even if p isn't aligned, this could work even on strict aligned
> targets.

Well, the above isn't a "dereference".  Or rather, if it is, it's a dereference
through a type with assumed alignment of 1 byte.

> Anyway, the arbitrary value in a pointer thing is much more important then
> the rest, so having the dominating dereference test is very important.

Yes.

Richard.

>        Jakub
>


Re: [PATCH, PR43814] Assume function arguments of pointer type are aligned.

2011-09-29 Thread Jakub Jelinek
On Thu, Sep 29, 2011 at 11:11:12AM +0200, Richard Guenther wrote:
> On Wed, Sep 28, 2011 at 10:08 PM, Jakub Jelinek  wrote:
> > On Wed, Sep 28, 2011 at 09:56:27PM +0200, Richard Guenther wrote:
> >> There is nothing like "very likely aligned" ;)  Note that what is new is
> >
> > On non-strict aligned targets there is no reason not to have something like
> > "very likely aligned".  You would expand stores/loads as if it was aligned
> > in that case, and if it isn't, all you'd need to ensure from that is that
> > you don't derive properties about the pointer value from the "likely
> > aligned" info, only from alignment.
> 
> As if aligned, say, using movaps? ;)  Then we can as well derive
> properties about the pointer value.
> 
> Note that I don't see a real difference between expanding a load/store
> assuming the pointer is aligned from deriving properties about the pointer
> value.  To support existing questionable code I'd do both only if
> there is a dereference post-dominating the pointer assignment of course.

I was talking about e.g. PR49442, where using the unaligned stores has
severe performance hit.  If compiler was hinted that the store pointers are
likely aligned, it could version on the alignment.

> > But any references?  If you have
> > int foo (int *p)
> > {
> >  memcpy (p, "a", 1);
> >  return ((uintptr_t) p & 3) == 0;
> > }
> > then even if p isn't aligned, this could work even on strict aligned
> > targets.
> 
> Well, the above isn't a "dereference".  Or rather, if it is, it's a 
> dereference
> through a type with assumed alignment of 1 byte.

If the dereference is only through the declared type of the parameter, I
feel much safer about it.  But I thought this thread was exactly about
memcpy/memset with int * pointer not dereferenced in any other way.

Jakub


Re: [PATCH] Fix stack red zone bug (PR38644)

2011-09-29 Thread Richard Sandiford
Andrew Pinski  writes:
> On Mon, Sep 26, 2011 at 7:28 PM, Jiangning Liu  wrote:
>>
>> Sorry, for this bug, I don’t see your valuable comments previously in either 
>> bug zilla and the [RFC] I sent out long time ago in gcc mail list. What I 
>> see is a bunch of people agreed this problem should be fixed in middle end.
>
> The only person I see that agrees this problem should be fixed in the
> middle-end is Richard S.  Everyone else seems like is saying it should
> be fixed in the back-end.

Did you mean Richard E?

Richard


Re: [PATCH, PR43814] Assume function arguments of pointer type are aligned.

2011-09-29 Thread Richard Guenther
On Thu, Sep 29, 2011 at 10:27 AM, Tom de Vries  wrote:
> On 09/29/2011 12:21 AM, Tom de Vries wrote:
>> On 09/24/2011 11:31 AM, Richard Guenther wrote:
>>> On Tue, Sep 20, 2011 at 1:13 PM, Tom de Vries  
>>> wrote:
 Hi Richard,

 I have a patch for PR43814. It introduces an option that assumes that 
 function
 arguments of pointer type are aligned, and uses that information in
 tree-ssa-ccp. This enables the memcpy in pr43814-2.c to be inlined.

 I tested the patch successfully on-by-default on x86_64 and i686 (both gcc 
 only
 builds).

 I also tested the patch on-by-default for ARM (gcc/glibc build). The patch
 generated wrong code for uselocale.c:
 ...
 glibc/locale/locale.h:
 ...
 /* This value can be passed to `uselocale' and may be returned by
   it. Passing this value to any other function has undefined behavior.  */
 # define LC_GLOBAL_LOCALE       ((__locale_t) -1L)
 ...
 glibc/locale/uselocale.c:
 ...
 locale_t
 __uselocale (locale_t newloc)
 {
  locale_t oldloc = _NL_CURRENT_LOCALE;

  if (newloc != NULL)
    {
      const locale_t locobj
        = newloc == LC_GLOBAL_LOCALE ? &_nl_global_locale : newloc;

 ...
 The assumption that function arguments of pointer type are aligned, 
 allowed the
 test 'newloc == LC_GLOBAL_LOCALE' to evaluate to false.
 But the usage of ((__locale_t) -1L) as function argument in uselocale 
 violates
 that assumption.

 Fixing the definition of LC_GLOBAL_LOCALE allowed the gcc tests to run 
 without
 regressions for ARM.

 Furthermore, the patch fixes ipa-sra-2.c and ipa-sra-6.c regressions on 
 ARM,
 discussed here:
 - http://gcc.gnu.org/ml/gcc-patches/2011-08/msg00930.html
 - http://gcc.gnu.org/ml/gcc-patches/2011-09/msg00459.html

 But, since glibc uses this construct currently, the option is 
 off-by-default for
 now.

 OK for trunk?
>>>
>>> No, I don't like to have an option to control this.  And given the issue
>>> you spotted it doesn't look like the best idea either.  This area in GCC
>>> is simply too fragile right now :/
>>>
>>> It would be nice if we could extend IPA-CP to propagate alignment
>>> information though.
>>>
>>> And at some point devise a reliable way for frontends to communicate
>>> alignment constraints the middle-end can rely on (well, yes, you could
>>> argue that's what TYPE_ALIGN is about, and I thought that maybe we
>>> can unconditionally rely on TYPE_ALIGN for pointer PARM_DECLs
>>> at least - your example shows we can't).
>>>
>>> In the end I'd probably say the patch is ok without the option (thus
>>> turned on by default), but if LC_GLOBAL_LOCALE is part of the
>>> glibc ABI then we clearly can't do this.
>>>
>>
>> I removed the flag, and enabled the optimization now with the aligned 
>> attribute.
>>
>> bootstrapped and tested on x86_64 (gcc only build), and build and reg-tested 
>> on
>> arm (gcc + glibc build), no issues found.
>>
>
> Sorry for the EWRONGPATCH. Correct patch this time.
>
> OK for trunk?

+  else if (val.lattice_val == VARYING

With default-def PARM_DECLs this should be always true, so no need
to check it.

+  && SSA_NAME_IS_DEFAULT_DEF (expr)
+  && TREE_CODE (var) == PARM_DECL
+  && POINTER_TYPE_P (TREE_TYPE (var))
+  && TYPE_USER_ALIGN (TREE_TYPE (TREE_TYPE (var
+   val = get_align_value (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (var))),
+  TREE_TYPE (var), 0);

I realize that the scope where this applies is pretty narrow (and thus
bad fallout is quite unlikely).  But I suppose we should document
somewhere that GCC treats alignment attributes on parameters
more strict than say, on assignments.

Btw, for this particular case Jakub invented __builtin_assume_aligned,
which I think is strictly superior.  So I'm not sure we should promote
the "old way" which isn't very well supported.  Thus, I believe frontends
should insert such builtin calls when they think it is safe to do, I
don't like the middle-end extracting too much alignment information
from types - an area that's very gray in GCC.

I'm leaving this for comments from others for now.

Thanks,
Richard.

> Thanks,
> - Tom
>
>>
>> I intend to send a follow-up patch that introduces a target hook
>> function_pointers_aligned, that is false by default, and on by default for
>> -mandroid. I asked Google to test their Android codebase with the 
>> optimization
>> on by default. Would such a target hook be acceptable?
>>
>>> Richard.
>>>
>>
>> Thanks,
>> - Tom
>>
>> 2011-09-29  Tom de Vries 
>>
>>       PR target/43814
>>       * tree-ssa-ccp.c (get_align_value): New function, factored out of
>>       get_value_from_alignment.
>>       (get_value_from_alignment): Use get_align_value.
>>       (get_value_for_expr): Use get_align_value to handle alignment of
>>       function 

Re: [PATCH, PR43814] Assume function arguments of pointer type are aligned.

2011-09-29 Thread Richard Guenther
On Thu, Sep 29, 2011 at 11:20 AM, Jakub Jelinek  wrote:
> On Thu, Sep 29, 2011 at 11:11:12AM +0200, Richard Guenther wrote:
>> On Wed, Sep 28, 2011 at 10:08 PM, Jakub Jelinek  wrote:
>> > On Wed, Sep 28, 2011 at 09:56:27PM +0200, Richard Guenther wrote:
>> >> There is nothing like "very likely aligned" ;)  Note that what is new is
>> >
>> > On non-strict aligned targets there is no reason not to have something like
>> > "very likely aligned".  You would expand stores/loads as if it was aligned
>> > in that case, and if it isn't, all you'd need to ensure from that is that
>> > you don't derive properties about the pointer value from the "likely
>> > aligned" info, only from alignment.
>>
>> As if aligned, say, using movaps? ;)  Then we can as well derive
>> properties about the pointer value.
>>
>> Note that I don't see a real difference between expanding a load/store
>> assuming the pointer is aligned from deriving properties about the pointer
>> value.  To support existing questionable code I'd do both only if
>> there is a dereference post-dominating the pointer assignment of course.
>
> I was talking about e.g. PR49442, where using the unaligned stores has
> severe performance hit.  If compiler was hinted that the store pointers are
> likely aligned, it could version on the alignment.

Well, it could do so anyway?  It just doesn't as the vectorizer cost model
and hw description says using misaligned moves is just fine.

>> > But any references?  If you have
>> > int foo (int *p)
>> > {
>> >  memcpy (p, "a", 1);
>> >  return ((uintptr_t) p & 3) == 0;
>> > }
>> > then even if p isn't aligned, this could work even on strict aligned
>> > targets.
>>
>> Well, the above isn't a "dereference".  Or rather, if it is, it's a 
>> dereference
>> through a type with assumed alignment of 1 byte.
>
> If the dereference is only through the declared type of the parameter, I
> feel much safer about it.  But I thought this thread was exactly about
> memcpy/memset with int * pointer not dereferenced in any other way.

Maybe that was the PR the patch is about, but surely we can't treat
a memcpy (p, ..) like *p.  Which means we'd use the declared type
of p only.  And we can do so only for parameters (intermediate conversions
are dropped), which would make the case apply to artificial testcases only
(and thus not worth).

Richard.

>        Jakub
>


Re: Vector Comparison patch

2011-09-29 Thread Richard Guenther
On Wed, Sep 28, 2011 at 4:23 PM, Richard Guenther
 wrote:
> On Mon, Sep 26, 2011 at 5:43 PM, Richard Guenther
>  wrote:
>> On Mon, Sep 26, 2011 at 4:25 PM, Richard Guenther
>>  wrote:
>>> On Wed, Sep 7, 2011 at 5:06 PM, Joseph S. Myers  
>>> wrote:
 This looks like it has the same issue with maybe needing to use
 TYPE_MAIN_VARIANT in type comparisons as the shuffle patch.
>>>
>>> I don't think so, we move qualifiers to the vector type from the element 
>>> type
>>> in make_vector_type and the tests only look at the component type.
>>>
>>> I am re-testing the patch currently and will commit it if that succeeds.
>>
>> Unfortunately gcc.c-torture/execute/vector-compare-1.c fails with -m32
>> for
>>
>>    vector (2, double) d0;
>>    vector (2, double) d1;
>>    vector (2, long) idres;
>>
>>    d0 = (vector (2, double)){(double)argc,  10.};
>>    d1 = (vector (2, double)){0., (double)-23};
>>    idres = (d0 > d1);
>>
>> as appearantly the type we chose to assign to (d0 > d1) is different
>> from that of idres:
>>
>> /space/rguenther/src/svn/trunk/gcc/testsuite/gcc.c-torture/execute/vector-compare-1.c:118:5:
>> error: incompatible types when assigning to type '__vector(2) long
>> int' from type '__vector(2) long long int'^M
>>
>> Adjusting it to vector (2, long long) otoh yields, for -m64:
>>
>> /space/rguenther/src/svn/trunk/gcc/testsuite/gcc.c-torture/execute/vector-compare-1.c:118:5:
>> error: incompatible types when assigning to type '__vector(2) long
>> long int' from type '__vector(2) long int'
>>
>> But those two types are at least compatible from their modes.  Joseph,
>> should we accept mode-compatible types in assignments or maybe
>> transparently convert them?
>
> Looks like we have a more suitable solution for these automatically
> generated vector types - mark them with TYPE_VECTOR_OPAQUE.
>
> I'm testing the following incremental patch.
>
> Richard.
>
> Index: gcc/c-typeck.c
> ===
> --- gcc/c-typeck.c.orig 2011-09-28 16:22:10.0 +0200
> +++ gcc/c-typeck.c      2011-09-28 16:18:39.0 +0200
> @@ -9928,8 +9928,10 @@ build_binary_op (location_t location, en
>             }
>
>           /* Always construct signed integer vector type.  */
> -          intt = c_common_type_for_size (TYPE_PRECISION (TREE_TYPE
> (type0)), 0);
> -          result_type = build_vector_type (intt, TYPE_VECTOR_SUBPARTS 
> (type0));
> +          intt = c_common_type_for_size (GET_MODE_BITSIZE
> +                                          (TYPE_MODE (TREE_TYPE (type0))), 
> 0);
> +          result_type = build_opaque_vector_type (intt,
> +                                                 TYPE_VECTOR_SUBPARTS 
> (type0));
>           converted = 1;
>           break;
>         }
> @@ -10063,8 +10065,10 @@ build_binary_op (location_t location, en
>             }
>
>           /* Always construct signed integer vector type.  */
> -          intt = c_common_type_for_size (TYPE_PRECISION (TREE_TYPE
> (type0)), 0);
> -          result_type = build_vector_type (intt, TYPE_VECTOR_SUBPARTS 
> (type0));
> +          intt = c_common_type_for_size (GET_MODE_BITSIZE
> +                                          (TYPE_MODE (TREE_TYPE (type0))), 
> 0);
> +          result_type = build_opaque_vector_type (intt,
> +                                                 TYPE_VECTOR_SUBPARTS 
> (type0));
>           converted = 1;
>           break;
>         }

That doesn't seem to work either.  Because we treat the opaque and
non-opaque variants of vector as different (the opaque type isn't
a variant type of the non-opaque one - something suspicious anyway).

I'm going to try to apply some surgery on how we build opaque variants
and then re-visit the above again.

Richard.


RE: [PATCH] Fix stack red zone bug (PR38644)

2011-09-29 Thread Jiangning Liu


> -Original Message-
> From: Richard Guenther [mailto:richard.guent...@gmail.com]
> Sent: Thursday, September 29, 2011 5:03 PM
> To: Jiangning Liu
> Cc: Andrew Pinski; gcc-patches@gcc.gnu.org
> Subject: Re: [PATCH] Fix stack red zone bug (PR38644)
> 
> On Thu, Sep 29, 2011 at 5:13 AM, Jiangning Liu 
> wrote:
> >
> >
> >> -Original Message-
> >> From: Richard Guenther [mailto:richard.guent...@gmail.com]
> >> Sent: Wednesday, September 28, 2011 5:56 PM
> >> To: Jiangning Liu
> >> Cc: Andrew Pinski; gcc-patches@gcc.gnu.org
> >> Subject: Re: [PATCH] Fix stack red zone bug (PR38644)
> >>
> >> On Wed, Sep 28, 2011 at 11:40 AM, Jiangning Liu
> 
> >> wrote:
> >> >
> >> >
> >> >> -Original Message-
> >> >> From: Richard Guenther [mailto:richard.guent...@gmail.com]
> >> >> Sent: Wednesday, September 28, 2011 5:20 PM
> >> >> To: Jiangning Liu
> >> >> Cc: Andrew Pinski; gcc-patches@gcc.gnu.org
> >> >> Subject: Re: [PATCH] Fix stack red zone bug (PR38644)
> >> >>
> >> >> On Wed, Sep 28, 2011 at 11:10 AM, Jiangning Liu
> >> 
> >> >> wrote:
> >> >> >
> >> >> >
> >> >> >> -Original Message-
> >> >> >> From: Richard Guenther [mailto:richard.guent...@gmail.com]
> >> >> >> Sent: Wednesday, September 28, 2011 4:39 PM
> >> >> >> To: Jiangning Liu
> >> >> >> Cc: Andrew Pinski; gcc-patches@gcc.gnu.org
> >> >> >> Subject: Re: [PATCH] Fix stack red zone bug (PR38644)
> >> >> >>
> >> >> >> On Wed, Sep 28, 2011 at 3:49 AM, Jiangning Liu
> >> >> 
> >> >> >> wrote:
> >> >> >> >
> >> >> >> >
> >> >> >> >> -Original Message-
> >> >> >> >> From: Richard Guenther [mailto:richard.guent...@gmail.com]
> >> >> >> >> Sent: Tuesday, September 27, 2011 3:41 PM
> >> >> >> >> To: Jiangning Liu
> >> >> >> >> Cc: Andrew Pinski; gcc-patches@gcc.gnu.org
> >> >> >> >> Subject: Re: [PATCH] Fix stack red zone bug (PR38644)
> >> >> >> >>
> >> >> >> >> On Tue, Sep 27, 2011 at 5:32 AM, Jiangning Liu
> >> >> >> 
> >> >> >> >> wrote:
> >> >> >> >> >> Think of it this way.  What the IR says is there is no
> >> barrier
> >> >> >> >> between
> >> >> >> >> >> those moves.  You either have an implicit barrier (which
> is
> >> >> what
> >> >> >> you
> >> >> >> >> >> are proposing) or you have it explicitly.  I think we
> all
> >> >> rather
> >> >> >> >> have
> >> >> >> >> >> more things explicit rather than implicit in the
> IR.  And
> >> that
> >> >> >> has
> >> >> >> >> >> been the overall feeling for a few years now.
> >> >> >> >> >>
> >> >> >> >> >
> >> >> >> >> > Sorry, I'm afraid I can't agree with you. Instead, I
> think
> >> >> using
> >> >> >> >> barrier to describe this kind of dependence is a kind of
> >> implicit
> >> >> >> >> method. Please note that this is not an usual data
> dependence
> >> >> issue.
> >> >> >> >> The stack pointer change doesn't have any dependence with
> >> memory
> >> >> >> access
> >> >> >> >> at all.
> >> >> >> >>
> >> >> >> >> It is similar to atomic instructions that require being an
> >> >> >> >> optimization/memory barrier.  Sure it is not a usual data
> >> >> dependence
> >> >> >> >> (otherwise we would handle
> >> >> >> >> it already), so the targets have to explicitly express the
> >> >> >> dependence
> >> >> >> >> somehow, for which we only have barriers right now.
> >> >> >> >>
> >> >> >> >
> >> >> >> > Richard,
> >> >> >> >
> >> >> >> > Thanks for your explanation. It's explicit to back-end,
> while
> >> it's
> >> >> >> implicit
> >> >> >> > to scheduler in middle end, because barrier can decide
> >> dependence
> >> >> in
> >> >> >> > scheduler but barrier can be generated from several
> different
> >> >> >> scenarios.
> >> >> >> > It's unsafe and prone to introduce bug if any one of the
> >> scenarios
> >> >> >> requiring
> >> >> >> > generating barriers is being missed in back-end.
> >> >> >> >
> >> >> >> > Between middle-end and back-end, we should have interfaces
> that
> >> is
> >> >> >> easy to
> >> >> >> > be implemented by back-end. After all, middle-end itself
> can't
> >> >> >> consist of a
> >> >> >> > compiler, and vice versa. Back-end needs middle-end's help
> to
> >> make
> >> >> >> sure
> >> >> >> > back-end is easy to be implemented and reduce the
> possibility
> >> of
> >> >> >> introducing
> >> >> >> > bugs.
> >> >> >> >
> >> >> >> > Without an explicit hook as I'm proposing, back-end
> >> implementers
> >> >> have
> >> >> >> to
> >> >> >> > clearly know all scenarios of generating barrier very
> clearly,
> >> >> >> because there
> >> >> >> > isn't any code tips and comments in middle end telling back-
> end
> >> >> the
> >> >> >> list of
> >> >> >> > all scenarios on generating barriers.
> >> >> >> >
> >> >> >> > Yes, barrier is a perfect interface for scheduler in theory.
> >> But
> >> >> from
> >> >> >> > engineering point of view, I think it's better to explicitly
> >> >> define
> >> >> >> an
> >> >> >> > interface to describe stack red zone and inform back-end, or
> >> vice
> >> >> >> versa. Not
> >> >> >> > like computer, people is easy to make mistake 

Re: [PATCH] Fix stack red zone bug (PR38644)

2011-09-29 Thread Jakub Jelinek
On Thu, Sep 29, 2011 at 06:08:50PM +0800, Jiangning Liu wrote:
> As far as I know different back-ends are implementing different
> prologue/epilogue in GCC. If one day this part can be refined and abstracted
> as well, I would say solving this stack-red-zone problem in shared
> prologue/epilogue code would be a perfect solution, and barrier can be
> inserted there.
> 
> I'm not saying you are wrong on keeping scheduler using a pure barrier
> interface. From engineering point of view, I only feel my proposal is so far
> so good, because this patch at least solve the problem for all targets in a
> quite simple way. Maybe it can be improved in future based on this. 

But you don't want to listen about any other alternative, other backends are
happy with being able to put the best kind of barrier at the best spot
in the epilogue and don't need a "generic" solution which won't model very
well the target diversity anyway.

Jakub


Re: [PATCH, PR43814] Assume function arguments of pointer type are aligned.

2011-09-29 Thread Bernd Schmidt
On 09/29/11 11:26, Richard Guenther wrote:
> Maybe that was the PR the patch is about, but surely we can't treat
> a memcpy (p, ..) like *p.  Which means we'd use the declared type
> of p only.  And we can do so only for parameters (intermediate conversions
> are dropped), which would make the case apply to artificial testcases only
> (and thus not worth).

Didn't we used to do exactly that, though?

Googling shows

http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=325670

but I seem to remember PRs in gcc bugzilla marked not a bug. Or maybe my
memory is playing tricks on me.


Bernd


Re: [Patch, Darwin/libiberty] fix target/48108 by adding a section wrapper scheme to the darwin port.

2011-09-29 Thread Mike Stump
On Sep 29, 2011, at 1:27 AM, Iain Sandoe wrote:
> ... so here is a patch that implements a wrapper scheme on top of mach-o 
> (used presently only for LTO - but in the most general sense available 
> elsewhere to mach-o should the problem re-emerge).

> OK for trunk and 4.6 ?

Ok for all my bits.  I don't know if others want me to own the darwin bits in 
libiberty, or if they want to own them...  I'll let them chime in.


Re: [PATCH, PR43814] Assume function arguments of pointer type are aligned.

2011-09-29 Thread Richard Guenther
On Thu, Sep 29, 2011 at 12:51 PM, Bernd Schmidt  wrote:
> On 09/29/11 11:26, Richard Guenther wrote:
>> Maybe that was the PR the patch is about, but surely we can't treat
>> a memcpy (p, ..) like *p.  Which means we'd use the declared type
>> of p only.  And we can do so only for parameters (intermediate conversions
>> are dropped), which would make the case apply to artificial testcases only
>> (and thus not worth).
>
> Didn't we used to do exactly that, though?

Yes we did.  The point is this is all language semantics and we are not
good at expressing it given the various GCC extensions we have.

struct { int i; } __attribute__((aligned(1))) x;

and &x->i will have type int *, not int __attribute__((aligned(1))) *.
Basically because we share FIELD_DECLs for type variants and
alignment consitutes a variant, the FIELD_DECL has natural
alignment and whenever the context (that we access a component of x)
vanishes the alignment information vanishes.

Thus, the middle-end cannot reasonably derive anything from type
alignment.  Because the frontends get it wrong.  Which means we
used to miscompile valid code (using GCC extensions, of course).

Richard.

> Googling shows
>
> http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=325670
>
> but I seem to remember PRs in gcc bugzilla marked not a bug. Or maybe my
> memory is playing tricks on me.
>
>
> Bernd
>


Re: [Patch,AVR]: Better log output with -mdeb/-mdebug=

2011-09-29 Thread Georg-Johann Lay
Georg-Johann Lay wrote:
> This is a tentative patch for better support of logging information for avr BE
> developers.
> 
> There are situations where it is more convenient to let the compiler produce
> information than to debug into the compiler. One example are -da dumps.
> 
> This patch proposes a better support to print information by means of a
> printf-like function via %-codes.
> 
> The current debug output with avr-gcc's option -mdeb produces bulk of
> information that is very hard to read because
>  - there is much output
>  - there is no information on current_function_decl
>  - there is no information on current pass name/number
>  - there is no print-like function so the trees, rtxes so that it is
>tedious to get formatted output.
> 
> For example, the following call to avr_edump
> 
> static int
> avr_OS_main_function_p (tree func)
> {
>   avr_edump ("%?: %t\n", func);
>   return avr_lookup_function_attribute1 (func, "OS_main");
> }
> 
> prints additional information in a convenient way (foo is function to be 
> compiled):
> 
> avr_OS_main_function_p[foo:pro_and_epilogue(202)]:  type  type  ...
> 
> Wondering that similar functionality is not provided by GCC itself, I wrote
> this patch. GCC's diagnostic seems to be overwhelmingly complicated and not
> intended for the purpose mentioned above.
> 
> The code is dead code at the moment. No function in the BE uses the new
> functions. This patch just adds them.
> 
> Moreover; I don't know if avr port maintainers or global reviewers like such
> stuff in the compiler... Therefore it's just tentative draft.
> 
> Supported %-codes are:
> 
>   r: rtx
>   t: tree
>   T: tree (brief)
>   C: enum rtx_code
>   m: enum machine_mode
>   R: enum reg_class
>   L: insn list
>   H: location_t
> 
>   -- no args --
>   A: call abort()
>   f: current_function_name()
>   F: caller (via __FUNCTION__)
>   P: Pass name and number
>   ?: Print caller, current function and pass info
> 
>   -- same as printf --
>   %: %
>   c: char
>   s: string
>   d: int (decimal)
>   x: int (hex)
> 
> These codes cover great deal of BE developers needs and if something is 
> missing
> it can be added easily.

Here is now the patch for review.

As Denis proposed in http://gcc.gnu.org/ml/gcc/2011-09/msg00357.html all
additional functions are put in the new file avr-log.c.

-mdebug= is a new command line option that takes a string argument to filter
specific log messages.  The purpose of -mdebug=all is to be similar to -mdeb
and, e.g, -mdebug=rtx_costs,legitimate_address_p is intended to print
information from respective named hooks.

avr.c:avr_option_override calls avr-log.c:avr__set_avr_debug to parse -mdebug
and set bitfield avr_debug accordingly, but using the log functions and
avr_debug is still to come.  This patch just supplies the basis.

I opened PR50566 to collect changes for better logging.

Ok for trunk?

Johann

PR target/50566
* config.gcc (extra_objs): Add avr-log.o for $target in:
avr-*-rtems*, avr-*-*.
* config/avr/t-avr (avr-log.o): New rule to compile...
* config/avr/avr-log.c: ...this new file.
* config/avr/avr.opt (mlog=): New option.
* config/avr/avr-protos.h (avr_edump, avr_fdump): New macros.
(avr_log_set_caller_e, avr_log_set_caller_f): New prototypes.
(avr_log_set_avr_log): New prototype.
(avr_log_t): New typedef.
(avr_log): New declaration.
* config/avr/avr.c (avr_option_override): Call avr_log_set_avr_log.


Index: config.gcc
===
--- config.gcc	(revision 179334)
+++ config.gcc	(working copy)
@@ -939,14 +939,14 @@ avr-*-rtems*)
 	libgcc_tm_file="$libgcc_tm_file avr/avr-lib.h"
 	tmake_file="avr/t-avr t-rtems avr/t-rtems"
 	extra_gcc_objs="driver-avr.o avr-devices.o"
-	extra_objs="avr-devices.o"
+	extra_objs="avr-devices.o avr-log.o"
 	;;
 avr-*-*)
 	tm_file="elfos.h avr/elf.h avr/avr.h dbxelf.h newlib-stdint.h"
 	libgcc_tm_file="$libgcc_tm_file avr/avr-lib.h"
 	use_gcc_stdint=wrap
 	extra_gcc_objs="driver-avr.o avr-devices.o"
-	extra_objs="avr-devices.o"
+	extra_objs="avr-devices.o avr-log.o"
 	;;
 bfin*-elf*)
 	tm_file="${tm_file} dbxelf.h elfos.h newlib-stdint.h bfin/elf.h"
Index: config/avr/avr-log.c
===
--- config/avr/avr-log.c	(revision 0)
+++ config/avr/avr-log.c	(revision 0)
@@ -0,0 +1,314 @@
+/* Subroutines for log output for Atmel AVR back end.
+   Copyright (C) 2011 Free Software Foundation, Inc.
+   Contributed by Georg-Johann Lay (a...@gjlay.de)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+   
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the 

Re: Use of vector instructions in memmov/memset expanding

2011-09-29 Thread Jakub Jelinek
Hi!

On Thu, Sep 29, 2011 at 03:14:40PM +0400, Michael Zolotukhin wrote:
+/* { dg-options "-O2 -march=atom -mtune=atom -m64 -dp" } */
   

The testcases are wrong, -m64 or -m32 should never appear in dg-options,
instead if the testcase is specific to -m64, it should be guarded with
/* { dg-do compile { target lp64 } } */
resp. ia32 (or ilp32, depending on what exactly should be done for -mx32),
if you have the same testcase for -m32 and -m64, but just want different
scan-assembler for the two cases, then just guard the scan-assembler
with lp64 resp. ia32/ilp32 target and add second one for the other target.

Jakub


[PATCH] Rework how we build opaque vector types

2011-09-29 Thread Richard Guenther

This makes sure that opaque vector types are treated as compatible
with their non-opaque variant (something that they certainly should be).
I made them variant types instead of completely separate types,
also making sure we properly share them.

Bootstrapped and tested on x86_64-unknown-linux-gnu ontop of a patch
that will excercise them on this target.

Committed.

Richard.

2011-09-27  Richard Guenther  

* tree.c (build_opaque_vector_type): Make opaque vectors
variant types of the corresponding non-opaque type.  Make
sure to share opaque vector types properly.

Index: gcc/tree.c
===
*** gcc/tree.c  (revision 179308)
--- gcc/tree.c  (working copy)
*** build_vector_type (tree innertype, int n
*** 9752,9768 
return make_vector_type (innertype, nunits, VOIDmode);
  }
  
! /* Similarly, but takes the inner type and number of units, which must be
!a power of two.  */
  
  tree
  build_opaque_vector_type (tree innertype, int nunits)
  {
!   tree t;
!   innertype = build_distinct_type_copy (innertype);
!   t = make_vector_type (innertype, nunits, VOIDmode);
!   TYPE_VECTOR_OPAQUE (t) = true;
!   return t;
  }
  
  
--- 9752,9780 
return make_vector_type (innertype, nunits, VOIDmode);
  }
  
! /* Similarly, but builds a variant type with TYPE_VECTOR_OPAQUE set.  */
  
  tree
  build_opaque_vector_type (tree innertype, int nunits)
  {
!   tree t = make_vector_type (innertype, nunits, VOIDmode);
!   tree cand;
!   /* We always build the non-opaque variant before the opaque one,
!  so if it already exists, it is TYPE_NEXT_VARIANT of this one.  */
!   cand = TYPE_NEXT_VARIANT (t);
!   if (cand
!   && TYPE_VECTOR_OPAQUE (cand)
!   && check_qualified_type (cand, t, TYPE_QUALS (t)))
! return cand;
!   /* Othewise build a variant type and make sure to queue it after
!  the non-opaque type.  */
!   cand = build_distinct_type_copy (t);
!   TYPE_VECTOR_OPAQUE (cand) = true;
!   TYPE_CANONICAL (cand) = TYPE_CANONICAL (t);
!   TYPE_NEXT_VARIANT (cand) = TYPE_NEXT_VARIANT (t);
!   TYPE_NEXT_VARIANT (t) = cand;
!   TYPE_MAIN_VARIANT (cand) = TYPE_MAIN_VARIANT (t);
!   return cand;
  }
  
  


Re: [Patch,AVR]: Better log output with -mdeb/-mdebug=

2011-09-29 Thread Denis Chertykov
2011/9/29 Georg-Johann Lay :
> Georg-Johann Lay wrote:
>> This is a tentative patch for better support of logging information for avr 
>> BE
>> developers.
>>
>> There are situations where it is more convenient to let the compiler produce
>> information than to debug into the compiler. One example are -da dumps.
>>
>> This patch proposes a better support to print information by means of a
>> printf-like function via %-codes.
>>
>> The current debug output with avr-gcc's option -mdeb produces bulk of
>> information that is very hard to read because
>>  - there is much output
>>  - there is no information on current_function_decl
>>  - there is no information on current pass name/number
>>  - there is no print-like function so the trees, rtxes so that it is
>>    tedious to get formatted output.
>>
>> For example, the following call to avr_edump
>>
>> static int
>> avr_OS_main_function_p (tree func)
>> {
>>   avr_edump ("%?: %t\n", func);
>>   return avr_lookup_function_attribute1 (func, "OS_main");
>> }
>>
>> prints additional information in a convenient way (foo is function to be 
>> compiled):
>>
>> avr_OS_main_function_p[foo:pro_and_epilogue(202)]: >     type >         type > ...
>>
>> Wondering that similar functionality is not provided by GCC itself, I wrote
>> this patch. GCC's diagnostic seems to be overwhelmingly complicated and not
>> intended for the purpose mentioned above.
>>
>> The code is dead code at the moment. No function in the BE uses the new
>> functions. This patch just adds them.
>>
>> Moreover; I don't know if avr port maintainers or global reviewers like such
>> stuff in the compiler... Therefore it's just tentative draft.
>>
>> Supported %-codes are:
>>
>>   r: rtx
>>   t: tree
>>   T: tree (brief)
>>   C: enum rtx_code
>>   m: enum machine_mode
>>   R: enum reg_class
>>   L: insn list
>>   H: location_t
>>
>>   -- no args --
>>   A: call abort()
>>   f: current_function_name()
>>   F: caller (via __FUNCTION__)
>>   P: Pass name and number
>>   ?: Print caller, current function and pass info
>>
>>   -- same as printf --
>>   %: %
>>   c: char
>>   s: string
>>   d: int (decimal)
>>   x: int (hex)
>>
>> These codes cover great deal of BE developers needs and if something is 
>> missing
>> it can be added easily.
>
> Here is now the patch for review.
>
> As Denis proposed in http://gcc.gnu.org/ml/gcc/2011-09/msg00357.html all
> additional functions are put in the new file avr-log.c.
>
> -mdebug= is a new command line option that takes a string argument to filter
> specific log messages.  The purpose of -mdebug=all is to be similar to -mdeb
> and, e.g, -mdebug=rtx_costs,legitimate_address_p is intended to print
> information from respective named hooks.
>
> avr.c:avr_option_override calls avr-log.c:avr__set_avr_debug to parse -mdebug
> and set bitfield avr_debug accordingly, but using the log functions and
> avr_debug is still to come.  This patch just supplies the basis.
>
> I opened PR50566 to collect changes for better logging.
>
> Ok for trunk?

Ok.
Please apply.

Denis


Re: Vector Comparison patch

2011-09-29 Thread Richard Guenther
On Thu, Sep 29, 2011 at 12:00 PM, Richard Guenther
 wrote:
> On Wed, Sep 28, 2011 at 4:23 PM, Richard Guenther
>  wrote:
>> On Mon, Sep 26, 2011 at 5:43 PM, Richard Guenther
>>  wrote:
>>> On Mon, Sep 26, 2011 at 4:25 PM, Richard Guenther
>>>  wrote:
 On Wed, Sep 7, 2011 at 5:06 PM, Joseph S. Myers  
 wrote:
> This looks like it has the same issue with maybe needing to use
> TYPE_MAIN_VARIANT in type comparisons as the shuffle patch.

 I don't think so, we move qualifiers to the vector type from the element 
 type
 in make_vector_type and the tests only look at the component type.

 I am re-testing the patch currently and will commit it if that succeeds.
>>>
>>> Unfortunately gcc.c-torture/execute/vector-compare-1.c fails with -m32
>>> for
>>>
>>>    vector (2, double) d0;
>>>    vector (2, double) d1;
>>>    vector (2, long) idres;
>>>
>>>    d0 = (vector (2, double)){(double)argc,  10.};
>>>    d1 = (vector (2, double)){0., (double)-23};
>>>    idres = (d0 > d1);
>>>
>>> as appearantly the type we chose to assign to (d0 > d1) is different
>>> from that of idres:
>>>
>>> /space/rguenther/src/svn/trunk/gcc/testsuite/gcc.c-torture/execute/vector-compare-1.c:118:5:
>>> error: incompatible types when assigning to type '__vector(2) long
>>> int' from type '__vector(2) long long int'^M
>>>
>>> Adjusting it to vector (2, long long) otoh yields, for -m64:
>>>
>>> /space/rguenther/src/svn/trunk/gcc/testsuite/gcc.c-torture/execute/vector-compare-1.c:118:5:
>>> error: incompatible types when assigning to type '__vector(2) long
>>> long int' from type '__vector(2) long int'
>>>
>>> But those two types are at least compatible from their modes.  Joseph,
>>> should we accept mode-compatible types in assignments or maybe
>>> transparently convert them?
>>
>> Looks like we have a more suitable solution for these automatically
>> generated vector types - mark them with TYPE_VECTOR_OPAQUE.
>>
>> I'm testing the following incremental patch.
>>
>> Richard.
>>
>> Index: gcc/c-typeck.c
>> ===
>> --- gcc/c-typeck.c.orig 2011-09-28 16:22:10.0 +0200
>> +++ gcc/c-typeck.c      2011-09-28 16:18:39.0 +0200
>> @@ -9928,8 +9928,10 @@ build_binary_op (location_t location, en
>>             }
>>
>>           /* Always construct signed integer vector type.  */
>> -          intt = c_common_type_for_size (TYPE_PRECISION (TREE_TYPE
>> (type0)), 0);
>> -          result_type = build_vector_type (intt, TYPE_VECTOR_SUBPARTS 
>> (type0));
>> +          intt = c_common_type_for_size (GET_MODE_BITSIZE
>> +                                          (TYPE_MODE (TREE_TYPE (type0))), 
>> 0);
>> +          result_type = build_opaque_vector_type (intt,
>> +                                                 TYPE_VECTOR_SUBPARTS 
>> (type0));
>>           converted = 1;
>>           break;
>>         }
>> @@ -10063,8 +10065,10 @@ build_binary_op (location_t location, en
>>             }
>>
>>           /* Always construct signed integer vector type.  */
>> -          intt = c_common_type_for_size (TYPE_PRECISION (TREE_TYPE
>> (type0)), 0);
>> -          result_type = build_vector_type (intt, TYPE_VECTOR_SUBPARTS 
>> (type0));
>> +          intt = c_common_type_for_size (GET_MODE_BITSIZE
>> +                                          (TYPE_MODE (TREE_TYPE (type0))), 
>> 0);
>> +          result_type = build_opaque_vector_type (intt,
>> +                                                 TYPE_VECTOR_SUBPARTS 
>> (type0));
>>           converted = 1;
>>           break;
>>         }
>
> That doesn't seem to work either.  Because we treat the opaque and
> non-opaque variants of vector as different (the opaque type isn't
> a variant type of the non-opaque one - something suspicious anyway).
>
> I'm going to try to apply some surgery on how we build opaque variants
> and then re-visit the above again.

Bootstrapped and tested on x86_64-unknown-linux-gnu and installed.

Richard.

> Richard.
>


[Patch, Fortran, committed] PR 50547 & 50553

2011-09-29 Thread Janus Weil
Hi all,

I just committed as obvious another pair of accepts-invalid fixes:

http://gcc.gnu.org/viewcvs?root=gcc&view=rev&rev=179345

Also I would like to thank Vittorio for the large number of bug
reports he has been delivering lately. (I could make a comment about
his great talent for writing invalid Fortran code, but that would
probably sound more negative than intended ;)

Cheers,
Janus


Re: [PING] Re: [PATCH] Fix PR50183

2011-09-29 Thread William J. Schmidt
On Thu, 2011-09-29 at 10:03 +0100, Tobias Grosser wrote:
> On 09/29/2011 09:58 AM, Richard Guenther wrote:
> > On Thu, Sep 29, 2011 at 12:10 AM, William J. Schmidt
> >   wrote:
> >> Hi there,
> >>
> >> Ping.  I'm seeking approval for this fix on trunk and 4_6-branch.
> >> Thanks!
> >
> > Ok.
> Yes, also looks good from me. Though, you may want to move the forward
> declaration after the "#ifdef HAVE_CLOOG". This makes it clearer that 
> the whole file is not compiled, if cloog is not available.

Good point.  I'll make that change.

Thanks!
Bill

> 
> Cheers
> Tobi



[wwwdocs] Re: [2/2] tree-ssa-strlen optimization pass

2011-09-29 Thread Jakub Jelinek
Hi!

On Mon, Sep 26, 2011 at 04:33:05PM +0200, Richard Guenther wrote:
> Btw, can you add a changes.html entry for this?

Like this?

--- htdocs/gcc-4.7/changes.html 27 Sep 2011 18:42:56 -  1.38
+++ htdocs/gcc-4.7/changes.html 29 Sep 2011 12:56:42 -
@@ -124,6 +124,48 @@ void bar (void)
   possibly only by inlining all calls.  Cloning causes a lot less code size
   growth.
 
+
+String length optimization pass has been added.  This pass attempts
+  to track string lengths and optimize various standard C string functions
+  like strlen, strchr, strcpy,
+  strcat, stpcpy and their
+  _FORTIFY_SOURCE counterparts into faster alternatives.
+  This pass is enabled by default at -O2 or above, unless
+  optimizing for size, and can be disabled by
+  -fno-optimize-strlen option.  The pass can e.g. optimize
+  
+char *bar (const char *a)
+{
+  size_t l = strlen (a) + 2;
+  char *p = malloc (l); if (p == NULL) return p;
+  strcpy (p, a); strcat (p, "/"); return p;
+}
+  
+  can be optimized into:
+  
+char *bar (const char *a)
+{
+  size_t tmp = strlen (a);
+  char *p = malloc (tmp + 2); if (p == NULL) return p;
+  memcpy (p, a, tmp); memcpy (p + tmp, "/", 2); return p;
+}
+  
+  or for hosted compilations where stpcpy is available in the
+  runtime and headers provide its prototype, e.g.
+  
+void foo (char *a, const char *b, const char *c, const char *d)
+{
+  strcpy (a, b); strcat (a, c); strcat (a, d);
+}
+  
+  into:
+  
+void foo (char *a, const char *b, const char *c, const char *d)
+{
+  strcpy (stpcpy (stpcpy (a, b), c), d);
+}
+  
+
   
 
 New Languages and Language specific improvements


Jakub


Re: [wwwdocs] Re: [2/2] tree-ssa-strlen optimization pass

2011-09-29 Thread Richard Guenther
On Thu, Sep 29, 2011 at 3:02 PM, Jakub Jelinek  wrote:
> Hi!
>
> On Mon, Sep 26, 2011 at 04:33:05PM +0200, Richard Guenther wrote:
>> Btw, can you add a changes.html entry for this?
>
> Like this?

Yes.

Thanks,
Richard.

> --- htdocs/gcc-4.7/changes.html 27 Sep 2011 18:42:56 -      1.38
> +++ htdocs/gcc-4.7/changes.html 29 Sep 2011 12:56:42 -
> @@ -124,6 +124,48 @@ void bar (void)
>       possibly only by inlining all calls.  Cloning causes a lot less code 
> size
>       growth.
>     
> +
> +    String length optimization pass has been added.  This pass attempts
> +      to track string lengths and optimize various standard C string 
> functions
> +      like strlen, strchr, strcpy,
> +      strcat, stpcpy and their
> +      _FORTIFY_SOURCE counterparts into faster alternatives.
> +      This pass is enabled by default at -O2 or above, unless
> +      optimizing for size, and can be disabled by
> +      -fno-optimize-strlen option.  The pass can e.g. optimize
> +      
> +char *bar (const char *a)
> +{
> +  size_t l = strlen (a) + 2;
> +  char *p = malloc (l); if (p == NULL) return p;
> +  strcpy (p, a); strcat (p, "/"); return p;
> +}
> +      
> +      can be optimized into:
> +      
> +char *bar (const char *a)
> +{
> +  size_t tmp = strlen (a);
> +  char *p = malloc (tmp + 2); if (p == NULL) return p;
> +  memcpy (p, a, tmp); memcpy (p + tmp, "/", 2); return p;
> +}
> +      
> +      or for hosted compilations where stpcpy is available in 
> the
> +      runtime and headers provide its prototype, e.g.
> +      
> +void foo (char *a, const char *b, const char *c, const char *d)
> +{
> +  strcpy (a, b); strcat (a, c); strcat (a, d);
> +}
> +      
> +      into:
> +      
> +void foo (char *a, const char *b, const char *c, const char *d)
> +{
> +  strcpy (stpcpy (stpcpy (a, b), c), d);
> +}
> +      
> +    
>   
>
>  New Languages and Language specific improvements
>
>
>        Jakub
>


Re: [PATCH, PR50527] Don't assume alignment of vla-related allocas.

2011-09-29 Thread Tom de Vries
On 09/28/2011 11:53 AM, Richard Guenther wrote:
> On Wed, Sep 28, 2011 at 11:34 AM, Tom de Vries  wrote:
>> Richard,
>>
>> I got a patch for PR50527.
>>
>> The patch prevents the alignment of vla-related allocas to be set to
>> BIGGEST_ALIGNMENT in ccp. The alignment may turn out smaller after folding
>> the alloca.
>>
>> Bootstrapped and regtested on x86_64.
>>
>> OK for trunk?
> 
> Hmm.  As gfortran with -fstack-arrays uses VLAs it's probably bad that
> the vectorizer then will no longer see that the arrays are properly aligned.
> 
> I'm not sure what the best thing to do is here, other than trying to record
> the alignment requirement of the VLA somewhere.
> 
> Forcing the alignment of the alloca replacement decl to BIGGEST_ALIGNMENT
> has the issue that it will force stack-realignment which isn't free (and the
> point was to make the decl cheaper than the alloca).  But that might
> possibly be the better choice.
> 
> Any other thoughts?

How about the approach in this (untested) patch? Using the DECL_ALIGN of the vla
for the new array prevents stack realignment for folded vla-allocas, also for
large vlas.

This will not help in vectorizing large folded vla-allocas, but I think it's not
reasonable to expect BIGGEST_ALIGNMENT when writing a vla (although that has
been the case up until we started to fold). If you want to trigger vectorization
for a vla, you can still use the aligned attribute on the declaration.

Still, the unfolded vla-allocas will have BIGGEST_ALIGNMENT, also without using
an attribute on the decl. This patch exploits this by setting it at the end of
the 3rd pass_ccp, renamed to pass_ccp_last. This is not very effective in
propagation though, because although the ptr_info of the lhs is propagated via
copy_prop afterwards, it's not propagated anymore via ccp.

Another way to do this would be to set BIGGEST_ALIGNMENT at the end of ccp2 and
not fold during ccp3.

Thanks,
- Tom

Index: gcc/tree-pass.h
===
--- gcc/tree-pass.h (revision 179043)
+++ gcc/tree-pass.h (working copy)
@@ -389,6 +389,7 @@ extern struct gimple_opt_pass pass_iv_op
 extern struct gimple_opt_pass pass_tree_loop_done;
 extern struct gimple_opt_pass pass_ch;
 extern struct gimple_opt_pass pass_ccp;
+extern struct gimple_opt_pass pass_ccp_last;
 extern struct gimple_opt_pass pass_phi_only_cprop;
 extern struct gimple_opt_pass pass_build_ssa;
 extern struct gimple_opt_pass pass_build_alias;
Index: gcc/tree-ssa-ccp.c
===
--- gcc/tree-ssa-ccp.c (revision 179210)
+++ gcc/tree-ssa-ccp.c (working copy)
@@ -167,8 +167,12 @@ typedef struct prop_value_d prop_value_t
doing the store).  */
 static prop_value_t *const_val;
 
+/* Indicates whether we're in pass_ccp_last.  */
+static bool ccp_last;
+
 static void canonicalize_float_value (prop_value_t *);
 static bool ccp_fold_stmt (gimple_stmt_iterator *);
+static bool fold_builtin_alloca_for_var_p (gimple);
 
 /* Dump constant propagation value VAL to file OUTF prefixed by PREFIX.  */
 
@@ -813,6 +817,15 @@ ccp_finalize (void)
 	  || !POINTER_TYPE_P (TREE_TYPE (name)))
 	continue;
 
+  if (ccp_last && !SSA_NAME_IS_DEFAULT_DEF (name)
+	  && gimple_call_alloca_for_var_p (SSA_NAME_DEF_STMT (name))
+	  && !fold_builtin_alloca_for_var_p (SSA_NAME_DEF_STMT (name)))
+	{
+	  pi = get_ptr_info (name);
+	  pi->align = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
+	  continue;
+	}
+
   val = get_value (name);
   if (val->lattice_val != CONSTANT
 	  || TREE_CODE (val->value) != INTEGER_CST)
@@ -1492,6 +1505,7 @@ evaluate_stmt (gimple stmt)
   tree simplified = NULL_TREE;
   ccp_lattice_t likelyvalue = likely_value (stmt);
   bool is_constant = false;
+  unsigned int align;
 
   if (dump_file && (dump_flags & TDF_DETAILS))
 {
@@ -1632,10 +1646,13 @@ evaluate_stmt (gimple stmt)
 	  break;
 
 	case BUILT_IN_ALLOCA:
+	  align = (gimple_call_alloca_for_var_p (stmt)
+		   ? TYPE_ALIGN (TREE_TYPE (gimple_get_lhs (stmt)))
+		   : BIGGEST_ALIGNMENT);
 	  val.lattice_val = CONSTANT;
 	  val.value = build_int_cst (TREE_TYPE (gimple_get_lhs (stmt)), 0);
 	  val.mask = shwi_to_double_int
-		  	   (~(((HOST_WIDE_INT) BIGGEST_ALIGNMENT)
+		  	   (~(((HOST_WIDE_INT) align)
 			  / BITS_PER_UNIT - 1));
 	  break;
 
@@ -1685,27 +1702,25 @@ evaluate_stmt (gimple stmt)
   return val;
 }
 
-/* Detects a vla-related alloca with a constant argument.  Declares fixed-size
-   array and return the address, if found, otherwise returns NULL_TREE.  */
-
-static tree
-fold_builtin_alloca_for_var (gimple stmt)
+static bool
+fold_builtin_alloca_for_var_p (gimple stmt)
 {
-  unsigned HOST_WIDE_INT size, threshold, n_elem;
-  tree lhs, arg, block, var, elem_type, array_type;
-  unsigned int align;
+  unsigned HOST_WIDE_INT size, threshold;
+  tree lhs, arg, block;
+
+  gcc_checking_assert (gimple_call_alloca_for_var_p (stmt));
 
   /* Get lhs.  */
   lhs

Re: Ping: [ARM] Loosen MODES_TIEABLE_P

2011-09-29 Thread Ramana Radhakrishnan
On 20 September 2011 11:12, Richard Sandiford
 wrote:
> Ping for:
>
>    http://gcc.gnu.org/ml/gcc-patches/2011-09/msg00626.html
>
> which allows taking the vector subreg of a structure mode to be treated
> as "cheap" (because direct register references are allowed).  At the
> moment we treat them as expensive.

This is OK.

Ramana
>
> Richard
>


Re: [Patch,AVR]: PR50566: Better log output with -mdeb/-mlog= [2/n]

2011-09-29 Thread Georg-Johann Lay
This is the second patch in this series.

Functions that formerly used fprintf/debug_rtx now use avr_edump and the use
sites of TARGET_ALL_DEBUG are mapped to respective flags of avr_log.

avr_log_vadump uses %b to print bool.

The patch adds outputs for the results of avr_rtx_costs and wraps the worker
function.

Ok to apply?

Johann

PR target/50566
* config/avr/avr-log.c (avr_log_vadump): Use %b to print bool.
* config/avr/avr.c (avr_rtx_costs_1): New static function, renamed
from avr_rtx_costs.
(avr_legitimate_address_p): Use avr_edump to print log information
filtered by avr_log.
(extra_constraint_Q): Ditto.
(avr_legitimize_address): Ditto.
(avr_rtx_costs): Ditto.  Rewrite as wrapper for avr_rtx_costs_1.
(final_prescan_insn): Use avr_log.rtx_costs as filter.

Index: config/avr/avr-log.c
===
--- config/avr/avr-log.c	(revision 179344)
+++ config/avr/avr-log.c	(working copy)
@@ -42,6 +42,7 @@
 
   == known %-codes ==
   
+  b: bool  
   r: rtx
   t: tree
   T: tree (brief)
@@ -190,6 +191,10 @@ avr_log_vadump (FILE *file, const char *
   fprintf (file, "%x", va_arg (ap, int));
   break;
 
+case 'b':
+  fprintf (file, "%s", va_arg (ap, int) ? "true" : "false");
+  break;
+
 case 'c':
   fputc (va_arg (ap, int), file);
   break;
Index: config/avr/avr.c
===
--- config/avr/avr.c	(revision 179344)
+++ config/avr/avr.c	(working copy)
@@ -1193,27 +1193,7 @@ avr_cannot_modify_jumps_p (void)
 bool
 avr_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
 {
-  enum reg_class r = NO_REGS;
-  
-  if (TARGET_ALL_DEBUG)
-{
-  fprintf (stderr, "mode: (%s) %s %s %s %s:",
-	   GET_MODE_NAME(mode),
-	   strict ? "(strict)": "",
-	   reload_completed ? "(reload_completed)": "",
-	   reload_in_progress ? "(reload_in_progress)": "",
-	   reg_renumber ? "(reg_renumber)" : "");
-  if (GET_CODE (x) == PLUS
-	  && REG_P (XEXP (x, 0))
-	  && GET_CODE (XEXP (x, 1)) == CONST_INT
-	  && INTVAL (XEXP (x, 1)) >= 0
-	  && INTVAL (XEXP (x, 1)) <= MAX_LD_OFFSET (mode)
-	  && reg_renumber
-	  )
-	fprintf (stderr, "(r%d ---> r%d)", REGNO (XEXP (x, 0)),
-		 true_regnum (XEXP (x, 0)));
-  debug_rtx (x);
-}
+  reg_class_t r = NO_REGS;
   
   if (REG_P (x) && (strict ? REG_OK_FOR_BASE_STRICT_P (x)
 : REG_OK_FOR_BASE_NOSTRICT_P (x)))
@@ -1247,10 +1227,27 @@ avr_legitimate_address_p (enum machine_m
 {
   r = POINTER_REGS;
 }
-  if (TARGET_ALL_DEBUG)
+
+  if (avr_log.legitimate_address_p)
 {
-  fprintf (stderr, "   ret = %c\n", r + '0');
+  avr_edump ("\n%?: ret=%d=%R, mode=%m strict=%d "
+ "reload_completed=%d reload_in_progress=%d %s:",
+ !!r, r, mode, strict, reload_completed, reload_in_progress,
+ reg_renumber ? "(reg_renumber)" : "");
+  
+  if (GET_CODE (x) == PLUS
+  && REG_P (XEXP (x, 0))
+  && CONST_INT_P (XEXP (x, 1))
+  && IN_RANGE (INTVAL (XEXP (x, 1)), 0, MAX_LD_OFFSET (mode))
+  && reg_renumber)
+{
+  avr_edump ("(r%d ---> r%d)", REGNO (XEXP (x, 0)),
+ true_regnum (XEXP (x, 0)));
+}
+  
+  avr_edump ("\n%r\n", x);
 }
+  
   return r == NO_REGS ? 0 : (int)r;
 }
 
@@ -1260,30 +1257,35 @@ avr_legitimate_address_p (enum machine_m
 rtx
 avr_legitimize_address (rtx x, rtx oldx, enum machine_mode mode)
 {
+  bool big_offset_p = false;
+  
   x = oldx;
-  if (TARGET_ALL_DEBUG)
+  
+  if (GET_CODE (oldx) == PLUS
+  && REG_P (XEXP (oldx, 0)))
 {
-  fprintf (stderr, "legitimize_address mode: %s", GET_MODE_NAME(mode));
-  debug_rtx (oldx);
+  if (REG_P (XEXP (oldx, 1)))
+x = force_reg (GET_MODE (oldx), oldx);
+  else if (CONST_INT_P (XEXP (oldx, 1)))
+{
+	  int offs = INTVAL (XEXP (oldx, 1));
+  if (frame_pointer_rtx != XEXP (oldx, 0)
+  && offs > MAX_LD_OFFSET (mode))
+{
+  big_offset_p = true;
+  x = force_reg (GET_MODE (oldx), oldx);
+}
+}
 }
   
-  if (GET_CODE (oldx) == PLUS
-  && REG_P (XEXP (oldx,0)))
+  if (avr_log.legitimize_address)
 {
-  if (REG_P (XEXP (oldx,1)))
-	x = force_reg (GET_MODE (oldx), oldx);
-  else if (GET_CODE (XEXP (oldx, 1)) == CONST_INT)
-	{
-	  int offs = INTVAL (XEXP (oldx,1));
-	  if (frame_pointer_rtx != XEXP (oldx,0))
-	if (offs > MAX_LD_OFFSET (mode))
-	  {
-		if (TARGET_ALL_DEBUG)
-		  fprintf (stderr, "force_reg (big offset)\n");
-		x = force_reg (GET_MODE (oldx), oldx);
-	  }
-	}
+  avr_edump ("\n%?: mode=%m\n %r\n", mode, oldx);
+
+  if (x != oldx)
+avr_edump (" %

Re: [patch] Support vectorization of widening shifts

2011-09-29 Thread Ramana Radhakrishnan
On 19 September 2011 08:54, Ira Rosen  wrote:

>
> Bootstrapped on powerpc64-suse-linux, tested on powerpc64-suse-linux
> and arm-linux-gnueabi
> OK for mainline?

Sorry I missed this patch. Is there any reason why we need unspecs in
this case ? Can't this be represented by subregs and zero/ sign
extensions in RTL without the UNSPECs ?

cheers
Ramana

>
> Thanks,
> Ira
>
> ChangeLog:
>
>        * doc/md.texi (vec_widen_ushiftl_hi, vec_widen_ushiftl_lo,
> vec_widen_sshiftl_hi,
>        vec_widen_sshiftl_lo): Document.
>        * tree-pretty-print.c (dump_generic_node): Handle 
> WIDEN_SHIFT_LEFT_EXPR,
>        VEC_WIDEN_SHIFT_LEFT_HI_EXPR and VEC_WIDEN_SHIFT_LEFT_LO_EXPR.
>        (op_code_prio): Likewise.
>        (op_symbol_code): Handle WIDEN_SHIFT_LEFT_EXPR.
>        * optabs.c (optab_for_tree_code): Handle
>        VEC_WIDEN_SHIFT_LEFT_HI_EXPR and VEC_WIDEN_SHIFT_LEFT_LO_EXPR.
>        (init-optabs): Initialize optab codes for vec_widen_u/sshiftl_hi/lo.
>        * optabs.h (enum optab_index): Add OTI_vec_widen_u/sshiftl_hi/lo.
>        * genopinit.c (optabs): Initialize the new optabs.
>        * expr.c (expand_expr_real_2): Handle
>        VEC_WIDEN_SHIFT_LEFT_HI_EXPR and VEC_WIDEN_SHIFT_LEFT_LO_EXPR.
>        * gimple-pretty-print.c (dump_binary_rhs): Likewise.
>        * tree-vectorizer.h (NUM_PATTERNS): Increase to 6.
>        * tree.def (WIDEN_SHIFT_LEFT_EXPR, VEC_WIDEN_SHIFT_LEFT_HI_EXPR,
>        VEC_WIDEN_SHIFT_LEFT_LO_EXPR): New.
>        * cfgexpand.c (expand_debug_expr):  Handle new tree codes.
>        * tree-vect-patterns.c (vect_vect_recog_func_ptrs): Add
>        vect_recog_widen_shift_pattern.
>        (vect_handle_widen_mult_by_const): Rename...
>        (vect_handle_widen_op_by_const): ...to this.  Handle shifts.
>        Add a new argument, update documentation.
>        (vect_recog_widen_mult_pattern): Assume that only second
>        operand can be constant.  Update call to
>        vect_handle_widen_op_by_const.
>        (vect_operation_fits_smaller_type): Add the already existing
>        def stmt to the list of pattern statements.
>        (vect_recog_widen_shift_pattern): New.
>        * tree-vect-stmts.c (vectorizable_type_promotion): Handle
>        widening shifts.
>        (supportable_widening_operation): Likewise.
>        * tree-inline.c (estimate_operator_cost): Handle new tree codes.
>        * tree-vect-generic.c (expand_vector_operations_1): Likewise.
>        * tree-cfg.c (verify_gimple_assign_binary): Likewise.
>        * config/arm/neon.md (neon_vec_shiftl_lo_): New.
>        (vec_widen_shiftl_lo_, neon_vec_shiftl_hi_,
>        vec_widen_shiftl_hi_, neon_vec_shift_left_):
>        Likewise.
>        * tree-vect-slp.c (vect_build_slp_tree): Require same shift operand
>        for widening shift.
>
> testsuite/ChangeLog:
>
>       * gcc.dg/vect/vect-widen-shift-s16.c: New.
>       * gcc.dg/vect/vect-widen-shift-s8.c: New.
>       * gcc.dg/vect/vect-widen-shift-u16.c: New.
>       * gcc.dg/vect/vect-widen-shift-u8.c: New.
>


[Patch Ada/Darwin] factor LIBGNAT_TARGET_PAIRS for darwin sub-targets.

2011-09-29 Thread Iain Sandoe
No functional change, just factor out the common LIBGNAT_TARGET_PAIRS  
across the port.

OK for trunk?
Iain

ada:

* gcc-interface/Makefile.in (Darwin): Factor LIBGNAT_TARGET_PAIRS
across the port.

Index: gcc/ada/gcc-interface/Makefile.in
===
--- gcc/ada/gcc-interface/Makefile.in   (revision 179346)
+++ gcc/ada/gcc-interface/Makefile.in   (working copy)
@@ -2136,21 +2136,23 @@ endif
 
 ifeq ($(strip $(filter-out darwin%,$(osys))),)
   SO_OPTS = -shared-libgcc
-  ifeq ($(strip $(filter-out %86,$(arch))),)
-LIBGNAT_TARGET_PAIRS = \
+  LIBGNAT_TARGET_PAIRS = \
 a-intnam.ads





Re: [Patch Ada/Darwin] factor LIBGNAT_TARGET_PAIRS for darwin sub-targets.

2011-09-29 Thread Arnaud Charlet
> No functional change, just factor out the common LIBGNAT_TARGET_PAIRS
> across the port.
> OK for trunk?

OK

> ada:
> 
>   * gcc-interface/Makefile.in (Darwin): Factor LIBGNAT_TARGET_PAIRS
>   across the port.



Re: [PATCH] [Annotalysis] Fix internal compiler error on template methods

2011-09-29 Thread Ollie Wild
I think what you're looking for is:

if (DECL_CLONED_FUNCTION_P (clone) && DECL_CLONED_FUNCTION (clone) == decl)

That's a much cleaner implementation.

Ollie

On Tue, Sep 27, 2011 at 6:18 PM, Delesley Hutchins  wrote:
>
> This patch fixes a bug in the parser which cause an internal compiler
> error when copying attributes from cloned methods.  The bug occurs
> when a class has both an annotated constructor and a template method.
>
> Bootstrapped and passed gcc regression testsuite on
> x86_64-unknown-linux-gnu.  Okay for google/gcc-4_6?
>
>  -DeLesley
>
> cp/Changelog.google-4_6:
> 2011-9-27  DeLesley Hutchins  
>
>      * cp/parser.c  (cp_parser_late_parsing_attribute_arg_lists)
>         fixed case where the potential clone is a template.
>
> testsuite/Changelog.google-4_6:
> 2011-9-27  DeLesley Hutchins  
>
>      * testsuite/g++.dg/thread-ann/thread_annot_lock-81.C
>        (new regression test)
>
> --
> DeLesley Hutchins | Software Engineer | deles...@google.com | 505-206-0315


[Patch] Support DEC-C extensions

2011-09-29 Thread Tristan Gingold
Hi,

DEC-C, the DEC compiler provided on VMS, has added to ANSI-C at least one 
extension that is difficult to work-around as it is used in the system headers: 
varargs without named argument.  It makes sense on VMS because of its ABI which 
pass the number of arguments used.

This patch allows such declaration when the new flag -fdecc-extensions is used 
(C and ObjC only as C++ already allows that).

I use the plural for consistency with other -fxxx-extensions and in case where 
others extensions are added.

Bootstrapped on x86_64-darwin, no regressions.

Ok for mainline ?

Tristan.

gcc/
2011-09-29  Tristan Gingold  

* doc/invoke.texi: Document -fdecc-extensions.
* c-parser.c (c_parser_parms_list_declarator): Handle -fdecc-extensions.

gcc/c-family/
2011-09-29  Tristan Gingold  

* c.opt (fdecc-extensions): New.

gcc/testsuite/
2011-09-29  Tristan Gingold  

* gcc.dg/va-arg-4.c: New test.
* gcc.dg/va-arg-5.c: Ditto.


diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
index e6ac5dc..4879cff 100644
--- a/gcc/c-family/c.opt
+++ b/gcc/c-family/c.opt
@@ -739,6 +739,10 @@ fconstexpr-depth=
 C++ ObjC++ Joined RejectNegative UInteger Var(max_constexpr_depth) Init(512)
 -fconstexpr-depth= Specify maximum constexpr recursion depth
 
+fdecc-extensions
+C ObjC Var(flag_decc_extensions)
+Enable DEC-C language extensions
+
 fdeduce-init-list
 C++ ObjC++ Var(flag_deduce_init_list) Init(1)
 -fno-deduce-init-list  disable deduction of std::initializer_list for a 
template type parameter from a brace-enclosed initializer-list
diff --git a/gcc/c-parser.c b/gcc/c-parser.c
index ff376df..788d13c 100644
--- a/gcc/c-parser.c
+++ b/gcc/c-parser.c
@@ -3159,10 +3159,19 @@ c_parser_parms_list_declarator (c_parser *parser, tree 
attrs, tree expr)
   if (c_parser_next_token_is (parser, CPP_ELLIPSIS))
 {
   struct c_arg_info *ret = build_arg_info ();
-  /* Suppress -Wold-style-definition for this case.  */
-  ret->types = error_mark_node;
-  error_at (c_parser_peek_token (parser)->location,
-   "ISO C requires a named argument before %<...%>");
+
+  if (flag_decc_extensions)
+{
+  /* F (...) is allowed by DEC-C.  */
+  ret->types = NULL_TREE;
+}
+  else
+{
+  /* Suppress -Wold-style-definition for this case.  */
+  ret->types = error_mark_node;
+  error_at (c_parser_peek_token (parser)->location,
+"ISO C requires a named argument before %<...%>");
+}
   c_parser_consume_token (parser);
   if (c_parser_next_token_is (parser, CPP_CLOSE_PAREN))
{
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index e166964..1fceace 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -170,7 +170,7 @@ in the following sections.
 @item C Language Options
 @xref{C Dialect Options,,Options Controlling C Dialect}.
 @gccoptlist{-ansi  -std=@var{standard}  -fgnu89-inline @gol
--aux-info @var{filename} @gol
+-aux-info @var{filename} -fdecc-extensions @gol
 -fno-asm  -fno-builtin  -fno-builtin-@var{function} @gol
 -fhosted  -ffreestanding -fopenmp -fms-extensions -fplan9-extensions @gol
 -trigraphs  -no-integrated-cpp  -traditional  -traditional-cpp @gol
@@ -1733,6 +1733,16 @@ fields declared using a typedef.  @xref{Unnamed 
Fields,,Unnamed
 struct/union fields within structs/unions}, for details.  This is only
 supported for C, not C++.
 
+@item -fdecc-extensions
+Accept some non-standard constructs used by DEC-C in VMS header files.
+
+This allows varargs functions without named argument.  VMS is able to
+support this feature because of its call convention which pass the
+number of arguments in a register.  Although it is possible to define
+such a function, this is not very usefull as it is not possible to read
+the arguments.  This is only supported for C as this construct is
+allowed by C++.
+
 @item -trigraphs
 @opindex trigraphs
 Support ISO C trigraphs.  The @option{-ansi} option (and @option{-std}
diff --git a/gcc/testsuite/gcc.dg/va-arg-4.c b/gcc/testsuite/gcc.dg/va-arg-4.c
new file mode 100644
index 000..6d737c4
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/va-arg-4.c
@@ -0,0 +1,3 @@
+/* { dg-do compile } */
+#include 
+extern void baz(...);  /* { dg-error "requires a named argument" } */
diff --git a/gcc/testsuite/gcc.dg/va-arg-5.c b/gcc/testsuite/gcc.dg/va-arg-5.c
new file mode 100644
index 000..015f592
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/va-arg-5.c
@@ -0,0 +1,4 @@
+/* { dg-do compile } */
+/* { dg-options "-fdecc-extensions" } */
+#include 
+extern void baz(...);



Re: Vector shuffling

2011-09-29 Thread Richard Henderson
On 09/29/2011 03:44 AM, Artem Shinkarov wrote:
> Here is a new version of the patch which hopefully fixes all the
> formatting issues and uses expand_simple_binop instead of force_reg in
> binary operations.
> 
> Ok?

Well, it's certainly not perfect by any means.  But I guess I can fix
things up myself once this patch is applied.


r~


Re: [PATCH] [Annotalysis] Fix internal compiler error on template methods

2011-09-29 Thread Ollie Wild
On Thu, Sep 29, 2011 at 10:13 AM, Delesley Hutchins  wrote:
>
> Unfortunately, DECL_CLONED_FUNCTION_P is not actually a predicate that allows 
> you to call DECL_CLONED_FUNCTION safely.  Look at the definition of the 
> macros; despite what the comments say, DECL_CLONED_FUNCTION_P may return true 
> in cases where DECL_CLONED_FUNCTION will still crash.  The correct fix is to 
> fix the macros, but I have no understanding of what they are actually doing.  
> :-(
>   -DeLesley

Diego, can you comment?

Ollie


Re: [PATCH] [Annotalysis] Fix internal compiler error on template methods

2011-09-29 Thread Diego Novillo
On Thu, Sep 29, 2011 at 11:26, Ollie Wild  wrote:
> On Thu, Sep 29, 2011 at 10:13 AM, Delesley Hutchins  
> wrote:
>>
>> Unfortunately, DECL_CLONED_FUNCTION_P is not actually a predicate that 
>> allows you
>> to call DECL_CLONED_FUNCTION safely.  Look at the definition of the macros; 
>> despite
>> what the comments say, DECL_CLONED_FUNCTION_P may return true in cases where
>> DECL_CLONED_FUNCTION will still crash.  The correct fix is to fix the 
>> macros, but I
>> have no understanding of what they are actually doing.  :-(
>>   -DeLesley
>
> Diego, can you comment?

Really?  That's surprising.  I would certainly expect
DECL_CLONED_FUNCTION_P to be exactly the right predicate to guard
DECL_CLONED_FUNCTION with.  That's how it's used elsewhere.

Delesley, can you give more details on when DECL_CLONED_FUNCTION_P
fails?  Changing that predicate with:

  if (DECL_CLONED_FUNCTION_P (clone)
  && DECL_CLONED_FUNCTION (clone) == decl)

should be all you need.  If that's not working, then send me the test
case, cause I'll be confused :)


Diego.


Re: [Patch] Support DEC-C extensions

2011-09-29 Thread Basile Starynkevitch
On Thu, 29 Sep 2011 17:10:26 +0200
Tristan Gingold  wrote:

> Hi,
> 
> DEC-C, the DEC compiler provided on VMS, has added to ANSI-C at least one 
> extension that is difficult to work-around as it is used in the system 
> headers: varargs without named argument.  It makes sense on VMS because of 
> its ABI which pass the number of arguments used.
> 

I believe that such an extension is useful on other systems, even when their 
ABI don't
pass the number of arguments.

The use case I would have in mind is when the signature of the called function 
(that is
the number & types of arguments) is determined by something else, perhaps a 
global
variable or data. Think e.g. of a printf-like function, except that the format 
string is
conventionally assigned to some fixed global before calling it.

Perhaps also the comments and explanation might reflect such kind of usage?

And perhaps then such an extension might be trigerred by a flag which don't 
even mention
DEC. Eg -fveryvariadic-extensions ?

Regards.

-- 
Basile STARYNKEVITCH http://starynkevitch.net/Basile/
email: basilestarynkevitchnet mobile: +33 6 8501 2359
8, rue de la Faiencerie, 92340 Bourg La Reine, France
*** opinions {are only mine, sont seulement les miennes} ***


argument of alloca in terms of units or not

2011-09-29 Thread Tom de Vries
Richard,

in gimplify_vla_decl, the alloca argument seems to be the size of the vla in 
units:
...
  t = build_call_expr (t, 1, DECL_SIZE_UNIT (decl));
...

I wonder why we are going through this 8 vs. BITS_PER_UNIT conversion here:
...
  elem_type = build_nonstandard_integer_type (BITS_PER_UNIT, 1);
  n_elem = size * 8 / BITS_PER_UNIT;
  align = MIN (size * 8, BIGGEST_ALIGNMENT);
...
The element size is BITS_PER_UNIT, so 1 unit, and size is in units.

Shouldn't this be:
...
Index: tree-ssa-ccp.c
===
--- tree-ssa-ccp.c (revision 179210)
+++ tree-ssa-ccp.c (working copy)
@@ -1722,8 +1722,8 @@ fold_builtin_alloca_for_var (gimple stmt

   /* Declare array.  */
   elem_type = build_nonstandard_integer_type (BITS_PER_UNIT, 1);
-  n_elem = size * 8 / BITS_PER_UNIT;
-  align = MIN (size * 8, BIGGEST_ALIGNMENT);
+  n_elem = size;
+  align = MIN (size * BITS_PER_UNIT, BIGGEST_ALIGNMENT);
   if (align < BITS_PER_UNIT)
 align = BITS_PER_UNIT;
   array_type = build_array_type_nelts (elem_type, n_elem);
...

Thanks,
- Tom


Re: [Patch,AVR]: PR50566: Better log output with -mdeb/-mlog= [2/n]

2011-09-29 Thread Denis Chertykov
2011/9/29 Georg-Johann Lay :
> This is the second patch in this series.
>
> Functions that formerly used fprintf/debug_rtx now use avr_edump and the use
> sites of TARGET_ALL_DEBUG are mapped to respective flags of avr_log.
>
> avr_log_vadump uses %b to print bool.
>
> The patch adds outputs for the results of avr_rtx_costs and wraps the worker
> function.
>
> Ok to apply?
>
> Johann
>
>        PR target/50566
>        * config/avr/avr-log.c (avr_log_vadump): Use %b to print bool.
>        * config/avr/avr.c (avr_rtx_costs_1): New static function, renamed
>        from avr_rtx_costs.
>        (avr_legitimate_address_p): Use avr_edump to print log information
>        filtered by avr_log.
>        (extra_constraint_Q): Ditto.
>        (avr_legitimize_address): Ditto.
>        (avr_rtx_costs): Ditto.  Rewrite as wrapper for avr_rtx_costs_1.
>        (final_prescan_insn): Use avr_log.rtx_costs as filter.


Approved.

Denis.


[PATCH] [Annotalysis] Fix internal compiler error on template methods

2011-09-29 Thread Delesley Hutchins
I don't have a test case, but look at the definitions of the two
macros in cp/cp-tree.h. DECL_CLONED_FUNCTION_P calls
decl_cloned_function_p, passing true as the second argument,  while
DECL_CLONED_FUNCTION makes the same call, but passes false.  Now look
at the definition of  decl_cloned_function_p in cp/class.c.  If the
second argument is true, it will step into templates, and if it is
false, it won't.  Incidentally, the ICE occurs when
DECL_CLONED_FUNCTION is applied to a template function, so this is not
a hypothetical case.  :-)

IMHO, it doesn't make sense to me to have a predicate that can
potentially succeed on templates, when the macro itself will fail.
However, I don't understand how cloned functions work, so I may be
missing something here -- perhaps a template function is never a
clone, so the predicate still returns NULL in that case?  But if so,
then why step into templates at all?

  -DeLesley

On Thu, Sep 29, 2011 at 8:42 AM, Diego Novillo  wrote:
>
> On Thu, Sep 29, 2011 at 11:26, Ollie Wild  wrote:
> > On Thu, Sep 29, 2011 at 10:13 AM, Delesley Hutchins  
> > wrote:
> >>
> >> Unfortunately, DECL_CLONED_FUNCTION_P is not actually a predicate that 
> >> allows you
> >> to call DECL_CLONED_FUNCTION safely.  Look at the definition of the 
> >> macros; despite
> >> what the comments say, DECL_CLONED_FUNCTION_P may return true in cases 
> >> where
> >> DECL_CLONED_FUNCTION will still crash.  The correct fix is to fix the 
> >> macros, but I
> >> have no understanding of what they are actually doing.  :-(
> >>   -DeLesley
> >
> > Diego, can you comment?
>
> Really?  That's surprising.  I would certainly expect
> DECL_CLONED_FUNCTION_P to be exactly the right predicate to guard
> DECL_CLONED_FUNCTION with.  That's how it's used elsewhere.
>
> Delesley, can you give more details on when DECL_CLONED_FUNCTION_P
> fails?  Changing that predicate with:
>
>              if (DECL_CLONED_FUNCTION_P (clone)
>                  && DECL_CLONED_FUNCTION (clone) == decl)
>
> should be all you need.  If that's not working, then send me the test
> case, cause I'll be confused :)
>
>
> Diego.


Re: Intrinsics for N2965: Type traits and base classes

2011-09-29 Thread Benjamin Kosnik

> OK. Here are some simple benchmarks. I simulated heavy use of
> reflection with 1000 classes that each had about a thousand base
> classes. I also created a super-simple typelist class
> 
> template struct typelist {}; // Variadic templates rock
> 
> If bases returns a typelist, the program takes about 4 sec.
> If bases returns a tuple, the program takes about 4 min.
> 
> If I make the program any bigger, the tuple case fails to compile
> with spurious error messages, while the typelist version stays quick.
> 
> Given that metaprograms typically create large class hierarchies
> (look at Alexandrescu's CreateScatterHierarchy that he uses to
> implement factory in the Modern C++ design book) and that compile
> times are an enormous obstacle to metaprogramming, I don't think
> these tests are at all ridiculous.
> 
> I think this shows we need to return a typelist instead of a tuple.

Yes, compelling.
 
> As I mentioned earlier, I could just return the typelist, or hide it
> by returning an unspecified type (which would actually be a typelist)
> that you would apply a first<> and a rest<> template to walk through.

The interface is still simple, I like it.

> This would give us more flexibility for the future (e.g., if a
> standard typelist type is adopted. Likewise, we would be covered if
> wanted to change bases implementation in the future to return an
> associative container. For example, if using size bases::type>>::value to count the number of occurrences of A as a
> base class of E turns out to be useful).

This plan sounds excellent to me.

-benjamin


Re: [PATCH] [Annotalysis] Fix internal compiler error on template methods

2011-09-29 Thread Diego Novillo

On 11-09-29 13:21 , Delesley Hutchins wrote:

I don't have a test case, but look at the definitions of the two
macros in cp/cp-tree.h. DECL_CLONED_FUNCTION_P calls
decl_cloned_function_p, passing true as the second argument,  while
DECL_CLONED_FUNCTION makes the same call, but passes false.  Now look
at the definition of  decl_cloned_function_p in cp/class.c.  If the
second argument is true, it will step into templates, and if it is
false, it won't.  Incidentally, the ICE occurs when
DECL_CLONED_FUNCTION is applied to a template function, so this is not
a hypothetical case.  :-)


But notice that STRIP_TEMPLATE is a NOP when DECL is not a 
TEMPLATE_DECL.  So, I'm not sure where you saw it ICE.  We are already 
using this idiom all over the parser, so it would be great if you could 
produce a test case for the failure you have in mind.


Incidentally, I applied the variant of the patch Ollie and I suggested 
and the testcase works fine with it (while it fails without the patch, 
of course).



Diego.


Commit: FRV: Fix problems building toolchain.

2011-09-29 Thread Nick Clifton
Hi Guys,

  I am applying the attached patch to fix the FRV backend so that the
  frv-elf toolchain will build.

  The patch to frv_function_prologue was actually created by Bernd, and
  I have been using it in my local tree for some time now.  It works
  around a problem generating dwarf2 debug information for function
  prologues when sometimes CALL_ARG_LOCATION notes can be separated from
  their CALL insns.

Cheers
  Nick

gcc/ChangeLog
2011-09-29  Nick Clifton  
Bernd Schmidt  

* config/frv/frvbegin.c: Fix location of unwind-dw2-fde.h header
file.
* config/frv/frvend.c: Likewise.
* config/frv/frv.c (frv_function_prologue): Move misplaced
CALL_ARG_LOCATION notes back to their proper locations.

Index: gcc/config/frv/frvbegin.c
===
--- gcc/config/frv/frvbegin.c   (revision 179334)
+++ gcc/config/frv/frvbegin.c   (working copy)
@@ -28,7 +28,7 @@
 
 #include "defaults.h"
 #include 
-#include "unwind-dw2-fde.h"
+#include "../libgcc/unwind-dw2-fde.h"
 #include "gbl-ctors.h"
 
 /*  Declare a pointer to void function type.  */
Index: gcc/config/frv/frvend.c
===
--- gcc/config/frv/frvend.c (revision 179334)
+++ gcc/config/frv/frvend.c (working copy)
@@ -25,7 +25,7 @@
 
 #include "defaults.h"
 #include 
-#include "unwind-dw2-fde.h"
+#include "../libgcc/unwind-dw2-fde.h"
 
 #ifdef __FRV_UNDERSCORE__
 #define UNDERSCORE "_"
Index: gcc/config/frv/frv.c
===
--- gcc/config/frv/frv.c(revision 179334)
+++ gcc/config/frv/frv.c(working copy)
@@ -1424,6 +1424,8 @@
 static void
 frv_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
 {
+  rtx insn, next, last_call;
+
   /* If no frame was created, check whether the function uses a call
  instruction to implement a far jump.  If so, save the link in gr3 and
  replace all returns to LR with returns to GR3.  GR3 is used because it
@@ -1464,6 +1466,32 @@
 
   /* Allow the garbage collector to free the nops created by frv_reorg.  */
   memset (frv_nops, 0, sizeof (frv_nops));
+
+  /* Locate CALL_ARG_LOCATION notes that have been misplaced
+ and move them back to where they should be located.  */
+  last_call = NULL_RTX;
+  for (insn = get_insns (); insn; insn = next)
+{
+  next = NEXT_INSN (insn);
+  if (CALL_P (insn)
+ || (INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE
+ && CALL_P (XVECEXP (PATTERN (insn), 0, 0
+   last_call = insn;
+
+  if (!NOTE_P (insn) || NOTE_KIND (insn) != NOTE_INSN_CALL_ARG_LOCATION)
+   continue;
+
+  if (NEXT_INSN (last_call) == insn)
+   continue;
+
+  NEXT_INSN (PREV_INSN (insn)) = NEXT_INSN (insn);
+  PREV_INSN (NEXT_INSN (insn)) = PREV_INSN (insn);
+  PREV_INSN (insn) = last_call;
+  NEXT_INSN (insn) = NEXT_INSN (last_call);
+  PREV_INSN (NEXT_INSN (insn)) = insn;
+  NEXT_INSN (PREV_INSN (insn)) = insn;
+  last_call = insn;
+}
 }
 
 


Re: [Patch, Fortran, committed] PR 50547 & 50553

2011-09-29 Thread Toon Moene

On 09/29/2011 02:09 PM, Janus Weil wrote:


Hi all,

I just committed as obvious another pair of accepts-invalid fixes:

http://gcc.gnu.org/viewcvs?root=gcc&view=rev&rev=179345

Also I would like to thank Vittorio for the large number of bug
reports he has been delivering lately. (I could make a comment about
his great talent for writing invalid Fortran code, but that would
probably sound more negative than intended ;)


In fact, I *do* consider this a virtue - in most cases it will catch 
errors in the parser or resolver, but there are also those who will lead 
to (hidden) bugs in later stages of the compiler.


In the days of g77 we had one person who made a program to insert random 
one-character changes into valid programs until he hit an ICE.


Of course, that was all automated ... it lead to a *lot* of bug fixes.

--
Toon Moene - e-mail: t...@moene.org - phone: +31 346 214290
Saturnushof 14, 3738 XG  Maartensdijk, The Netherlands
At home: http://moene.org/~toon/; weather: http://moene.org/~hirlam/
Progress of GNU Fortran: http://gcc.gnu.org/wiki/GFortran#news


Re: [PATCH 1/3] Change random seeds to 64bit and drop re-crcing

2011-09-29 Thread H.J. Lu
On Wed, Sep 28, 2011 at 5:49 AM, Andi Kleen  wrote:
> From: Andi Kleen 
>
> I had some trouble with random build failures in a large LTO project
> and it turned out to be random seed collisions in a highly parallel build
> (thanks to Honza for suggesting that)
>
> There were multiple problems:
> - The way to generate the random seed is not very random (milliseconds time 
> plus pid)
> and prone to collisions on highly parallel builds
> - It's only 32bit
> - Several users take the existing ascii seed and re-CRC32 it again, which
> doesn't exactly improve it.
>
> This patch changes that to:
> - Always use 64bit seeds as numbers (no re-crcing)
> - Change all users to use HOST_WIDE_INT
> - When the user specifies a random seed it's still crc32ed, but only in
> this case.
>
> Passes bootstrap + testsuite on x86_64-linux.
>
> gcc/cp:
>
> 2011-09-26   Andi Kleen 
>
>        * repo.c (finish_repo): Use HOST_WIDE_INT_PRINT_HEX_PURE.
>
> gcc/:
>
> 2011-09-26   Andi Kleen 
>
>        * hwint.h (HOST_WIDE_INT_PRINT_HEX_PURE): Add.
>        * lto-streamer.c (lto_get_section_name): Remove crc32_string.
>        Handle numerical random seed.
>        * lto-streamer.h (lto_file_decl_data): Change id to unsigned 
> HOST_WIDE_INT.
>        * toplev.c (random_seed): Add.
>        (init_random_seed): Change for numerical random seed.
>        (get_random_seed): Return as HOST_WIDE_INT.
>        (set_random_seed): Crc32 existing string.
>        * toplev.h (get_random_seed): Change to numercal return.
>        * tree.c (get_file_function_name): Remove CRC. Handle numerical random 
> seed.
>
> gcc/lto/:
>
> 2011-09-26   Andi Kleen 
>
>        * lto.c (lto_resolution_read): Remove id dumping.
>        (lto_section_with_id): Turn id HOST_WIDE_ID.
>        (create_subid_section_table): Dito.

This patch caused:

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=50568

-- 
H.J.


Re: [PATCH] Remove unnecessary sparc code attr.

2011-09-29 Thread David Miller
From: Jakub Jelinek 
Date: Thu, 29 Sep 2011 10:12:58 +0200

> On the other side, I'm surprised you don't need to prefix gcm_name with GCM:
> - when you have more than one mode iterator in a pattern, IMHO you should
> make it clear which mode you are talking about.  Maybe it works, but which
> one it is?  The first mode_iterator declared in the *.md files (and used in
> the pattern), the last one, the first mode_iterator encountered in the
> pattern, the last one?
> So IMHO it should be  in both cases.
> 
> And there is just one code iterator, can't you use just  instead of
> ?

You're right on all counts, thus I committed the following.  Thanks Jakub!


More sparc pixel-compare insn pattern cleanups.

* config/sparc/sparc.md (VIS pixel-compare insn): There is only one
code iterator used, so just use .  There are two mode iterators
so explicitly use .

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@179366 
138bc75d-0d04-0410-961f-82ee72b054a4
---
 gcc/ChangeLog |6 +-
 gcc/config/sparc/sparc.md |4 ++--
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index c831d39..ea5c6d0 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -33,7 +33,7 @@
forward declaration.
(remove_duplicate_close_phi): Detect and repair creation of
duplicate close-phis for a containing loop.
-   
+
 2011-09-27   Andi Kleen 
 
* gcc.c (get_local_tick). Rename to get_random_number.
@@ -112,6 +112,10 @@
* config/sparc/sparc.md (gcond_name): Delete unnecessary code attr.
(VIS pixel-compare insn): Just use .
 
+   * config/sparc/sparc.md (VIS pixel-compare insn): There is only one
+   code iterator used, so just use .  There are two mode iterators
+   so explicitly use .
+
 2011-09-29  Iain Sandoe  
 
* config/darwin9.h (STACK_CHECK_STATIC_BUILTIN): Enable for
diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md
index d9bcd31..2def8d1 100644
--- a/gcc/config/sparc/sparc.md
+++ b/gcc/config/sparc/sparc.md
@@ -8151,13 +8151,13 @@
 (define_mode_iterator GCM [V4HI V2SI])
 (define_mode_attr gcm_name [(V4HI "16") (V2SI "32")])
 
-(define_insn "fcmp_vis"
+(define_insn "fcmp_vis"
   [(set (match_operand:P 0 "register_operand" "=r")
(unspec:P [(gcond:GCM (match_operand:GCM 1 "register_operand" "e")
  (match_operand:GCM 2 "register_operand" "e"))]
 UNSPEC_FCMP))]
   "TARGET_VIS"
-  "fcmp\t%1, %2, %0"
+  "fcmp\t%1, %2, %0"
   [(set_attr "type" "fpmul")
(set_attr "fptype" "double")])
 
-- 
1.7.6.401.g6a319



Re: [PATCH] [Annotalysis] Fix internal compiler error on template methods

2011-09-29 Thread Delesley Hutchins
The ICE occurs when decl is a TEMPLATE_DECL; that's the corner case
that causes a problem.  The patch that you and Ollie suggest does stop
the ICE for our particular example; I assume because the template in
question is not a clone, and so the predicate fails further on.
However, I'm not convinced that it will work in all cases.  I wish I
could come up with a test case, but like I said, I don't understand
enough about clones to understand what's happening here.  If you are
confident that DECL_CLONED_FUNCTION_P is correct, then we can use
that; I personally had no such confidence.

  -DeLesley

On Thu, Sep 29, 2011 at 10:41 AM, Diego Novillo  wrote:
> On 11-09-29 13:21 , Delesley Hutchins wrote:
>>
>> I don't have a test case, but look at the definitions of the two
>> macros in cp/cp-tree.h. DECL_CLONED_FUNCTION_P calls
>> decl_cloned_function_p, passing true as the second argument,  while
>> DECL_CLONED_FUNCTION makes the same call, but passes false.  Now look
>> at the definition of  decl_cloned_function_p in cp/class.c.  If the
>> second argument is true, it will step into templates, and if it is
>> false, it won't.  Incidentally, the ICE occurs when
>> DECL_CLONED_FUNCTION is applied to a template function, so this is not
>> a hypothetical case.  :-)
>
> But notice that STRIP_TEMPLATE is a NOP when DECL is not a TEMPLATE_DECL.
>  So, I'm not sure where you saw it ICE.  We are already using this idiom all
> over the parser, so it would be great if you could produce a test case for
> the failure you have in mind.
>
> Incidentally, I applied the variant of the patch Ollie and I suggested and
> the testcase works fine with it (while it fails without the patch, of
> course).
>
>
> Diego.
>



-- 
DeLesley Hutchins | Software Engineer | deles...@google.com | 505-206-0315


Re: [PATCH 3/7] Emit macro expansion related diagnostics

2011-09-29 Thread Jason Merrill

On 09/29/2011 01:19 PM, Dodji Seketeli wrote:

+ For a token that is /not/ an argument for a parameter of a
+ function-like macro, each yI is the spelling location of the Ith
+ token of the replacement-list in the definition of the macro.
+ "Spelling location" means the location of the place in the source
+ where the token has been spelled.
+
+ For a token that is an argument for a parameter P of a
+ function-like macro, yI is the spelling location of P in the
+ replacement-list of the macro.


I think think this can be simpler:

yI is the location of the token in the macro definition, either of the 
token itself or of a macro parameter that it replaces.



+ In #2, there is a macro map for the expansion of PLUS.  PLUS is


The macro map doesn't seem to me to be in #2, but the expansion is. 
Let's reword that to "There is a macro map for the expansion of PLUS in #2".



+ x0 is the virtual location for the argument token "1",
+ and x2 is the virtual location for the argument token "2".  */


Are they actually virtual in this case?  I thought they would be the 
source locations in #2 since the expanding context isn't another macro.


Jason


[lra] pseudo assignment improvements

2011-09-29 Thread Vladimir Makarov
For this month I tried many different heuristics of spilling and 
assignment in LRA (about 10 of them, including spilling for inheritance 
reloads).  Here is the best what I found.  The major idea is to assign 
and spill for pseudos which are ordered by threads (set of reload 
pseudos connected by inheritance pseudos).


The patch mostly implements the new idea for assigning and spilling for 
reload pseudos.  The patch also fixes a typo introduced in the previous 
patch which actually prohibited undo-inheritance subpass and resulted in 
worse code.  The patch also fixes an insignificant valgrind error 
(reading uninitialized data).


The patch was successfully bootstrapped on x86/x86-64 and ppc64.

2011-09-29  Vladimir Makarov 

* lra-int.h (lra_get_copy): New prototype.

* lra.c (lra_get_copy): New function.

* lra-saves.c (lra_save_restore): Ignore scratches.

* lra-constraints.c (remove_inheritance_pseudos): Fix a typo in
checking remove_pseudos emptiness.

* lra-assign.c (reload_insn_num): Remove.
(struct regno_assign_info): New.
(regno_assign_info): New array.
(process_copy_to_form_allocno, init_regno_assign_info): New.
(finish_regno_assign_info): New.
(reload_pseudo_compare_func): Rewrite using threads.
(find_hard_regno_for): Check value for conflicting reload pseudos.
(spill_for): Use reload_pseudo_compare_func instead of
pseudo_compare_func.
(assign_by_spills): Print info about processed reload and
inheritance pseudos.
(lra_assign): Call init_regno_assign_info and
finish_regno_assign_info.


Index: lra-assigns.c
===
--- lra-assigns.c   (revision 179206)
+++ lra-assigns.c   (working copy)
@@ -44,13 +44,86 @@ along with GCC; see the file COPYING3.  
up the code.  */
 static enum reg_class *regno_allocno_class_array;
 
-/* Approximate order number of insn for each given reload pseudo (its
-   regno is the index) was generated, 0 otherwise.  This values are
-   used to improve chance of the inheritance.  */
-static int *reload_insn_num;
+/* Info about pseudo used during the assignment sub-pass.  Thread is a
+   set of connected reload and inheritance pseudos with the same set
+   of available hard reg set.  Thread is a pseudo itself for other
+   cases.  */
+struct regno_assign_info
+{
+  /* First/next pseudo of the same thread.  */
+  int first, next;
+  /* Frequency of the thread (frequency of the reload pseudos only in
+ the thread when the thread contains a reload pseudo).  Defined
+ only for the first thread pseudo.  */
+  int freq;
+};
+
+/* Map regno to the corresponding assignment info.  */
+static struct regno_assign_info *regno_assign_info;
 
-/* The function is used to sort *reload* pseudos to try to assign them
-   hard registers.  */
+/* Process a pseudo copy with frequency COPY_FREQ connecting REGNO1
+   and REGNO2 to form threads.  */
+static void
+process_copy_to_form_allocno (int regno1, int regno2, int copy_freq)
+{
+  int last, regno1_first, regno2_first;
+
+  gcc_assert (regno1 >= lra_constraint_new_regno_start
+ && regno2 >= lra_constraint_new_regno_start);
+  regno1_first = regno_assign_info[regno1].first;
+  regno2_first = regno_assign_info[regno2].first;
+  if (regno1_first != regno2_first)
+{
+  for (last = regno2_first;
+  regno_assign_info[last].next >= 0;
+  last = regno_assign_info[last].next)
+   regno_assign_info[last].first = regno1_first;
+  regno_assign_info[last].next = regno_assign_info[regno1_first].next;
+  regno_assign_info[regno1_first].first = regno2_first;
+  regno_assign_info[regno1_first].freq
+   += regno_assign_info[regno2_first].freq;
+}
+  regno_assign_info[regno1_first].freq -= 2 * copy_freq;
+  gcc_assert (regno_assign_info[regno1_first].freq >= 0);
+}
+
+/* Initialize REGNO_ASSIGN_INFO and form threads.  */
+static void
+init_regno_assign_info (void)
+{
+  int i, regno1, regno2;
+  lra_copy_t cp;
+
+  regno_assign_info
+= (struct regno_assign_info *) xmalloc (sizeof (struct regno_assign_info)
+  * max_reg_num ());
+  for (i = FIRST_PSEUDO_REGISTER; i < max_reg_num (); i++)
+{
+  regno_assign_info[i].first = i;
+  regno_assign_info[i].next = -1;
+  regno_assign_info[i].freq = lra_reg_info[i].freq;
+}
+  /* Form the threads.  */
+  for (i = 0; (cp = lra_get_copy (i)) != NULL; i++)
+if ((regno1 = cp->regno1) >= lra_constraint_new_regno_start
+   && (regno2 = cp->regno2) >= lra_constraint_new_regno_start
+   && reg_renumber[regno1] < 0 && lra_reg_info[regno1].nrefs != 0
+   && reg_renumber[regno2] < 0 && lra_reg_info[regno2].nrefs != 0
+   && (ira_available_class_regs[regno_allocno_class_array[regno1]]
+   == ira_available_class_regs[regno_allocno_class_array[regno2]]))
+  process_co

Re: [PATCH] [Annotalysis] Fix internal compiler error on template methods

2011-09-29 Thread Diego Novillo

On 11-09-29 15:19 , Delesley Hutchins wrote:


However, I'm not convinced that it will work in all cases.  I wish I
could come up with a test case, but like I said, I don't understand
enough about clones to understand what's happening here.  If you are
confident that DECL_CLONED_FUNCTION_P is correct, then we can use
that; I personally had no such confidence.


Yes, there is no case in which a TEMPLATE_DECL will match 
DECL_CLONED_FUNCTION_P.  I agree with you that the logic in 
decl_cloned_function_p seems like it may allow that, but clones of 
functions are created as FUNCTION_DECLs.


In looking at the code again, I think we could even simplify it a bit 
more using FOR_EACH_CLONE.  This does the same work as the if()/for() 
combination in a more compact way.



Diego.

Index: cp/parser.c
===
--- cp/parser.c (revision 179065)
+++ cp/parser.c (working copy)
@@ -19279,6 +19279,7 @@ cp_parser_late_parsing_attribute_arg_lis
   cp_token_cache *tokens = (cp_token_cache *) TREE_VALUE 
(artificial_node);

   tree ctype;
   VEC(tree,gc) *vec;
+  tree clone;

   gcc_assert (tokens);
   gcc_assert (decl && decl != error_mark_node);
@@ -19322,16 +19323,9 @@ cp_parser_late_parsing_attribute_arg_lis

   /* If decl has clones (when it is a ctor or a dtor), we need to
  modify the clones' attributes as well.  */
-  if (TREE_CODE (decl) == FUNCTION_DECL
-  && (DECL_CONSTRUCTOR_P (decl) || DECL_DESTRUCTOR_P (decl)))
-{
-  tree clone;
-  for (clone = TREE_CHAIN (decl); clone; clone = TREE_CHAIN 
(clone))

-{
-  if (DECL_CLONED_FUNCTION (clone) == decl)
-DECL_ATTRIBUTES (clone) = DECL_ATTRIBUTES (decl);
-}
-}
+  FOR_EACH_CLONE (clone, decl)
+if (DECL_CLONED_FUNCTION (clone) == decl)
+  DECL_ATTRIBUTES (clone) = DECL_ATTRIBUTES (decl);

   pop_nested_class ();



[PATCH] fold_range_test like optimization on GIMPLE (PR tree-optimization/46309)

2011-09-29 Thread Jakub Jelinek
Hi!

This patch implements a fold_range_test like optimization on GIMPLE, inside
tree-ssa-reassoc and tweaks fold-const.c so that most of the code can be
shared in between the two.
The advantage of the reassoc optimization is that it doesn't attempt to
merge just 2 ranges at a time, instead it sorts the ranges for the same
SSA_NAME and thus can optimize even cases where source code doesn't have
the numbers in a range test in increasing or decreasing order (and also
can optimize things that were in multiple statements in the source).
Additionally, it optimizes cases like
x == 1 || x == 3
into (x & ~2) == 1.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2011-09-27  Jakub Jelinek  

PR tree-optimization/46309
* fold-const.c (make_range, merge_ranges): Remove prototypes.
(range_binop): Likewise, no longer static.
(make_range_step): New function.
(make_range): Use it.
* tree.h (make_range_step, range_binop): New prototypes.
* Makefile.in (tree-ssa-reassoc.o): Depend on $(DIAGNOSTIC_CORE_H).
* tree-ssa-reassoc.c: Include diagnostic-core.h.
(struct range_entry): New type.
(init_range_entry, range_entry_cmp, update_range_test,
optimize_range_tests): New functions.
(reassociate_bb): Call optimize_range_tests.

* gcc.dg/pr46309.c: New test.

--- gcc/fold-const.c.jj 2011-09-05 12:28:53.0 +0200
+++ gcc/fold-const.c2011-09-27 16:39:09.0 +0200
@@ -112,12 +112,8 @@ static tree decode_field_reference (loca
 static int all_ones_mask_p (const_tree, int);
 static tree sign_bit_p (tree, const_tree);
 static int simple_operand_p (const_tree);
-static tree range_binop (enum tree_code, tree, tree, int, tree, int);
 static tree range_predecessor (tree);
 static tree range_successor (tree);
-extern tree make_range (tree, int *, tree *, tree *, bool *);
-extern bool merge_ranges (int *, tree *, tree *, int, tree, tree, int,
- tree, tree);
 static tree fold_range_test (location_t, enum tree_code, tree, tree, tree);
 static tree fold_cond_expr_with_comparison (location_t, tree, tree, tree, 
tree);
 static tree unextend (tree, int, int, tree);
@@ -3731,7 +3727,7 @@ simple_operand_p (const_tree exp)
must be specified for a comparison.  ARG1 will be converted to ARG0's
type if both are specified.  */
 
-static tree
+tree
 range_binop (enum tree_code code, tree type, tree arg0, int upper0_p,
 tree arg1, int upper1_p)
 {
@@ -3790,6 +3786,255 @@ range_binop (enum tree_code code, tree t
   return constant_boolean_node (result, type);
 }
 
+/* Helper routine for make_range.  Perform one step for it, return
+   new expression if the loop should continue or NULL_TREE if it should
+   stop.  */
+
+tree
+make_range_step (location_t loc, enum tree_code code, tree arg0, tree arg1,
+tree exp_type, tree *p_low, tree *p_high, int *p_in_p,
+bool *strict_overflow_p)
+{
+  tree arg0_type = TREE_TYPE (arg0);
+  tree n_low, n_high, low = *p_low, high = *p_high;
+  int in_p = *p_in_p, n_in_p;
+
+  switch (code)
+{
+case TRUTH_NOT_EXPR:
+  *p_in_p = ! in_p;
+  return arg0;
+
+case EQ_EXPR: case NE_EXPR:
+case LT_EXPR: case LE_EXPR: case GE_EXPR: case GT_EXPR:
+  /* We can only do something if the range is testing for zero
+and if the second operand is an integer constant.  Note that
+saying something is "in" the range we make is done by
+complementing IN_P since it will set in the initial case of
+being not equal to zero; "out" is leaving it alone.  */
+  if (low == NULL_TREE || high == NULL_TREE
+ || ! integer_zerop (low) || ! integer_zerop (high)
+ || TREE_CODE (arg1) != INTEGER_CST)
+   return NULL_TREE;
+
+  switch (code)
+   {
+   case NE_EXPR:  /* - [c, c]  */
+ low = high = arg1;
+ break;
+   case EQ_EXPR:  /* + [c, c]  */
+ in_p = ! in_p, low = high = arg1;
+ break;
+   case GT_EXPR:  /* - [-, c] */
+ low = 0, high = arg1;
+ break;
+   case GE_EXPR:  /* + [c, -] */
+ in_p = ! in_p, low = arg1, high = 0;
+ break;
+   case LT_EXPR:  /* - [c, -] */
+ low = arg1, high = 0;
+ break;
+   case LE_EXPR:  /* + [-, c] */
+ in_p = ! in_p, low = 0, high = arg1;
+ break;
+   default:
+ gcc_unreachable ();
+   }
+
+  /* If this is an unsigned comparison, we also know that EXP is
+greater than or equal to zero.  We base the range tests we make
+on that fact, so we record it here so we can parse existing
+range tests.  We test arg0_type since often the return type
+of, e.g. EQ_EXPR, is boolean.  */
+  if (TYPE_UNSIGNED (arg0_type) && (low == 0 || high == 0))
+   {
+ if (! merge_ranges (&n_in_p, &n_low, &n_high,
+ in_p, low, high, 1,
+   

[PATCH] Teach PTA and aliasing about strdup/strndup

2011-09-29 Thread Jakub Jelinek
Hi!

This patch teaches PTA/aliasing about strdup/strndup (that the passed in
string is just read and doesn't escape in any way, and that otherwise it
acts as malloc or other allocation calls.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2011-09-29  Jakub Jelinek  

* tree-ssa-structalias.c (find_func_aliases_for_builtin_call): Handle
BUILT_IN_STRDUP and BUILT_IN_STRNDUP.
* tree-ssa-alias.c (call_may_clobber_ref_p_1): Likewise.

* gcc.dg/strlenopt-21.c: New test.

--- gcc/tree-ssa-structalias.c.jj   2011-09-29 15:27:17.0 +0200
+++ gcc/tree-ssa-structalias.c  2011-09-29 15:31:02.0 +0200
@@ -4130,6 +4130,16 @@ find_func_aliases_for_builtin_call (gimp
   case BUILT_IN_REMQUOL:
   case BUILT_IN_FREE:
return true;
+  case BUILT_IN_STRDUP:
+  case BUILT_IN_STRNDUP:
+   {
+ varinfo_t uses = get_call_use_vi (t);
+ make_constraint_to (uses->id, gimple_call_arg (t, 0));
+ if (gimple_call_lhs (t))
+   handle_lhs_call (t, gimple_call_lhs (t), gimple_call_flags (t),
+NULL, fndecl);
+ return true;
+   }
   /* Trampolines are special - they set up passing the static
 frame.  */
   case BUILT_IN_INIT_TRAMPOLINE:
--- gcc/tree-ssa-alias.c.jj 2011-09-29 15:27:17.0 +0200
+++ gcc/tree-ssa-alias.c2011-09-29 15:31:02.0 +0200
@@ -1506,6 +1506,8 @@ call_may_clobber_ref_p_1 (gimple call, a
   being the definition point for the pointer.  */
case BUILT_IN_MALLOC:
case BUILT_IN_CALLOC:
+   case BUILT_IN_STRDUP:
+   case BUILT_IN_STRNDUP:
  /* Unix98 specifies that errno is set on allocation failure.  */
  if (flag_errno_math
  && targetm.ref_may_alias_errno (ref))
--- gcc/testsuite/gcc.dg/strlenopt-21.c.jj  2011-09-29 15:42:19.0 
+0200
+++ gcc/testsuite/gcc.dg/strlenopt-21.c 2011-09-29 15:42:00.0 +0200
@@ -0,0 +1,66 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fdump-tree-strlen -fdump-tree-optimized" } */
+
+#include "strlenopt.h"
+
+struct S { char *p; size_t l; };
+
+__attribute__((noinline, noclone)) struct S
+foo (char *x, int n)
+{
+  int i;
+  char a[64];
+  char *p = strchr (x, '\0');
+  struct S s;
+  /* strcpy here is optimized into memcpy, length computed as p - x + 1.  */
+  strcpy (a, x);
+  /* strcat here is optimized into memcpy.  */
+  strcat (p, "abcd");
+  for (i = 0; i < n; i++)
+if ((i % 123) == 53)
+  /* strcat here is optimized into strlen and memcpy.  */
+  strcat (a, "efg");
+  s.p = strdup (a);
+  /* The strlen should be optimized here into 4.  */
+  s.l = strlen (p);
+  return s;
+}
+
+int
+main ()
+{
+  char buf[32];
+  struct S s;
+  buf[0] = 'z';
+  buf[1] = '\0';
+  s = foo (buf, 0);
+  if (s.l != 4 || memcmp (buf, "zabcd", 6) != 0)
+abort ();
+  if (s.p == NULL)
+return 0;
+  if (memcmp (s.p, "z", 2) != 0)
+abort ();
+  s = foo (buf, 60);
+  if (s.l != 4 || memcmp (buf, "zabcdabcd", 10) != 0)
+abort ();
+  if (s.p == NULL)
+return 0;
+  if (memcmp (s.p, "zabcdefg", 9) != 0)
+abort ();
+  s = foo (buf, 240);
+  if (s.l != 4 || memcmp (buf, "zabcdabcdabcd", 14) != 0)
+abort ();
+  if (s.p == NULL)
+return 0;
+  if (memcmp (s.p, "zabcdabcdefgefg", 16) != 0)
+abort ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "strlen \\(" 1 "strlen" } } */
+/* { dg-final { scan-tree-dump-times "memcpy \\(" 3 "strlen" } } */
+/* { dg-final { scan-tree-dump-times "strcpy \\(" 0 "strlen" } } */
+/* { dg-final { scan-tree-dump-times "strcat \\(" 0 "strlen" } } */
+/* { dg-final { scan-tree-dump-times "strchr \\(" 1 "strlen" } } */
+/* { dg-final { scan-tree-dump-times "stpcpy \\(" 0 "strlen" } } */
+/* { dg-final { cleanup-tree-dump "strlen" } } */

Jakub


[PATCH] Restrict fixes

2011-09-29 Thread Jakub Jelinek
Hi!

On Mon, Sep 26, 2011 at 06:41:10PM +0200, Jakub Jelinek wrote:
> which would be invalid to call with foo (a, 32); given the above, but
> it isn't obvious to the compiler what value y has.  With -DWORKAROUND
> the PT decls in (restr) look correct, without that not (supposedly because
> of the folding of the initializer), still, the vectorizer together
> with the alias oracle don't figure out they can omit the non-overlap
> tests before both loops.

This patch fixes the folder that
int *__restrict p2 = x + 32;
for non-restrict x isn't gimplified as
int *__restrict x.0 = (int *__restrict) x;
int *__restrict p2 = x.0 + 32;
and forwprop to avoid propagating what has a restrict pointer been
initialized from, unless it was restrict too, because otherwise the
restrict info is lost and aliasing can't disambiguate accesses based on
that pointer.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2011-09-29  Jakub Jelinek  

* fold-const.c (fold_unary_loc): Don't optimize
POINTER_PLUS_EXPR casted to TYPE_RESTRICT pointer by
casting the inner pointer if it isn't TYPE_RESTRICT.
* tree-ssa-forwprop.c (forward_propagate_addr_expr_1): Don't through
casts from non-TYPE_RESTRICT pointer to TYPE_RESTRICT pointer.

* gcc.dg/tree-ssa/restrict-4.c: New test.

--- gcc/fold-const.c.jj 2011-09-29 14:25:46.0 +0200
+++ gcc/fold-const.c2011-09-29 18:20:04.0 +0200
@@ -7929,6 +7929,7 @@ fold_unary_loc (location_t loc, enum tre
 that this happens when X or Y is NOP_EXPR or Y is INTEGER_CST. */
   if (POINTER_TYPE_P (type)
  && TREE_CODE (arg0) == POINTER_PLUS_EXPR
+ && (!TYPE_RESTRICT (type) || TYPE_RESTRICT (TREE_TYPE (arg0)))
  && (TREE_CODE (TREE_OPERAND (arg0, 1)) == INTEGER_CST
  || TREE_CODE (TREE_OPERAND (arg0, 0)) == NOP_EXPR
  || TREE_CODE (TREE_OPERAND (arg0, 1)) == NOP_EXPR))
--- gcc/tree-ssa-forwprop.c.jj  2011-09-15 12:18:54.0 +0200
+++ gcc/tree-ssa-forwprop.c 2011-09-29 19:08:03.0 +0200
@@ -804,6 +804,10 @@ forward_propagate_addr_expr_1 (tree name
   && ((rhs_code == SSA_NAME && rhs == name)
  || CONVERT_EXPR_CODE_P (rhs_code)))
 {
+  /* Don't propagate restrict pointer's RHS.  */
+  if (TYPE_RESTRICT (TREE_TYPE (lhs))
+ && !TYPE_RESTRICT (TREE_TYPE (name)))
+   return false;
   /* Only recurse if we don't deal with a single use or we cannot
 do the propagation to the current statement.  In particular
 we can end up with a conversion needed for a non-invariant
--- gcc/testsuite/gcc.dg/tree-ssa/restrict-4.c.jj   2011-09-29 
20:21:00.0 +0200
+++ gcc/testsuite/gcc.dg/tree-ssa/restrict-4.c  2011-09-29 20:21:57.0 
+0200
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+int
+foo (int *x, int y)
+{
+  int *__restrict p1 = x;
+  int *__restrict p2 = x + 32;
+  p1[y] = 1;
+  p2[4] = 2;
+  return p1[y];
+}
+
+int
+bar (int *x, int y)
+{
+  int *__restrict p1 = x;
+  int *p3 = x + 32;
+  int *__restrict p2 = p3;
+  p1[y] = 1;
+  p2[4] = 2;
+  return p1[y];
+}
+
+/* { dg-final { scan-tree-dump-times "return 1;" 2 "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */


Jakub


Re: [PATCH] [Annotalysis] Fix internal compiler error on template methods

2011-09-29 Thread Delesley Hutchins
That seems much cleaner!  LGTM.  Are you submitting the patch?  I can
submit the test case.

  -DeLesley

On Thu, Sep 29, 2011 at 1:50 PM, Diego Novillo  wrote:
> On 11-09-29 15:19 , Delesley Hutchins wrote:
>
>> However, I'm not convinced that it will work in all cases.  I wish I
>> could come up with a test case, but like I said, I don't understand
>> enough about clones to understand what's happening here.  If you are
>> confident that DECL_CLONED_FUNCTION_P is correct, then we can use
>> that; I personally had no such confidence.
>
> Yes, there is no case in which a TEMPLATE_DECL will match
> DECL_CLONED_FUNCTION_P.  I agree with you that the logic in
> decl_cloned_function_p seems like it may allow that, but clones of functions
> are created as FUNCTION_DECLs.
>
> In looking at the code again, I think we could even simplify it a bit more
> using FOR_EACH_CLONE.  This does the same work as the if()/for() combination
> in a more compact way.
>
>
> Diego.
>
> Index: cp/parser.c
> ===
> --- cp/parser.c (revision 179065)
> +++ cp/parser.c (working copy)
> @@ -19279,6 +19279,7 @@ cp_parser_late_parsing_attribute_arg_lis
>       cp_token_cache *tokens = (cp_token_cache *) TREE_VALUE
> (artificial_node);
>       tree ctype;
>       VEC(tree,gc) *vec;
> +      tree clone;
>
>       gcc_assert (tokens);
>       gcc_assert (decl && decl != error_mark_node);
> @@ -19322,16 +19323,9 @@ cp_parser_late_parsing_attribute_arg_lis
>
>       /* If decl has clones (when it is a ctor or a dtor), we need to
>          modify the clones' attributes as well.  */
> -      if (TREE_CODE (decl) == FUNCTION_DECL
> -          && (DECL_CONSTRUCTOR_P (decl) || DECL_DESTRUCTOR_P (decl)))
> -        {
> -          tree clone;
> -          for (clone = TREE_CHAIN (decl); clone; clone = TREE_CHAIN
> (clone))
> -            {
> -              if (DECL_CLONED_FUNCTION (clone) == decl)
> -                DECL_ATTRIBUTES (clone) = DECL_ATTRIBUTES (decl);
> -            }
> -        }
> +      FOR_EACH_CLONE (clone, decl)
> +        if (DECL_CLONED_FUNCTION (clone) == decl)
> +          DECL_ATTRIBUTES (clone) = DECL_ATTRIBUTES (decl);
>
>       pop_nested_class ();
>
>



-- 
DeLesley Hutchins | Software Engineer | deles...@google.com | 505-206-0315


Re: [0/4] Modulo scheduling with haifa-sched for C6X

2011-09-29 Thread Vladimir Makarov

On 09/27/2011 08:36 AM, Bernd Schmidt wrote:

Ping:
http://gcc.gnu.org/ml/gcc-patches/2011-09/msg00811.html


Bernd, sorry for the delay.

  I thought for long time about this approach because we already have 
selective scheduler which with some modifications could be used for 
this.  Selective scheduler was implemented for Itanium, designed to work 
after RA (although it can work before too) and it implements a general 
software pipelining (not modulo scheduling) which could be used for 
loops with conditionals and with varying II (if we speak in terms of 
modulo scheduling).  Selective scheduling is more complicated than haifa 
scheduler (although haifa scheduler with so many changes for its 
existence is approaching to the complexity of selective scheduler), it 
might be hard to modify for your purposes,  it has also a tendency to 
generate a big code which is probably not good for C6X which is oriented 
to embedded applications.


  On the other hand, your changes to haifa-scheduler are small, so I 
concluded it might be ok (I hope the coming changes with register 
renaming which selective scheduler already does will be not big too).  
Now we have more complex selective scheduler with general software 
pipelining and simpler haifa scheduler with modulo scheduling.  So I 
think we could look at selective scheduler for servers with VLIW and 
in-order pipelined processors where code expansion is not so important 
and haifa-scheduler with modulo scheduler for embedded VLIW processors.


  Still I think that selective scheduler has more potential to generate 
a faster code than what you are trying to implement.


  As for the patch itself (only scheduler parts in 1/4 and 2/4), it is 
ok to me to commit this.  I did not find anything which should be changed.




Re: [PATCH] [Annotalysis] Fix internal compiler error on template methods

2011-09-29 Thread Diego Novillo

On 11-09-29 17:24 , Delesley Hutchins wrote:

That seems much cleaner!  LGTM.  Are you submitting the patch?  I can
submit the test case.


I tested it on an unclean tree, sorry.  I'd rather leave the testing and 
final submit to you.



Thanks.  Diego.


Re: [PATCH] [Annotalysis] Fix internal compiler error on template methods

2011-09-29 Thread Delesley Hutchins
No problem.  Will do.

  -DeLesley

On Thu, Sep 29, 2011 at 2:44 PM, Diego Novillo  wrote:
> On 11-09-29 17:24 , Delesley Hutchins wrote:
>>
>> That seems much cleaner!  LGTM.  Are you submitting the patch?  I can
>> submit the test case.
>
> I tested it on an unclean tree, sorry.  I'd rather leave the testing and
> final submit to you.
>
>
> Thanks.  Diego.
>



-- 
DeLesley Hutchins | Software Engineer | deles...@google.com | 505-206-0315


Re: [0/4] Modulo scheduling with haifa-sched for C6X

2011-09-29 Thread Bernd Schmidt
On 09/29/11 23:32, Vladimir Makarov wrote:
> Bernd, sorry for the delay.

No problem.

>   I thought for long time about this approach because we already have
> selective scheduler which with some modifications could be used for
> this.  Selective scheduler was implemented for Itanium, designed to work
> after RA (although it can work before too) and it implements a general
> software pipelining (not modulo scheduling) which could be used for
> loops with conditionals and with varying II (if we speak in terms of
> modulo scheduling).

Yes, but that's somewhat orthogonal to what we need for C6X - the
hardware support is really designed for modulo scheduling. For more
complicated or bigger loops, using more general software pipelining
could be a win, but benchmark results seem to suggest that a large part
of the possible gains on C6X can be achieved without it.

The other problem with sel-sched is that, as you said, it's quite
complicated - the code is still rather opaque to me. Also, to use it on
C6X, the support for delay slots that now exists in haifa-sched would
have to be added to it as well. In general I'm not too keen on
supporting two different schedulers side-by-side and duplicating
features in each.

>   On the other hand, your changes to haifa-scheduler are small, so I
> concluded it might be ok (I hope the coming changes with register
> renaming which selective scheduler already does will be not big too). 

The register renaming changes will be localized to c6x.c (and a few
small changes in regrename.c) - their purpose is to ensure that the
instructions present in a loop are balanced across the two halves of the
machine. I don't think there's another target with similar enough
requirements to try to add some form of general support for this kind of
thing yet.

What I do have coming up are some more haifa-sched changes to make it
predicate insns when that allows them to be moved across jumps. Just
bootstrapping that on ia64 now...

> Now we have more complex selective scheduler with general software
> pipelining and simpler haifa scheduler with modulo scheduling.  So I
> think we could look at selective scheduler for servers with VLIW and
> in-order pipelined processors where code expansion is not so important
> and haifa-scheduler with modulo scheduler for embedded VLIW processors.

Note that the C6X modulo-scheduling really relies on the exposed
pipeline to produce sensible code; you'd probably have to be more clever
with the unrolling to make this work reasonably on a different target.
Still, I think the haifa-sched code could in principle be extended to
also be interesting for other CPUs, maybe even ia64 - doesn't that have
rotating registers for modulo scheduling?

>   As for the patch itself (only scheduler parts in 1/4 and 2/4), it is
> ok to me to commit this.  I did not find anything which should be changed.

Thanks!


Bernd


Re: [Patch, Darwin/libiberty] fix target/48108 by adding a section wrapper scheme to the darwin port.

2011-09-29 Thread Ian Lance Taylor
Iain Sandoe  writes:


This patch looks basically OK but it seems to have a number of
extraneous changes which make it harder to review.


> libiberty:
>
>   PR target/48108
>   * simple-object-mach-o.c (GNU_SECTION_NAMES): Remove.
>   (GNU_WRAPPER_SECTS, GNU_WRAPPER_INDEX,
>   GNU_WRAPPER_NAMES): New macros.
>   (simple_object_mach_o_match): Amend comment and error message.
>   Do not require that a segment name is specified.
>   (simple_object_mach_o_segment): Handle wrapper scheme, if a
> segment name is
>   specified.  (simple_object_mach_o_write_section_header): Allow
> the segment name
>   to be supplied. (simple_object_mach_o_write_segment): Handle
> wrapper scheme, if
>   a segment name is specified.  Ensure that the top-level
> segment name in the load
>   command is empty.   (simple_object_mach_o_write_to_file): Determine the
>   number of sections during segment output, use that in writing
> the header.
>
> gcc:
>
>   PR target/48108
>   * config/darwin.c (top level): Amend comments concerning LTO output.
>   (lto_section_num): New variable.  (darwin_lto_section_e): New GTY.
>   (LTO_SECTS_SECTION, LTO_INDEX_SECTION): New.
>   (LTO_NAMES_SECTION): Rename.
>   (darwin_asm_named_section): Record LTO section counts and switches
>   in a vec of darwin_lto_section_e.
>   (darwin_file_start): Remove unused code.
>   (darwin_file_end): Put an LTO section termination label.  Handle output
>   of the wrapped LTO sections, index and names table.
>   
> include:
>
>   PR target/48108
>   *simple-object.h: Amend comments for simple read/write to note the
>   wrapper scheme for Mach-o.

Space after '*'.


> -#define GNU_SECTION_NAMES "__section_names"

Are we sure it is OK to drop support for __section_names?  Won't that
mean that any old LTO files will not work with new gcc?  Is that OK?


> @@ -258,18 +274,10 @@ simple_object_mach_o_match (
>  }
>  #endif
>  
> -  /* We require the user to provide a segment name.  This is
> - unfortunate but I don't see any good choices here.  */
> -
> -  if (segment_name == NULL)
> +  /* Although a standard MH_OBJECT has a single anonymous segment, we allow
> + a segment name to be specified - but don't act on it at this stage.  */
> +  if (segment_name && strlen (segment_name) > MACH_O_NAME_LEN)
>  {
> -  *errmsg = "Mach-O file found but no segment name specified";
> -  *err = 0;
> -  return NULL;
> -}
> -
> -  if (strlen (segment_name) > MACH_O_NAME_LEN)
> -{
>*errmsg = "Mach-O segment name too long";
>*err = 0;
>return NULL;

Please write "segment_name != NULL &&", not just "segment_name &&".



> @@ -294,13 +302,14 @@ simple_object_mach_o_match (
>filetype = (*fetch_32) (b + offsetof (struct mach_o_header_32, filetype));
>if (filetype != MACH_O_MH_OBJECT)
>  {
> -  *errmsg = "Mach-O file is not object file";
> +  *errmsg = "Mach-O file is not an MH_OBJECT file";
>*err = 0;
>return NULL;
>  }

I'm not sure this error message change is an improvement.  This error
message may be seen by end users, not just developers.  I think more end
users will understand "object file."  Why do you want to make this
change?


>omr = XNEW (struct simple_object_mach_o_read);
> -  omr->segment_name = xstrdup (segment_name);
> +  if (segment_name)
> +omr->segment_name = xstrdup (segment_name);
>omr->magic = magic;
>omr->is_big_endian = is_big_endian;
>omr->cputype = (*fetch_32) (b + offsetof (struct mach_o_header_32, 
> cputype));

Please write "if (segment_name != NULL)".


> @@ -356,9 +365,25 @@ simple_object_mach_o_section_info (int is_big_endi
>  }
>  }
>  
> -/* Handle a segment in a Mach-O file.  Return 1 if we should continue,
> -   0 if the caller should return.  */
> +/* Handle a segment in a Mach-O Object file.
>  
> +   This will callback to the function pfn for each "section found" the 
> meaning
> +   of which depends on a gnu extension to mach-o:
> +   
> +   When omr->segment_name is specified, we implement a wrapper scheme (which
> +   whilst it will, in principle, work with any segment name, is likely to be
> +   meaningless current for anything except __GNU_LTO).
> +   
> +   If we find mach-o sections (with the segment name as specified) which also
> +   contain: a 'sects' wrapper, an index, and a  name table, we expand this 
> into
> +   as many sections as are specified in the index.  In this case, there will
> +   be a callback for each of these.
> +   
> +   When the omr->segment_name is not specified, then we would call-back for
> +   every mach-o section specified in the file.
> +
> +   Return 1 if we should continue, 0 if the caller should return.  */

Is there a reason that you are changing the code to support not
specifying a segment?  Should that be a separate change?


> @@ -375,12 +400,19 @@ simple_object_mach_o_segment (simple_object_

Re: [Patch, Darwin/libiberty] fix target/48108 by adding a section wrapper scheme to the darwin port.

2011-09-29 Thread Iain Sandoe

Hello Ian,

I'll re-jig with the typographical changes (sorry that were so  
many ... )

... and re-post - but I'd like to clear up a couple of points first.

On 30 Sep 2011, at 00:00, Ian Lance Taylor wrote:

-#define GNU_SECTION_NAMES "__section_names"


Are we sure it is OK to drop support for __section_names?  Won't that
mean that any old LTO files will not work with new gcc?  Is that OK?


I was under the impression that an lto built with an earlier revision  
of gcc would not interwork anyway.


If that is mistaken, then we could re-enable the option to allow both  
(I had that in the previous version, in fact).


+   If we find mach-o sections (with the segment name as specified)  
which also
+   contain: a 'sects' wrapper, an index, and a  name table, we  
expand this into
+   as many sections as are specified in the index.  In this case,  
there will

+   be a callback for each of these.
+
+   When the omr->segment_name is not specified, then we would call- 
back for

+   every mach-o section specified in the file.
+
+   Return 1 if we should continue, 0 if the caller should return.   
*/


Is there a reason that you are changing the code to support not
specifying a segment?  Should that be a separate change?


I'll separate it out.
I think that it could help us build some small tools to assist in  
making GCC output more closely match what the system tool-chain is  
expecting (e.g. stripping off LTO sections).  But that would require  
being able to read all sections as they come and then apply a filter.


Perhaps it's just better to invest the time in BFD, but that does mean  
the user having to build a second set of stuff or a combined tree.


thanks for reviewing and sorry about the number of trivial errors.

Iain






Re: [PATCH] Distribute inliner's size_time data across entries with similar predicates

2011-09-29 Thread Maxim Kuvyrkov
On 25/09/2011, at 11:21 PM, Jan Hubicka wrote:

>> 
>> I wonder why we bother to record so many predicates though.
> 
> Yep, I wonder if it comes from some real testcase?  I didn't see functions 
> that
> reach the limit and still be inlinable very often in practice.

This patch wasn't inspired by a real testcase.  I was studying your new 
context-sensitive inliner analysis and thought that we can deal with predicate 
overflow a bit better.

>> 
>>>   if (e->time > MAX_TIME * INLINE_TIME_SCALE)
>>> 
>>> The rationale was that since we are accounting size and time under the 
>>> entry we also need to make entry's predicate a superset of the predicate we 
>>> want to account the data under.
>>> 
>>> Then I thought that mushing all predicates into the single predicate of 
>>> entry[0] will cause it to become true_predicate() very quickly, so I added 
>>> logic to distribute incoming size_time information across all 32 entries by 
>>> searching for entries with similar predicates.
> 
> Entry[0] is always true predicate BTW.  This is why you don't need or 
> predicate there.

Yeap, I learned this several hours after I posted the patch.

> 
> I will take at the patch later today.

Thanks,

--
Maxim Kuvyrkov
CodeSourcery / Mentor Graphics



PATCH: PR lto/50568: [4.7 Regression] Massive LTO failures

2011-09-29 Thread H.J. Lu
Hi,

On 32bit hosts, like Linux/ia32, HOST_WIDE_INT is 64bit.  But lto and
lto-plugin still uses 32bit int in symbol ID handling.  As the result,
LTO is totally broken on Linux/ia32.  This patch switches symbol ID
to HOST_WIDE_INT in lto.c and long long in lto-plugin.c.  Since
symbol ID is generated by lto.c, long long >= HOST_WIDE_INT and
lto-plugin.c only scans/prints symbol ID as numbers, it isn't a problem.
Tested on Linux/ia32 and Linux/x86-64.  OK for trunk?

Thanks.


H.J.
---
gcc/lto

2011-09-29  H.J. Lu  
Andi Kleen  

PR lto/50568
* lto.c (lto_splay_tree_delete_id): New.
(lto_splay_tree_compare_ids): Likewise.
(lto_splay_tree_lookup): Likewise.
(lto_splay_tree_id_equal_p): Likewise.
(lto_splay_tree_insert): Likewise.
(lto_splay_tree_new): Likewise.
(lto_resolution_read): Change id to unsigned HOST_WIDE_INT.
Use lto_splay_tree_id_equal_p and lto_splay_tree_lookup.
(create_subid_section_table): Use lto_splay_tree_lookup and
lto_splay_tree_insert.
(lto_file_read): Use lto_splay_tree_new.

lto-plugin/

2011-09-29  H.J. Lu  
Andi Kleen  

PR lto/50568
* lto-plugin.c (sym_aux): Change id to unsigned long long.
(plugin_symtab): Likewise.
(dump_symtab): Likewise.
(resolve_conflicts): Likewise.
(process_symtab): Likewise.

diff --git a/gcc/lto/lto.c b/gcc/lto/lto.c
index 77eb1a1..9c6770a 100644
--- a/gcc/lto/lto.c
+++ b/gcc/lto/lto.c
@@ -93,6 +93,93 @@ lto_obj_create_section_hash_table (void)
   return htab_create (37, hash_name, eq_name, free_with_string);
 }
 
+/* Delete a allocated integer key in the splay tree.  */
+
+static void
+lto_splay_tree_delete_id (splay_tree_key key)
+{
+  free ((void *) key);
+}
+
+/* Compare two splay tree node ids.  */
+
+static int
+lto_splay_tree_compare_ids (splay_tree_key a, splay_tree_key b)
+{
+  unsigned HOST_WIDE_INT ai;
+  unsigned HOST_WIDE_INT bi;
+
+  if (sizeof (splay_tree_key) == sizeof (unsigned HOST_WIDE_INT))
+{
+  ai = (unsigned HOST_WIDE_INT) a;
+  bi = (unsigned HOST_WIDE_INT) b;
+}
+  else
+{
+  ai = *(unsigned HOST_WIDE_INT *) a;
+  bi = *(unsigned HOST_WIDE_INT *) b;
+}
+
+  if (ai < bi)
+return -1;
+  else if (ai > bi)
+return 1;
+  return 0;
+}
+
+/* Look up splay tree node by ID.  */
+
+static splay_tree_node
+lto_splay_tree_lookup (splay_tree t, unsigned HOST_WIDE_INT id)
+{
+  if (sizeof (splay_tree_key) == sizeof (unsigned HOST_WIDE_INT))
+return splay_tree_lookup (t, (splay_tree_key) id);
+  else
+return splay_tree_lookup (t, (splay_tree_key) &id);
+}
+
+/* Check if KEY has ID.  */
+
+static bool
+lto_splay_tree_id_equal_p (splay_tree_key key, unsigned HOST_WIDE_INT id)
+{
+  if (sizeof (splay_tree_key) == sizeof (unsigned HOST_WIDE_INT))
+return (unsigned HOST_WIDE_INT) key == id;
+  else
+return *(unsigned HOST_WIDE_INT *) key == id;
+}
+
+/* Insert a splay tree node with ID as key and FILE_DATA as value.  */
+
+static void
+lto_splay_tree_insert (splay_tree t, unsigned HOST_WIDE_INT id,
+  struct lto_file_decl_data * file_data)
+{
+  if (sizeof (splay_tree_key) == sizeof (unsigned HOST_WIDE_INT))
+splay_tree_insert (t, (splay_tree_key) id,
+  (splay_tree_value) file_data);
+  else
+{
+  unsigned HOST_WIDE_INT *idp = XCNEW (unsigned HOST_WIDE_INT);
+  *idp = id;
+  splay_tree_insert (t, (splay_tree_key) idp,
+(splay_tree_value) file_data);
+}
+}
+
+/* Create a splay tree.  */
+
+static splay_tree
+lto_splay_tree_new (void)
+{
+  if (sizeof (splay_tree_key) == sizeof (unsigned HOST_WIDE_INT))
+return splay_tree_new (lto_splay_tree_compare_ids, NULL, NULL);
+  else
+return splay_tree_new (lto_splay_tree_compare_ids,
+  lto_splay_tree_delete_id,
+  NULL);
+}
+
 /* Read the constructors and inits.  */
 
 static void
@@ -944,14 +1031,16 @@ lto_resolution_read (splay_tree file_ids, FILE 
*resolution, lto_file *file)
   for (i = 0; i < num_symbols; i++)
 {
   int t;
-  unsigned index, id;
+  unsigned index;
+  unsigned HOST_WIDE_INT id;
   char r_str[27];
   enum ld_plugin_symbol_resolution r = (enum ld_plugin_symbol_resolution) 
0;
   unsigned int j;
   unsigned int lto_resolution_str_len =
sizeof (lto_resolution_str) / sizeof (char *);
 
-  t = fscanf (resolution, "%u %x %26s %*[^\n]\n", &index, &id, r_str);
+  t = fscanf (resolution, "%u " HOST_WIDE_INT_PRINT_HEX_PURE " %26s 
%*[^\n]\n", 
+ &index, &id, r_str);
   if (t != 3)
 internal_error ("invalid line in the resolution file");
   if (index > max_index)
@@ -968,11 +1057,12 @@ lto_resolution_read (splay_tree file_ids, FILE 
*resolution, lto_file *file)
   if (j == lto_resolution_str_len)
internal_error ("invalid resolution in the resolut

RE: [PATCH] Fix stack red zone bug (PR38644)

2011-09-29 Thread Jiangning Liu


> -Original Message-
> From: Jakub Jelinek [mailto:ja...@redhat.com]
> Sent: Thursday, September 29, 2011 6:14 PM
> To: Jiangning Liu
> Cc: 'Richard Guenther'; Andrew Pinski; gcc-patches@gcc.gnu.org
> Subject: Re: [PATCH] Fix stack red zone bug (PR38644)
> 
> On Thu, Sep 29, 2011 at 06:08:50PM +0800, Jiangning Liu wrote:
> > As far as I know different back-ends are implementing different
> > prologue/epilogue in GCC. If one day this part can be refined and
> abstracted
> > as well, I would say solving this stack-red-zone problem in shared
> > prologue/epilogue code would be a perfect solution, and barrier can
> be
> > inserted there.
> >
> > I'm not saying you are wrong on keeping scheduler using a pure
> barrier
> > interface. From engineering point of view, I only feel my proposal is
> so far
> > so good, because this patch at least solve the problem for all
> targets in a
> > quite simple way. Maybe it can be improved in future based on this.
> 
> But you don't want to listen about any other alternative, other
> backends are
> happy with being able to put the best kind of barrier at the best spot
> in the epilogue and don't need a "generic" solution which won't model
> very
> well the target diversity anyway.

Jakub,

Appreciate for your attention on this issue,

1) Can you clarify who are the "others back-ends"? Does it cover most of the
back-ends being supported by GCC right now?
2) You need give data to prove "other back-ends" are happy with current
solution. The fact is over the years there are a bunch of bugs filed against
this problem. WHY? At this point you are implying "other back-ends" are
happy with bugs or potential bugs? This is wired to me. Also, this is not a
issue whether a back-end is able to implement barrier or not. If you are
really asking "able or not", I would say every back-end is able, but it
doesn't mean "able" is correct and it doesn't mean "able" is the most
reasonable.

Comparing with the one I am proposing, I don't see the current solution has
other significant advantages in addition to the "proper modeling" for
scheduler you are arguing. Instead, the solution I'm proposing is a safe
solution, and a solution easy to avoid bugs. If GCC compiler infrastructure
can't even give a safe compilation, why should we insist on the "proper
modeling" for scheduler only?

Hopefully you can consider again about this.

-Jiangning

> 
>   Jakub






Re: PATCH: PR lto/50568: [4.7 Regression] Massive LTO failures

2011-09-29 Thread Andi Kleen
"H.J. Lu"  writes:

> Hi,
>
> On 32bit hosts, like Linux/ia32, HOST_WIDE_INT is 64bit.  But lto and
> lto-plugin still uses 32bit int in symbol ID handling.  As the result,
> LTO is totally broken on Linux/ia32.  This patch switches symbol ID
> to HOST_WIDE_INT in lto.c and long long in lto-plugin.c.  Since
> symbol ID is generated by lto.c, long long >= HOST_WIDE_INT and
> lto-plugin.c only scans/prints symbol ID as numbers, it isn't a problem.
> Tested on Linux/ia32 and Linux/x86-64.  OK for trunk?

Thanks for fixing. Looks good to me.
-Andi

-- 
a...@linux.intel.com -- Speaking for myself only


Re: PATCH: PR lto/50568: [4.7 Regression] Massive LTO failures

2011-09-29 Thread Diego Novillo

On 11-09-29 20:57 , H.J. Lu wrote:



diff --git a/gcc/lto/lto.c b/gcc/lto/lto.c
index 77eb1a1..9c6770a 100644
--- a/gcc/lto/lto.c
+++ b/gcc/lto/lto.c
@@ -93,6 +93,93 @@ lto_obj_create_section_hash_table (void)
return htab_create (37, hash_name, eq_name, free_with_string);
  }

+/* Delete a allocated integer key in the splay tree.  */
+


s/a allocated/an allocated/

Please document KEY.


+static void
+lto_splay_tree_delete_id (splay_tree_key key)
+{
+  free ((void *) key);
+}
+
+/* Compare two splay tree node ids.  */
+


Likewise A and B.  Other functions have similar issues.



+/* Insert a splay tree node with ID as key and FILE_DATA as value.  */
+
+static void
+lto_splay_tree_insert (splay_tree t, unsigned HOST_WIDE_INT id,
+  struct lto_file_decl_data * file_data)


s/* file_data/*file_data/



+/* Create a splay tree.  */
+
+static splay_tree
+lto_splay_tree_new (void)
+{
+  if (sizeof (splay_tree_key) == sizeof (unsigned HOST_WIDE_INT))
+return splay_tree_new (lto_splay_tree_compare_ids, NULL, NULL);
+  else
+return splay_tree_new (lto_splay_tree_compare_ids,
+  lto_splay_tree_delete_id,
+  NULL);
+}


Why not always do the option where we allocate the IDs?  I don't think 
it would be a huge performance hit and it would make the code easier to 
understand.


Could you document here and in lto-plugin why we need to play with this 
checking of splay_tree_key?  And why both need to kept in sync.



Thanks.  Diego.


[PATCH] [Annotalysis] Fix bogus warnings caused by ipa-sra optimization

2011-09-29 Thread Delesley Hutchins
This patch suppresses bogus warnings that are caused when annotalysis
is run with the IPA-SRA optimization (i.e. -O2 or higher).  IPA-SRA
creates clones of functions in which some arguments have been eliminated,
even if those arguments are used in the thread safety annotations;
this renders the attributes on such functions invalid.  Annotalysis will now
detect when such clones have been created, and suppress certain
warnings to avoid false positives.

Bootstrapped and passed gcc regression testsuite on
x86_64-unknown-linux-gnu.  Okay for google/gcc-4_6?

 -DeLesley

cp/Changelog.google-4_6:
2011-9-27  DeLesley Hutchins  

 * cp/tree-threadsafe-analyze.c (process_function_attrs)
 detect clones and suppress warnings

testsuite/Changelog.google-4_6:
2011-9-27  DeLesley Hutchins  

 * testsuite/g++.dg/thread-ann/thread_annot_lock-82.C
   (new regression test)


Index: gcc/testsuite/g++.dg/thread-ann/thread_annot_lock-82.C
===
--- gcc/testsuite/g++.dg/thread-ann/thread_annot_lock-82.C  (revision 0)
+++ gcc/testsuite/g++.dg/thread-ann/thread_annot_lock-82.C  (revision 0)
@@ -0,0 +1,30 @@
+// Test template methods in the presence of cloned constructors.
+// Regression test for bugfix.
+// { dg-do compile }
+// { dg-options "-Wthread-safety -O3" }
+
+#include "thread_annot_common.h"
+
+class Foo;
+void do_something(void* a);
+
+class Foo {
+  Mutex mu_;
+
+  // with optimization turned on, ipa-sra should eliminate the hidden
+  // "this" argument, thus invalidating EXCLUSIVE_LOCKS_REQUIRED.
+  inline void clone_me_ipasra(Foo* f) EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+do_something(f);
+  }
+
+  void foo(Foo* f);
+};
+
+void Foo::foo(Foo* f) {
+  mu_.Lock();
+  // in the cloned version, it looks like the required lock is f->mu_
+  // we should detect this and ignore the attribute.
+  clone_me_ipasra(f);
+  mu_.Unlock();
+}
+
Index: gcc/tree-threadsafe-analyze.c
===
--- gcc/tree-threadsafe-analyze.c   (revision 179371)
+++ gcc/tree-threadsafe-analyze.c   (working copy)
@@ -2129,6 +2129,7 @@ process_function_attrs (gimple call, tree fdecl,
   tree base_obj = NULL_TREE;
   bool is_exclusive_lock;
   bool is_trylock;
+  bool bad_fun_args = false;

   gcc_assert (is_gimple_call (call));

@@ -2147,16 +2148,28 @@ process_function_attrs (gimple call, tree fdecl,
   != NULL_TREE)
 current_bb_info->writes_ignored = false;

+  /* If the given function is a clone, and if some of the parameters of the
+ clone have been optimized away, then the function attribute is no longer
+ correct, and we should supress certain warnings.  Clones are often created
+ when -fipa-sra is enabled, which happens by default at -O2 starting from
+ GCC-4.5.  The clones could be created as early as when constructing SSA.
+ ipa-sra is particularly fond of optimizing away the "this" pointer,
+ which is a problem because that makes it impossible to determine the
+ base object, which then causes spurious errors. It's better to just
+ remain silent in this case.  */
+  if ((TREE_CODE (TREE_TYPE (fdecl)) == FUNCTION_TYPE
+   || TREE_CODE (TREE_TYPE (fdecl)) == METHOD_TYPE)  /* sanity check   */
+  && (fdecl != DECL_ORIGIN (fdecl))  /* is it a clone? */
+  && (type_num_arguments (TREE_TYPE (fdecl)) !=  /* compare args   */
+  type_num_arguments (TREE_TYPE (DECL_ORIGIN(fdecl)
+bad_fun_args = true;
+
   /* If the function is a class member, the first argument of the function
  (i.e. "this" pointer) would be the base object. Note that here we call
  DECL_ORIGIN on fdecl first before we check whether it's a METHOD_TYPE
  because if fdecl is a cloned method, the TREE_CODE of its type would be
  FUNCTION_DECL instead of METHOD_DECL, which would lead us to not grab
- its base object. One possible situation where fdecl could be a clone is
- when -fipa-sra is enabled. (-fipa-sra is enabled by default at -O2
- starting from GCC-4.5.). The clones could be created as early as when
- constructing SSA. Also note that the parameters of a cloned method could
- be optimized away.  */
+ its base object. */
   if (TREE_CODE (TREE_TYPE (DECL_ORIGIN (fdecl))) == METHOD_TYPE
   && gimple_call_num_args(call) > 0)
 base_obj = gimple_call_arg (call, 0);
@@ -2285,7 +2298,8 @@ process_function_attrs (gimple call, tree fdecl,
   current_bb_info, locus);
 }

-  if (warn_thread_unguarded_func)
+  /* suppress warnings if the function arguments are no longer accurate. */
+  if (warn_thread_unguarded_func && !bad_fun_args)
 {
   /* Handle the attributes specifying the lock requirements of
  functions.  */

-- 
DeLesley Hutchins | Software Engineer | deles...@google.com | 505-206-0315


Re: [PR 47382] We cannot simply fold OBJ_TYPE_REF at all in 4.6

2011-09-29 Thread Maxim Kuvyrkov
On 24/09/2011, at 2:19 AM, Martin Jambor wrote:

> However, both of these are really 4.8 material and since the patches
> probably need only minor updates, it might be worthwhile to do that so
> that gcc can handle the "embarrassing" simple cases.  So I will do
> that (though it might need to wait for about a week), re-try them on
> Firefox and probably propose them for submission.

Great!  Thank you.

> 
> By the way, do you evaluate (your) devirtualization by compiling any
> real software (other than Firefox)?  Do the patches make any
> difference there?

The devirtualization and inlining improvements with your patches in the mix 
provided very good improvements on certain proprietary source base.  Code size 
shrank by 8-10% and performance improved by 3-10% (depending on hardware).  
There was some amount of parameter tuning involved, so the improvements with 
default settings will not be as big, but they still should be noticeable.  [The 
testing was done with a 4.6-based toolchain.]

The optimizations that we implemented targeted coding patterns that are 
distilled in the 9 inline-devirt-*.C testcases that I posted.  The actual code 
base was not micro-optimized, and this makes me confident that the improvements 
are general enough to be valuable to broad bodies of code.

Regards,

--
Maxim Kuvyrkov
CodeSourcery / Mentor Graphics



Re: [PATCH] [Annotalysis] Fix bogus warnings caused by ipa-sra optimization

2011-09-29 Thread Diego Novillo

On 11-09-29 23:01 , Delesley Hutchins wrote:

This patch suppresses bogus warnings that are caused when annotalysis
is run with the IPA-SRA optimization (i.e. -O2 or higher).  IPA-SRA
creates clones of functions in which some arguments have been eliminated,
even if those arguments are used in the thread safety annotations;
this renders the attributes on such functions invalid.  Annotalysis will now
detect when such clones have been created, and suppress certain
warnings to avoid false positives.

Bootstrapped and passed gcc regression testsuite on
x86_64-unknown-linux-gnu.  Okay for google/gcc-4_6?

  -DeLesley

cp/Changelog.google-4_6:
2011-9-27  DeLesley Hutchins


The date should be formatted as -MM-DD



  * cp/tree-threadsafe-analyze.c (process_function_attrs)
  detect clones and suppress warnings


You need ':' after the closing ')' and write the entry as a proper 
statement (start with capital and end in '.').


Also, no need to add 'cp/', since this entry will go in 
cp/ChangeLog.google-4_6.




testsuite/Changelog.google-4_6:
2011-9-27  DeLesley Hutchins

  * testsuite/g++.dg/thread-ann/thread_annot_lock-82.C
(new regression test)


Similar comments here.




Index: gcc/testsuite/g++.dg/thread-ann/thread_annot_lock-82.C
===
--- gcc/testsuite/g++.dg/thread-ann/thread_annot_lock-82.C  (revision 0)
+++ gcc/testsuite/g++.dg/thread-ann/thread_annot_lock-82.C  (revision 0)
@@ -0,0 +1,30 @@
+// Test template methods in the presence of cloned constructors.
+// Regression test for bugfix.
+// { dg-do compile }
+// { dg-options "-Wthread-safety -O3" }
+
+#include "thread_annot_common.h"
+
+class Foo;
+void do_something(void* a);
+
+class Foo {
+  Mutex mu_;
+
+  // with optimization turned on, ipa-sra should eliminate the hidden
+  // "this" argument, thus invalidating EXCLUSIVE_LOCKS_REQUIRED.
+  inline void clone_me_ipasra(Foo* f) EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+do_something(f);
+  }
+
+  void foo(Foo* f);
+};
+
+void Foo::foo(Foo* f) {
+  mu_.Lock();
+  // in the cloned version, it looks like the required lock is f->mu_
+  // we should detect this and ignore the attribute.
+  clone_me_ipasra(f);
+  mu_.Unlock();
+}
+
Index: gcc/tree-threadsafe-analyze.c
===
--- gcc/tree-threadsafe-analyze.c   (revision 179371)
+++ gcc/tree-threadsafe-analyze.c   (working copy)
@@ -2129,6 +2129,7 @@ process_function_attrs (gimple call, tree fdecl,
tree base_obj = NULL_TREE;
bool is_exclusive_lock;
bool is_trylock;
+  bool bad_fun_args = false;


s/bad_fun_args/optimized_args/ ?


gcc_assert (is_gimple_call (call));

@@ -2147,16 +2148,28 @@ process_function_attrs (gimple call, tree fdecl,
!= NULL_TREE)
  current_bb_info->writes_ignored = false;

+  /* If the given function is a clone, and if some of the parameters of the
+ clone have been optimized away, then the function attribute is no longer
+ correct, and we should supress certain warnings.  Clones are often created


s/supress/suppress/


+ when -fipa-sra is enabled, which happens by default at -O2 starting from
+ GCC-4.5.  The clones could be created as early as when constructing SSA.
+ ipa-sra is particularly fond of optimizing away the "this" pointer,
+ which is a problem because that makes it impossible to determine the
+ base object, which then causes spurious errors. It's better to just
+ remain silent in this case.  */
+  if ((TREE_CODE (TREE_TYPE (fdecl)) == FUNCTION_TYPE
+   || TREE_CODE (TREE_TYPE (fdecl)) == METHOD_TYPE)  /* sanity check   */
+&&  (fdecl != DECL_ORIGIN (fdecl))  /* is it a clone? */
+&&  (type_num_arguments (TREE_TYPE (fdecl)) !=  /* compare args   */
+  type_num_arguments (TREE_TYPE (DECL_ORIGIN(fdecl)


gah, thunderbird is so bad with spaces.  I suppose these predicates are 
properly aligned.  Need space before '(fdecl)'.


I'm half tempted to suggest that we should be asserting that we're 
dealing with a FUNCTION_TYPE or a METHOD_TYPE, but this is OK too.



+bad_fun_args = true;
+
/* If the function is a class member, the first argument of the function
   (i.e. "this" pointer) would be the base object. Note that here we call
   DECL_ORIGIN on fdecl first before we check whether it's a METHOD_TYPE
   because if fdecl is a cloned method, the TREE_CODE of its type would be
   FUNCTION_DECL instead of METHOD_DECL, which would lead us to not grab
- its base object. One possible situation where fdecl could be a clone is
- when -fipa-sra is enabled. (-fipa-sra is enabled by default at -O2
- starting from GCC-4.5.). The clones could be created as early as when
- constructing SSA. Also note that the parameters of a cloned method could
- be optimized away.  */
+ its base object. */
if (TREE_CODE (TREE_TYPE (DECL_ORIGIN (fdecl)

Re: PATCH: PR lto/50568: [4.7 Regression] Massive LTO failures

2011-09-29 Thread Andi Kleen

I updated the patch according to your comments. Ok now?

-Andi

gcc/lto/:

2011-09-29  H.J. Lu  
Andi Kleen  

PR lto/50568
* lto.c (lto_splay_tree_delete_id): New.
(lto_splay_tree_compare_ids): Likewise.
(lto_splay_tree_lookup): Likewise.
(lto_splay_tree_id_equal_p): Likewise.
(lto_splay_tree_insert): Likewise.
(lto_splay_tree_new): Likewise.
(lto_resolution_read): Change id to unsigned HOST_WIDE_INT.
Use lto_splay_tree_id_equal_p and lto_splay_tree_lookup.
(create_subid_section_table): Use lto_splay_tree_lookup and
lto_splay_tree_insert.
(lto_file_read): Use lto_splay_tree_new.

lto-plugin/:

2011-09-29  H.J. Lu  
Andi Kleen  

PR lto/50568
* lto-plugin.c (sym_aux): Change id to unsigned long long.
(plugin_symtab): Likewise.
(dump_symtab): Likewise.
(resolve_conflicts): Likewise.
(process_symtab): Likewise.
---
 gcc/lto/lto.c   |   84 ++
 lto-plugin/lto-plugin.c |   14 ---
 2 files changed, 84 insertions(+), 14 deletions(-)

diff --git a/gcc/lto/lto.c b/gcc/lto/lto.c
index 77eb1a1..778e33e 100644
--- a/gcc/lto/lto.c
+++ b/gcc/lto/lto.c
@@ -93,6 +93,71 @@ lto_obj_create_section_hash_table (void)
   return htab_create (37, hash_name, eq_name, free_with_string);
 }
 
+/* Delete an allocated integer KEY in the splay tree.  */
+
+static void
+lto_splay_tree_delete_id (splay_tree_key key)
+{
+  free ((void *) key);
+}
+
+/* Compare splay tree node ids A and B.  */
+
+static int
+lto_splay_tree_compare_ids (splay_tree_key a, splay_tree_key b)
+{
+  unsigned HOST_WIDE_INT ai;
+  unsigned HOST_WIDE_INT bi;
+
+  ai = *(unsigned HOST_WIDE_INT *) a;
+  bi = *(unsigned HOST_WIDE_INT *) b;
+
+  if (ai < bi)
+return -1;
+  else if (ai > bi)
+return 1;
+  return 0;
+}
+
+/* Look up splay tree node by ID in splay tree T.  */
+
+static splay_tree_node
+lto_splay_tree_lookup (splay_tree t, unsigned HOST_WIDE_INT id)
+{
+  return splay_tree_lookup (t, (splay_tree_key) &id);
+}
+
+/* Check if KEY has ID.  */
+
+static bool
+lto_splay_tree_id_equal_p (splay_tree_key key, unsigned HOST_WIDE_INT id)
+{
+  return *(unsigned HOST_WIDE_INT *) key == id;
+}
+
+/* Insert a splay tree node into tree T with ID as key and FILE_DATA as value. 
+   The ID is allocated separately because we need HOST_WIDE_INTs which may
+   be wider than a splay_tree_key. */
+
+static void
+lto_splay_tree_insert (splay_tree t, unsigned HOST_WIDE_INT id,
+  struct lto_file_decl_data *file_data)
+{
+  unsigned HOST_WIDE_INT *idp = XCNEW (unsigned HOST_WIDE_INT);
+  *idp = id;
+  splay_tree_insert (t, (splay_tree_key) idp, (splay_tree_value) file_data);
+}
+
+/* Create a splay tree.  */
+
+static splay_tree
+lto_splay_tree_new (void)
+{
+  return splay_tree_new (lto_splay_tree_compare_ids,
+lto_splay_tree_delete_id,
+NULL);
+}
+
 /* Read the constructors and inits.  */
 
 static void
@@ -944,14 +1009,16 @@ lto_resolution_read (splay_tree file_ids, FILE 
*resolution, lto_file *file)
   for (i = 0; i < num_symbols; i++)
 {
   int t;
-  unsigned index, id;
+  unsigned index;
+  unsigned HOST_WIDE_INT id;
   char r_str[27];
   enum ld_plugin_symbol_resolution r = (enum ld_plugin_symbol_resolution) 
0;
   unsigned int j;
   unsigned int lto_resolution_str_len =
sizeof (lto_resolution_str) / sizeof (char *);
 
-  t = fscanf (resolution, "%u %x %26s %*[^\n]\n", &index, &id, r_str);
+  t = fscanf (resolution, "%u " HOST_WIDE_INT_PRINT_HEX_PURE " %26s 
%*[^\n]\n", 
+ &index, &id, r_str);
   if (t != 3)
 internal_error ("invalid line in the resolution file");
   if (index > max_index)
@@ -968,11 +1035,12 @@ lto_resolution_read (splay_tree file_ids, FILE 
*resolution, lto_file *file)
   if (j == lto_resolution_str_len)
internal_error ("invalid resolution in the resolution file");
 
-  if (!(nd && nd->key == id))
+  if (!(nd && lto_splay_tree_id_equal_p (nd->key, id)))
{
- nd = splay_tree_lookup (file_ids, id);
+ nd = lto_splay_tree_lookup (file_ids, id);
  if (nd == NULL)
-   internal_error ("resolution sub id %x not in object file", id);
+   internal_error ("resolution sub id " HOST_WIDE_INT_PRINT_HEX_PURE
+   " not in object file", id);
}
 
   file_data = (struct lto_file_decl_data *)nd->value;
@@ -1015,7 +1083,7 @@ create_subid_section_table (void **slot, void *data)
 return 1;
   
   /* Find hash table of sub module id */
-  nd = splay_tree_lookup (file_ids, id);
+  nd = lto_splay_tree_lookup (file_ids, id);
   if (nd != NULL)
 {
   file_data = (struct lto_file_decl_data *)nd->value;
@@ -1026,7 +1094,7 @@ create_subid_section_table (void **slot, void *data)
   memset(fi

Re: [PR 47382] We cannot simply fold OBJ_TYPE_REF at all in 4.6

2011-09-29 Thread Maxim Kuvyrkov
On 30/09/2011, at 4:02 PM, Maxim Kuvyrkov wrote:

> On 24/09/2011, at 2:19 AM, Martin Jambor wrote:
> 
>> However, both of these are really 4.8 material and since the patches
>> probably need only minor updates, it might be worthwhile to do that so
>> that gcc can handle the "embarrassing" simple cases.  So I will do
>> that (though it might need to wait for about a week), re-try them on
>> Firefox and probably propose them for submission.
> 
> Great!  Thank you.

Here is one of your patches updated to the latest mainline.  Just add water^W 
changelog.

It bootstraps, and I'm regtesting it on x86_64-linux-gnu {-m64,-m32} now.

Regards,

--
Maxim Kuvyrkov
CodeSourcery / Mentor Graphics



fsf-gcc-martin-1.patch
Description: Binary data


[PATCH] Do not fold addressable operands of "m" into non-addressable (PR inline-asm/50571)

2011-09-29 Thread Jakub Jelinek
Hi!

GCC on the following testcase warns
warning: use of memory input without lvalue in asm operand 0 is deprecated 
[enabled by default]
starting with 4.6, but the source actually had an lvalue there (I don't
think we should forbid for input operands const qualified memory).
On "m" (1) in the source we would error out early, but here we
fold that "m" (*(int *) var) during gimple folding into
"m" (1).  The following patch makes sure that we don't fold an lvalue
into non-lvalue for asm operands that allow memory, but not reg.

This has been originally reported on Linux kernel bitops, which IMHO
are invalid too, they say to the compiler that "m" (*(unsigned long *)array)
is being read, but in fact the inline asm relies on the whole array to
be after that too.  IMHO one should tell the compiler about that,
through "m" (*(struct { unsigned long __l[0x1000]; } *)array)
or similar.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk/4.6?

2011-09-30  Jakub Jelinek  

PR inline-asm/50571
* gimple-fold.c (fold_stmt_1) : If
constraints allow mem and not reg, don't optimize lvalues
into non-lvalues.

* gcc.dg/pr50571.c: New test.

--- gcc/gimple-fold.c.jj2011-09-29 14:25:46.0 +0200
+++ gcc/gimple-fold.c   2011-09-29 21:25:00.0 +0200
@@ -1201,28 +1201,55 @@ fold_stmt_1 (gimple_stmt_iterator *gsi, 
 
 case GIMPLE_ASM:
   /* Fold *& in asm operands.  */
-  for (i = 0; i < gimple_asm_noutputs (stmt); ++i)
-   {
- tree link = gimple_asm_output_op (stmt, i);
- tree op = TREE_VALUE (link);
- if (REFERENCE_CLASS_P (op)
- && (op = maybe_fold_reference (op, true)) != NULL_TREE)
-   {
- TREE_VALUE (link) = op;
- changed = true;
-   }
-   }
-  for (i = 0; i < gimple_asm_ninputs (stmt); ++i)
-   {
- tree link = gimple_asm_input_op (stmt, i);
- tree op = TREE_VALUE (link);
- if (REFERENCE_CLASS_P (op)
- && (op = maybe_fold_reference (op, false)) != NULL_TREE)
-   {
- TREE_VALUE (link) = op;
- changed = true;
-   }
-   }
+  {
+   size_t noutputs;
+   const char **oconstraints;
+   const char *constraint;
+   bool allows_mem, allows_reg, is_inout;
+
+   noutputs = gimple_asm_noutputs (stmt);
+   oconstraints = XALLOCAVEC (const char *, noutputs);
+
+   for (i = 0; i < gimple_asm_noutputs (stmt); ++i)
+ {
+   tree link = gimple_asm_output_op (stmt, i);
+   tree op = TREE_VALUE (link);
+   constraint
+ = TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (link)));
+   oconstraints[i] = constraint;
+   parse_output_constraint (&constraint, i, 0, 0, &allows_mem,
+&allows_reg, &is_inout);
+   if (REFERENCE_CLASS_P (op)
+   && (op = maybe_fold_reference (op, true)) != NULL_TREE
+   && (allows_reg
+   || !allows_mem
+   || is_gimple_lvalue (op)
+   || !is_gimple_lvalue (TREE_VALUE (link
+ {
+   TREE_VALUE (link) = op;
+   changed = true;
+ }
+ }
+   for (i = 0; i < gimple_asm_ninputs (stmt); ++i)
+ {
+   tree link = gimple_asm_input_op (stmt, i);
+   tree op = TREE_VALUE (link);
+   constraint
+ = TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (link)));
+   parse_input_constraint (&constraint, 0, 0, noutputs, 0,
+   oconstraints, &allows_mem, &allows_reg);
+   if (REFERENCE_CLASS_P (op)
+   && (op = maybe_fold_reference (op, false)) != NULL_TREE
+   && (allows_reg
+   || !allows_mem
+   || is_gimple_lvalue (op)
+   || !is_gimple_lvalue (TREE_VALUE (link
+ {
+   TREE_VALUE (link) = op;
+   changed = true;
+ }
+ }
+  }
   break;
 
 case GIMPLE_DEBUG:
--- gcc/testsuite/gcc.dg/pr50571.c.jj   2011-09-29 21:28:05.0 +0200
+++ gcc/testsuite/gcc.dg/pr50571.c  2011-09-29 21:30:08.0 +0200
@@ -0,0 +1,11 @@
+/* PR inline-asm/50571 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+static const int var[4] = { 1, 2, 3, 4 };
+
+void
+foo (void)
+{
+  __asm volatile ("" : : "m" (*(int *) var));
+}

Jakub