[pph] Correct executable testing. Factor x1dynarray1. (issue4667050)

2011-06-29 Thread Lawrence Crowl
Fix dg-pph.exp to execute both normal and pph executable tests and to avoid
false new/disappeared reports.

Break x1dynarray1 into three levels of tests.

Remove ICE failure in x1dynarray1.  We now get bogus warnings, but we cannot
seem to mark them with dg-bogus without also getting xfails.


Index: gcc/testsuite/ChangeLog.pph

2011-06-29  Lawrence Crowl  

* lib/dg-pph.exp: Run pph executables when normal executables run.
Change (assembly identical) and (assembly mismatch) to (assembly
comparison) to avoid false "disappeared" test
reports.
* g++.dg/pph/x1dynarray1.h: Simplfy test to avoid more complicated
language features: namespaces, exceptions, placement new, and explicit
destructor calls.
* g++.dg/pph/x1dynarray1.cc: Strip out namespaces.
* g++.dg/pph/x1dynarray0.cc: New.  An even simpler use.
* g++.dg/pph/x1dynarray2.h: New.  Add back the full test.
* g++.dg/pph/x1dynarray2.cc: New.  Add back the full test.


Index: gcc/testsuite/lib/dg-pph.exp
===
--- gcc/testsuite/lib/dg-pph.exp(revision 175673)
+++ gcc/testsuite/lib/dg-pph.exp(working copy)
@@ -74,29 +74,35 @@ proc dg-pph-pos { subdir test options ma
 set dg-do-what-default compile
 dg-test -keep-output $test "$options -I." ""
 
-# Quit if it did not compile successfully.
-if { ![file_on_host exists "$bname.s"] } {
-   # All regular compiles should pass.
-   fail "$nshort $options (regular assembly missing)"
-   return
+# Executables do not generate assembly.
+if { ![string compare "dg-do-what" "run"] } {
+   # Not executable, so quit if it did not compile successfully.
+   if { ![file_on_host exists "$bname.s"] } {
+   fail "$nshort $options (regular assembly missing)"
+   return
+   }
+   # Rename the .s file into .s-pph to compare it after the second build.
+   remote_upload host "$bname.s" "$bname.s-pph"
+   remote_download host "$bname.s-pph"
+   file_on_host delete "$bname.s"
 }
 
-# Rename the .s file into .s-pph to compare it after the second build.
-remote_upload host "$bname.s" "$bname.s-pph"
-remote_download host "$bname.s-pph"
-file_on_host delete "$bname.s"
-
 verbose -log ""
 
 # Compile a second time using the pph files.
 dg-test -keep-output $test "$options $mapflag -I." ""
 
+# Executables do not generate assembly,
+if { [string compare "dg-do-what" "run"] } {
+   # and so we are done testing.
+   return
+}
+
 # Quit if it did not compile successfully.
 if { ![file_on_host exists "$bname.s"] } {
# Expect assembly to be missing when the compile is an
-   # expected fail or when this was an executable test.
-   if { ![string compare "dg-do-what" "run"] \
-&& ![llength [grep $test "dg-xfail-if.*-fpph-map"]] } {
+   # expected fail.
+   if { ![llength [grep $test "dg-xfail-if.*-fpph-map"]] } {
fail "$nshort $options (pph assembly missing)"
}
return
@@ -117,17 +123,17 @@ proc dg-pph-pos { subdir test options ma
fail "$nshort $options comparison failure"
 } elseif { $adiff == 1 } {
if { $xdiff } {
-   xpass "$nshort $options (assembly identical)"
+   xpass "$nshort $options (assembly comparison)"
} else {
-   pass "$nshort $options (assembly identical)"
+   pass "$nshort $options (assembly comparison)"
}
file_on_host delete "$bname.s-pph"
file_on_host delete "$bname.s+pph"
 } else {
if { $xdiff } {
-   xfail "$nshort $options (assembly mismatch)"
+   xfail "$nshort $options (assembly comparison)"
} else {
-   fail "$nshort $options (assembly mismatch)"
+   fail "$nshort $options (assembly comparison)"
}
 }
 }
Index: gcc/testsuite/g++.dg/pph/x1dynarray2.h
===
--- gcc/testsuite/g++.dg/pph/x1dynarray2.h  (revision 0)
+++ gcc/testsuite/g++.dg/pph/x1dynarray2.h  (revision 0)
@@ -0,0 +1,111 @@
+// { dg-xfail-if "BOGUS" { "*-*-*" } { "-fpph-map=pph.map" } }
+// { dg-bogus "wchar.h:1:0: error: PPH file stdio.pph fails macro validation, 
_WCHAR_H is" "" { xfail *-*-* } 0 }
+// { dg-bogus "unistd.h:1144:34: error: declaration of .* has a different 
exception specifier" "" { xfail *-*-* } 0 }
+#ifndef X1DYNARRAY2_H
+#define X1DYNARRAY2_H
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define DefaultConstructible typename
+#define CPP0X( ignore )
+
+namespace std {
+
+template< DefaultConstructible T >
+struct dynarray
+{
+// types:
+typedef   T   value_type;
+typedef   T&  reference;
+typedef const T&  const_reference;
+typedef   T*   

Re: Ping: C-family stack check for threads

2011-06-29 Thread Ye Joey
On Fri, Jun 24, 2011 at 11:51 PM, Thomas Klein  wrote:
>
> Hi
>
> This is a ping of (http://gcc.gnu.org/ml/gcc-patches/2011-03/msg01226.html).
> Repeating my request.
>
> I would like to have a stack check for threads with small amount of stack 
> space per thread.
> (I'm using a ARM Cortex-M3 microcontroller with a stack size of a 1 KByte per 
> Thread.)
> Each thread having its own limit address.
> The thread scheduler can then calculate the limit and store this value inside 
> of a global variable.
> The compiler may generate code to check the stack for overflow at function 
> entry.
> In principal this can be done this way:
>  - push registers as usual
>  - figure out if one or two work registers, that can be used directly without 
> extra push
>  - if not enough registers found push required work registers to stack
>  - load limit address into first working register
>  - load value of limit address (into the same register)
>  - if stack pointer will go to extend the stack (e.g. for local variables)
>    load this size value too (here the second work register can be used)
>  - compare for overflow
>  - if overflow occur "call" stack_failure function
>  - pop work registers that are pushed before
>  - continue function prologue as usual e.g. extend stack pointer
>
> The ARM target has an option "-mapcs-stack-check" but this is more or less 
> not working. (implementation seems to be missing)
> There are also architecture independent options like
> "-fstack-check=generic", "-fstack-limit-symbol=current_stack_limit" or 
> "-fstack-limit-register=r6"
> that can be used.
>
> The generic stack check is doing a probe at end of function prologue phase
> (e.g by writing 12K ahead the current stack pointer position).
> If this stack space is not available the probe may generates a fault.
> This require that the CPU is having a MPU or a MMU.
> For machines with small memory space an additional mechanism should be
> available.
>
> The option "-fstack-check" can be extend by the switches "direct" and 
> "indirect" to emit compare code in function prologue.
> If switch "direct" is given the address of "-fstack-limit-symbol" represents 
> the limit itself.
> If switch "indirect" is given "-fstack-limit-symbol" is a kind of global
> variable that needs be read before comparison.
Thomas,

I think your are working on a very useful feature. I have ARM MCU
applications running of out stack space and resulting strange
behaviors silently. I'd like to try your patch and probably give
further comments

- Joey


[pph] Fixing string streaming functions and their comments (issue4654076)

2011-06-29 Thread Gabriel Charette
Some comments were wrong.

In particular, the strings read from the stream are placed in a data table, and 
thus the strings returned DO NOT need to be freed by the caller, they belong to 
the stream who handles them itself.

Also lto_output_string_with_length does handle NULL strings, removed the 
redundant logic.

Tested with bootstrap build and pph regression testing.

2011-06-29  Gabriel Charette  

* pph-streamer.h (struct pph_stream): Fix comment of data_in field.
(pph_out_string_with_length): lto_output_string_with_length now handles
NULL strings, call it directly.
(pph_in_string): Fix comment.

diff --git a/gcc/cp/pph-streamer.h b/gcc/cp/pph-streamer.h
index b899501..8d0c024 100644
--- a/gcc/cp/pph-streamer.h
+++ b/gcc/cp/pph-streamer.h
@@ -100,7 +100,7 @@ typedef struct pph_stream {
   struct lto_input_block *ib;
 
   /* String tables and other descriptors used by the LTO reading
- routines.  NULL when the file is opened for reading.  */
+ routines.  NULL when the file is opened for writing.  */
   struct data_in *data_in;
 
   /* Array of sections in the PPH file.  */
@@ -250,21 +250,10 @@ static inline void
 pph_out_string_with_length (pph_stream *stream, const char *str,
   unsigned int len)
 {
-  if (str)
-{
-  if (flag_pph_tracer >= 4)
-   pph_trace_string_with_length (stream, str, len);
-  lto_output_string_with_length (stream->ob, stream->ob->main_stream,
-str, len + 1, false);
-}
-  else
-{
-  /* lto_output_string_with_length does not handle NULL strings,
-but lto_output_string does.  */
-  if (flag_pph_tracer >= 4)
-   pph_trace_string (stream, str);
-  pph_out_string (stream, NULL);
-}
+  if (flag_pph_tracer >= 4)
+pph_trace_string_with_length (stream, str, len);
+  lto_output_string_with_length (stream->ob, stream->ob->main_stream,
+ str, len + 1, false);
 }
 
 /* Output VEC V of ASTs to STREAM.
@@ -338,9 +327,7 @@ pph_in_bytes (pph_stream *stream, void *p, size_t n)
 pph_trace_bytes (stream, p, n);
 }
 
-/* Read and return a string of up to MAX characters from STREAM.
-   The caller is responsible for freeing the memory allocated
-   for the string.  */
+/* Read and return a string of up to MAX characters from STREAM.  */
 
 static inline const char *
 pph_in_string (pph_stream *stream)

--
This patch is available for review at http://codereview.appspot.com/4654076


Re: [PATCH 6/6] Fix PR47654: Compute LB and UB of a CLAST expression.

2011-06-29 Thread Tobias Grosser

On 06/29/2011 12:35 PM, Sebastian Pop wrote:

2011-06-29  Sebastian Pop

PR tree-optimization/47654
* graphite-clast-to-gimple.c (gcc_type_for_value): Removed.
(gcc_type_for_clast_term): Removed.
(gcc_type_for_clast_red): Removed.
(gcc_type_for_clast_bin): Removed.
(lb_ub_for_expr_name): New.
(lb_ub_for_term): New.
(lb_ub_for_expr): New.
(lb_ub_for_red): New.
(lb_ub_for_bin): New.
(gcc_type_for_clast_expr): Reimplemented.
* graphite-ppl.h (value_min): New.

* gcc.dg/graphite/run-id-pr47654.c: New.


I think the approach you are taking here is correct (in terms of not 
producing wrong code).


However I am not sure if this will lead to the smallest type possible. 
As far as I understand you assume for both surrounding induction 
variables and parameters that their lb/ub values are the maximal/minimal 
possible values in their types. This is not incorrect, however I believe 
the constraints in Cloog may provide us with more information, 
especially if the context contains constraints on the parameters.


My dream would be to enhance CLooG such that it can provide information 
about the minimal an maximal value of each clast (sub)expression.


What types would you get for this code (i,j,k,m, n)?

for (i = 0 ; i < 2; i++)
  for (j = i ; j < i + 1; j++)
for (k = j ; k < j + 1; k++)
  for (m = k ; m < k + 1; m++)
for (n = m ; n < m + 1; n++)
  A[0] += A[n];

I am a little bit afraid that we will increase the type size by an order 
of magnitude (or at least one bit) for each nesting level.


Cheers
Tobi




Re: [PATCH 5/6] Compute the type of the IV based only on the CLAST bounds.

2011-06-29 Thread Tobias Grosser

On 06/29/2011 12:35 PM, Sebastian Pop wrote:

2011-06-29  Sebastian Pop

* graphite-clast-to-gimple.c (compute_bounds_for_level): Removed.
(compute_type_for_level): Removed.
(clast_get_body_of_loop): Removed.
(gcc_type_for_iv_of_clast_loop): Removed.
(graphite_create_new_loop): Use max_precision_type.  Compute the type
of the IV based only on the CLAST bounds.
(translate_clast_for_loop): Do not pass level to
graphite_create_new_loop.


This one looks also OK.



Re: [PATCH 0/6] Fix PR47654

2011-06-29 Thread Tobias Grosser

On 06/29/2011 12:35 PM, Sebastian Pop wrote:

Hi,
the following patch set fixes PR47654:

   Correct typo.
   Correct computation of max.
   Fix PR47654: Loop blocking should strip-mine at least two loops.

Those three look OK.

Cheers
Tobi


Re: [PATCH 4/6] Fix computation of precision.

2011-06-29 Thread Tobias Grosser

On 06/29/2011 12:35 PM, Sebastian Pop wrote:

2011-06-29  Sebastian Pop

* graphite-clast-to-gimple.c (precision_for_value): Removed.
(precision_for_interval): Removed.
(gcc_type_for_interval): Use mpz_sizeinbase.
-/* Return a type that could represent the integer value VAL.  */
+/* Return a type that could represent the values between LOW and UP.
+   The value of LOW can be bigger than UP.  */

  static tree
  gcc_type_for_interval (mpz_t low, mpz_t up)
  {


Hi Sebastian,

why do we continue to call low 'low' and up 'up', if we actually just 
have two values v1 and v2 where we do not know which one is larger? I 
think this wrong and probably comes because we pass the lower loop bound 
to val_one and the upper loop bound to val_two.


What about:

+/* Return a type that could represent all values between VAL_ONE and
+   VAL_TWO including VAL_ONE and VAL_TWO itself.  There is no
+   constraint on which of the two values is larger.  */

  static tree
- gcc_type_for_interval (mpz_t low, mpz_t up)
+ gcc_type_for_interval (mpz_t val_one, mpz_t val_two)
   {


-  bool unsigned_p = true;
-  int precision, prec_up, prec_int;
+  bool unsigned_p;
tree type;
enum machine_mode mode;
-
-  gcc_assert (mpz_cmp (low, up)<= 0);
-
-  prec_up = precision_for_value (up);
-  prec_int = precision_for_interval (low, up);
-  precision = MAX (prec_up, prec_int);
+  int precision = MAX (mpz_sizeinbase (low, 2),
+  mpz_sizeinbase (up, 2));

if (precision>  BITS_PER_WORD)
  {
@@ -452,14 +397,10 @@ gcc_type_for_interval (mpz_t low, mpz_t up)
return integer_type_node;
  }

-  if (mpz_sgn (low)<= 0)
-unsigned_p = false;
-
-  else if (precision<  BITS_PER_WORD)
-{
-  unsigned_p = false;
-  precision++;
-}
+  if (mpz_cmp (low, up)<= 0)
+unsigned_p = (mpz_sgn (low)>= 0);
+  else
+unsigned_p = (mpz_sgn (up)>= 0);


What about?

   unsigned_p = value_min(low, up) >= 0;

(You need to move the implementation of value_min to this patch)



mode = smallest_mode_for_size (precision, MODE_INT);
precision = GET_MODE_PRECISION (mode);


In general the new implementation looks a lot more elegant as the old 
one. What was the problem with the old one? That low could be larger 
than up and that the calculation in precision_for_interval was incorrect 
(or at least not understandable for me)?


The rest of the patch looks good.

Cheers
Tobi



Re: [PATCH 04/12] vax: Emit prologue as rtl.

2011-06-29 Thread Richard Henderson
On 06/29/2011 04:01 PM, Steven Bosscher wrote:
> Can you please also update http://gcc.gnu.org/backends.html? I think
> VAX should have a 'g' after you commit this patch.

Sure.

> How many TARGET_ASM_FUNCTION_{PRO,EPI}LOGUE targets are left anyway?

It's difficult to tell at a glance, because quite a few use
the hook for Other Things.  Such as ARM printing debug info
about the stack frame, or Sparc outputting the scratch register
elf info, or IA-64 and Alpha using it to output some portion
of the unwind info.


r~


Re: [PATCH 04/12] vax: Emit prologue as rtl.

2011-06-29 Thread Steven Bosscher
On Wed, Jun 29, 2011 at 11:49 PM, Richard Henderson  wrote:
> --- a/gcc/config/vax/vax.c
> +++ b/gcc/config/vax/vax.c
> @@ -70,9 +69,6 @@ static int vax_return_pops_args (tree, tree, int);
>  #undef TARGET_ASM_ALIGNED_HI_OP
>  #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
>
> -#undef TARGET_ASM_FUNCTION_PROLOGUE
> -#define TARGET_ASM_FUNCTION_PROLOGUE vax_output_function_prologue
> -
>  #undef TARGET_ASM_FILE_START
>  #define TARGET_ASM_FILE_START vax_file_start
>  #undef TARGET_ASM_FILE_START_APP_OFF

Can you please also update http://gcc.gnu.org/backends.html? I think
VAX should have a 'g' after you commit this patch.

How many TARGET_ASM_FUNCTION_{PRO,EPI}LOGUE targets are left anyway?

Ciao!
Steven


Re: [trans-mem] Beginning of refactoring

2011-06-29 Thread Richard Henderson
On 05/25/2011 02:10 PM, Torvald Riegel wrote:
> Here's the beginning of a refactoring aimed at being able to merge more
> TM algorithms later on.
> 
> Patch 1: Just a straightfoward rename to make it clear that we're
> dispatching on the level of ABI calls, not internals.

Ok, I guess.  I don't think it matters that much.

> Patch 2: _ITM_dropReferences is not sufficiently defined in the ABI. It
> seems to target some form of open nesting for txnal wrappers, but the
> prose in the ABI specification is unclear. Thus, disable this for now
> (aka fatal runtime error), and expect the related tests to fail. Pick it
> up again once that the ABI has been improved and the use cases are
> clear.

Sure, but please actually delete the code rather than just comment it out.

> Patch 3: The actual change in how ABI calls are dispatched. Also,
> removed method-readonly (broken, will in a similar form reappear in the
> family of globallock-based algorithms), and disabled method-wbetl (needs
> larger refactoring, will be revived/remerged later).

> +CREATE_DISPATCH_FUNCTIONS_T_MEMCPY(M256, GTM::abi_disp()->_, )
> +CREATE_DISPATCH_FUNCTIONS_T_MEMCPY(M64, GTM::abi_disp()->_, )
> +CREATE_DISPATCH_FUNCTIONS_T_MEMCPY(M128, GTM::abi_disp()->_, )
> +CREATE_DISPATCH_FUNCTIONS(GTM::abi_disp()->_, )

What's the point of using "GTM::abi_disp()->_" as a mandatory argument?

Further, that second "M2" argument is universally empty.  What's that?

> +// Creates memcpy/memmove/memset methods.
> +#define CREATE_DISPATCH_METHODS_MEM()  \
> +virtual void _memtransfer(void *dst, const void* src, size_t size, \
> +bool may_overlap, ls_modifier dst_mod, ls_modifier src_mod)   \
> +{ \
> +  memtransfer_static(dst, src, size, may_overlap, dst_mod, src_mod);  \
> +} \
> +virtual void _memset(void *dst, int c, size_t size, ls_modifier mod)   \
> +{ \
> +  memset_static(dst, c, size, mod);   \
> +}

Why are the memtransfer and memset virtuals distinguished from the statics?
For the patch as written it would seem to be ok to merge them.

> +  if (GTM::abi_dispatch::NONTXNAL == GTM::abi_dispatch::WRITE || 
> \
> +  GTM::abi_dispatch::NONTXNAL == GTM::abi_dispatch::READ)
> \

Formatting.

> +#define ITM_MEMTRANSFER_DEF(TARGET, M2, NAME, READ, WRITE) \
> +void ITM_REGPARM _ITM_memcpy##NAME(void *dst, const void *src, size_t size)  
> \
> +{
> \
> +  TARGET##memtransfer##M2 (dst, src, size,   
> \
> + false, GTM::abi_dispatch::WRITE, GTM::abi_dispatch::READ);  
> \
> +}
> \
> +void ITM_REGPARM _ITM_memmove##NAME(void *dst, const void *src, size_t size) 
> \
> +{
> \
> +  if (GTM::abi_dispatch::NONTXNAL == GTM::abi_dispatch::WRITE || 
> \
> +  GTM::abi_dispatch::NONTXNAL == GTM::abi_dispatch::READ)
> \
> +{
> \
> +  if (((uintptr_t)dst <= (uintptr_t)src ?
> \
> +  (uintptr_t)dst + size > (uintptr_t)src :   
> \
> +  (uintptr_t)src + size > (uintptr_t)dst))   
> \
> +GTM::GTM_fatal("_ITM_memmove overlapping and t/nt is not allowed");  
> \
> +  else   
> \
> +TARGET##memtransfer##M2 (dst, src, size, 
> \
> +false, GTM::abi_dispatch::WRITE, GTM::abi_dispatch::READ);   
> \
> +}
> \
> +  TARGET##memtransfer##M2 (dst, src, size,   
> \
> +  true, GTM::abi_dispatch::WRITE, GTM::abi_dispatch::READ);  
> \
> +}

Ok, I realize we need macros to generate the ABI names both here and in
CREATE_DISPATCH_FUNCTIONS, but can we limit the code within macros to as
little as absolutely possible?

For instance,

  template
  void abi_memmove(void *dst, const void *src, size_t size)
  {
if (dst_mod == NONTXNAL || src_mod == NONTXNAL)
  ...
  }

where the actual implementation under macro is limited to a function call.

Missing return/else in there?  Surely not two calls to memtransfer...

> +protected:
> +  /// Transactional load. Will be called from the dispatch methods
> +  /// created below.
> +  template  static V load(const V* addr, ls_modifier mod)
> +  {
> +return *addr;
> +  }
> +  /// Transactional store. Will be called from the dispatch methods
> +  /// cre

Re: [testsuite, objc] Don't XFAIL objc.dg/torture/forward-1.m

2011-06-29 Thread Mike Stump
On Jun 29, 2011, at 1:37 AM, Iain Sandoe wrote:
> On 28 Jun 2011, at 18:01, Iain Sandoe wrote:
>> On 28 Jun 2011, at 17:47, Rainer Orth wrote:
>>> objc.dg/torture/forward-1.m now seems to XPASS everywhere, creating an
>>> annoying amount of testsuite noise.  Dominique provided the following
>>> patch in PR libobjc/Bug 36610.
>>> 
>>> Tested with the appropriate runtest invocations on i386-pc-solaris2.10
>>> (both multilibs), sparc-sun-solaris2.10 (both multilibs),
>>> alpha-dec-osf5.1b, mips-sgi-irix6.5 (both multilibs),
>>> powerpc-apple-darwin9.8.0 (32-bit only).
>>> 
>>> Ok for mainline?
>>> 
>>> Thanks.
>>>  Rainer
>>> 
>>> 
>>> 2011-06-28  Dominique d'Humieres  
>>> 
>>> * objc.dg/torture/forward-1.m: Remove dg-xfail-run-if, dg-skip-if.
>>> 
>>> Index: gcc/testsuite/objc.dg/torture/forward-1.m
>>> ===
>>> --- gcc/testsuite/objc.dg/torture/forward-1.m   (revision 175589)
>>> +++ gcc/testsuite/objc.dg/torture/forward-1.m   (working copy)
>>> @@ -1,7 +1,5 @@
>>> /* { dg-do run } */
>>> /* See if -forward:: is able to work. */
>>> -/* { dg-xfail-run-if "PR36610" { ! { { i?86-*-* x86_64-*-* } && ilp32 } } 
>>> { "-fgnu-runtime" } { "" } } */
>> 
>>> -/* { dg-skip-if "Needs OBJC2 Implementation" { *-*-darwin* && { lp64 } } { 
>>> "-fnext-runtime" } { "" } } */
>> 
>> actually, looking at this,  it should likely read (untested):
>> 
>> /* { dg-skip-if "Needs OBJC2 Implementation" { *-*-darwin8* && { lp64 && { ! 
>> objc2 } } } { "-fnext-runtime" } { "" } } */
>> 
>> and should stay in place to protect the test-cases for m64 on *-*-darwin8*
>> 
>> (not that there's ever likely to be an m64 objc2 on darwin 8.. but)
> 
> Just FTR, this works for me on powerpc-apple-darwin9
> Iain
> 
> Index: gcc/testsuite/objc.dg/torture/forward-1.m
> ===
> --- gcc/testsuite/objc.dg/torture/forward-1.m   (revision 175578)
> +++ gcc/testsuite/objc.dg/torture/forward-1.m   (working copy)
> @@ -1,7 +1,6 @@
> /* { dg-do run } */
> /* See if -forward:: is able to work. */
> -/* { dg-xfail-run-if "PR36610" { ! { { i?86-*-* x86_64-*-* } && ilp32 } } { 
> "-fgnu-runtime" } { "" } } */
> -/* { dg-skip-if "Needs OBJC2 Implementation" { *-*-darwin* && { lp64 } } { 
> "-fnext-runtime" } { "" } } */
> +/* { dg-skip-if "Needs OBJC2 Implementation" { *-*-darwin8* && { lp64 && { ! 
> objc2 } } } { "-fnext-runtime" } { "" } } */
> 
> #include 
> #include 

That means, you can check it in, Iain is my testsuite expert.  :-)


[PATCH] Add support on powerpc to change CASE_VALUES_THRESHOLD

2011-06-29 Thread Michael Meissner
On the powerpc, switch statements can be expensive, and we would like to be
able to tune the threshold of when the compiler generates if statements
vs. using a table jump operation (and different processors within the powerpc
have different limits).  This patch adds a powerpc tuning option to control
this.

I've done bootstraps and make checks with no regressions.  Is this ok to apply
to the trunk?  At this time, I am not changing the default value (4).  With the
option, I've seen a few spec 2006 benchmarks run faster, and a few run slower.

[gcc]
2011-06-29  Michael Meissner  

* config/rs6000/rs6000.opt (-mcase-values-threshold): New switch.

* config/rs6000/rs6000.c (TARGET_CASE_VALUES_THRESHOLD): New
target hook for override choice of when to do jump table vs. if
statements based on -mcase-values-threshold=.

* doc/invoke.texi (RS/6000 and PowerPC Options): Document
-mcase-values-threshold.

[gcc/testsuite]
2011-06-29  Michael Meissner  

* gcc.target/powerpc/ppc-switch-1.c: New test for
-mcase-values-threshold.
* gcc.target/powerpc/ppc-switch-2.c: Ditto.

-- 
Michael Meissner, IBM
5 Technology Place Drive, M/S 2757, Westford, MA 01886-3141, USA
meiss...@linux.vnet.ibm.com fax +1 (978) 399-6899
Index: gcc/config/rs6000/rs6000.opt
===
--- gcc/config/rs6000/rs6000.opt(revision 175662)
+++ gcc/config/rs6000/rs6000.opt(working copy)
@@ -521,4 +521,7 @@ mxilinx-fpu
 Target Var(rs6000_xilinx_fpu) Save
 Specify Xilinx FPU.
 
-
+mcase-values-threshold=
+Target Report Var(rs6000_case_values_threshold_num) Init(4) RejectNegative 
Joined UInteger Save
+Specify the smallest number of different values for which it is best to use a
+jump-table instead of a tree of conditional branches (default, 4).
Index: gcc/config/rs6000/rs6000.c
===
--- gcc/config/rs6000/rs6000.c  (revision 175662)
+++ gcc/config/rs6000/rs6000.c  (working copy)
@@ -1210,6 +1210,7 @@ static void rs6000_function_specific_pri
struct cl_target_option *);
 static bool rs6000_can_inline_p (tree, tree);
 static void rs6000_set_current_function (tree);
+static unsigned int rs6000_case_values_threshold (void);
 
 
 /* Default register names.  */
@@ -1617,6 +1618,9 @@ static const struct attribute_spec rs600
 #undef TARGET_LEGITIMATE_CONSTANT_P
 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
 
+#undef TARGET_CASE_VALUES_THRESHOLD
+#define TARGET_CASE_VALUES_THRESHOLD rs6000_case_values_threshold
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 
@@ -26834,6 +26838,18 @@ rs6000_libcall_value (enum machine_mode 
   return gen_rtx_REG (mode, regno);
 }
 
+/* If the machine does not have a case insn that compares the bounds,
+   this means extra overhead for dispatch tables, which raises the
+   threshold for using them.  */
+
+static unsigned int
+rs6000_case_values_threshold (void)
+{
+  if (rs6000_case_values_threshold_num)
+return rs6000_case_values_threshold_num;
+
+  return default_case_values_threshold ();
+}
 
 /* Given FROM and TO register numbers, say whether this elimination is allowed.
Frame pointer elimination is automatically handled.
Index: gcc/doc/invoke.texi
===
--- gcc/doc/invoke.texi (revision 175662)
+++ gcc/doc/invoke.texi (working copy)
@@ -807,7 +807,7 @@ See RS/6000 and PowerPC Options.
 -msdata=@var{opt}  -mvxworks  -G @var{num}  -pthread @gol
 -mrecip -mrecip=@var{opt} -mno-recip -mrecip-precision @gol
 -mno-recip-precision @gol
--mveclibabi=@var{type} -mfriz -mno-friz}
+-mveclibabi=@var{type} -mfriz -mno-friz -mcase-values-threshold=@var{n}}
 
 @emph{RX Options}
 @gccoptlist{-m64bit-doubles  -m32bit-doubles  -fpu  -nofpu@gol
@@ -16320,6 +16320,11 @@ Generate (do not generate) the @code{fri
 rounding a floating point value to 64-bit integer and back to floating
 point.  The @code{friz} instruction does not return the same value if
 the floating point number is too large to fit in an integer.
+
+@item -mcase-values-threshold=@var{n}
+Specify the smallest number of different values for which it is best to
+use a jump-table instead of a tree of conditional branches.  The
+default for @option{-mcase-values-threshold} is 4.
 @end table
 
 @node RX Options
Index: gcc/testsuite/gcc.target/powerpc/ppc-switch-1.c
===
--- gcc/testsuite/gcc.target/powerpc/ppc-switch-1.c (revision 0)
+++ gcc/testsuite/gcc.target/powerpc/ppc-switch-1.c (revision 0)
@@ -0,0 +1,26 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-options "-O2 -mcase-values-threshold=2" } */
+/* { dg-final { scan-assembler "mtctr" } } */
+/* { dg-final { scan-assembler "bctr" } } */

Re: [Patch] Fix objc/48109

2011-06-29 Thread Mike Stump
On Jun 29, 2011, at 10:47 AM, Iain Sandoe wrote:
> The bug arises because of the use, by the ObjC FE, of two old target macros 
> that emit efficient representations of class definitions and references.

> Mike has already given this a 'seems reasonable' in the PR thread, however, I 
> need an approver for the varasm.c and target hook changes.
> 
> OK for trunk & 4.6?

Ok for my parts.


Re: [gcc patch] Re: C++ member function template id not matching linkage name (PR debug/49408)

2011-06-29 Thread Jason Merrill

On 06/29/2011 05:07 PM, Jan Kratochvil wrote:

On Wed, 29 Jun 2011 22:56:26 +0200, Jason Merrill wrote:

On 06/29/2011 04:00 PM, Jan Kratochvil wrote:

On Mon, 27 Jun 2011 20:00:24 +0200, Jason Merrill wrote:
   # decltype/fn call test
   --format=gnu-v3
   _Z4add3IidEDTclL_Z1gEfp_fp0_EET_T0_
-decltype (g({parm#1}, {parm#2})) add3(int, double)
+decltype (g) add3(int, double)


Here you're suppressing the arguments to a call, which we want to keep;
we only want to suppress printing the parameter types (which are not
part of the source expression).


Sorry but what is therefore the expect output in this case?


The earlier output was correct.  We just don't want to print "g(int, 
double)".


Jason


Re: [PATCH] Fix empty .debug_abbrev handling (PR debug/49364)

2011-06-29 Thread Jason Merrill

OK.

Jason


Re: C++ PATCH for c++/49216 (problems with new T[1]{})

2011-06-29 Thread Jason Merrill
As reported, this still wasn't working properly for an array of scalar 
type.  We still need to handle {} in the case of iterating over all the 
elements.


While I was poking at related code, I fixed a wrong-code bug with the 
ancient and permerrored new T[n](init) extension because it was easy.


Tested x86_64-pc-linux-gnu, applying to trunk.
commit dd6c8f64eec3752e9a7cf63b8ef2e46627fb5af1
Author: Jason Merrill 
Date:   Wed Jun 29 17:13:49 2011 -0400

	PR c++/49216
	* init.c (build_new_1): Pass {} down to build_vec_init.
	(build_vec_init): Handle it.

diff --git a/gcc/cp/init.c b/gcc/cp/init.c
index ac2b733..f80c475 100644
--- a/gcc/cp/init.c
+++ b/gcc/cp/init.c
@@ -2396,24 +2396,31 @@ build_new_1 (VEC(tree,gc) **placement, tree type, tree nelts,
 	  && BRACE_ENCLOSED_INITIALIZER_P (VEC_index (tree, *init, 0))
 	  && CONSTRUCTOR_IS_DIRECT_INIT (VEC_index (tree, *init, 0)))
 	{
-	  tree arraytype, domain;
 	  vecinit = VEC_index (tree, *init, 0);
-	  if (TREE_CONSTANT (nelts))
-		domain = compute_array_index_type (NULL_TREE, nelts, complain);
+	  if (CONSTRUCTOR_NELTS (vecinit) == 0)
+		/* List-value-initialization, leave it alone.  */;
 	  else
 		{
-		  domain = NULL_TREE;
-		  if (CONSTRUCTOR_NELTS (vecinit) > 0)
-		warning (0, "non-constant array size in new, unable to "
-			 "verify length of initializer-list");
+		  tree arraytype, domain;
+		  if (TREE_CONSTANT (nelts))
+		domain = compute_array_index_type (NULL_TREE, nelts,
+		   complain);
+		  else
+		{
+		  domain = NULL_TREE;
+		  if (CONSTRUCTOR_NELTS (vecinit) > 0)
+			warning (0, "non-constant array size in new, unable "
+ "to verify length of initializer-list");
+		}
+		  arraytype = build_cplus_array_type (type, domain);
+		  vecinit = digest_init (arraytype, vecinit, complain);
 		}
-	  arraytype = build_cplus_array_type (type, domain);
-	  vecinit = digest_init (arraytype, vecinit, complain);
 	}
 	  else if (*init)
 {
   if (complain & tf_error)
-permerror (input_location, "ISO C++ forbids initialization in array new");
+permerror (input_location,
+			   "parenthesized initializer in array new");
   else
 return error_mark_node;
 	  vecinit = build_tree_list_vec (*init);
@@ -3090,9 +3097,23 @@ build_vec_init (tree base, tree maxindex, tree init,
   try_block = begin_try_block ();
 }
 
+  /* If the initializer is {}, then all elements are initialized from {}.
+ But for non-classes, that's the same as value-initialization.  */
+  if (init && BRACE_ENCLOSED_INITIALIZER_P (init)
+  && CONSTRUCTOR_NELTS (init) == 0)
+{
+  if (CLASS_TYPE_P (type))
+	/* Leave init alone.  */;
+  else
+	{
+	  init = NULL_TREE;
+	  explicit_value_init_p = true;
+	}
+}
+
   /* Maybe pull out constant value when from_array? */
 
-  if (init != NULL_TREE && TREE_CODE (init) == CONSTRUCTOR)
+  else if (init != NULL_TREE && TREE_CODE (init) == CONSTRUCTOR)
 {
   /* Do non-default initialization of non-trivial arrays resulting from
 	 brace-enclosed initializers.  */
@@ -3210,7 +3231,7 @@ build_vec_init (tree base, tree maxindex, tree init,
  We do need to keep going if we're copying an array.  */
 
   if (from_array
-  || ((type_build_ctor_call (type) || explicit_value_init_p)
+  || ((type_build_ctor_call (type) || init || explicit_value_init_p)
 	  && ! (host_integerp (maxindex, 0)
 		&& (num_initialized_elts
 		== tree_low_cst (maxindex, 0) + 1
@@ -3276,8 +3297,16 @@ build_vec_init (tree base, tree maxindex, tree init,
 	}
   else
 	{
-	  gcc_assert (type_build_ctor_call (type));
-	  elt_init = build_aggr_init (to, init, 0, complain);
+	  gcc_assert (type_build_ctor_call (type) || init);
+	  if (CLASS_TYPE_P (type))
+	elt_init = build_aggr_init (to, init, 0, complain);
+	  else
+	{
+	  if (TREE_CODE (init) == TREE_LIST)
+		init = build_x_compound_expr_from_list (init, ELK_INIT,
+			complain);
+	  elt_init = build2 (INIT_EXPR, type, to, init);
+	}
 	}
 
   if (elt_init == error_mark_node)
diff --git a/gcc/testsuite/g++.dg/cpp0x/initlist-value.C b/gcc/testsuite/g++.dg/cpp0x/initlist-value.C
index 25a3373..215bb90 100644
--- a/gcc/testsuite/g++.dg/cpp0x/initlist-value.C
+++ b/gcc/testsuite/g++.dg/cpp0x/initlist-value.C
@@ -2,6 +2,9 @@
 // { dg-options -std=c++0x }
 // { dg-do run }
 
+void * operator new (__SIZE_TYPE__, void *p) { return p; }
+void * operator new[] (__SIZE_TYPE__, void *p) { return p; }
+
 // Empty base so A isn't an aggregate
 struct B {};
 struct A: B {
@@ -18,8 +21,14 @@ int main()
 {
   A a{};
   C c;
+  int space = 42;
+  A* ap = new (&space) A{};
+  int space1[1] = { 42 };
+  A* a1p = new (space1) A[1]{};
   if (a.i != 0
   || c.i != 0
+  || ap->i != 0
+  || a1p[0].i != 0
   || A{}.i != 0
   || f({}) != 0)
 return 1;
diff --git a/gcc/testsuite/g++.dg

[PATCH 03/12] dwarf2out: Add REG_CFA_FLUSH_QUEUE.

2011-06-29 Thread Richard Henderson
---
 gcc/dwarf2out.c   |   37 +
 gcc/final.c   |4 +++-
 gcc/reg-notes.def |   11 +--
 3 files changed, 37 insertions(+), 15 deletions(-)

diff --git a/gcc/dwarf2out.c b/gcc/dwarf2out.c
index 6fad7e9..904 100644
--- a/gcc/dwarf2out.c
+++ b/gcc/dwarf2out.c
@@ -2832,6 +2832,7 @@ dwarf2out_frame_debug (rtx insn, bool after_p)
   const char *label;
   rtx note, n;
   bool handled_one = false;
+  bool need_flush = false;
 
   if (!NONJUMP_INSN_P (insn) || clobbers_queued_reg_save (insn))
 dwarf2out_flush_queued_reg_saves ();
@@ -2854,7 +2855,7 @@ dwarf2out_frame_debug (rtx insn, bool after_p)
   {
   case REG_FRAME_RELATED_EXPR:
insn = XEXP (note, 0);
-   goto found;
+   goto do_frame_expr;
 
   case REG_CFA_DEF_CFA:
dwarf2out_frame_debug_def_cfa (XEXP (note, 0), label);
@@ -2934,24 +2935,36 @@ dwarf2out_frame_debug (rtx insn, bool after_p)
handled_one = true;
break;
 
+  case REG_CFA_FLUSH_QUEUE:
+   /* The actual flush happens below.  */
+   need_flush = true;
+   handled_one = true;
+   break;
+
   default:
break;
   }
+
   if (handled_one)
 {
-  if (any_cfis_emitted)
-   dwarf2out_flush_queued_reg_saves ();
-  return;
+  /* Minimize the number of advances by emitting the entire queue
+once anything is emitted.  */
+  need_flush |= any_cfis_emitted;
 }
+  else
+{
+  insn = PATTERN (insn);
+do_frame_expr:
+  dwarf2out_frame_debug_expr (insn, label);
 
-  insn = PATTERN (insn);
- found:
-  dwarf2out_frame_debug_expr (insn, label);
+  /* Check again.  A parallel can save and update the same register.
+ We could probably check just once, here, but this is safer than
+ removing the check at the start of the function.  */
+  if (any_cfis_emitted || clobbers_queued_reg_save (insn))
+   need_flush = true;
+}
 
-  /* Check again.  A parallel can save and update the same register.
- We could probably check just once, here, but this is safer than
- removing the check above.  */
-  if (any_cfis_emitted || clobbers_queued_reg_save (insn))
+  if (need_flush)
 dwarf2out_flush_queued_reg_saves ();
 }
 
diff --git a/gcc/final.c b/gcc/final.c
index cb4a83d..c0c1fc8 100644
--- a/gcc/final.c
+++ b/gcc/final.c
@@ -2683,7 +2683,9 @@ final_scan_insn (rtx insn, FILE *file, int optimize_p 
ATTRIBUTE_UNUSED,
 
current_output_insn = debug_insn = insn;
 
-   if (CALL_P (insn) && dwarf2out_do_frame ())
+   if (dwarf2out_do_frame ()
+   && (CALL_P (insn)
+   || find_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL)))
  dwarf2out_frame_debug (insn, false);
 
/* Find the proper template for this insn.  */
diff --git a/gcc/reg-notes.def b/gcc/reg-notes.def
index 9924355..eccac9e 100644
--- a/gcc/reg-notes.def
+++ b/gcc/reg-notes.def
@@ -155,16 +155,23 @@ REG_NOTE (CFA_EXPRESSION)
first pattern is the register to be restored.  */
 REG_NOTE (CFA_RESTORE)
 
-/* Attached to insn that is RTX_FRAME_RELATED_P, marks insn that sets
+/* Attached to insns that are RTX_FRAME_RELATED_P, marks insn that sets
vDRAP from DRAP.  If vDRAP is a register, vdrap_reg is initalized
to the argument, if it is a MEM, it is ignored.  */
 REG_NOTE (CFA_SET_VDRAP)
 
-/* Attached to insn that are RTX_FRAME_RELATED_P, indicating a window
+/* Attached to insns that are RTX_FRAME_RELATED_P, indicating a window
save operation, i.e. will result in a DW_CFA_GNU_window_save.
The argument is ignored.  */
 REG_NOTE (CFA_WINDOW_SAVE)
 
+/* Attached to insns that are RTX_FRAME_RELATED_P, marks the insn as
+   requiring that all queued information should be flushed *before* insn,
+   regardless of what is visible in the rtl.  The argument is ignored.
+   This is normally used for a call instruction which is not exposed to
+   the rest of the compiler as a CALL_INSN.  */
+REG_NOTE (CFA_FLUSH_QUEUE)
+
 /* Indicates that REG holds the exception context for the function.
This context is shared by inline functions, so the code to acquire
the real exception context is delayed until after inlining.  */
-- 
1.7.5.4



[PATCH 04/12] vax: Emit prologue as rtl.

2011-06-29 Thread Richard Henderson
Not that there's much rtl to emit, since the CALL instruction
interpreting the procedure entry mask does almost all the work.
However, it means we're no longer emitting dwarf2 via the
text-based entry points.
---
 gcc/config/vax/vax-protos.h |1 +
 gcc/config/vax/vax.c|   86 +++
 gcc/config/vax/vax.md   |   25 ++--
 3 files changed, 84 insertions(+), 28 deletions(-)

diff --git a/gcc/config/vax/vax-protos.h b/gcc/config/vax/vax-protos.h
index a8f88bf..3f24794 100644
--- a/gcc/config/vax/vax-protos.h
+++ b/gcc/config/vax/vax-protos.h
@@ -20,6 +20,7 @@ along with GCC; see the file COPYING3.  If not see
 
 extern bool legitimate_constant_address_p (rtx);
 extern bool vax_mode_dependent_address_p (rtx);
+extern void vax_expand_prologue (void);
 
 #ifdef RTX_CODE
 extern const char *cond_name (rtx);
diff --git a/gcc/config/vax/vax.c b/gcc/config/vax/vax.c
index 7c7070c..13a4515 100644
--- a/gcc/config/vax/vax.c
+++ b/gcc/config/vax/vax.c
@@ -48,7 +48,6 @@ along with GCC; see the file COPYING3.  If not see
 
 static void vax_option_override (void);
 static bool vax_legitimate_address_p (enum machine_mode, rtx, bool);
-static void vax_output_function_prologue (FILE *, HOST_WIDE_INT);
 static void vax_file_start (void);
 static void vax_init_libfuncs (void);
 static void vax_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
@@ -70,9 +69,6 @@ static int vax_return_pops_args (tree, tree, int);
 #undef TARGET_ASM_ALIGNED_HI_OP
 #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
 
-#undef TARGET_ASM_FUNCTION_PROLOGUE
-#define TARGET_ASM_FUNCTION_PROLOGUE vax_output_function_prologue
-
 #undef TARGET_ASM_FILE_START
 #define TARGET_ASM_FILE_START vax_file_start
 #undef TARGET_ASM_FILE_START_APP_OFF
@@ -137,6 +133,17 @@ vax_option_override (void)
 #endif
 }
 
+static void
+vax_add_reg_cfa_offset (rtx insn, int offset, rtx src)
+{
+  rtx x;
+
+  x = plus_constant (frame_pointer_rtx, offset);
+  x = gen_rtx_MEM (SImode, x);
+  x = gen_rtx_SET (VOIDmode, x, src);
+  add_reg_note (insn, REG_CFA_OFFSET, x);
+}
+
 /* Generate the assembly code for function entry.  FILE is a stdio
stream to output the code to.  SIZE is an int: how many units of
temporary storage to allocate.
@@ -146,38 +153,67 @@ vax_option_override (void)
used in the function.  This function is responsible for knowing
which registers should not be saved even if used.  */
 
-static void
-vax_output_function_prologue (FILE * file, HOST_WIDE_INT size)
+void
+vax_expand_prologue (void)
 {
-  int regno;
+  int regno, offset;
   int mask = 0;
+  HOST_WIDE_INT size;
+  rtx insn;
 
   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
   mask |= 1 << regno;
 
-  fprintf (file, "\t.word 0x%x\n", mask);
+  insn = emit_insn (gen_procedure_entry_mask (GEN_INT (mask)));
+  RTX_FRAME_RELATED_P (insn) = 1;
 
-  if (dwarf2out_do_frame ())
-{
-  const char *label = dwarf2out_cfi_label (false);
-  int offset = 0;
+  /* The layout of the CALLG/S stack frame is follows:
 
-  for (regno = FIRST_PSEUDO_REGISTER-1; regno >= 0; --regno)
-   if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
- dwarf2out_reg_save (label, regno, offset -= 4);
+   <- CFA, AP
+   r11
+   r10
+   ... Registers saved as specified by MASK
+   r3
+   r2
+   return-addr
+   old fp
+   old ap
+   old psw
+   zero
+   <- FP, SP
 
-  dwarf2out_reg_save (label, PC_REGNUM, offset -= 4);
-  dwarf2out_reg_save (label, FRAME_POINTER_REGNUM, offset -= 4);
-  dwarf2out_reg_save (label, ARG_POINTER_REGNUM, offset -= 4);
-  dwarf2out_def_cfa (label, FRAME_POINTER_REGNUM, -(offset - 4));
-}
+ The rest of the prologue will adjust the SP for the local frame.  */
+
+  vax_add_reg_cfa_offset (insn, 4, arg_pointer_rtx);
+  vax_add_reg_cfa_offset (insn, 8, frame_pointer_rtx);
+  vax_add_reg_cfa_offset (insn, 12, pc_rtx);
+
+  offset = 16;
+  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+if (mask & (1 << regno))
+  {
+   vax_add_reg_cfa_offset (insn, offset, gen_rtx_REG (SImode, regno));
+   offset += 4;
+  }
+
+  /* Because add_reg_note pushes the notes, adding this last means that
+ it will be processed first.  This is required to allow the other
+ notes be interpreted properly.  */
+  add_reg_note (insn, REG_CFA_DEF_CFA,
+   plus_constant (frame_pointer_rtx, offset));
 
+  /* Allocate the local stack frame.  */
+  size = get_frame_size ();
   size -= STARTING_FRAME_OFFSET;
-  if (size >= 64)
-asm_fprintf (file, "\tmovab %wd(%Rsp),%Rsp\n", -size);
-  else if (size)
-asm_fprintf (file, "\tsubl2 $%wd,%Rsp\n", size);
+  emit_insn (gen_addsi3 (stack_pointer_rtx,
+stack_pointer_rtx, GEN_INT (-size)));
+
+  /* Do not allow instructions referencing local stack memory to be
+ scheduled

[PATCH 12/12] dwarf2out: Remove unused text-based unwind entry points.

2011-06-29 Thread Richard Henderson
The dwarf2out_cfi_label function is privatized, the
dwarf2out_def_cfa function is merged into its last user.
The others really are unused.
---
 gcc/dwarf2out.c |   65 ++-
 gcc/tree.h  |   27 --
 2 files changed, 7 insertions(+), 85 deletions(-)

diff --git a/gcc/dwarf2out.c b/gcc/dwarf2out.c
index 904..7778dc9 100644
--- a/gcc/dwarf2out.c
+++ b/gcc/dwarf2out.c
@@ -848,7 +848,7 @@ add_cfi (cfi_vec *vec, dw_cfi_ref cfi)
 /* Generate a new label for the CFI info to refer to.  FORCE is true
if a label needs to be output even when using .cfi_* directives.  */
 
-char *
+static char *
 dwarf2out_cfi_label (bool force)
 {
   static char label[20];
@@ -1080,21 +1080,6 @@ static HOST_WIDE_INT args_size;
 /* The last args_size we actually output.  */
 static HOST_WIDE_INT old_args_size;
 
-/* Entry point to update the canonical frame address (CFA).
-   LABEL is passed to add_fde_cfi.  The value of CFA is now to be
-   calculated from REG+OFFSET.  */
-
-void
-dwarf2out_def_cfa (const char *label, unsigned int reg, HOST_WIDE_INT offset)
-{
-  dw_cfa_location loc;
-  loc.indirect = 0;
-  loc.base_offset = 0;
-  loc.reg = reg;
-  loc.offset = offset;
-  def_cfa_1 (label, &loc);
-}
-
 /* Determine if two dw_cfa_location structures define the same data.  */
 
 static bool
@@ -1232,33 +1217,6 @@ reg_save (const char *label, unsigned int reg, unsigned 
int sreg, HOST_WIDE_INT
   add_fde_cfi (label, cfi);
 }
 
-/* Entry point for saving a register to the stack.  REG is the GCC register
-   number.  LABEL and OFFSET are passed to reg_save.  */
-
-void
-dwarf2out_reg_save (const char *label, unsigned int reg, HOST_WIDE_INT offset)
-{
-  reg_save (label, DWARF_FRAME_REGNUM (reg), INVALID_REGNUM, offset);
-}
-
-/* Entry point for saving the return address in the stack.
-   LABEL and OFFSET are passed to reg_save.  */
-
-void
-dwarf2out_return_save (const char *label, HOST_WIDE_INT offset)
-{
-  reg_save (label, DWARF_FRAME_RETURN_COLUMN, INVALID_REGNUM, offset);
-}
-
-/* Entry point for saving the return address in a register.
-   LABEL and SREG are passed to reg_save.  */
-
-void
-dwarf2out_return_reg (const char *label, unsigned int sreg)
-{
-  reg_save (label, DWARF_FRAME_RETURN_COLUMN, DWARF_FRAME_REGNUM (sreg), 0);
-}
-
 /* Record the initial position of the return address.  RTL is
INCOMING_RETURN_ADDR_RTX.  */
 
@@ -1866,20 +1824,6 @@ clobbers_queued_reg_save (const_rtx insn)
   return false;
 }
 
-/* Entry point for saving the first register into the second.  */
-
-void
-dwarf2out_reg_save_reg (const char *label, rtx reg, rtx sreg)
-{
-  unsigned int regno, sregno;
-
-  record_reg_saved_in_reg (sreg, reg);
-
-  regno = DWARF_FRAME_REGNUM (REGNO (reg));
-  sregno = DWARF_FRAME_REGNUM (REGNO (sreg));
-  reg_save (label, regno, sregno, 0);
-}
-
 /* What register, if any, is currently saved in REG?  */
 
 static rtx
@@ -4273,6 +4217,8 @@ dwarf2out_end_epilogue (unsigned int line 
ATTRIBUTE_UNUSED,
 void
 dwarf2out_frame_init (void)
 {
+  dw_cfa_location loc;
+
   /* Allocate the initial hunk of the fde_table.  */
   fde_table = ggc_alloc_cleared_vec_dw_fde_node (FDE_TABLE_INCREMENT);
   fde_table_allocated = FDE_TABLE_INCREMENT;
@@ -4282,7 +4228,10 @@ dwarf2out_frame_init (void)
  sake of lookup_cfa.  */
 
   /* On entry, the Canonical Frame Address is at SP.  */
-  dwarf2out_def_cfa (NULL, STACK_POINTER_REGNUM, INCOMING_FRAME_SP_OFFSET);
+  memset(&loc, 0, sizeof (loc));
+  loc.reg = STACK_POINTER_REGNUM;
+  loc.offset = INCOMING_FRAME_SP_OFFSET;
+  def_cfa_1 (NULL, &loc);
 
   if (targetm.debug_unwind_info () == UI_DWARF2
   || targetm_common.except_unwind_info (&global_options) == UI_DWARF2)
diff --git a/gcc/tree.h b/gcc/tree.h
index 20ba295..b642953 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -5596,33 +5596,6 @@ extern tree tree_overlaps_hard_reg_set (tree, 
HARD_REG_SET *);
 #endif
 
 
-/* In dwarf2out.c */
-/* Interface of the DWARF2 unwind info support.  */
-
-/* Generate a new label for the CFI info to refer to.  */
-
-extern char *dwarf2out_cfi_label (bool);
-
-/* Entry point to update the canonical frame address (CFA).  */
-
-extern void dwarf2out_def_cfa (const char *, unsigned, HOST_WIDE_INT);
-
-/* Entry point for saving a register to the stack.  */
-
-extern void dwarf2out_reg_save (const char *, unsigned, HOST_WIDE_INT);
-
-/* Entry point for saving the return address in the stack.  */
-
-extern void dwarf2out_return_save (const char *, HOST_WIDE_INT);
-
-/* Entry point for saving the return address in a register.  */
-
-extern void dwarf2out_return_reg (const char *, unsigned);
-
-/* Entry point for saving the first register into the second.  */
-
-extern void dwarf2out_reg_save_reg (const char *, rtx, rtx);
-
 /* In tree-inline.c  */
 
 /* The type of a set of already-visited pointers.  Functions for creating
-- 
1.7.5.4



[PATCH 10/12] ia64: Remove TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.

2011-06-29 Thread Richard Henderson
Use an empty REG_FRAME_RELATED_EXPR instead.
---
 gcc/config/ia64/ia64.c |   23 +--
 1 files changed, 9 insertions(+), 14 deletions(-)

diff --git a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c
index bdabe82..2ebb225 100644
--- a/gcc/config/ia64/ia64.c
+++ b/gcc/config/ia64/ia64.c
@@ -319,7 +319,6 @@ static enum machine_mode ia64_promote_function_mode 
(const_tree,
 static void ia64_trampoline_init (rtx, tree, rtx);
 static void ia64_override_options_after_change (void);
 
-static void ia64_dwarf_handle_frame_unspec (const char *, rtx, int);
 static tree ia64_builtin_decl (unsigned, bool);
 
 static reg_class_t ia64_preferred_reload_class (rtx, reg_class_t);
@@ -551,8 +550,6 @@ static const struct attribute_spec ia64_attribute_table[] =
 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
 #define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
 
-#undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
-#define TARGET_DWARF_HANDLE_FRAME_UNSPEC  ia64_dwarf_handle_frame_unspec
 #undef TARGET_ASM_UNWIND_EMIT
 #define TARGET_ASM_UNWIND_EMIT ia64_asm_unwind_emit
 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
@@ -3793,10 +3790,19 @@ ia64_expand_epilogue (int sibcall_p)
   if (current_frame_info.n_input_regs != 0)
{
  rtx n_inputs = GEN_INT (current_frame_info.n_input_regs);
+
  insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
const0_rtx, const0_rtx,
n_inputs, const0_rtx));
  RTX_FRAME_RELATED_P (insn) = 1;
+
+ /* ??? We need to mark the alloc as frame-related so that it gets
+passed into ia64_asm_unwind_emit for ia64-specific unwinding.
+But there's nothing dwarf2 related to be done wrt the register
+windows.  If we do nothing, dwarf2out will abort on the UNSPEC;
+the empty parallel means dwarf2out will not see anything.  */
+ add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+   gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (0)));
}
 }
 }
@@ -9620,17 +9626,6 @@ static bool need_copy_state;
 # define MAX_ARTIFICIAL_LABEL_BYTES 30
 #endif
 
-/* All we need to do here is avoid a crash in the generic dwarf2
-   processing.  The real CFA definition is set up above.  */
-
-static void
-ia64_dwarf_handle_frame_unspec (const char * ARG_UNUSED (label),
-   rtx ARG_UNUSED (pattern),
-   int index)
-{
-  gcc_assert (index == UNSPECV_ALLOC);
-}
-
 /* The function emits unwind directives for the start of an epilogue.  */
 
 static void
-- 
1.7.5.4



[PATCH 11/12] i386: Always use TARGET_DEEP_BRANCH_PREDICTION.

2011-06-29 Thread Richard Henderson
While it could be possible to output_set_got such that we can
individually annotate the instructions, it's simpler to simply
admit that all processors currently being manufactured do want
deep branch prediction.  At which point all of the complication
simply goes away.
---
 gcc/config/i386/i386.c |  105 +++
 gcc/config/i386/i386.h |3 -
 2 files changed, 16 insertions(+), 92 deletions(-)

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 014401b..332e65b 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -55,7 +55,6 @@ along with GCC; see the file COPYING3.  If not see
 #include "params.h"
 #include "cselib.h"
 #include "debug.h"
-#include "dwarf2out.h"
 #include "sched-int.h"
 #include "sbitmap.h"
 #include "fibheap.h"
@@ -1847,10 +1846,6 @@ static unsigned int 
initial_ix86_tune_features[X86_TUNE_LAST] = {
   m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
   | m_CORE2I7 | m_GENERIC,
 
-  /* X86_TUNE_DEEP_BRANCH_PREDICTION */
-  m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
-  | m_CORE2I7 | m_GENERIC,
-
   /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
  on simulation result. But after P4 was made, no performance benefit
  was observed with branch hints.  It also increases the code size.
@@ -8323,31 +8318,11 @@ output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
 
   xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
 
-  if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
+  if (!flag_pic)
 {
   xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
 
-  if (!flag_pic)
-   output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
-  else
-   {
- output_asm_insn ("call\t%a2", xops);
-#ifdef DWARF2_UNWIND_INFO
- /* The call to next label acts as a push.  */
- if (dwarf2out_do_frame ())
-   {
- rtx insn;
- start_sequence ();
- insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
-gen_rtx_PLUS (Pmode,
-  stack_pointer_rtx,
-  GEN_INT (-4;
- RTX_FRAME_RELATED_P (insn) = 1;
- dwarf2out_frame_debug (insn, true);
- end_sequence ();
-   }
-#endif
-   }
+  output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
 
 #if TARGET_MACHO
   /* Output the Mach-O "canonical" label name ("Lxx$pb") here too.  This
@@ -8358,29 +8333,6 @@ output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
 
   targetm.asm_out.internal_label (asm_out_file, "L",
  CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
-
-  if (flag_pic)
-   {
- output_asm_insn ("pop%z0\t%0", xops);
-#ifdef DWARF2_UNWIND_INFO
- /* The pop is a pop and clobbers dest, but doesn't restore it
-for unwind info purposes.  */
- if (dwarf2out_do_frame ())
-   {
- rtx insn;
- start_sequence ();
- insn = emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
- dwarf2out_frame_debug (insn, true);
- insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
-gen_rtx_PLUS (Pmode,
-  stack_pointer_rtx,
-  GEN_INT (4;
- RTX_FRAME_RELATED_P (insn) = 1;
- dwarf2out_frame_debug (insn, true);
- end_sequence ();
-   }
-#endif
-   }
 }
   else
 {
@@ -8388,12 +8340,6 @@ output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
   get_pc_thunk_name (name, REGNO (dest));
   pic_labels_used |= 1 << REGNO (dest);
 
-#ifdef DWARF2_UNWIND_INFO
-  /* Ensure all queued register saves are flushed before the
-call.  */
-  if (dwarf2out_do_frame ())
-   dwarf2out_flush_queued_reg_saves ();
-#endif
   xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
   xops[2] = gen_rtx_MEM (QImode, xops[2]);
   output_asm_insn ("call\t%X2", xops);
@@ -8408,13 +8354,8 @@ output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
 #endif
 }
 
-  if (TARGET_MACHO)
-return "";
-
-  if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
+  if (!TARGET_MACHO)
 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
-  else
-output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
 
   return "";
 }
@@ -10138,7 +10079,11 @@ ix86_expand_prologue (void)
 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
}
   else
-insn = emit_insn (gen_set_got (pic_offset_table_rtx));
+   {
+  insn = emit_insn (gen_set_got (pic_offset_table_rtx));
+ RTX_FRAME_RELATED_P (insn) = 1;
+ add_reg_note (insn, REG_CFA_FLUSH_QUE

[PATCH 05/12] arm: Use REG_CFA_REGISTER instead of UNSPEC_STACK_ALIGN.

2011-06-29 Thread Richard Henderson
From: Richard Henderson 

This requires a bit of extra preparation in arm_unwind_emit, in
order to handle the change for ARM unwinding.
---
 gcc/config/arm/arm.c  |  114 ++--
 gcc/config/arm/arm.md |2 -
 2 files changed, 62 insertions(+), 54 deletions(-)

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 4c6041a..0e371f3 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -203,7 +203,6 @@ static bool arm_output_ttype (rtx);
 static void arm_asm_emit_except_personality (rtx);
 static void arm_asm_init_sections (void);
 #endif
-static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
 static rtx arm_dwarf_register_span (rtx);
 
 static tree arm_cxx_guard_type (void);
@@ -501,9 +500,6 @@ static const struct attribute_spec arm_attribute_table[] =
 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
 #endif /* ARM_UNWIND_INFO */
 
-#undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
-#define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
-
 #undef TARGET_DWARF_REGISTER_SPAN
 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
 
@@ -15830,9 +15826,8 @@ arm_expand_prologue (void)
 
   if (IS_STACKALIGN (func_type))
 {
-  rtx dwarf;
-  rtx r0;
-  rtx r1;
+  rtx r0, r1;
+
   /* Handle a word-aligned stack pointer.  We generate the following:
 
  mov r0, sp
@@ -15848,15 +15843,18 @@ arm_expand_prologue (void)
 
   r0 = gen_rtx_REG (SImode, 0);
   r1 = gen_rtx_REG (SImode, 1);
-  /* Use a real rtvec rather than NULL_RTVEC so the rest of the
-compiler won't choke.  */
-  dwarf = gen_rtx_UNSPEC (SImode, rtvec_alloc (0), UNSPEC_STACK_ALIGN);
-  dwarf = gen_rtx_SET (VOIDmode, r0, dwarf);
-  insn = gen_movsi (r0, stack_pointer_rtx);
+
+  insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
   RTX_FRAME_RELATED_P (insn) = 1;
-  add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
-  emit_insn (insn);
+  add_reg_note (insn, REG_CFA_REGISTER, NULL);
+
   emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
+
+  /* ??? The CFA changes here, which may cause GDB to conclude that it
+has entered a different function.  That said, the unwind info is
+correct, individually, before and after this instruction because
+we've described the save of SP, which will override the default
+handling of SP as restoring from the CFA.  */
   emit_insn (gen_movsi (stack_pointer_rtx, r1));
 }
 
@@ -22880,13 +22878,6 @@ arm_unwind_emit_set (FILE * asm_out_file, rtx p)
  asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
   REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
}
-  else if (GET_CODE (e1) == UNSPEC && XINT (e1, 1) == UNSPEC_STACK_ALIGN)
-   {
- /* Stack pointer save before alignment.  */
- reg = REGNO (e0);
- asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
-  reg + 0x90, reg);
-   }
   else
abort ();
   break;
@@ -22902,7 +22893,8 @@ arm_unwind_emit_set (FILE * asm_out_file, rtx p)
 static void
 arm_unwind_emit (FILE * asm_out_file, rtx insn)
 {
-  rtx pat;
+  rtx note, pat;
+  bool handled_one = false;
 
   if (arm_except_unwind_info (&global_options) != UI_TARGET)
 return;
@@ -22912,14 +22904,56 @@ arm_unwind_emit (FILE * asm_out_file, rtx insn)
  || crtl->all_throwers_are_sibcalls))
 return;
 
-  if (GET_CODE (insn) == NOTE || !RTX_FRAME_RELATED_P (insn))
+  if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
 return;
 
-  pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
-  if (pat)
-pat = XEXP (pat, 0);
-  else
-pat = PATTERN (insn);
+  for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
+{
+  pat = XEXP (note, 0);
+  switch (REG_NOTE_KIND (note))
+   {
+   case REG_FRAME_RELATED_EXPR:
+ goto found;
+
+   case REG_CFA_REGISTER:
+ if (pat == NULL)
+   {
+ pat = PATTERN (insn);
+ if (GET_CODE (pat) == PARALLEL)
+   pat = XVECEXP (pat, 0, 0);
+   }
+
+ /* Only emitted for IS_STACKALIGN re-alignment.  */
+ {
+   rtx dest, src;
+   unsigned reg;
+
+   src = SET_SRC (pat);
+   dest = SET_DEST (pat);
+
+   gcc_assert (src == stack_pointer_rtx);
+   reg = REGNO (dest);
+   asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
+reg + 0x90, reg);
+ }
+ handled_one = true;
+ break;
+
+   case REG_CFA_DEF_CFA:
+   case REG_CFA_EXPRESSION:
+   case REG_CFA_ADJUST_CFA:
+   case REG_CFA_OFFSET:
+ /* ??? Only handling here what we actually emit.  */
+ gcc_unreachable ();
+
+   default:
+ break;
+   }
+}
+  if (handled_one)
+return;
+  pat = PATTERN (insn);
+ found:
 
   switch (GET_

[PATCH 06/12] ia64: Issue REG_CFA_REGISTER for ar.pfs at alloc insn.

2011-06-29 Thread Richard Henderson
---
 gcc/config/ia64/ia64.c |9 -
 1 files changed, 8 insertions(+), 1 deletions(-)

diff --git a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c
index c06903d..0a6f2e6 100644
--- a/gcc/config/ia64/ia64.c
+++ b/gcc/config/ia64/ia64.c
@@ -3249,7 +3249,14 @@ ia64_expand_prologue (void)
   GEN_INT (current_frame_info.n_local_regs),
   GEN_INT (current_frame_info.n_output_regs),
   GEN_INT (current_frame_info.n_rotate_regs)));
-  RTX_FRAME_RELATED_P (insn) = (current_frame_info.r[reg_save_ar_pfs] != 
0);
+  if (current_frame_info.r[reg_save_ar_pfs])
+   {
+ RTX_FRAME_RELATED_P (insn) = 1;
+ add_reg_note (insn, REG_CFA_REGISTER,
+   gen_rtx_SET (VOIDmode,
+ar_pfs_save_reg,
+gen_rtx_REG (DImode, AR_PFS_REGNUM)));
+   }
 }
 
   /* Set up frame pointer, stack pointer, and spill iterators.  */
-- 
1.7.5.4



[PATCH 07/12] ia64: Remove ia64_dwarf2out_def_steady_cfa.

2011-06-29 Thread Richard Henderson
This defines nothing that we can't deduce already,
so remove it.
---
 gcc/config/ia64/ia64.c |   24 
 1 files changed, 0 insertions(+), 24 deletions(-)

diff --git a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c
index 0a6f2e6..bb3fc4d 100644
--- a/gcc/config/ia64/ia64.c
+++ b/gcc/config/ia64/ia64.c
@@ -9642,26 +9642,6 @@ ia64_emit_deleted_label_after_insn (rtx insn)
   return xstrdup (label);
 }
 
-/* Define the CFA after INSN with the steady-state definition.  */
-
-static void
-ia64_dwarf2out_def_steady_cfa (rtx insn, bool frame)
-{
-  rtx fp = frame_pointer_needed
-? hard_frame_pointer_rtx
-: stack_pointer_rtx;
-  const char *label = ia64_emit_deleted_label_after_insn (insn);
-
-  if (!frame)
-return;
-
-  dwarf2out_def_cfa
-(label, REGNO (fp),
- ia64_initial_elimination_offset
- (REGNO (arg_pointer_rtx), REGNO (fp))
- + ARG_POINTER_CFA_OFFSET (current_function_decl));
-}
-
 /* All we need to do here is avoid a crash in the generic dwarf2
processing.  The real CFA definition is set up above.  */
 
@@ -9727,7 +9707,6 @@ process_cfa_adjust_cfa (FILE *asm_out_file, rtx pat, rtx 
insn,
fprintf (asm_out_file,
 "\t.fframe "HOST_WIDE_INT_PRINT_DEC"\n",
 -INTVAL (op1));
- ia64_dwarf2out_def_steady_cfa (insn, frame);
}
  else
process_epilogue (asm_out_file, insn, unwind, frame);
@@ -9746,7 +9725,6 @@ process_cfa_adjust_cfa (FILE *asm_out_file, rtx pat, rtx 
insn,
   if (unwind)
fprintf (asm_out_file, "\t.vframe r%d\n",
 ia64_dbx_register_number (REGNO (dest)));
-  ia64_dwarf2out_def_steady_cfa (insn, frame);
 }
   else
 gcc_unreachable ();
@@ -9946,8 +9924,6 @@ ia64_asm_unwind_emit (FILE *asm_out_file, rtx insn)
  fprintf (asm_out_file, "\t.copy_state %d\n",
   cfun->machine->state_num);
}
- if (IA64_CHANGE_CFA_IN_EPILOGUE)
-   ia64_dwarf2out_def_steady_cfa (insn, frame);
  need_copy_state = false;
}
 }
-- 
1.7.5.4



[PATCH 08/12] ia64: Use pc_rtx to save the return address.

2011-06-29 Thread Richard Henderson
When actually emitting dwarf2, this now produces proper results.
When emitting ia64 unwind info, this removes a hard-coded assumption.
---
 gcc/config/ia64/ia64.c |   17 ++---
 1 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c
index bb3fc4d..b37919c 100644
--- a/gcc/config/ia64/ia64.c
+++ b/gcc/config/ia64/ia64.c
@@ -3444,7 +3444,8 @@ ia64_expand_prologue (void)
   reg_emitted (reg_save_b0);
  insn = emit_move_insn (alt_reg, reg);
  RTX_FRAME_RELATED_P (insn) = 1;
- add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
+ add_reg_note (insn, REG_CFA_REGISTER,
+   gen_rtx_SET (VOIDmode, alt_reg, pc_rtx));
 
  /* Even if we're not going to generate an epilogue, we still
 need to save the register so that EH works.  */
@@ -9737,20 +9738,22 @@ process_cfa_register (FILE *asm_out_file, rtx pat, bool 
unwind)
 {
   rtx dest = SET_DEST (pat);
   rtx src = SET_SRC (pat);
-
   int dest_regno = REGNO (dest);
-  int src_regno = REGNO (src);
+  int src_regno;
 
-  switch (src_regno)
+  if (src == pc_rtx)
 {
-case BR_REG (0):
   /* Saving return address pointer.  */
-  gcc_assert (dest_regno == current_frame_info.r[reg_save_b0]);
   if (unwind)
fprintf (asm_out_file, "\t.save rp, r%d\n",
 ia64_dbx_register_number (dest_regno));
-  break;
+  return;
+}
+
+  src_regno = REGNO (src);
 
+  switch (src_regno)
+{
 case PR_REG (0):
   gcc_assert (dest_regno == current_frame_info.r[reg_save_pr]);
   if (unwind)
-- 
1.7.5.4



[PATCH 09/12] ia64: Remove dead code in process_epilogue.

2011-06-29 Thread Richard Henderson
---
 gcc/config/ia64/ia64.c |   35 ++-
 1 files changed, 2 insertions(+), 33 deletions(-)

diff --git a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c
index b37919c..bdabe82 100644
--- a/gcc/config/ia64/ia64.c
+++ b/gcc/config/ia64/ia64.c
@@ -9620,29 +9620,6 @@ static bool need_copy_state;
 # define MAX_ARTIFICIAL_LABEL_BYTES 30
 #endif
 
-/* Emit a debugging label after a call-frame-related insn.  We'd
-   rather output the label right away, but we'd have to output it
-   after, not before, the instruction, and the instruction has not
-   been output yet.  So we emit the label after the insn, delete it to
-   avoid introducing basic blocks, and mark it as preserved, such that
-   it is still output, given that it is referenced in debug info.  */
-
-static const char *
-ia64_emit_deleted_label_after_insn (rtx insn)
-{
-  char label[MAX_ARTIFICIAL_LABEL_BYTES];
-  rtx lb = gen_label_rtx ();
-  rtx label_insn = emit_label_after (lb, insn);
-
-  LABEL_PRESERVE_P (lb) = 1;
-
-  delete_insn (label_insn);
-
-  ASM_GENERATE_INTERNAL_LABEL (label, "L", CODE_LABEL_NUMBER (label_insn));
-
-  return xstrdup (label);
-}
-
 /* All we need to do here is avoid a crash in the generic dwarf2
processing.  The real CFA definition is set up above.  */
 
@@ -9654,16 +9631,11 @@ ia64_dwarf_handle_frame_unspec (const char * ARG_UNUSED 
(label),
   gcc_assert (index == UNSPECV_ALLOC);
 }
 
-/* The generic dwarf2 frame debug info generator does not define a
-   separate region for the very end of the epilogue, so refrain from
-   doing so in the IA64-specific code as well.  */
-
-#define IA64_CHANGE_CFA_IN_EPILOGUE 0
-
 /* The function emits unwind directives for the start of an epilogue.  */
 
 static void
-process_epilogue (FILE *asm_out_file, rtx insn, bool unwind, bool frame)
+process_epilogue (FILE *asm_out_file, rtx insn ATTRIBUTE_UNUSED,
+ bool unwind, bool frame ATTRIBUTE_UNUSED)
 {
   /* If this isn't the last block of the function, then we need to label the
  current state, and copy it back in at the start of the next block.  */
@@ -9678,9 +9650,6 @@ process_epilogue (FILE *asm_out_file, rtx insn, bool 
unwind, bool frame)
 
   if (unwind)
 fprintf (asm_out_file, "\t.restore sp\n");
-  if (IA64_CHANGE_CFA_IN_EPILOGUE && frame)
-dwarf2out_def_cfa (ia64_emit_deleted_label_after_insn (insn),
-  STACK_POINTER_REGNUM, INCOMING_FRAME_SP_OFFSET);
 }
 
 /* This function processes a SET pattern for REG_CFA_ADJUST_CFA.  */
-- 
1.7.5.4



[PATCH 02/12] dwarf2out: Handle pc_rtx as return column in REG_CFA_OFFSET too.

2011-06-29 Thread Richard Henderson
---
 gcc/dwarf2out.c |   18 ++
 1 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/gcc/dwarf2out.c b/gcc/dwarf2out.c
index 3ecdd94..6fad7e9 100644
--- a/gcc/dwarf2out.c
+++ b/gcc/dwarf2out.c
@@ -1982,6 +1982,7 @@ dwarf2out_frame_debug_cfa_offset (rtx set, const char 
*label)
 {
   HOST_WIDE_INT offset;
   rtx src, addr, span;
+  unsigned int sregno;
 
   src = XEXP (set, 1);
   addr = XEXP (set, 0);
@@ -2003,12 +2004,21 @@ dwarf2out_frame_debug_cfa_offset (rtx set, const char 
*label)
   gcc_unreachable ();
 }
 
-  span = targetm.dwarf_register_span (src);
+  if (src == pc_rtx)
+{
+  span = NULL;
+  sregno = DWARF_FRAME_RETURN_COLUMN;
+}
+  else 
+{
+  span = targetm.dwarf_register_span (src);
+  sregno = DWARF_FRAME_REGNUM (REGNO (src));
+}
 
   /* ??? We'd like to use queue_reg_save, but we need to come up with
  a different flushing heuristic for epilogues.  */
   if (!span)
-reg_save (label, DWARF_FRAME_REGNUM (REGNO (src)), INVALID_REGNUM, offset);
+reg_save (label, sregno, INVALID_REGNUM, offset);
   else
 {
   /* We have a PARALLEL describing where the contents of SRC live.
@@ -2024,8 +2034,8 @@ dwarf2out_frame_debug_cfa_offset (rtx set, const char 
*label)
{
  rtx elem = XVECEXP (span, 0, par_index);
 
- reg_save (label, DWARF_FRAME_REGNUM (REGNO (elem)),
-   INVALID_REGNUM, span_offset);
+ sregno = DWARF_FRAME_REGNUM (REGNO (src));
+ reg_save (label, sregno, INVALID_REGNUM, span_offset);
  span_offset += GET_MODE_SIZE (GET_MODE (elem));
}
 }
-- 
1.7.5.4



[PATCH 01/12] dwarf2out: Convert regs_saved_in_regs to VEC.

2011-06-29 Thread Richard Henderson
Also pull out value insertion into a subroutine.
---
 gcc/dwarf2out.c |  115 +++
 1 files changed, 65 insertions(+), 50 deletions(-)

diff --git a/gcc/dwarf2out.c b/gcc/dwarf2out.c
index 3d63d7b..3ecdd94 100644
--- a/gcc/dwarf2out.c
+++ b/gcc/dwarf2out.c
@@ -1732,17 +1732,56 @@ struct GTY(()) queued_reg_save {
 static GTY(()) struct queued_reg_save *queued_reg_saves;
 
 /* The caller's ORIG_REG is saved in SAVED_IN_REG.  */
-struct GTY(()) reg_saved_in_data {
+typedef struct GTY(()) reg_saved_in_data {
   rtx orig_reg;
   rtx saved_in_reg;
-};
+} reg_saved_in_data;
+
+DEF_VEC_O (reg_saved_in_data);
+DEF_VEC_ALLOC_O (reg_saved_in_data, gc);
+
+/* A set of registers saved in other registers.  This is implemented as
+   a flat array because it normally contains zero or 1 entry, depending
+   on the target.  IA-64 is the big spender here, using a maximum of
+   5 entries.  */
+static GTY(()) VEC(reg_saved_in_data, gc) *regs_saved_in_regs;
 
-/* A list of registers saved in other registers.
-   The list intentionally has a small maximum capacity of 4; if your
-   port needs more than that, you might consider implementing a
-   more efficient data structure.  */
-static GTY(()) struct reg_saved_in_data regs_saved_in_regs[4];
-static GTY(()) size_t num_regs_saved_in_regs;
+/* Compare X and Y for equivalence.  The inputs may be REGs or PC_RTX.  */
+
+static bool
+compare_reg_or_pc (rtx x, rtx y)
+{
+  if (REG_P (x) && REG_P (y))
+return REGNO (x) == REGNO (y);
+  return x == y;
+}
+
+/* Record SRC as being saved in DEST.  DEST may be null to delete an
+   existing entry.  SRC may be a register or PC_RTX.  */
+
+static void
+record_reg_saved_in_reg (rtx dest, rtx src)
+{
+  reg_saved_in_data *elt;
+  size_t i;
+
+  FOR_EACH_VEC_ELT (reg_saved_in_data, regs_saved_in_regs, i, elt)
+if (compare_reg_or_pc (elt->orig_reg, src))
+  {
+   if (dest == NULL)
+ VEC_unordered_remove(reg_saved_in_data, regs_saved_in_regs, i);
+   else
+ elt->saved_in_reg = dest;
+   return;
+  }
+
+  if (dest == NULL)
+return;
+
+  elt = VEC_safe_push(reg_saved_in_data, gc, regs_saved_in_regs, NULL);
+  elt->orig_reg = src;
+  elt->saved_in_reg = dest;
+}
 
 static const char *last_reg_save_label;
 
@@ -1784,22 +1823,9 @@ dwarf2out_flush_queued_reg_saves (void)
 
   for (q = queued_reg_saves; q; q = q->next)
 {
-  size_t i;
   unsigned int reg, sreg;
 
-  for (i = 0; i < num_regs_saved_in_regs; i++)
-   if (REGNO (regs_saved_in_regs[i].orig_reg) == REGNO (q->reg))
- break;
-  if (q->saved_reg && i == num_regs_saved_in_regs)
-   {
- gcc_assert (i != ARRAY_SIZE (regs_saved_in_regs));
- num_regs_saved_in_regs++;
-   }
-  if (i != num_regs_saved_in_regs)
-   {
- regs_saved_in_regs[i].orig_reg = q->reg;
- regs_saved_in_regs[i].saved_in_reg = q->saved_reg;
-   }
+  record_reg_saved_in_reg (q->saved_reg, q->reg);
 
   reg = DWARF_FRAME_REGNUM (REGNO (q->reg));
   if (q->saved_reg)
@@ -1826,11 +1852,14 @@ clobbers_queued_reg_save (const_rtx insn)
   for (q = queued_reg_saves; q; q = q->next)
 {
   size_t i;
+  reg_saved_in_data *rir;
+
   if (modified_in_p (q->reg, insn))
return true;
-  for (i = 0; i < num_regs_saved_in_regs; i++)
-   if (REGNO (q->reg) == REGNO (regs_saved_in_regs[i].orig_reg)
-   && modified_in_p (regs_saved_in_regs[i].saved_in_reg, insn))
+
+  FOR_EACH_VEC_ELT (reg_saved_in_data, regs_saved_in_regs, i, rir)
+   if (compare_reg_or_pc (q->reg, rir->orig_reg)
+   && modified_in_p (rir->saved_in_reg, insn))
  return true;
 }
 
@@ -1842,19 +1871,9 @@ clobbers_queued_reg_save (const_rtx insn)
 void
 dwarf2out_reg_save_reg (const char *label, rtx reg, rtx sreg)
 {
-  size_t i;
   unsigned int regno, sregno;
 
-  for (i = 0; i < num_regs_saved_in_regs; i++)
-if (REGNO (regs_saved_in_regs[i].orig_reg) == REGNO (reg))
-  break;
-  if (i == num_regs_saved_in_regs)
-{
-  gcc_assert (i != ARRAY_SIZE (regs_saved_in_regs));
-  num_regs_saved_in_regs++;
-}
-  regs_saved_in_regs[i].orig_reg = reg;
-  regs_saved_in_regs[i].saved_in_reg = sreg;
+  record_reg_saved_in_reg (sreg, reg);
 
   regno = DWARF_FRAME_REGNUM (REGNO (reg));
   sregno = DWARF_FRAME_REGNUM (REGNO (sreg));
@@ -1867,17 +1886,17 @@ static rtx
 reg_saved_in (rtx reg)
 {
   unsigned int regn = REGNO (reg);
-  size_t i;
   struct queued_reg_save *q;
+  reg_saved_in_data *rir;
+  size_t i;
 
   for (q = queued_reg_saves; q; q = q->next)
 if (q->saved_reg && regn == REGNO (q->saved_reg))
   return q->reg;
 
-  for (i = 0; i < num_regs_saved_in_regs; i++)
-if (regs_saved_in_regs[i].saved_in_reg
-   && regn == REGNO (regs_saved_in_regs[i].saved_in_reg))
-  return regs_saved_in_regs[i].orig_reg;
+  FOR_EACH_VEC_ELT (reg_saved_in_data, regs_saved_in_regs, i, rir)
+if (regn == REGNO (r

[PATCH 00/12] Eliminate the text-based dwarf2out interface

2011-06-29 Thread Richard Henderson
In preparation for the changes required to support shrink-wrapping,
it makes things Much Easier if we no longer have to handle seeing
new unwind states being introduced during pass_final, while emitting
the actual assembly.  Thus we want to convert everything that used
the text-based interfaces to use the REG_CFA_* notes instead.

The following set has been tested on i386-linux, x86_64-linux,
ia64-linux, arm-elf, arm-eabi, and a cross-compile to vax-linux.


r~


Richard Henderson (12):
  dwarf2out: Convert regs_saved_in_regs to VEC.
  dwarf2out: Handle pc_rtx as return column in REG_CFA_OFFSET too.
  dwarf2out: Add REG_CFA_FLUSH_QUEUE.
  vax: Emit prologue as rtl.
  arm: Use REG_CFA_REGISTER instead of UNSPEC_STACK_ALIGN.
  ia64: Issue REG_CFA_REGISTER for ar.pfs at alloc insn.
  ia64: Remove ia64_dwarf2out_def_steady_cfa.
  ia64: Use pc_rtx to save the return address.
  ia64: Remove dead code in process_epilogue.
  ia64: Remove TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
  i386: Always use TARGET_DEEP_BRANCH_PREDICTION.
  dwarf2out: Remove unused text-based unwind entry points.

 gcc/config/arm/arm.c|  114 --
 gcc/config/arm/arm.md   |2 -
 gcc/config/i386/i386.c  |  105 +++
 gcc/config/i386/i386.h  |3 -
 gcc/config/ia64/ia64.c  |  108 ++---
 gcc/config/vax/vax-protos.h |1 +
 gcc/config/vax/vax.c|   86 -
 gcc/config/vax/vax.md   |   25 -
 gcc/dwarf2out.c |  233 ---
 gcc/final.c |4 +-
 gcc/reg-notes.def   |   11 ++-
 gcc/tree.h  |   27 -
 12 files changed, 313 insertions(+), 406 deletions(-)

-- 
1.7.5.4



[PATCH] Fix empty .debug_abbrev handling (PR debug/49364)

2011-06-29 Thread Jakub Jelinek
Hi!

Mark has recently changed dwarf2out to not emit anything into .debug_abbrev
section if there are no abbreviations, but as Rainer has reported, some
vendor tools are upset about it.  We were still emitting the .debug_abbrev
section, just with zero size, in order to emit the .Ldebug_abbrev0
symbol into it (which isn't used anywhere though).  So, I wonder if we
can fix that by not adding .section .debug_abbrev and emitting
the label in that case at all.  Rainer, does this make SGI tools
happy?

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2011-06-29  Jakub Jelinek  

PR debug/49364
* dwarf2out.c (output_abbrev_section): Don't return early
if abbrev_die_table_in_use is 1.
(dwarf2out_finish): Instead don't call output_abbrev_section
nor emit abbrev_section_label in that case.

--- gcc/dwarf2out.c.jj  2011-06-28 16:12:08.0 +0200
+++ gcc/dwarf2out.c 2011-06-29 19:25:12.0 +0200
@@ -11358,9 +11358,6 @@ output_abbrev_section (void)
 {
   unsigned long abbrev_id;
 
-  if (abbrev_die_table_in_use == 1)
-return;
-
   for (abbrev_id = 1; abbrev_id < abbrev_die_table_in_use; ++abbrev_id)
 {
   dw_die_ref abbrev = abbrev_die_table[abbrev_id];
@@ -25226,9 +25223,12 @@ dwarf2out_finish (const char *filename)
   output_comp_unit (comp_unit_die (), debug_info_level >= DINFO_LEVEL_VERBOSE);
 
   /* Output the abbreviation table.  */
-  switch_to_section (debug_abbrev_section);
-  ASM_OUTPUT_LABEL (asm_out_file, abbrev_section_label);
-  output_abbrev_section ();
+  if (abbrev_die_table_in_use != 1)
+{
+  switch_to_section (debug_abbrev_section);
+  ASM_OUTPUT_LABEL (asm_out_file, abbrev_section_label);
+  output_abbrev_section ();
+}
 
   /* Output location list section if necessary.  */
   if (have_location_lists)

Jakub


[PATCH] Reintroduce repeat field for ctors (PR fortran/49540)

2011-06-29 Thread Jakub Jelinek
Hi!

As discussed in the PR and can be seen on the first testcase,
the removal of repeat field for ctors resulted in huge memory consumption
of the fortran FE on some real-world testcases (the first testcase
needs several GB of memory to compile).

This patch reintroduces the repeat field and handles the cases where
a range ctor is replacing some other ctors in that range or
some ctor is being replacing a part of a range ctor.  It should result
in no change in the generated code, just use much less memory
in larger testcases.

Bootstrapped/regtested on x86_64-linux and i686-linux.  Ok for trunk
and after a while for 4.6 too?

OT, the results before as well as after the patch for the second testcases are
unexpected, it seems the DATA stmts are processed in reverse order, so that
the first one wins instead of last one, unlike e.g. ifort or gfortran 4.1.
I guess we should change that, but independently of this patch.

2011-06-27  Jakub Jelinek  

PR fortran/49540
* gfortran.h (gfc_constructor): Add repeat field.
* trans-array.c (gfc_conv_array_initializer): Handle repeat > 1.
* array.c (current_expand): Add repeat field.
(expand_constructor): Copy repeat.
* constructor.c (node_free, node_copy, gfc_constructor_get,
gfc_constructor_lookup): Handle repeat field.
(gfc_constructor_lookup_next, gfc_constructor_remove): New functions.
* data.h (gfc_assign_data_value): Add mpz_t * argument.
(gfc_assign_data_value_range): Removed.
* constructor.h (gfc_constructor_advance): Removed.
(gfc_constructor_lookup_next, gfc_constructor_remove): New prototypes.
* data.c (gfc_assign_data_value): Add REPEAT argument, handle it and
also handle overwriting a range with a single entry.
(gfc_assign_data_value_range): Removed.
* resolve.c (check_data_variable): Adjust gfc_assign_data_value
call.  Use gfc_assign_data_value instead of
gfc_assign_data_value_expr.

* gfortran.dg/pr49540-1.f90: New test.
* gfortran.dg/pr49540-2.f90: New test.

--- gcc/fortran/gfortran.h.jj   2011-06-21 16:45:54.0 +0200
+++ gcc/fortran/gfortran.h  2011-06-27 18:37:45.0 +0200
@@ -2271,6 +2271,8 @@ typedef struct gfc_constructor
  gfc_component *component; /* Record the component being initialized.  */
   }
   n;
+  mpz_t repeat; /* Record the repeat number of initial values in data
+ statement like "data a/5*10/".  */
 }
 gfc_constructor;
 
--- gcc/fortran/trans-array.c.jj2011-06-17 11:02:01.0 +0200
+++ gcc/fortran/trans-array.c   2011-06-29 15:15:04.0 +0200
@@ -4555,7 +4555,7 @@ gfc_conv_array_initializer (tree type, g
   gfc_se se;
   HOST_WIDE_INT hi;
   unsigned HOST_WIDE_INT lo;
-  tree index;
+  tree index, range;
   VEC(constructor_elt,gc) *v = NULL;
 
   switch (expr->expr_type)
@@ -4608,29 +4608,56 @@ gfc_conv_array_initializer (tree type, g
 index = gfc_conv_mpz_to_tree (c->offset, gfc_index_integer_kind);
   else
 index = NULL_TREE;
+ if (mpz_cmp_si (c->repeat, 1) > 0)
+   {
+ tree tmp1, tmp2;
+ mpz_t maxval;
+
+ mpz_init (maxval);
+ mpz_add (maxval, c->offset, c->repeat);
+ mpz_sub_ui (maxval, maxval, 1);
+ tmp2 = gfc_conv_mpz_to_tree (maxval, gfc_index_integer_kind);
+ if (mpz_cmp_si (c->offset, 0) != 0)
+   {
+ mpz_add_ui (maxval, c->offset, 1);
+ tmp1 = gfc_conv_mpz_to_tree (maxval, gfc_index_integer_kind);
+   }
+ else
+   tmp1 = gfc_conv_mpz_to_tree (c->offset, gfc_index_integer_kind);
+
+ range = fold_build2 (RANGE_EXPR, integer_type_node, tmp1, tmp2);
+ mpz_clear (maxval);
+   }
+ else
+   range = NULL;
 
   gfc_init_se (&se, NULL);
  switch (c->expr->expr_type)
{
case EXPR_CONSTANT:
  gfc_conv_constant (&se, c->expr);
- CONSTRUCTOR_APPEND_ELT (v, index, se.expr);
  break;
 
case EXPR_STRUCTURE:
   gfc_conv_structure (&se, c->expr, 1);
- CONSTRUCTOR_APPEND_ELT (v, index, se.expr);
  break;
 
-
default:
  /* Catch those occasional beasts that do not simplify
 for one reason or another, assuming that if they are
 standard defying the frontend will catch them.  */
  gfc_conv_expr (&se, c->expr);
- CONSTRUCTOR_APPEND_ELT (v, index, se.expr);
  break;
}
+
+ if (range == NULL_TREE)
+   CONSTRUCTOR_APPEND_ELT (v, index, se.expr);
+ else
+   {
+ if (index != NULL_TREE)
+   CONSTRUCTOR_APPEND_ELT (v, index, se.expr);
+ CONSTRUCTOR_APPEND_ELT (v, range, se.expr);
+   }
   

Re: [trans-mem] Beginning of refactoring

2011-06-29 Thread Richard Henderson
On 06/29/2011 02:12 PM, Torvald Riegel wrote:
> Patch contains more clean-up: the trycommit, rollback and
> registerThrownObject functions in the ABI are unnecessary for how we
> support exception handling.
> 
> Ok for branch?
> 

Looks good.


r~


[PATCH] Sign extend before converting constants to GMP values.

2011-06-29 Thread Sebastian Pop
Hi,

This patch fixes PR47653 by sign extending the double int constants
before converting them to a GMP value.  There still are some places
where we should not sign extend the values converted: upper bounds of
unsigned types should for example not be sign extended.

The patch passed make -k check RUNTESTFLAGS=graphite.exp on c,c++,fortran.
Regstrap with all languages in progress on amd64-linux.  Ok for trunk?

Thanks,
Sebastian


2011-06-29  Sebastian Pop  

PR tree-optimization/47653
* graphite-ppl.h (tree_int_to_gmp): Sign extend before converting
constants to GMP values.  Add a sext parameter.
(ppl_set_inhomogeneous_tree): Add sext parameter.
(ppl_set_coef_tree): Removed.
* graphite-sese-to-poly.c (scan_tree_for_params_right_scev): Adjust
call to tree_int_to_gmp.
(scan_tree_for_params_int): Use tree_int_to_gmp.
(scan_tree_for_params): Adjust call to tree_int_to_gmp.
(build_loop_iteration_domains): Adjust call to
ppl_set_inhomogeneous_tree.
(add_param_constraints): Same.
(pdr_add_data_dimensions): Same.

* gcc.dg/graphite/run-id-pr47653.c: New.
---
 gcc/ChangeLog  |   16 
 gcc/graphite-ppl.h |   30 +--
 gcc/graphite-sese-to-poly.c|   27 ++---
 gcc/testsuite/ChangeLog|5 
 gcc/testsuite/gcc.dg/graphite/run-id-pr47653.c |   17 +
 5 files changed, 63 insertions(+), 32 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/graphite/run-id-pr47653.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index e37d823..bed0070 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,19 @@
+2011-06-29  Sebastian Pop  
+
+   PR tree-optimization/47653
+   * graphite-ppl.h (tree_int_to_gmp): Sign extend before converting
+   constants to GMP values.  Add a sext parameter.
+   (ppl_set_inhomogeneous_tree): Add sext parameter.
+   (ppl_set_coef_tree): Removed.
+   * graphite-sese-to-poly.c (scan_tree_for_params_right_scev): Adjust
+   call to tree_int_to_gmp.
+   (scan_tree_for_params_int): Use tree_int_to_gmp.
+   (scan_tree_for_params): Adjust call to tree_int_to_gmp.
+   (build_loop_iteration_domains): Adjust call to
+   ppl_set_inhomogeneous_tree.
+   (add_param_constraints): Same.
+   (pdr_add_data_dimensions): Same.
+
 2011-06-29  Eric Botcazou  
 
PR tree-optimization/49539
diff --git a/gcc/graphite-ppl.h b/gcc/graphite-ppl.h
index 695d01f..4ae9f63 100644
--- a/gcc/graphite-ppl.h
+++ b/gcc/graphite-ppl.h
@@ -50,13 +50,18 @@ void debug_gmp_value (mpz_t);
 bool ppl_powerset_is_empty (ppl_Pointset_Powerset_C_Polyhedron_t);
 
 
-/* Assigns to RES the value of the INTEGER_CST T.  */
+/* Assigns to RES the value of the INTEGER_CST T.  When SEXT is true,
+   sign extend the value of T to not get "-1 = 2^n - 1".  */
 
 static inline void
-tree_int_to_gmp (tree t, mpz_t res)
+tree_int_to_gmp (tree t, mpz_t res, bool sext)
 {
+  tree type = TREE_TYPE (t);
   double_int di = tree_to_double_int (t);
-  mpz_set_double_int (res, di, TYPE_UNSIGNED (TREE_TYPE (t)));
+
+  if (sext)
+di = double_int_sext (di, TYPE_PRECISION (type));
+  mpz_set_double_int (res, di, false);
 }
 
 /* Converts a GMP constant VAL to a tree and returns it.  */
@@ -88,14 +93,15 @@ ppl_set_inhomogeneous (ppl_Linear_Expression_t e, int x)
   mpz_clear (v);
 }
 
-/* Set the inhomogeneous term of E to the tree X.  */
+/* Set the inhomogeneous term of E to the tree X.  When SEXT is true,
+   sign extend the value of X.  */
 
 static inline void
-ppl_set_inhomogeneous_tree (ppl_Linear_Expression_t e, tree x)
+ppl_set_inhomogeneous_tree (ppl_Linear_Expression_t e, tree x, bool sext)
 {
   mpz_t v;
   mpz_init (v);
-  tree_int_to_gmp (x, v);
+  tree_int_to_gmp (x, v, sext);
   ppl_set_inhomogeneous_gmp (e, v);
   mpz_clear (v);
 }
@@ -112,18 +118,6 @@ ppl_set_coef (ppl_Linear_Expression_t e, 
ppl_dimension_type i, int x)
   mpz_clear (v);
 }
 
-/* Set E[I] to tree X.  */
-
-static inline void
-ppl_set_coef_tree (ppl_Linear_Expression_t e, ppl_dimension_type i, tree x)
-{
-  mpz_t v;
-  mpz_init (v);
-  tree_int_to_gmp (x, v);
-  ppl_set_coef_gmp (e, i, v);
-  mpz_clear (v);
-}
-
 /* Sets RES to the max of V1 and V2.  */
 
 static inline void
diff --git a/gcc/graphite-sese-to-poly.c b/gcc/graphite-sese-to-poly.c
index 7e23c9d..5f8188b 100644
--- a/gcc/graphite-sese-to-poly.c
+++ b/gcc/graphite-sese-to-poly.c
@@ -633,7 +633,7 @@ scan_tree_for_params_right_scev (sese s, tree e, int var,
   gcc_assert (TREE_CODE (e) == INTEGER_CST);
 
   mpz_init (val);
-  tree_int_to_gmp (e, val);
+  tree_int_to_gmp (e, val, true);
   add_value_to_dim (l, expr, val);
   mpz_clear (val);
 }
@@ -647,14 +647,9 @@ scan_tree_for_params_int (tree cst, 
ppl_Linear_Expression_t expr, mpz_t k)
 {
   mpz_t val;
   ppl_Coefficien

Re: C++ PATCH for c++/49554 (bogus error with lambda in template)

2011-06-29 Thread Jason Merrill
While looking at this issue, I also noticed that we were substituting 
into DECL_INITIAL more than we need to.


Tested x86_64-pc-linux-gnu, applying to trunk.
commit 327bbebe3243b0423932862d3d12f92928853e9c
Author: Jason Merrill 
Date:   Wed Jun 29 14:21:17 2011 -0400

	* pt.c (tsubst_decl) [VAR_DECL]: In unevaluated operand,
	don't tsubst DECL_INITIAL unless our type use auto.

diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index d1d8336..dc6cd50 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -10138,12 +10138,11 @@ tsubst_decl (tree t, tree args, tsubst_flags_t complain)
 	   scope, such as for a lambda return type.  Don't add it to
 	   local_specializations, do perform auto deduction.  */
 	tree auto_node = type_uses_auto (type);
-	tree init
-	  = tsubst_expr (DECL_INITIAL (t), args, complain, in_decl,
-			 /*constant_expression_p=*/false);
-
-	if (auto_node && init)
+	if (auto_node)
 	  {
+		tree init
+		  = tsubst_expr (DECL_INITIAL (t), args, complain, in_decl,
+ /*constant_expression_p=*/false);
 		init = resolve_nondeduced_context (init);
 		TREE_TYPE (r) = type
 		  = do_auto_deduction (type, init, auto_node);


C++ PATCH for c++/49003 (DR 1207, use of 'this' in trailing return type)

2011-06-29 Thread Jason Merrill
This patch adds support for use of 'this' (implicitly or explicitly) in 
the trailing-return-type of a member function.


Tested x86_64-pc-linux-gnu, applying to trunk.
commit 8154a9854f1d5ff3407af1707d112899eb39f013
Author: Jason Merrill 
Date:   Wed Jun 29 15:19:46 2011 -0400

	DR 1207
	PR c++/49003
	* cp-tree.h (struct saved_scope): Add x_current_class_ptr,
	x_current_class_ref.
	(current_class_ptr, current_class_ref): Use them.
	* decl.c (build_this_parm): Handle getting the class type.
	* parser.c (cp_parser_late_return_type_opt): Set up 'this'
	for use within the trailing return type.

diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index 55c88e3..ef25c97 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -953,6 +953,10 @@ struct GTY(()) saved_scope {
   struct cp_binding_level *x_previous_class_level;
   tree x_saved_tree;
 
+  /* Only used for uses of this in trailing return type.  */
+  tree x_current_class_ptr;
+  tree x_current_class_ref;
+
   int x_processing_template_decl;
   int x_processing_specialization;
   BOOL_BITFIELD x_processing_explicit_instantiation : 1;
@@ -1070,12 +1074,14 @@ struct GTY(()) language_function {
PARM_DECL for the `this' pointer.  The current_class_ref is an
expression for `*this'.  */
 
-#define current_class_ptr \
-  (cfun && cp_function_chain	\
-   ? cp_function_chain->x_current_class_ptr : NULL_TREE)
-#define current_class_ref \
-  ((cfun && cp_function_chain)  \
-   ? cp_function_chain->x_current_class_ref : NULL_TREE)
+#define current_class_ptr			\
+  (*(cfun && cp_function_chain			\
+ ? &cp_function_chain->x_current_class_ptr	\
+ : &scope_chain->x_current_class_ptr))
+#define current_class_ref			\
+  (*(cfun && cp_function_chain			\
+ ? &cp_function_chain->x_current_class_ref	\
+ : &scope_chain->x_current_class_ref))
 
 /* The EH_SPEC_BLOCK for the exception-specifiers for the current
function, if any.  */
diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c
index b8435a6..94d686d 100644
--- a/gcc/cp/decl.c
+++ b/gcc/cp/decl.c
@@ -7001,7 +7001,14 @@ build_this_parm (tree type, cp_cv_quals quals)
   tree parm;
   cp_cv_quals this_quals;
 
-  this_type = type_of_this_parm (type);
+  if (CLASS_TYPE_P (type))
+{
+  this_type
+	= cp_build_qualified_type (type, quals & ~TYPE_QUAL_RESTRICT);
+  this_type = build_pointer_type (this_type);
+}
+  else
+this_type = type_of_this_parm (type);
   /* The `this' parameter is implicitly `const'; it cannot be
  assigned to.  */
   this_quals = (quals & TYPE_QUAL_RESTRICT) | TYPE_QUAL_CONST;
@@ -12675,6 +12682,7 @@ start_preparsed_function (tree decl1, tree attrs, int flags)
 
   cp_function_chain->x_current_class_ref
 	= cp_build_indirect_ref (t, RO_NULL, tf_warning_or_error);
+  /* Set this second to avoid shortcut in cp_build_indirect_ref.  */
   cp_function_chain->x_current_class_ptr = t;
 
   /* Constructors and destructors need to know whether they're "in
diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
index f1b7976..d79326d 100644
--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -1696,7 +1696,7 @@ static cp_cv_quals cp_parser_cv_qualifier_seq_opt
 static cp_virt_specifiers cp_parser_virt_specifier_seq_opt
   (cp_parser *);
 static tree cp_parser_late_return_type_opt
-  (cp_parser *);
+  (cp_parser *, cp_cv_quals);
 static tree cp_parser_declarator_id
   (cp_parser *, bool);
 static tree cp_parser_type_id
@@ -14968,7 +14968,7 @@ cp_parser_direct_declarator (cp_parser* parser,
 		  virt_specifiers = cp_parser_virt_specifier_seq_opt (parser);
 
 		  late_return
-		= cp_parser_late_return_type_opt (parser);
+		= cp_parser_late_return_type_opt (parser, cv_quals);
 
 		  /* Create the function-declarator.  */
 		  declarator = make_call_declarator (declarator,
@@ -15537,9 +15537,10 @@ cp_parser_virt_specifier_seq_opt (cp_parser* parser)
Returns the type indicated by the type-id.  */
 
 static tree
-cp_parser_late_return_type_opt (cp_parser* parser)
+cp_parser_late_return_type_opt (cp_parser* parser, cp_cv_quals quals)
 {
   cp_token *token;
+  tree type;
 
   /* Peek at the next token.  */
   token = cp_lexer_peek_token (parser->lexer);
@@ -15550,7 +15551,23 @@ cp_parser_late_return_type_opt (cp_parser* parser)
   /* Consume the ->.  */
   cp_lexer_consume_token (parser->lexer);
 
-  return cp_parser_trailing_type_id (parser);
+  if (current_class_type)
+{
+  /* DR 1207: 'this' is in scope in the trailing return type.  */
+  tree this_parm = build_this_parm (current_class_type, quals);
+  gcc_assert (current_class_ptr == NULL_TREE);
+  current_class_ref
+	= cp_build_indirect_ref (this_parm, RO_NULL, tf_warning_or_error);
+  /* Set this second to avoid shortcut in cp_build_indirect_ref.  */
+  current_class_ptr = this_parm;
+}
+
+  type = cp_parser_trailing_type_id (parser);
+
+  if (current_class_type)
+current_class_ptr = current_class_ref = NULL_TREE;

Re: [trans-mem] Beginning of refactoring

2011-06-29 Thread Torvald Riegel
Patch contains more clean-up: the trycommit, rollback and
registerThrownObject functions in the ABI are unnecessary for how we
support exception handling.

Ok for branch?

commit 79ae722913f40715882b4c7b3fb798306f1ad3ae
Author: Torvald Riegel 
Date:   Wed Jun 29 23:05:28 2011 +0200

Removed unnecessary trycommit, rollback, and registerThrownObject ABI 
functions.

diff --git a/libitm/beginend.cc b/libitm/beginend.cc
index 7ed5073..9b16973 100644
--- a/libitm/beginend.cc
+++ b/libitm/beginend.cc
@@ -184,25 +184,12 @@ GTM::gtm_transaction::rollback ()
 }
 
 void ITM_REGPARM
-_ITM_rollbackTransaction (void)
-{
-  gtm_transaction *tx = gtm_tx();
-  
-  assert ((tx->prop & pr_hasNoAbort) == 0);
-  assert ((tx->state & gtm_transaction::STATE_ABORTING) == 0);
-
-  tx->rollback ();
-  tx->state |= gtm_transaction::STATE_ABORTING;
-}
-
-void ITM_REGPARM
 _ITM_abortTransaction (_ITM_abortReason reason)
 {
   gtm_transaction *tx = gtm_tx();
 
   assert (reason == userAbort);
   assert ((tx->prop & pr_hasNoAbort) == 0);
-  assert ((tx->state & gtm_transaction::STATE_ABORTING) == 0);
 
   if (tx->state & gtm_transaction::STATE_IRREVOCABLE)
 abort ();
@@ -238,7 +225,7 @@ GTM::gtm_transaction::trycommit ()
 bool
 GTM::gtm_transaction::trycommit_and_finalize ()
 {
-  if ((this->state & gtm_transaction::STATE_ABORTING) || trycommit ())
+  if (trycommit ())
 {
   abi_disp()->fini ();
   set_gtm_tx (this->prev);
@@ -252,14 +239,6 @@ GTM::gtm_transaction::trycommit_and_finalize ()
   return false;
 }
 
-bool ITM_REGPARM
-_ITM_tryCommitTransaction (void)
-{
-  gtm_transaction *tx = gtm_tx();
-  assert ((tx->state & gtm_transaction::STATE_ABORTING) == 0);
-  return tx->trycommit ();
-}
-
 void ITM_NORETURN
 GTM::gtm_transaction::restart (gtm_restart_reason r)
 {
diff --git a/libitm/libitm.h b/libitm/libitm.h
index df89d33..abd4274 100644
--- a/libitm/libitm.h
+++ b/libitm/libitm.h
@@ -45,7 +45,7 @@ extern "C" {
 
 #define ITM_NORETURN   __attribute__((noreturn))
 #define ITM_PURE __attribute__((transaction_pure))
-
+
 /* The following are externally visible definitions and functions, though
only very few of these should be called by user code.  */
 
@@ -91,6 +91,7 @@ typedef enum
pr_RaRBarriersOmitted   = 0x0200,
pr_undoLogCode  = 0x0400,
pr_preferUninstrumented = 0x0800,
+   /* Exception blocks are not used nor supported. */
pr_exceptionBlock   = 0x1000,
pr_readOnly = 0x4000,
pr_hasElse  = 0x20,
@@ -138,12 +139,8 @@ extern _ITM_transactionId_t _ITM_getTransactionId(void) 
ITM_REGPARM;
 extern uint32_t _ITM_beginTransaction(uint32_t, ...) ITM_REGPARM;
 
 extern void _ITM_abortTransaction(_ITM_abortReason) ITM_REGPARM ITM_NORETURN;
-extern void _ITM_rollbackTransaction (void) ITM_REGPARM;
 
 extern void _ITM_commitTransaction (void) ITM_REGPARM;
-extern bool _ITM_tryCommitTransaction(void) ITM_REGPARM;
-
-extern void _ITM_registerThrownObject (const void *, size_t) ITM_REGPARM;
 
 extern void _ITM_changeTransactionMode (_ITM_transactionState) ITM_REGPARM;
 
diff --git a/libitm/libitm.map b/libitm/libitm.map
index 0d52a7c..49d0b1b 100644
--- a/libitm/libitm.map
+++ b/libitm/libitm.map
@@ -12,9 +12,6 @@ LIBITM_1.0 {
_ITM_getTransactionId;
_ITM_inTransaction;
_ITM_libraryVersion;
-   _ITM_registerThrownObject;
-   _ITM_rollbackTransaction;
-   _ITM_tryCommitTransaction;
_ITM_versionCompatible;
 
_ITM_registerTMCloneTable;
diff --git a/libitm/libitm.texi b/libitm/libitm.texi
index 8d8de8c..046b0bd 100644
--- a/libitm/libitm.texi
+++ b/libitm/libitm.texi
@@ -221,25 +221,34 @@ reported for the dynamic scope as well, not just for the 
lexical scope
 (@code{hasNoAbort}). Without this, a library cannot exploit this together
 with flat nesting.
 
-@strong{TODO} @code{exceptionBlock?}
+@code{exceptionBlock} is not supported because exception blocks are not used.
 
 @subsubsection [No changes] Windows exception state
 @subsubsection [No changes] Other machine state
 
-@subsubsection Results from beginTransaction
-@strong{TODO} @code{abortTransaction} supported?
+@subsubsection [No changes] Results from beginTransaction
 
 @subsection Aborting a transaction
-@strong{TODO} make consistent with EH.
 
-@subsection Committing a transaction
-@strong{TODO} make consistent with EH.
+@code{_ITM_rollbackTransaction} is not supported. @code{_ITM_abortTransaction}
+is supported but the abort reason @code{exceptionBlockAbort} is not (and there
+are no exception blocks in general, so the related cases also do not have to
+be considered).
 
-@subsection Exception handling support
+@subsection Committing a transaction
 
-@strong{TODO} Document wrappers. Document code generated for commit, perhaps
-with examples similar to those in the specification. What can be removed from
-the ABI in turn? Document requirements on libstdc++ (@code{_cxa_tm_cleanup()}).
+The exception handling (EH) scheme is di

Re: [gcc patch] Re: C++ member function template id not matching linkage name (PR debug/49408)

2011-06-29 Thread Jan Kratochvil
On Wed, 29 Jun 2011 22:56:26 +0200, Jason Merrill wrote:
> On 06/29/2011 04:00 PM, Jan Kratochvil wrote:
> > On Mon, 27 Jun 2011 20:00:24 +0200, Jason Merrill wrote:
> >   # decltype/fn call test
> >   --format=gnu-v3
> >   _Z4add3IidEDTclL_Z1gEfp_fp0_EET_T0_
> > -decltype (g({parm#1}, {parm#2})) add3(int, double)
> > +decltype (g) add3(int, double)
> 
> Here you're suppressing the arguments to a call, which we want to keep;
> we only want to suppress printing the parameter types (which are not
> part of the source expression).

Sorry but what is therefore the expect output in this case?


Thanks,
Jan


Re: [gcc patch] Re: C++ member function template id not matching linkage name (PR debug/49408)

2011-06-29 Thread Jason Merrill
On 06/29/2011 04:00 PM, Jan Kratochvil wrote:
> On Mon, 27 Jun 2011 20:00:24 +0200, Jason Merrill wrote:
>> They should be supressed whenever the function appears in an
>> expression context, either as a pointer to member function (i.e. the
>> operand of '&')
> 
> Done, therefore it is no longer restricted only to templates as before.
> 
>> or as the function being called in a call expression.
> 
> I implemented it in the patch below but I do not agree + understand it.
> 
> The call expression is in libiberty/testsuite/demangle-expected modified by
> this patch as:
> 
>   # decltype/fn call test
>   --format=gnu-v3
>   _Z4add3IidEDTclL_Z1gEfp_fp0_EET_T0_
> -decltype (g({parm#1}, {parm#2})) add3(int, double)
> +decltype (g) add3(int, double)

Here you're suppressing the arguments to a call, which we want to keep;
we only want to suppress printing the parameter types (which are not
part of the source expression).

Jason


[pph] Fix ICE during function expansion (issue4662067)

2011-06-29 Thread Diego Novillo
This patch fixes a segmentation fault during function expansion
by delaying all expansions to happen after the PPH file has been
read.

The patch fixes one test (x1template.cc), but it exposes a common
ICE in resume_scope in other two tests.  Lawrence, this is the
ICE that you are now looking at, I think.  Your patch should fix
these two cases now.

Tested on x86_64.  Committed to the branch.

* pph-streamer-in.c (pph_read_file_contents): Call
expand_or_defer_fn on every function in stream->fns_to_expand.
(pph_in_function_decl): If FNDECL has a body, add it to
stream->fns_to_expand.
(pph_read_tree): Tidy argument list.
* pph-streamer.h (struct pph_stream): Add field fns_to_expand.

testsuite/ChangeLog.pph

* g++.dg/pph/x1dynarray1.cc: Adjust xfail.
* g++.dg/pph/x1namespace.cc: Likewise.
* g++.dg/pph/x1template.cc: Mark fixed.

diff --git a/gcc/cp/ChangeLog.pph b/gcc/cp/ChangeLog.pph
index 9465484..eaf7121 100644
--- a/gcc/cp/ChangeLog.pph
+++ b/gcc/cp/ChangeLog.pph
@@ -1,4 +1,13 @@
 2011-06-29   Diego Novillo  
+
+   * pph-streamer-in.c (pph_read_file_contents): Call
+   expand_or_defer_fn on every function in stream->fns_to_expand.
+   (pph_in_function_decl): If FNDECL has a body, add it to
+   stream->fns_to_expand.
+   (pph_read_tree): Tidy argument list.
+   * pph-streamer.h (struct pph_stream): Add field fns_to_expand.
+
+2011-06-29   Diego Novillo  
 Lawrence Crowl  
 
* pph-streamer-in.c (pph_in_lang_specific): Expect shared
diff --git a/gcc/cp/pph-streamer-in.c b/gcc/cp/pph-streamer-in.c
index 1c71c30..3ac5243 100644
--- a/gcc/cp/pph-streamer-in.c
+++ b/gcc/cp/pph-streamer-in.c
@@ -1311,6 +1311,8 @@ pph_read_file_contents (pph_stream *stream)
   cpp_ident_use *bad_use;
   const char *cur_def;
   cpp_idents_used idents_used;
+  tree fndecl;
+  unsigned i;
 
   pph_in_identifiers (stream, &idents_used);
 
@@ -1333,6 +1335,23 @@ pph_read_file_contents (pph_stream *stream)
   /* FIXME pph: This call replaces the tinfo, we should merge instead.
  See pph_in_tree_vec.  */
   unemitted_tinfo_decls = pph_in_tree_vec (stream);
+
+  /* Expand all the functions with bodies that we read from STREAM.  */
+  for (i = 0; VEC_iterate (tree, stream->fns_to_expand, i, fndecl); i++)
+{
+  /* FIXME pph - This is somewhat gross.  When we generated the
+PPH image, the parser called expand_or_defer_fn on FNDECL,
+which marked it DECL_EXTERNAL (see expand_or_defer_fn_1 for
+details).
+
+However, this is not really an extern definition, so it was
+also marked not-really-extern (yes, I know...). If this
+happens, we need to unmark it, otherwise the code generator
+will toss it out.  */
+  if (DECL_NOT_REALLY_EXTERN (fndecl))
+   DECL_EXTERNAL (fndecl) = 0;
+  expand_or_defer_fn (fndecl);
+}
 }
 
 
@@ -1372,20 +1391,7 @@ pph_in_function_decl (pph_stream *stream, tree fndecl)
   DECL_STRUCT_FUNCTION (fndecl) = pph_in_struct_function (stream, fndecl);
   DECL_CHAIN (fndecl) = pph_in_tree (stream);
   if (DECL_SAVED_TREE (fndecl))
-{
-  /* FIXME pph - This is somewhat gross.  When we generated the
-PPH image, the parser called expand_or_defer_fn on FNDECL,
-which marked it DECL_EXTERNAL (see expand_or_defer_fn_1 for
-details).
-
-However, this is not really an extern definition, so it was
-also marked not-really-extern (yes, I know...). If this
-happens, we need to unmark it, otherwise the code generator
-will toss it out.  */
-  if (DECL_NOT_REALLY_EXTERN (fndecl))
-   DECL_EXTERNAL (fndecl) = 0;
-  expand_or_defer_fn (fndecl);
-}
+VEC_safe_push (tree, gc, stream->fns_to_expand, fndecl);
 }
 
 
@@ -1396,7 +1402,7 @@ pph_in_function_decl (pph_stream *stream, tree fndecl)
 
 void
 pph_read_tree (struct lto_input_block *ib ATTRIBUTE_UNUSED,
- struct data_in *data_in, tree expr)
+  struct data_in *data_in, tree expr)
 {
   pph_stream *stream = (pph_stream *) data_in->sdata;
 
diff --git a/gcc/cp/pph-streamer.h b/gcc/cp/pph-streamer.h
index b36d622..b899501 100644
--- a/gcc/cp/pph-streamer.h
+++ b/gcc/cp/pph-streamer.h
@@ -116,6 +116,10 @@ typedef struct pph_stream {
 
   /* Nonzero if the stream was opened for writing.  */
   unsigned int write_p : 1;
+
+  /* List of functions with bodies that need to be expanded after
+ reading the PPH file.  */
+  VEC(tree,gc) *fns_to_expand;
 } pph_stream;
 
 /* Filter values for pph_out_chain_filtered.  */
diff --git a/gcc/testsuite/ChangeLog.pph b/gcc/testsuite/ChangeLog.pph
index aeede51..c5e1634 100644
--- a/gcc/testsuite/ChangeLog.pph
+++ b/gcc/testsuite/ChangeLog.pph
@@ -1,5 +1,11 @@
 2011-06-29  Diego Novillo  
 
+   * g++.dg/pph/x1dynarray1.cc: Adjust xfail.
+   * g++.dg/pph/x1namespace.cc: Likewise.
+   * g++.dg/pph/x1template.cc: Mark fixed.
+
+2011-06-29  Di

[pph] Allow DECL_LANG_SPECIFIC to be shared (issue4667049)

2011-06-29 Thread Diego Novillo
This is a patch that Lawrence and I were working on yesterday
that I need for my next patch.  We were not expecting
DECL_LANG_SPECIFIC to be shared, but in fact there are several
occasions (thunks, aliases) where it is shared.

By itself, this does not fix any tests, but it fixes a problem
exposed by the next patch.

Tested on x86_64.  Committed to the branch.

2011-06-29   Diego Novillo  
 Lawrence Crowl  

* pph-streamer-in.c (pph_in_lang_specific): Expect shared
DECL_LANG_SPECIFIC fields.

diff --git a/gcc/cp/ChangeLog.pph b/gcc/cp/ChangeLog.pph
index a62dd66..9465484 100644
--- a/gcc/cp/ChangeLog.pph
+++ b/gcc/cp/ChangeLog.pph
@@ -1,4 +1,10 @@
 2011-06-29   Diego Novillo  
+Lawrence Crowl  
+
+   * pph-streamer-in.c (pph_in_lang_specific): Expect shared
+   DECL_LANG_SPECIFIC fields.
+
+2011-06-29   Diego Novillo  
 
* pph-streamer-in.c (pph_read_tree): Call
append_to_statement_list_force instead of append_to_statement_list.
diff --git a/gcc/cp/pph-streamer-in.c b/gcc/cp/pph-streamer-in.c
index 1a59640..1c71c30 100644
--- a/gcc/cp/pph-streamer-in.c
+++ b/gcc/cp/pph-streamer-in.c
@@ -881,18 +881,23 @@ pph_in_lang_specific (pph_stream *stream, tree decl)
   marker = pph_in_start_record (stream, &ix);
   if (marker == PPH_RECORD_END)
 return;
-
-  /* Since lang_decl is embedded in every decl, LD cannot
- be shared.  */
-  gcc_assert (marker != PPH_RECORD_SHARED);
+  else if (marker == PPH_RECORD_SHARED)
+{
+  DECL_LANG_SPECIFIC (decl) =
+   (struct lang_decl *) pph_in_shared_data (stream, ix);
+  return;
+}
 
   /* Allocate a lang_decl structure for DECL.  */
   retrofit_lang_decl (decl);
-
   ld = DECL_LANG_SPECIFIC (decl);
-  ldb = &ld->u.base;
+
+  /* Now register it.  We would normally use ALLOC_AND_REGISTER,
+ but retrofit_lang_decl does not return a pointer.  */
+  pph_register_shared_data (stream, ld, ix);
 
   /* Read all the fields in lang_decl_base.  */
+  ldb = &ld->u.base;
   pph_in_ld_base (stream, ldb);
 
   if (ldb->selector == 0)

--
This patch is available for review at http://codereview.appspot.com/4667049


[pph] Fix ICE during gimplification (issue4636074)

2011-06-29 Thread Diego Novillo
This patch fixes an ICE during gimplification.  When reading a
STATEMENT_LIST, we were calling append_to_statement_list, but at
that point some elements in the list do not have side-effects
(the final VAR_DECL in a stmt expression), so we were dropping it
out.

Fixed by force-adding every statetement we read.  This fixes
c1attr-warn-unused-result.cc.

Tested on x86_64.  Committed to branch.


* pph-streamer-in.c (pph_read_tree): Call
append_to_statement_list_force instead of append_to_statement_list.

testsuite/ChangeLog.pph

* g++.dg/pph/c1attr-warn-unused-result.cc: Remove xfail marker.


diff --git a/gcc/cp/ChangeLog.pph b/gcc/cp/ChangeLog.pph
index c2679e6..a62dd66 100644
--- a/gcc/cp/ChangeLog.pph
+++ b/gcc/cp/ChangeLog.pph
@@ -1,3 +1,8 @@
+2011-06-29   Diego Novillo  
+
+   * pph-streamer-in.c (pph_read_tree): Call
+   append_to_statement_list_force instead of append_to_statement_list.
+
 2011-06-28   Diego Novillo  
 
* pph-streamer-in.c (pph_in_ld_fn): Instantiate
diff --git a/gcc/cp/pph-streamer-in.c b/gcc/cp/pph-streamer-in.c
index 1dabcf1..1a59640 100644
--- a/gcc/cp/pph-streamer-in.c
+++ b/gcc/cp/pph-streamer-in.c
@@ -1488,7 +1488,7 @@ pph_read_tree (struct lto_input_block *ib 
ATTRIBUTE_UNUSED,
 for (i = 0; i < num_trees; i++)
  {
tree stmt = pph_in_tree (stream);
-   append_to_statement_list (stmt, &expr);
+   append_to_statement_list_force (stmt, &expr);
  }
   }
   break;
diff --git a/gcc/testsuite/ChangeLog.pph b/gcc/testsuite/ChangeLog.pph
index 9c33875..aeede51 100644
--- a/gcc/testsuite/ChangeLog.pph
+++ b/gcc/testsuite/ChangeLog.pph
@@ -1,3 +1,7 @@
+2011-06-29  Diego Novillo  
+
+   * g++.dg/pph/c1attr-warn-unused-result.cc: Remove xfail marker.
+
 2011-06-28  Diego Novillo  
 
* g++.dg/pph/c1attr-warn-unused-result.cc: Expect an ICE.
diff --git a/gcc/testsuite/g++.dg/pph/c1attr-warn-unused-result.cc 
b/gcc/testsuite/g++.dg/pph/c1attr-warn-unused-result.cc
index 4633106..da75561 100644
--- a/gcc/testsuite/g++.dg/pph/c1attr-warn-unused-result.cc
+++ b/gcc/testsuite/g++.dg/pph/c1attr-warn-unused-result.cc
@@ -1,6 +1,3 @@
-/* { dg-xfail-if "ICE" { "*-*-*" } { "-fpph-map=pph.map" } } */
-// { dg-bogus "internal compiler error: Segmentation fault" "" { xfail *-*-* } 
0 }
-// { dg-prune-output "In file included from" }
 /* { dg-options "-w" } */
 // pph asm xdiff
 #include "c1attr-warn-unused-result.h"

--
This patch is available for review at http://codereview.appspot.com/4636074


Re: cleanup patch

2011-06-29 Thread François Dumont

Attached patch applied

2011-06-29  François Dumont 

* include/debug/set.h, unordered_map, multiset.h, forward_list,
unordered_set, vector, deque, string, list, multimap.h: Remove base
class default constructor calls.
* include/debug/map.h: Likewise and cleanup several redefinition of
base iterator typedef.

Regards

On 06/29/2011 10:34 AM, Jonathan Wakely wrote:

On 28 June 2011 21:36, François Dumont wrote:

Here is a small patch to cleanup debug code.

2011-06-28  François Dumont

* include/debug/set.h, unordered_map, multiset.h, forward_list,
unordered_set, vector, deque, string, list, multimap.h: Remove base
class default constructor calls.
* include/debug/map.h: Likewise and cleanup several redefinition of
base iterator typedef.


Tested under linux x86_64.

(Remember to CC gcc-patches for all patches)

This is OK to check in, thanks.



Index: include/debug/set.h
===
--- include/debug/set.h	(revision 175553)
+++ include/debug/set.h	(working copy)
@@ -46,7 +46,6 @@
   public __gnu_debug::_Safe_sequence >
 {
   typedef _GLIBCXX_STD_C::set<_Key, _Compare, _Allocator> _Base;
-  typedef __gnu_debug::_Safe_sequence _Safe_base;
 
   typedef typename _Base::const_iterator _Base_const_iterator;
   typedef typename _Base::iterator _Base_iterator;
@@ -88,21 +87,21 @@
 		__comp, __a) { }
 
   set(const set& __x)
-  : _Base(__x), _Safe_base() { }
+  : _Base(__x) { }
 
   set(const _Base& __x)
-  : _Base(__x), _Safe_base() { }
+  : _Base(__x) { }
 
 #ifdef __GXX_EXPERIMENTAL_CXX0X__
   set(set&& __x)
   noexcept(is_nothrow_copy_constructible<_Compare>::value)
-  : _Base(std::move(__x)), _Safe_base()
+  : _Base(std::move(__x))
   { this->_M_swap(__x); }
 
   set(initializer_list __l,
 	  const _Compare& __comp = _Compare(),
 	  const allocator_type& __a = allocator_type())
-  : _Base(__l, __comp, __a), _Safe_base() { }
+  : _Base(__l, __comp, __a) { }
 #endif
 
   ~set() _GLIBCXX_NOEXCEPT { }
@@ -206,7 +205,6 @@
   std::pair
   insert(value_type&& __x)
   {
-	typedef typename _Base::iterator _Base_iterator;
 	std::pair<_Base_iterator, bool> __res
 	  = _Base::insert(std::move(__x));
 	return std::pair(iterator(__res.first, this),
Index: include/debug/unordered_map
===
--- include/debug/unordered_map	(revision 175553)
+++ include/debug/unordered_map	(working copy)
@@ -89,25 +89,25 @@
 	: _Base(__gnu_debug::__base(__gnu_debug::__check_valid_range(__first,
  __last)),
 		__gnu_debug::__base(__last), __n,
-		__hf, __eql, __a), _Safe_base() { }
+		__hf, __eql, __a) { }
 
   unordered_map(const unordered_map& __x) 
-  : _Base(__x), _Safe_base() { }
+  : _Base(__x) { }
 
   unordered_map(const _Base& __x)
-  : _Base(__x), _Safe_base() { }
+  : _Base(__x) { }
 
   unordered_map(unordered_map&& __x)
   noexcept(__and_,
 	  is_nothrow_copy_constructible<_Pred>>::value)
-  : _Base(std::move(__x)), _Safe_base() { }
+  : _Base(std::move(__x)) { }
 
   unordered_map(initializer_list __l,
 		size_type __n = 0,
 		const hasher& __hf = hasher(),
 		const key_equal& __eql = key_equal(),
 		const allocator_type& __a = allocator_type())
-  : _Base(__l, __n, __hf, __eql, __a), _Safe_base() { }
+  : _Base(__l, __n, __hf, __eql, __a) { }
 
   ~unordered_map() noexcept { }
 
@@ -381,25 +381,25 @@
 	: _Base(__gnu_debug::__base(__gnu_debug::__check_valid_range(__first,
  __last)),
 		__gnu_debug::__base(__last), __n,
-		__hf, __eql, __a), _Safe_base() { }
+		__hf, __eql, __a) { }
 
   unordered_multimap(const unordered_multimap& __x) 
-  : _Base(__x), _Safe_base() { }
+  : _Base(__x) { }
 
   unordered_multimap(const _Base& __x) 
-  : _Base(__x), _Safe_base() { }
+  : _Base(__x) { }
 
   unordered_multimap(unordered_multimap&& __x)
   noexcept(__and_,
 	  is_nothrow_copy_constructible<_Pred>>::value)
-  : _Base(std::move(__x)), _Safe_base() { }
+  : _Base(std::move(__x)) { }
 
   unordered_multimap(initializer_list __l,
 			 size_type __n = 0,
 			 const hasher& __hf = hasher(),
 			 const key_equal& __eql = key_equal(),
 			 const allocator_type& __a = allocator_type())
-  : _Base(__l, __n, __hf, __eql, __a), _Safe_base() { }
+  : _Base(__l, __n, __hf, __eql, __a) { }
 
   ~unordered_multimap() noexcept { }
 
Index: include/debug/multiset.h
===
--- include/debug/multiset.h	(revision 175553)
+++ include/debug/multiset.h	(working copy)
@@ -46,7 +46,6 @@
   public __gnu_debug::_Safe_sequence >
 {
   typedef _GLIBCXX_STD_C::multiset<_Key, _Compare, _Allocator> _Base;
-  typedef __gnu_debug::

Re: [gcc patch] Re: C++ member function template id not matching linkage name (PR debug/49408)

2011-06-29 Thread Jan Kratochvil
On Mon, 27 Jun 2011 20:00:24 +0200, Jason Merrill wrote:
> They should be supressed whenever the function appears in an
> expression context, either as a pointer to member function (i.e. the
> operand of '&')

Done, therefore it is no longer restricted only to templates as before.


> or as the function being called in a call expression.

I implemented it in the patch below but I do not agree + understand it.

The call expression is in libiberty/testsuite/demangle-expected modified by
this patch as:

 # decltype/fn call test
 --format=gnu-v3
 _Z4add3IidEDTclL_Z1gEfp_fp0_EET_T0_
-decltype (g({parm#1}, {parm#2})) add3(int, double)
+decltype (g) add3(int, double)

I agree it is sufficient to determine the return type just from the function
type as return type cannot be overloaded by the function parameters.  But it
no longer matches a valid C++ source code now:

char g (int x, double y) { return 0; }
template 
decltype (g((T) 0, (U) 0)) add3 (T x, U y) { return 'z'; }
// error: ‘add3’ declared as function returning a function
// decltype (g) add3 (T x, U y) { return 'z'; }
int main () { add3 (1, 2.0); }

g++ -Wall -g -std=c++0x
g++ (GCC) 4.7.0 20110629 (experimental)


(Regression testing underway.)


Thanks,
Jan


libiberty/
2011-06-29  Jan Kratochvil  

* cp-demangle.c (d_print_comp): Suppress argument list for function
references by the '&' unary operator.  Keep also already processed
variant without the argument list.  Suppress argument list also for
function call used in an expression.
* testsuite/demangle-expected: Remove parameters from function call
expressions of 6 testcases.  Create 3 new testcases for function
references by the '&' unary operator..

--- a/libiberty/cp-demangle.c
+++ b/libiberty/cp-demangle.c
@@ -4139,7 +4169,46 @@ d_print_comp (struct d_print_info *dpi, int options,
   return;
 
 case DEMANGLE_COMPONENT_UNARY:
-  if (d_left (dc)->type != DEMANGLE_COMPONENT_CAST)
+  if (d_left (dc)->type == DEMANGLE_COMPONENT_OPERATOR
+ && d_left (dc)->u.s_operator.op->len == 1
+ && d_left (dc)->u.s_operator.op->name[0] == '&'
+ && d_right (dc)->type == DEMANGLE_COMPONENT_TYPED_NAME
+ && d_left (d_right (dc))->type == DEMANGLE_COMPONENT_QUAL_NAME
+ && d_right (d_right (dc))->type == DEMANGLE_COMPONENT_FUNCTION_TYPE)
+   {
+ /* Address of a function (therefore in an expression context) must
+have its argument list suppressed.
+
+unary operator ... dc
+  operator & ... d_left (dc)
+  typed name ... d_right (dc)
+qualified name ... d_left (d_right (dc))
+  
+function type ... d_right (d_right (dc))
+  argument list
+  */
+
+ d_print_expr_op (dpi, options, d_left (dc));
+ d_print_comp (dpi, options, d_left (d_right (dc)));
+ return;
+   }
+  else if (d_left (dc)->type == DEMANGLE_COMPONENT_OPERATOR
+  && d_left (dc)->u.s_operator.op->len == 1
+  && d_left (dc)->u.s_operator.op->name[0] == '&'
+  && d_right (dc)->type == DEMANGLE_COMPONENT_QUAL_NAME)
+   {
+ /* Keep also already processed variant without the argument list.
+
+unary operator ... dc
+  operator & ... d_left (dc)
+  qualified name ... d_right (dc)
+  */
+
+ d_print_expr_op (dpi, options, d_left (dc));
+ d_print_comp (dpi, options, d_right (dc));
+ return;
+   }
+  else if (d_left (dc)->type != DEMANGLE_COMPONENT_CAST)
d_print_expr_op (dpi, options, d_left (dc));
   else
{
@@ -4172,10 +4241,11 @@ d_print_comp (struct d_print_info *dpi, int options,
  d_print_comp (dpi, options, d_right (d_right (dc)));
  d_append_char (dpi, ']');
}
-  else
+  /* Function call used in an expression should not have the argument list
+printed.  */
+  else if (strcmp (d_left (dc)->u.s_operator.op->code, "cl") != 0)
{
- if (strcmp (d_left (dc)->u.s_operator.op->code, "cl") != 0)
-   d_print_expr_op (dpi, options, d_left (dc));
+ d_print_expr_op (dpi, options, d_left (dc));
  d_print_subexpr (dpi, options, d_right (d_right (dc)));
}
 
diff --git a/libiberty/testsuite/demangle-expected 
b/libiberty/testsuite/demangle-expected
index bbd418c..da87282 100644
--- a/libiberty/testsuite/demangle-expected
+++ b/libiberty/testsuite/demangle-expected
@@ -3904,7 +3904,7 @@ decltype ({parm#1}+{parm#2}) add(int, double)
 # decltype/fn call test
 --format=gnu-v3
 _Z4add3

Re: [PATCH, MELT] correct meltgc_read_from_val without location

2011-06-29 Thread Basile Starynkevitch
On Wed, 29 Jun 2011 20:32:08 +0200
Pierre  wrote:

> Hello,
> 
> here is an improvment to 
> http://gcc.gnu.org/ml/gcc-patches/2011-06/msg01888.html.
> 

Thanks. I applied it with minor changes on the MELT branch.
{spelling mistakes & indentation mostly}.

Perhaps a future improvement might be to set the location (i.e. locnamv
in meltgc_read_from_val) to something which depends upon the string
rbuf. I have no idea if that would be useful.

Regards.

-- 
Basile STARYNKEVITCH http://starynkevitch.net/Basile/
email: basilestarynkevitchnet mobile: +33 6 8501 2359
8, rue de la Faiencerie, 92340 Bourg La Reine, France
*** opinions {are only mine, sont seulement les miennes} ***


Re: [PATCH, MELT] correct meltgc_read_from_val without location

2011-06-29 Thread Pierre

Hello,

here is an improvment to 
http://gcc.gnu.org/ml/gcc-patches/2011-06/msg01888.html.


The function meltgc_read_from_val (in melt-runtime.c) takes two 
arguments, a string value and a second one which is a location. In the 
comments, it is written that we can pass a NULL pointer if we have no 
location (it is a direct string). However, this conduct MELT to crash 
because it doesn't handle correctly the absence of file.


In the first patch, I modified makesexpr to create a location with a 
'virtual' file. This works however, as this function is used recursively 
this is not very elegant.


This patch is more elegant, it adds a boolean field to the struct 
reading_st to declare if there is no file location.
meltgc_read_* functions are modified to test if there is a given 
location, and if no, it create a location with a virtual file and set 
the boolean to false, conducting to avoid crash.


I have been able to build MELT with this pass and test it without problem.

On 24/06/2011 18:13, Pierre Vittet wrote:

Hello,

The function meltgc_read_from_val (in melt-runtime.c) takes two
arguments, a string value and a second one which is a location.
In the comments, it is written that we can pass a NULL pointer if we
have no location (it is a direct string). However, this conduct MELT to
crash because it doesn't handle correctly the absence of file.

This patch correct this, if there is no file, it create a "virtual" one
which is named "stringBuffer".

Pierre Vittet


correct_read_from_val_without_location-175348.diff




Index: gcc/melt-runtime.c
===
--- gcc/melt-runtime.c  (revision 175348)
+++ gcc/melt-runtime.c  (working copy)
@@ -6292,6 +6292,7 @@ struct reading_st
   int rcol;/* current column */
   source_location rsrcloc; /* current source location */
   melt_ptr_t *rpfilnam;/* pointer to location of file name string */
+  bool has_file_location;  /* precise if the string comes from a file */
 };
 
 #define MELT_READ_TABULATION_FACTOR 8
@@ -6326,7 +6327,7 @@ melt_linemap_compute_current_location (struct read
 {
   int colnum = 1;
   int cix = 0;
-  if (!rd || !rd->rcurlin) 
+  if (!rd || !rd->rcurlin || !rd->has_file_location)
 return;
   for (cix=0; cixrcol; cix++) {
 char c = rd->rcurlin[cix];
@@ -8314,6 +8315,7 @@ meltgc_read_file (const char *filnam, const char *
   rd = &rds;
   locnamv = meltgc_new_stringdup ((meltobject_ptr_t) MELT_PREDEF 
(DISCR_STRING), locnam);
   rds.rpfilnam = (melt_ptr_t *) & locnamv;
+  rds.has_file_location = true;
   seqv = meltgc_new_list ((meltobject_ptr_t) MELT_PREDEF (DISCR_LIST));
   while (!rdeof ())
 {
@@ -8371,7 +8373,16 @@ meltgc_read_from_rawstring (const char *rawstr, co
   rds.rsrcloc = loch;
   rd = &rds;
   if (locnam)
-locnamv = meltgc_new_stringdup ((meltobject_ptr_t) MELT_PREDEF 
(DISCR_STRING), locnam);
+{
+  rds.has_file_location = true;
+  locnamv = meltgc_new_stringdup ((meltobject_ptr_t) MELT_PREDEF 
(DISCR_STRING), locnam);
+}
+  else
+{
+  rds.has_file_location = false;
+  locnamv = meltgc_new_string ((meltobject_ptr_t) 
MELT_PREDEF(DISCR_STRING),
+  "stringBuffer");
+}
   seqv = meltgc_new_list ((meltobject_ptr_t) MELT_PREDEF (DISCR_LIST));
   rds.rpfilnam = (melt_ptr_t *) & locnamv;
   while (rdcurc ())
@@ -8415,6 +8426,7 @@ meltgc_read_from_val (melt_ptr_t strv_p, melt_ptr_
   locnamv = locnam_p;
   rbuf = 0;
   strmagic = melt_magic_discr ((melt_ptr_t) strv);
+  seqv = meltgc_new_list ((meltobject_ptr_t) MELT_PREDEF (DISCR_LIST));
   switch (strmagic)
 {
 case MELTOBMAG_STRING:
@@ -8441,7 +8453,14 @@ meltgc_read_from_val (melt_ptr_t strv_p, melt_ptr_
   rds.rpath = 0;
   rds.rlineno = 0;
   rds.rcurlin = rbuf;
+  rds.has_file_location = true;
   rd = &rds;
+  if (locnamv == NULL){
+rds.has_file_location = false;
+locnamv = meltgc_new_string ((meltobject_ptr_t) MELT_PREDEF(DISCR_STRING),
+ "stringBuffer");
+rd->rpfilnam = (melt_ptr_t *) &locnamv;
+  }
   rds.rpfilnam = (melt_ptr_t *) & locnamv;
   while (rdcurc ())
 {
2011-06-29  Pierre Vittet  

* melt-runtime.c (struct reading_st): add a boolean has_file_location
field.
(melt_linemap_compute_current_location): return immediately if no file
location.
(meltgc_read_file, meltgc_read_from_rawstring, meltgc_read_from_val):
set has_file_location accordingly
(meltgc_read_from_val): create seqv list (it was used without being
created).


Re: [Design notes, RFC] Address-lowering prototype design (PR46556)

2011-06-29 Thread William J. Schmidt
On Tue, 2011-06-14 at 15:39 +0200, Richard Guenther wrote:
> On Fri, Jun 10, 2011 at 5:11 PM, William J. Schmidt
>  wrote:
> > On Tue, 2011-06-07 at 16:49 +0200, Richard Guenther wrote:
> >> On Tue, Jun 7, 2011 at 4:14 PM, William J. Schmidt
> >>  wrote:
> >
> > 
> >
> >> >> > Loss of aliasing information
> >> >> > 
> >> >> > The most serious problem I've run into is degraded performance due to 
> >> >> > poorer
> >> >> > instruction scheduling choices. I tracked this down to
> >> >> > alias.c:nonoverlapping_component_refs_p.
> >> >> >
> >> >> > This code proves that two memory accesses don't overlap by attempting 
> >> >> > to prove
> >> >> > that they access different fields of the same structure. This is done 
> >> >> > using
> >> >> > the MEM_EXPRs of the two rtx's, which record the expression trees 
> >> >> > that were
> >> >> > translated into the rtx's during expand. When address lowering is not
> >> >> > present, a simple COMPONENT_REF will appear in the MEM_EXPR: x.a, for
> >> >> > example. However, address lowering changes the simple COMPONENT_REF 
> >> >> > into a
> >> >> > [TARGET_]MEM_REF that is no longer necessarily identifiable as a field
> >> >> > reference. Thus the aliasing machinery can no longer prove that two 
> >> >> > such
> >> >> > field references are disjoint.
> >> >> >
> >> >> > This has severe consequences for performance, and has to be dealt 
> >> >> > with if
> >> >> > address lowering is to be successful.
> >> >> >
> >> >> > I've worked around this with an admittedly fragile solution; I'll 
> >> >> > discuss the
> >> >> > drawbacks below. The idea is to construct a mapping from replacement 
> >> >> > mem_refs
> >> >> > to the original expressions that they replaced. When a MEM_EXPR is 
> >> >> > being set
> >> >> > during expand, we first look up the mem_ref in the mapping. If 
> >> >> > present, the
> >> >> > MEM_EXPR is set to the original expression, rather than to the 
> >> >> > mem_ref. This
> >> >> > essentially duplicates the behavior in the absence of address 
> >> >> > lowering.
> >> >>
> >> >> Ick. We had this in the past via TMR_ORIGINAL which caused all sorts
> >> >> of problems. Removing it didn't cause much degradation because we now
> >> >> preserve points-to information.
> >> >>
> >> >> Originally I played with lowering all memory accesses to MEM_REFs
> >> >> (see the old mem-ref branch), and the loss of type-based alias
> >> >> disambiguation was indeed an issue.
> >> >>
> >> >> But - I definitely do not like the idea of preserving something similar
> >> >> to TMR_ORIGINAL. Instead we can try preserving some information
> >> >> we derive from it. We keep the original access type that we can use
> >> >> for TBAA but do not retain knowledge on whether the type of the
> >> >> MEM_REF is valid for TBAA or if it is view-converted.
> >> >
> >> > Yes, I really don't like what I have at the moment, either. I put it in
> >> > place as a stopgap to let me proceed to look for other performance
> >> > problems.
> >> >
> >> > The question is how we can infer useful information for TBAA from the
> >> > MEM_REFs and TMRs. I poked at trying to identify types and offsets from
> >> > the MEM_EXPRs, but this ended up being useless; I had to constrain too
> >> > many cases to maintain correctness, and couldn't prove the type
> >> > information for the important cases in SPEC I was trying to address.
> >> >
> >> > Unfortunately, the whole design goes down the drain if we can't find a
> >> > way to solve the TBAA issue. The performance degradations are too
> >> > costly.
> >>
> >> If you look at what basic TBAA the alias oracle performs then it boils
> >> down to the fact that get_alias_set for a.b.c might end up using the
> >> alias-set of the type of C but for MEM[&a + 4] it will use the alias set
> >> of the type of a. The tree alias-oracle extracts both alias sets, that
> >> of the outermost valid type and that of the innermost as both are
> >> equally useful. But the MEM_REF (or TARGET_MEM_REF) tree
> >> only have storage for one such alias-set. Thus my idea at some point
> >> was to store the other one as well in some form. It will not be
> >> the full information (after all, the complete access path does provide
> >> some extra information - see aliasing_component_refs_p).
> >
> > This is what concerns me. TBAA information for the outer and inner
> > components doesn't seem sufficient to provide what
> > nonoverlapping_component_refs_p is currently able to prove. The latter
> > searches for a common RECORD_TYPE somewhere along the two access paths,
> > and then disambiguates if the two associated referenced fields differ.
> > For a simple case like "struct x { int a; int b; };", a and b have the
> > same type and alias-set, so the alias-set information doesn't add
> > anything. It isn't sufficient alone for the disambiguation of x1.a =
> > MEM_REF[&x1, 0] and x2.b = MEM_REF[&x2, 4].
> >
> > Obviously the offset is sufficient to disambiguate for 

Re: [testsuite] skip ARM tests if no thumb2 support

2011-06-29 Thread Janis Johnson
On 06/29/2011 06:55 AM, Richard Earnshaw wrote:

>  3. Add a new dg directive (perhaps dg-ignore-multilib) that instructs
> the framework to ignore the multilib options entirely (only supported
> for compile/assembly tests).
> 
> There are a lot of target-specific tests that are not really testing
> execution, just that the compiler generates the right instruction under
> specific compilation options.  It seems silly to force all this into the
> multi-lib framework.

If it's true that most testsuite runs on ARM include a default multilib,
then some tests can be skipped for everything other than the default
multilib and specify all the flags that are needed for the test, e.g.
with

  /* { dg-require-effective-target default_multilib } */

That's much simpler than ignoring multilib flags, and avoids running
the same test multiple times with exactly the same options.

Janis


[Patch] Fix objc/48109

2011-06-29 Thread Iain Sandoe
The bug arises because of the use, by the ObjC FE, of two old target  
macros that emit efficient representations of class definitions and  
references.


This 'works fine' (however wrong it might be conceptually), until LTO  
is engaged, whereupon the definitions vanish without trace (since no  
corresponding real variable is ever created).


---

The patch creates appropriate variables in the FE and tags them as  
ObjC meta-data (in the same manner as is done for other objc meta-data).


We then intercept them in varasm.c with a hook that allows the target  
to declare that it has completely handled the output of a variable -  
allowing us to handle them in the required special manner.


FAOD, It is necessary to preserve this mechanism for emitting the  
definitions and references to permit linkage with existing system  
libraries.




If the patch is acceptable, then I would expect to follow up with a  
patch to remove ASM_DECLARE_CLASS_REFERENCE and  
ASM_DECLARE_UNRESOLVED_REFERENCE from the tree - since this is their  
only use.




bootstrapped on i686-linux, i686-darwin9, x86_64-darwin10,  (checked  
to do the Right Thing on darwin).


Mike has already given this a 'seems reasonable' in the PR thread,  
however, I need an approver for the varasm.c and target hook changes.


OK for trunk & 4.6?

Iain

===

gcc/

* target.def (handled_assemble_variable_p): New hook.
* varasm.c (assemble_variable): Allow target to handle variable output
in some special manner.
* doc/tm.texi: Regenerate.
* config/darwin.c (darwin_objc1_section): Handle class defs/refs.
(darwin_handled_assemble_variable_p): New.
* config/darwin-protos.h (darwin_handled_assemble_variable_p): New.
* config/darwin.h (TARGET_ASM_HANDLED_ASSEMBLE_VARIABLE_P): New.

gcc/objc/

	*objc-next-runtime-abi-01.c (handle_next_class_ref): Don't emit lazy  
refs. for
	cases where the class is local.  Declare a real, meta-data item  
tagged as

a class reference.
	(handle_next_impent): Declare a real, meta-data item tagged as a  
class def.


===

Index: gcc/target.def
===
--- gcc/target.def  (revision 175628)
+++ gcc/target.def  (working copy)
@@ -449,6 +449,13 @@ DEFHOOK
  bool, (FILE *file, rtx x),
  default_asm_output_addr_const_extra)

+DEFHOOK
+(handled_assemble_variable_p,
+ "Returns @code{true} iff the target has handled the assembly of the\
+  variable @var{var_decl}",
+ bool, (tree var_decl),
+ hook_bool_tree_false)
+
 /* ??? The TARGET_PRINT_OPERAND* hooks are part of the asm_out struct,
even though that is not reflected in the macro name to override  
their

initializers.  */
Index: gcc/objc/objc-next-runtime-abi-01.c
===
--- gcc/objc/objc-next-runtime-abi-01.c (revision 175628)
+++ gcc/objc/objc-next-runtime-abi-01.c (working copy)
@@ -2267,27 +2267,51 @@ generate_objc_symtab_decl (void)
   init_objc_symtab (TREE_TYPE (UOBJC_SYMBOLS_decl)));
 }

-
 static void
 handle_next_class_ref (tree chain)
 {
+  tree decl, exp;
+  struct imp_entry *impent;
   const char *name = IDENTIFIER_POINTER (TREE_VALUE (chain));
   char *string = (char *) alloca (strlen (name) + 30);

+  for (impent = imp_list; impent; impent = impent->next)
+if (TREE_CODE (impent->imp_context) == CLASS_IMPLEMENTATION_TYPE
+&& IDENTIFIER_POINTER (CLASS_NAME (impent->imp_context))
+   == IDENTIFIER_POINTER (TREE_VALUE (chain)))
+  return; /* we declare this, no need for a lazy ref.  */
+
   sprintf (string, ".objc_class_name_%s", name);

-#ifdef ASM_DECLARE_UNRESOLVED_REFERENCE
-  ASM_DECLARE_UNRESOLVED_REFERENCE (asm_out_file, string);
-#else
-  return ; /* NULL build for targets other than Darwin.  */
-#endif
+  decl = build_decl (UNKNOWN_LOCATION,
+VAR_DECL, get_identifier (string), char_type_node);
+  TREE_PUBLIC (decl) = 1;
+  DECL_EXTERNAL (decl) = 1;
+  DECL_CONTEXT (decl) = NULL_TREE;
+  finish_var_decl (decl, NULL);
+
+  /* We build a variable to signal the reference.  This will be  
intercepted

+ and output as a lazy reference.  */
+  sprintf (string, "_OBJC_class_ref_%s", name);
+  exp = build1 (ADDR_EXPR, string_type_node, decl);
+  decl = build_decl (input_location,
+VAR_DECL, get_identifier (string), string_type_node);
+  TREE_STATIC (decl) = 1;
+  DECL_ARTIFICIAL (decl) = 1;
+  DECL_INITIAL (decl) = error_mark_node;
+
+  /* We must force the reference.  */
+  DECL_PRESERVE_P (decl) = 1;
+  OBJCMETA (decl, objc_meta, get_identifier ("V1_CREF"));
+  DECL_CONTEXT (decl) = NULL_TREE;
+  finish_var_decl (decl, exp);
 }

 static void
 handle_next_impent (struct imp_entry *impent)
 {
   char buf[BUFSIZE];
-
+  tree decl;
   switch (TREE_CODE (impent->imp_context))
 {
 case CLASS_IMPLEMENTATION_TYPE:
@@ -2303,11 +2327,16 @@ handle_next_impent (struct imp_entry *

Re: [testsuite, ada] Cope with HP-UX sh in run_acats (PR ada/49511)

2011-06-29 Thread Arnaud Charlet
> As described in the PR, HP-UX /bin/sh type -p ignores the -p, so we need
> to extract the last field of the output just as for regular type.  The
> following patch does this, tested by Dave and bootstrapped on
> sparc-sun-solaris2.11.
> 
> Ok for mainline, 4.6 and 4.5 branches?

OK


[PATCH 4/6] Fix computation of precision.

2011-06-29 Thread Sebastian Pop
2011-06-29  Sebastian Pop  

* graphite-clast-to-gimple.c (precision_for_value): Removed.
(precision_for_interval): Removed.
(gcc_type_for_interval): Use mpz_sizeinbase.
---
 gcc/ChangeLog  |6 +++
 gcc/graphite-clast-to-gimple.c |   77 +---
 2 files changed, 15 insertions(+), 68 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 828559a..0616b10 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,11 @@
 2011-06-29  Sebastian Pop  
 
+   * graphite-clast-to-gimple.c (precision_for_value): Removed.
+   (precision_for_interval): Removed.
+   (gcc_type_for_interval): Use mpz_sizeinbase.
+
+2011-06-29  Sebastian Pop  
+
PR tree-optimization/47654
* graphite-blocking.c (pbb_strip_mine_time_depth): Do not return bool.
(lst_do_strip_mine_loop): Return an int.
diff --git a/gcc/graphite-clast-to-gimple.c b/gcc/graphite-clast-to-gimple.c
index 4a4c3d2..70031a0 100644
--- a/gcc/graphite-clast-to-gimple.c
+++ b/gcc/graphite-clast-to-gimple.c
@@ -379,72 +379,17 @@ clast_to_gcc_expression (tree type, struct clast_expr *e,
   return NULL_TREE;
 }
 
-/* Return the precision needed to represent the value VAL.  */
-
-static int
-precision_for_value (mpz_t val)
-{
-  mpz_t x, y, two;
-  int precision;
-
-  mpz_init (x);
-  mpz_init (y);
-  mpz_init (two);
-  mpz_set_si (x, 2);
-  mpz_set (y, val);
-  mpz_set_si (two, 2);
-  precision = 1;
-
-  if (mpz_sgn (y) < 0)
-mpz_neg (y, y);
-
-  while (mpz_cmp (y, x) >= 0)
-{
-  mpz_mul (x, x, two);
-  precision++;
-}
-
-  mpz_clear (x);
-  mpz_clear (y);
-  mpz_clear (two);
-
-  return precision;
-}
-
-/* Return the precision needed to represent the values between LOW and
-   UP.  */
-
-static int
-precision_for_interval (mpz_t low, mpz_t up)
-{
-  mpz_t diff;
-  int precision;
-
-  gcc_assert (mpz_cmp (low, up) <= 0);
-
-  mpz_init (diff);
-  mpz_sub (diff, up, low);
-  precision = precision_for_value (diff);
-  mpz_clear (diff);
-
-  return precision;
-}
-
-/* Return a type that could represent the integer value VAL.  */
+/* Return a type that could represent the values between LOW and UP.
+   The value of LOW can be bigger than UP.  */
 
 static tree
 gcc_type_for_interval (mpz_t low, mpz_t up)
 {
-  bool unsigned_p = true;
-  int precision, prec_up, prec_int;
+  bool unsigned_p;
   tree type;
   enum machine_mode mode;
-
-  gcc_assert (mpz_cmp (low, up) <= 0);
-
-  prec_up = precision_for_value (up);
-  prec_int = precision_for_interval (low, up);
-  precision = MAX (prec_up, prec_int);
+  int precision = MAX (mpz_sizeinbase (low, 2),
+  mpz_sizeinbase (up, 2));
 
   if (precision > BITS_PER_WORD)
 {
@@ -452,14 +397,10 @@ gcc_type_for_interval (mpz_t low, mpz_t up)
   return integer_type_node;
 }
 
-  if (mpz_sgn (low) <= 0)
-unsigned_p = false;
-
-  else if (precision < BITS_PER_WORD)
-{
-  unsigned_p = false;
-  precision++;
-}
+  if (mpz_cmp (low, up) <= 0)
+unsigned_p = (mpz_sgn (low) >= 0);
+  else
+unsigned_p = (mpz_sgn (up) >= 0);
 
   mode = smallest_mode_for_size (precision, MODE_INT);
   precision = GET_MODE_PRECISION (mode);
-- 
1.7.4.1



Re: [cxx-mem-model] __sync_mem_load

2011-06-29 Thread Richard Henderson
On 06/23/2011 03:50 PM, Andrew MacLeod wrote:
> + (define_expand "sync_mem_load"
> +   [(match_operand:SWI 0 "register_operand" "")  ;; output
> +(match_operand:SWI 1 "memory_operand" "");; memory
> +(match_operand:SI  2 "const_int_operand" "")];; memory model
> +""
> + {
> +   if (INTVAL (operands[2]) == MEMMODEL_ACQUIRE || 
> +   INTVAL (operands[2]) == MEMMODEL_SEQ_CST)

Oh, and I suspect all of these will be easier to debug with

  enum memmodel mm = (enum memmodel) INTVAL (operands[2]);
  if (mm == ...)

r~


[PATCH 6/6] Fix PR47654: Compute LB and UB of a CLAST expression.

2011-06-29 Thread Sebastian Pop
2011-06-29  Sebastian Pop  

PR tree-optimization/47654
* graphite-clast-to-gimple.c (gcc_type_for_value): Removed.
(gcc_type_for_clast_term): Removed.
(gcc_type_for_clast_red): Removed.
(gcc_type_for_clast_bin): Removed.
(lb_ub_for_expr_name): New.
(lb_ub_for_term): New.
(lb_ub_for_expr): New.
(lb_ub_for_red): New.
(lb_ub_for_bin): New.
(gcc_type_for_clast_expr): Reimplemented.
* graphite-ppl.h (value_min): New.

* gcc.dg/graphite/run-id-pr47654.c: New.
---
 gcc/ChangeLog  |   15 ++
 gcc/graphite-clast-to-gimple.c |  281 
 gcc/graphite-ppl.h |   11 +
 gcc/testsuite/ChangeLog|5 +
 gcc/testsuite/gcc.dg/graphite/run-id-pr47654.c |   24 ++
 5 files changed, 240 insertions(+), 96 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/graphite/run-id-pr47654.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 3117f23..f69f7f8 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,20 @@
 2011-06-29  Sebastian Pop  
 
+   PR tree-optimization/47654
+   * graphite-clast-to-gimple.c (gcc_type_for_value): Removed.
+   (gcc_type_for_clast_term): Removed.
+   (gcc_type_for_clast_red): Removed.
+   (gcc_type_for_clast_bin): Removed.
+   (lb_ub_for_expr_name): New.
+   (lb_ub_for_term): New.
+   (lb_ub_for_expr): New.
+   (lb_ub_for_red): New.
+   (lb_ub_for_bin): New.
+   (gcc_type_for_clast_expr): Reimplemented.
+   * graphite-ppl.h (value_min): New.
+
+2011-06-29  Sebastian Pop  
+
* graphite-clast-to-gimple.c (compute_bounds_for_level): Removed.
(compute_type_for_level): Removed.
(clast_get_body_of_loop): Removed.
diff --git a/gcc/graphite-clast-to-gimple.c b/gcc/graphite-clast-to-gimple.c
index c8d76c1..686c921 100644
--- a/gcc/graphite-clast-to-gimple.c
+++ b/gcc/graphite-clast-to-gimple.c
@@ -379,147 +379,236 @@ clast_to_gcc_expression (tree type, struct clast_expr 
*e,
   return NULL_TREE;
 }
 
-/* Return a type that could represent the values between LOW and UP.
-   The value of LOW can be bigger than UP.  */
+/* Return the lower bound LB and upper bound UB of the clast_name N.  */
 
-static tree
-gcc_type_for_interval (mpz_t low, mpz_t up)
+static void
+lb_ub_for_name (clast_name_p n, sese region, VEC (tree, heap) *newivs,
+   htab_t newivs_index, htab_t params_index, mpz_t lb, mpz_t ub)
 {
-  bool unsigned_p;
-  tree type;
-  enum machine_mode mode;
-  int precision = MAX (mpz_sizeinbase (low, 2),
-  mpz_sizeinbase (up, 2));
-
-  if (precision > BITS_PER_WORD)
-{
-  gloog_error = true;
-  return integer_type_node;
-}
+  tree l, u;
+  tree type = TREE_TYPE (clast_name_to_gcc (n, region, newivs,
+   newivs_index, params_index));
 
-  if (mpz_cmp (low, up) <= 0)
-unsigned_p = (mpz_sgn (low) >= 0);
+  if (POINTER_TYPE_P (type) || !TYPE_MIN_VALUE (type))
+l = lower_bound_in_type (type, type);
   else
-unsigned_p = (mpz_sgn (up) >= 0);
+l = TYPE_MIN_VALUE (type);
 
-  mode = smallest_mode_for_size (precision, MODE_INT);
-  precision = GET_MODE_PRECISION (mode);
-  type = build_nonstandard_integer_type (precision, unsigned_p);
-
-  if (!type)
-{
-  gloog_error = true;
-  return integer_type_node;
-}
+  if (POINTER_TYPE_P (type) || !TYPE_MAX_VALUE (type))
+u = upper_bound_in_type (type, type);
+  else
+u = TYPE_MAX_VALUE (type);
 
-  return type;
+  tree_int_to_gmp (l, lb);
+  tree_int_to_gmp (u, ub);
 }
 
-/* Return a type that could represent the integer value VAL, or
-   otherwise return NULL_TREE.  */
-
-static tree
-gcc_type_for_value (mpz_t val)
-{
-  return gcc_type_for_interval (val, val);
-}
+/* Return the lower bound LB and upper bound UB of the clast_term T.  */
 
-/* Return the type for the clast_term T used in STMT.  */
-
-static tree
-gcc_type_for_clast_term (struct clast_term *t,
-sese region, VEC (tree, heap) *newivs,
-htab_t newivs_index, htab_t params_index)
+static void
+lb_ub_for_term (struct clast_term *t, sese region,
+   VEC (tree, heap) *newivs, htab_t newivs_index,
+   htab_t params_index, mpz_t lb, mpz_t ub)
 {
   gcc_assert (t->expr.type == clast_expr_term);
 
-  if (!t->var)
-return gcc_type_for_value (t->val);
-
-  return TREE_TYPE (clast_name_to_gcc (t->var, region, newivs,
-  newivs_index, params_index));
+  if (t->var)
+{
+  mpz_t v;
+  lb_ub_for_name ((clast_name_p) (t->var), region, newivs, newivs_index,
+ params_index, lb, ub);
+  mpz_init (v);
+  mpz_abs (v, t->val);
+  mpz_mul (lb, lb, v);
+  mpz_mul (ub, ub, v);
+  mpz_clear (v);
+}
+  else
+{
+  mpz_set (lb, t->val);
+  mpz_se

[PATCH 3/6] Fix PR47654: Loop blocking should strip-mine at least two loops.

2011-06-29 Thread Sebastian Pop
2011-06-29  Sebastian Pop  

PR tree-optimization/47654
* graphite-blocking.c (pbb_strip_mine_time_depth): Do not return bool.
(lst_do_strip_mine_loop): Return an int.
(lst_do_strip_mine): Same.
(scop_do_strip_mine): Same.
(scop_do_block): Loop blocking should strip-mine at least two loops.
* graphite-interchange.c (lst_interchange_select_outer): Return an int.
(scop_do_interchange): Same.
* graphite-poly.h (scop_do_interchange): Update declaration.
(scop_do_strip_mine): Same.

* gcc.dg/graphite/block-pr47654.c: New.
---
 gcc/ChangeLog |   13 +
 gcc/graphite-blocking.c   |   59 +++--
 gcc/graphite-interchange.c|   21 +
 gcc/graphite-poly.h   |4 +-
 gcc/testsuite/ChangeLog   |5 ++
 gcc/testsuite/gcc.dg/graphite/block-pr47654.c |   25 ++
 6 files changed, 82 insertions(+), 45 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/graphite/block-pr47654.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index b0e3173..828559a 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,18 @@
 2011-06-29  Sebastian Pop  
 
+   PR tree-optimization/47654
+   * graphite-blocking.c (pbb_strip_mine_time_depth): Do not return bool.
+   (lst_do_strip_mine_loop): Return an int.
+   (lst_do_strip_mine): Same.
+   (scop_do_strip_mine): Same.
+   (scop_do_block): Loop blocking should strip-mine at least two loops.
+   * graphite-interchange.c (lst_interchange_select_outer): Return an int.
+   (scop_do_interchange): Same.
+   * graphite-poly.h (scop_do_interchange): Update declaration.
+   (scop_do_strip_mine): Same.
+
+2011-06-29  Sebastian Pop  
+
* graphite-ppl.h (value_max): Correct computation of max.
 
 2011-06-29  Sebastian Pop  
diff --git a/gcc/graphite-blocking.c b/gcc/graphite-blocking.c
index bcd077a..967de9d 100644
--- a/gcc/graphite-blocking.c
+++ b/gcc/graphite-blocking.c
@@ -89,7 +89,7 @@ along with GCC; see the file COPYING3.  If not see
# }
 */
 
-static bool
+static void
 pbb_strip_mine_time_depth (poly_bb_p pbb, int time_depth, int stride)
 {
   ppl_dimension_type iter, dim, strip;
@@ -151,8 +151,6 @@ pbb_strip_mine_time_depth (poly_bb_p pbb, int time_depth, 
int stride)
 ppl_Polyhedron_add_constraint (res, new_cstr);
 ppl_delete_Constraint (new_cstr);
   }
-
-  return true;
 }
 
 /* Returns true when strip mining with STRIDE of the loop LST is
@@ -177,10 +175,10 @@ lst_strip_mine_profitable_p (lst_p lst, int stride)
   return res;
 }
 
-/* Strip-mines all the loops of LST with STRIDE.  Return true if it
-   did strip-mined some loops.  */
+/* Strip-mines all the loops of LST with STRIDE.  Return the number of
+   loops strip-mined.  */
 
-static bool
+static int
 lst_do_strip_mine_loop (lst_p lst, int depth, int stride)
 {
   int i;
@@ -188,26 +186,26 @@ lst_do_strip_mine_loop (lst_p lst, int depth, int stride)
   poly_bb_p pbb;
 
   if (!lst)
-return false;
+return 0;
 
   if (LST_LOOP_P (lst))
 {
-  bool res = false;
+  int res = 0;
 
   FOR_EACH_VEC_ELT (lst_p, LST_SEQ (lst), i, l)
-   res |= lst_do_strip_mine_loop (l, depth, stride);
+   res += lst_do_strip_mine_loop (l, depth, stride);
 
   return res;
 }
 
   pbb = LST_PBB (lst);
-  return pbb_strip_mine_time_depth (pbb, psct_dynamic_dim (pbb, depth),
-   stride);
+  pbb_strip_mine_time_depth (pbb, psct_dynamic_dim (pbb, depth), stride);
+  return 1;
 }
 
 /* Strip-mines all the loops of LST with STRIDE.  When STRIDE is zero,
-   read the stride from the PARAM_LOOP_BLOCK_TILE_SIZE.  Return true
-   if it did strip-mined some loops.
+   read the stride from the PARAM_LOOP_BLOCK_TILE_SIZE.  Return the
+   number of strip-mined loops.
 
Strip mining transforms a loop
 
@@ -221,12 +219,12 @@ lst_do_strip_mine_loop (lst_p lst, int depth, int stride)
| S (i = k + j);
 */
 
-static bool
+static int
 lst_do_strip_mine (lst_p lst, int stride)
 {
   int i;
   lst_p l;
-  bool res = false;
+  int res = 0;
   int depth;
 
   if (!stride)
@@ -237,23 +235,23 @@ lst_do_strip_mine (lst_p lst, int stride)
 return false;
 
   FOR_EACH_VEC_ELT (lst_p, LST_SEQ (lst), i, l)
-res |= lst_do_strip_mine (l, stride);
+res += lst_do_strip_mine (l, stride);
 
   depth = lst_depth (lst);
   if (depth >= 0
   && lst_strip_mine_profitable_p (lst, stride))
 {
-  res |= lst_do_strip_mine_loop (lst, lst_depth (lst), stride);
+  res += lst_do_strip_mine_loop (lst, lst_depth (lst), stride);
   lst_add_loop_under_loop (lst);
 }
 
   return res;
 }
 
-/* Strip mines all the loops in SCOP.  Returns true when some loops
-   have been strip-mined.  */
+/* Strip mines all the loops in SCOP.  Returns the number of
+   strip-mined loops.  */
 
-bool
+int
 scop_do_strip_min

[PATCH 5/6] Compute the type of the IV based only on the CLAST bounds.

2011-06-29 Thread Sebastian Pop
2011-06-29  Sebastian Pop  

* graphite-clast-to-gimple.c (compute_bounds_for_level): Removed.
(compute_type_for_level): Removed.
(clast_get_body_of_loop): Removed.
(gcc_type_for_iv_of_clast_loop): Removed.
(graphite_create_new_loop): Use max_precision_type.  Compute the type
of the IV based only on the CLAST bounds.
(translate_clast_for_loop): Do not pass level to
graphite_create_new_loop.
---
 gcc/ChangeLog  |   11 +
 gcc/graphite-clast-to-gimple.c |   95 +--
 2 files changed, 14 insertions(+), 92 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 0616b10..3117f23 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,16 @@
 2011-06-29  Sebastian Pop  
 
+   * graphite-clast-to-gimple.c (compute_bounds_for_level): Removed.
+   (compute_type_for_level): Removed.
+   (clast_get_body_of_loop): Removed.
+   (gcc_type_for_iv_of_clast_loop): Removed.
+   (graphite_create_new_loop): Use max_precision_type.  Compute the type
+   of the IV based only on the CLAST bounds.
+   (translate_clast_for_loop): Do not pass level to
+   graphite_create_new_loop.
+
+2011-06-29  Sebastian Pop  
+
* graphite-clast-to-gimple.c (precision_for_value): Removed.
(precision_for_interval): Removed.
(gcc_type_for_interval): Use mpz_sizeinbase.
diff --git a/gcc/graphite-clast-to-gimple.c b/gcc/graphite-clast-to-gimple.c
index 70031a0..c8d76c1 100644
--- a/gcc/graphite-clast-to-gimple.c
+++ b/gcc/graphite-clast-to-gimple.c
@@ -603,94 +603,6 @@ graphite_create_new_guard (sese region, edge entry_edge,
   return exit_edge;
 }
 
-/* Compute the lower bound LOW and upper bound UP for the induction
-   variable at LEVEL for the statement PBB, based on the transformed
-   scattering of PBB: T|I|G|Cst, with T the scattering transform, I
-   the iteration domain, and G the context parameters.  */
-
-static void
-compute_bounds_for_level (poly_bb_p pbb, int level, mpz_t low, mpz_t up)
-{
-  ppl_Pointset_Powerset_C_Polyhedron_t ps;
-  ppl_Linear_Expression_t le;
-
-  combine_context_id_scat (&ps, pbb, false);
-
-  /* Prepare the linear expression corresponding to the level that we
- want to maximize/minimize.  */
-  {
-ppl_dimension_type dim = pbb_nb_scattering_transform (pbb)
-  + pbb_dim_iter_domain (pbb) + pbb_nb_params (pbb);
-
-ppl_new_Linear_Expression_with_dimension (&le, dim);
-ppl_set_coef (le, 2 * level + 1, 1);
-  }
-
-  ppl_max_for_le_pointset (ps, le, up);
-  ppl_min_for_le_pointset (ps, le, low);
-  ppl_delete_Linear_Expression (le);
-  ppl_delete_Pointset_Powerset_C_Polyhedron (ps);
-}
-
-/* Compute the type for the induction variable at LEVEL for the
-   statement PBB, based on the transformed schedule of PBB.  */
-
-static tree
-compute_type_for_level (poly_bb_p pbb, int level)
-{
-  mpz_t low, up;
-  tree type;
-
-  mpz_init (low);
-  mpz_init (up);
-
-  compute_bounds_for_level (pbb, level, low, up);
-  type = gcc_type_for_interval (low, up);
-
-  mpz_clear (low);
-  mpz_clear (up);
-  return type;
-}
-
-/* Walks a CLAST and returns the first statement in the body of a
-   loop.  */
-
-static struct clast_user_stmt *
-clast_get_body_of_loop (struct clast_stmt *stmt)
-{
-  if (!stmt
-  || CLAST_STMT_IS_A (stmt, stmt_user))
-return (struct clast_user_stmt *) stmt;
-
-  if (CLAST_STMT_IS_A (stmt, stmt_for))
-return clast_get_body_of_loop (((struct clast_for *) stmt)->body);
-
-  if (CLAST_STMT_IS_A (stmt, stmt_guard))
-return clast_get_body_of_loop (((struct clast_guard *) stmt)->then);
-
-  if (CLAST_STMT_IS_A (stmt, stmt_block))
-return clast_get_body_of_loop (((struct clast_block *) stmt)->body);
-
-  gcc_unreachable ();
-}
-
-/* Returns the type for the induction variable for the loop translated
-   from STMT_FOR.  */
-
-static tree
-gcc_type_for_iv_of_clast_loop (struct clast_for *stmt_for, int level,
-  tree lb_type, tree ub_type)
-{
-  struct clast_stmt *stmt = (struct clast_stmt *) stmt_for;
-  struct clast_user_stmt *body = clast_get_body_of_loop (stmt);
-  CloogStatement *cs = body->statement;
-  poly_bb_p pbb = (poly_bb_p) cloog_statement_usr (cs);
-
-  return max_signed_precision_type (lb_type, max_precision_type
-   (ub_type, compute_type_for_level
-(pbb, level - 1)));
-}
-
 /* Creates a new LOOP corresponding to Cloog's STMT.  Inserts an
induction variable for the new LOOP.  New LOOP is attached to CFG
starting at ENTRY_EDGE.  LOOP is inserted into the loop tree and
@@ -703,13 +615,13 @@ static struct loop *
 graphite_create_new_loop (sese region, edge entry_edge,
  struct clast_for *stmt,
  loop_p outer, VEC (tree, heap) **newivs,
- htab_t newivs_index, htab_t params_index, int level)
+ htab_t

[PATCH 0/6] Fix PR47654

2011-06-29 Thread Sebastian Pop
Hi,
the following patch set fixes PR47654:

  Correct typo.
  Correct computation of max.
  Fix PR47654: Loop blocking should strip-mine at least two loops.
  Fix computation of precision.
  Compute the type of the IV based only on the CLAST bounds.
  Fix PR47654: Compute LB and UB of a CLAST expression.

First, "Loop blocking should strip-mine at least two loops" disables
loop blocking when the strip mine is not applied to at least two
loops.  In the testcase of this PR we have the first loop that does
not contain enough iterations to be strip mined:

  for (i = 0; i < 40; i++)
for (j = 0; j < 128; j++)
  a[j][i] = 4;

The second interesting patch "Fix computation of precision" uses the
mpz_sizeinbase function instead of computing the log of the gmp value.

"Compute the type of the IV based only on the CLAST bounds" removes
the computation of the type of the induction variable based on the
polyhedral representation: this is a redundant information at the code
generation level, as cloog has already integrated this info in the
CLAST.

Finally, "Compute LB and UB of a CLAST expression" fixes the root
cause of PR47654: it reimplements the type computation based on the
low and up bounds of CLAST expressions.

This patch set passed bootstrap and test c,c++,fortran on amd64-linux.
Full bootstrap and test in progress on amd64-linux.  Ok for trunk?

Thanks,
Sebastian

 gcc/ChangeLog  |   53 +++
 gcc/graphite-blocking.c|   59 ++--
 gcc/graphite-clast-to-gimple.c |  417 ++--
 gcc/graphite-interchange.c |   21 +-
 gcc/graphite-poly.h|4 +-
 gcc/graphite-ppl.h |   14 +-
 gcc/testsuite/ChangeLog|   10 +
 gcc/testsuite/gcc.dg/graphite/block-pr47654.c  |   25 ++
 gcc/testsuite/gcc.dg/graphite/run-id-pr47654.c |   24 ++
 9 files changed, 343 insertions(+), 284 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/graphite/block-pr47654.c
 create mode 100644 gcc/testsuite/gcc.dg/graphite/run-id-pr47654.c

-- 
1.7.4.1



[PATCH 2/6] Correct computation of max.

2011-06-29 Thread Sebastian Pop
2011-06-29  Sebastian Pop  

* graphite-ppl.h (value_max): Correct computation of max.
---
 gcc/ChangeLog  |4 
 gcc/graphite-ppl.h |3 ++-
 2 files changed, 6 insertions(+), 1 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 8be5adb..b0e3173 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,9 @@
 2011-06-29  Sebastian Pop  
 
+   * graphite-ppl.h (value_max): Correct computation of max.
+
+2011-06-29  Sebastian Pop  
+
* graphite-clast-to-gimple.c (clast_name_to_index): Add missing space.
 
 2011-06-29  Eric Botcazou  
diff --git a/gcc/graphite-ppl.h b/gcc/graphite-ppl.h
index 695d01f..49bde61 100644
--- a/gcc/graphite-ppl.h
+++ b/gcc/graphite-ppl.h
@@ -131,7 +131,8 @@ value_max (mpz_t res, mpz_t v1, mpz_t v2)
 {
   if (mpz_cmp (v1, v2) < 0)
 mpz_set (res, v2);
-  mpz_set (res, v1);
+  else
+mpz_set (res, v1);
 }
 
 /* Builds a new identity map for dimension DIM.  */
-- 
1.7.4.1



[PATCH 1/6] Correct typo.

2011-06-29 Thread Sebastian Pop
2011-06-29  Sebastian Pop  

* graphite-clast-to-gimple.c (clast_name_to_index): Add missing space.
---
 gcc/ChangeLog  |4 
 gcc/graphite-clast-to-gimple.c |2 +-
 2 files changed, 5 insertions(+), 1 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index e37d823..8be5adb 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,7 @@
+2011-06-29  Sebastian Pop  
+
+   * graphite-clast-to-gimple.c (clast_name_to_index): Add missing space.
+
 2011-06-29  Eric Botcazou  
 
PR tree-optimization/49539
diff --git a/gcc/graphite-clast-to-gimple.c b/gcc/graphite-clast-to-gimple.c
index c8356d3..4a4c3d2 100644
--- a/gcc/graphite-clast-to-gimple.c
+++ b/gcc/graphite-clast-to-gimple.c
@@ -88,7 +88,7 @@ clast_name_to_index (clast_name_p name, htab_t index_table)
 
 #ifdef CLOOG_ORG
   gcc_assert (name->type == clast_expr_name);
-  tmp.name = ((const struct clast_name*) name)->name;
+  tmp.name = ((const struct clast_name *) name)->name;
 #else
   tmp.name = name;
 #endif
-- 
1.7.4.1



C++ PATCH for c++/49554 (bogus error with lambda in template)

2011-06-29 Thread Jason Merrill
The failure in this testcase was happening because we were trying to 
evaluate the capture decltype again when instantiating the lambda 
operator(), but it only works when we're still in the enclosing function 
context.  Fixed by basically waiting to copy the type over at 
instantiation time rather than copy the dependent type at template 
definition time.


Tested x86_64-pc-linux-gnu, applied to trunk.
commit 7493431b3a7c540254aa92cd0e8ea873eace94f8
Author: Jason Merrill 
Date:   Wed Jun 29 11:55:58 2011 -0400

	PR c++/49554
	* semantics.c (lambda_proxy_type): New.
	(build_capture_proxy): Use it.
	* cp-tree.h (DECLTYPE_FOR_LAMBDA_PROXY): New.
	* pt.c (tsubst) [DECLTYPE_TYPE]: Use them.

diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index 7244cc8..55c88e3 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -3418,11 +3418,13 @@ more_aggr_init_expr_args_p (const aggr_init_expr_arg_iterator *iter)
 
 /* These flags indicate that we want different semantics from normal
decltype: lambda capture just drops references, lambda return also does
-   type decay.  */
+   type decay, lambda proxies look through implicit dereference.  */
 #define DECLTYPE_FOR_LAMBDA_CAPTURE(NODE) \
   TREE_LANG_FLAG_0 (DECLTYPE_TYPE_CHECK (NODE))
 #define DECLTYPE_FOR_LAMBDA_RETURN(NODE) \
   TREE_LANG_FLAG_1 (DECLTYPE_TYPE_CHECK (NODE))
+#define DECLTYPE_FOR_LAMBDA_PROXY(NODE) \
+  TREE_LANG_FLAG_2 (DECLTYPE_TYPE_CHECK (NODE))
 
 /* Nonzero for VAR_DECL and FUNCTION_DECL node means that `extern' was
specified in its declaration.  This can also be set for an
@@ -5455,6 +5457,7 @@ extern tree build_lambda_object			(tree);
 extern tree begin_lambda_type   (tree);
 extern tree lambda_capture_field_type		(tree);
 extern tree lambda_return_type			(tree);
+extern tree lambda_proxy_type			(tree);
 extern tree lambda_function			(tree);
 extern void apply_lambda_return_type(tree, tree);
 extern tree add_capture (tree, tree, tree, bool, bool);
diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index b3dd85f..d1d8336 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -11108,6 +11108,8 @@ tsubst (tree t, tree args, tsubst_flags_t complain, tree in_decl)
 	  type = lambda_capture_field_type (type);
 	else if (DECLTYPE_FOR_LAMBDA_RETURN (t))
 	  type = lambda_return_type (type);
+	else if (DECLTYPE_FOR_LAMBDA_PROXY (t))
+	  type = lambda_proxy_type (type);
 	else
 	  type = finish_decltype_type
 	(type, DECLTYPE_TYPE_ID_EXPR_OR_MEMBER_ACCESS_P (t), complain);
diff --git a/gcc/cp/semantics.c b/gcc/cp/semantics.c
index 4581729..fb984d4 100644
--- a/gcc/cp/semantics.c
+++ b/gcc/cp/semantics.c
@@ -8489,6 +8489,27 @@ insert_pending_capture_proxies (void)
   LAMBDA_EXPR_PENDING_PROXIES (lam) = NULL;
 }
 
+/* Given REF, a COMPONENT_REF designating a field in the lambda closure,
+   return the type we want the proxy to have: the type of the field itself,
+   with added const-qualification if the lambda isn't mutable and the
+   capture is by value.  */
+
+tree
+lambda_proxy_type (tree ref)
+{
+  tree type;
+  if (REFERENCE_REF_P (ref))
+ref = TREE_OPERAND (ref, 0);
+  type = TREE_TYPE (ref);
+  if (!dependent_type_p (type))
+return type;
+  type = cxx_make_type (DECLTYPE_TYPE);
+  DECLTYPE_TYPE_EXPR (type) = ref;
+  DECLTYPE_FOR_LAMBDA_PROXY (type) = true;
+  SET_TYPE_STRUCTURAL_EQUALITY (type);
+  return type;
+}
+
 /* MEMBER is a capture field in a lambda closure class.  Now that we're
inside the operator(), build a placeholder var for future lookups and
debugging.  */
@@ -8496,7 +8517,7 @@ insert_pending_capture_proxies (void)
 tree
 build_capture_proxy (tree member)
 {
-  tree var, object, fn, closure, name, lam;
+  tree var, object, fn, closure, name, lam, type;
 
   closure = DECL_CONTEXT (member);
   fn = lambda_function (closure);
@@ -8511,7 +8532,8 @@ build_capture_proxy (tree member)
   /* Remove the __ inserted by add_capture.  */
   name = get_identifier (IDENTIFIER_POINTER (DECL_NAME (member)) + 2);
 
-  var = build_decl (input_location, VAR_DECL, name, TREE_TYPE (object));
+  type = lambda_proxy_type (object);
+  var = build_decl (input_location, VAR_DECL, name, type);
   SET_DECL_VALUE_EXPR (var, object);
   DECL_HAS_VALUE_EXPR_P (var) = 1;
   DECL_ARTIFICIAL (var) = 1;
diff --git a/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-template3.C b/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-template3.C
new file mode 100644
index 000..fd6f1d3
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-template3.C
@@ -0,0 +1,33 @@
+// PR c++/49554
+// { dg-options -std=c++0x }
+
+template
+  struct base
+  {
+struct iterator { };
+
+iterator begin();
+  };
+
+template
+class flist : public base
+{
+  typedef base Base;
+
+  typedef typename Base::iterator Base_iterator;
+public:
+
+  void
+  resize()
+  {
+Base_iterator b = Base::begin();
+
+[b](int i) { return i; };
+  }
+};
+
+void test01()
+{
+  flist fl;
+  fl.resize();
+}


C++ PATCH for c++/49520 (bogus error with using in constexpr)

2011-06-29 Thread Jason Merrill
A CLEANUP_POINT_EXPR wraps each statement, so we need to strip it at a 
lower level.


Tested x86_64-pc-linux-gnu, applied to trunk.
commit 505b4baa3619375d81b409f9ff2bde95cce0f50a
Author: Jason Merrill 
Date:   Wed Jun 29 12:43:19 2011 -0400

	PR c++/49520
	* semantics.c (constexpr_fn_retval): Handle CLEANUP_POINT_EXPR here.
	(massage_constexpr_body): Not here.

diff --git a/gcc/cp/cp-tree.def b/gcc/cp/cp-tree.def
index 12c01cb..bb1b753 100644
--- a/gcc/cp/cp-tree.def
+++ b/gcc/cp/cp-tree.def
@@ -207,7 +207,7 @@ DEFTREECODE (UNBOUND_CLASS_TEMPLATE, "unbound_class_template", tcc_type, 0)
 DEFTREECODE (USING_DECL, "using_decl", tcc_declaration, 0)
 
 /* A using directive. The operand is USING_STMT_NAMESPACE.  */
-DEFTREECODE (USING_STMT, "using_directive", tcc_statement, 1)
+DEFTREECODE (USING_STMT, "using_stmt", tcc_statement, 1)
 
 /* An un-parsed default argument.  Holds a vector of input tokens and
a vector of places where the argument was instantiated before
diff --git a/gcc/cp/semantics.c b/gcc/cp/semantics.c
index fb984d4..ad68a01 100644
--- a/gcc/cp/semantics.c
+++ b/gcc/cp/semantics.c
@@ -5657,6 +5657,9 @@ constexpr_fn_retval (tree body)
 	return NULL_TREE;
   return error_mark_node;
 
+case CLEANUP_POINT_EXPR:
+  return constexpr_fn_retval (TREE_OPERAND (body, 0));
+
 case USING_STMT:
   return NULL_TREE;
 
@@ -5683,8 +5686,6 @@ massage_constexpr_body (tree fun, tree body)
 body = EH_SPEC_STMTS (body);
   if (TREE_CODE (body) == MUST_NOT_THROW_EXPR)
 	body = TREE_OPERAND (body, 0);
-  if (TREE_CODE (body) == CLEANUP_POINT_EXPR)
-body = TREE_OPERAND (body, 0);
   body = constexpr_fn_retval (body);
 }
   return body;
diff --git a/gcc/testsuite/g++.dg/cpp0x/constexpr-using2.C b/gcc/testsuite/g++.dg/cpp0x/constexpr-using2.C
new file mode 100644
index 000..6b28281
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/constexpr-using2.C
@@ -0,0 +1,18 @@
+// PR c++/49520
+// { dg-options -std=c++0x }
+
+namespace x { void foo(); }
+
+template
+struct traits
+{
+static constexpr bool f() { return true; }
+
+static constexpr bool g()
+{
+using x::foo;
+return f() && noexcept(foo());
+}
+};
+
+template struct traits;


Re: [cxx-mem-model] __sync_mem_load

2011-06-29 Thread Richard Henderson
On 06/23/2011 03:50 PM, Andrew MacLeod wrote:
>   * doc/extend.texi (__sync_mem_load): Document.
>   * c-family/c-common.c (resolve_overloaded_builtin): Add 
>   BUILT_IN_SYNC_MEM_LOAD_N.
>   * optabs.c (expand_sync_mem_load): New.
>   * optabs.h (enum direct_optab_index): Add DOI_sync_mem_load.
>   (sync_mem_load_optab): Define.
>   * genopinit.c: Add entry for sync_mem_load.
>   * builtins.c (expand_builtin_sync_mem_load): New.
>   (expand_builtin): Handle BUILT_IN_SYNC_MEM_LOAD_*
>   * sync-bultins.def: Add entries for BUILT_IN_SYNC_MEM_LOAD_*.
>   * testsuite/gcc.dg/sync-mem-invalid.c: Add invalid load tests.
>   * testsuite/gcc.dg/sync-mem.h: Add load executable tests.
>   * builtin-types.def (BT_FN_I{1,2,4,8,16}_VPTR_INT): New.
>   * expr.h (expand_sync_mem_load): Declare.
>   * fortran/types.def (BT_FN_I{1,2,4,8,16}_VPTR_INT): New.
>   * config/i386/sync.md (sync_mem_load): New pattern.

Looks good.

> + (define_expand "sync_mem_load"
> +   [(match_operand:SWI 0 "register_operand" "")  ;; output
> +(match_operand:SWI 1 "memory_operand" "");; memory
> +(match_operand:SI  2 "const_int_operand" "")];; memory model
> +""
> + {
> +   if (INTVAL (operands[2]) == MEMMODEL_ACQUIRE || 
> +   INTVAL (operands[2]) == MEMMODEL_SEQ_CST)
> + expand_builtin_sync_synchronize ();
> +   ix86_expand_move (mode, operands);
> +   DONE;
> + })

Formatting error (|| operator on next line).

Invoke gen_memory_barrier directly instead of a
call back into expand_builtin_sync_synchronize.

Isn't there a MEMMODEL setting that would call for the
use of LFENCE instead of MFENCE?


r~


Re: PATCH [2/n]: Prepare x32: Convert pointer to TLS symbol if needed

2011-06-29 Thread H.J. Lu
On Wed, Jun 29, 2011 at 7:06 AM, H.J. Lu  wrote:
> On Wed, Jun 29, 2011 at 1:45 AM, Richard Sandiford
>  wrote:
>> "H.J. Lu"  writes:
>>> @@ -706,7 +706,13 @@ precompute_register_parameters (int num_actuals, 
>>> struct arg_data *args,
>>>          pseudo now.  TLS symbols sometimes need a call to resolve.  */
>>>       if (CONSTANT_P (args[i].value)
>>>           && !targetm.legitimate_constant_p (args[i].mode, args[i].value))
>>> -       args[i].value = force_reg (args[i].mode, args[i].value);
>>> +       {
>>> +         if (GET_MODE (args[i].value) != args[i].mode)
>>> +           args[i].value = convert_to_mode (args[i].mode,
>>> +                                            args[i].value,
>>> +                                            args[i].unsignedp);
>>> +         args[i].value = force_reg (args[i].mode, args[i].value);
>>> +       }
>>
>> But if GET_MODE (args[i].value) != args[i].mode, then the call to
>> targetm.legitimate_constant_p looks wrong.  The mode passed in the
>> first argument is supposed to the mode of the second argument.
>>
>> Is there any reason why this and the following:
>>
>>        /* If we are to promote the function arg to a wider mode,
>>           do it now.  */
>>
>>        if (args[i].mode != TYPE_MODE (TREE_TYPE (args[i].tree_value)))
>>          args[i].value
>>            = convert_modes (args[i].mode,
>>                             TYPE_MODE (TREE_TYPE (args[i].tree_value)),
>>                             args[i].value, args[i].unsignedp);
>>
>> need to be done in the current order?  I can't think of any off-hand.
>> If not, would swapping them also fix the bug?
>>
>> (I can't review this either way, of course.)
>
> It works on the testcase.  I will do a full test.
>

It works.  There are no regressions on Linux/x86-64.
OK for trunk?

Thanks.

-- 
H.J.


2011-06-29  H.J. Lu  

PR middle-end/47715
* calls.c (precompute_register_parameters): Promote the function
argument before checking non-legitimate constant.
2011-06-29  H.J. Lu  

PR middle-end/47715
* calls.c (precompute_register_parameters): Promote the function
argument before checking non-legitimate constant.

diff --git a/gcc/calls.c b/gcc/calls.c
index bba477c..7538e4e 100644
--- a/gcc/calls.c
+++ b/gcc/calls.c
@@ -702,12 +702,6 @@ precompute_register_parameters (int num_actuals, struct 
arg_data *args,
pop_temp_slots ();
  }
 
-   /* If the value is a non-legitimate constant, force it into a
-  pseudo now.  TLS symbols sometimes need a call to resolve.  */
-   if (CONSTANT_P (args[i].value)
-   && !targetm.legitimate_constant_p (args[i].mode, args[i].value))
- args[i].value = force_reg (args[i].mode, args[i].value);
-
/* If we are to promote the function arg to a wider mode,
   do it now.  */
 
@@ -717,6 +711,12 @@ precompute_register_parameters (int num_actuals, struct 
arg_data *args,
 TYPE_MODE (TREE_TYPE (args[i].tree_value)),
 args[i].value, args[i].unsignedp);
 
+   /* If the value is a non-legitimate constant, force it into a
+  pseudo now.  TLS symbols sometimes need a call to resolve.  */
+   if (CONSTANT_P (args[i].value)
+   && !targetm.legitimate_constant_p (args[i].mode, args[i].value))
+ args[i].value = force_reg (args[i].mode, args[i].value);
+
/* If we're going to have to load the value by parts, pull the
   parts into pseudos.  The part extraction process can involve
   non-trivial computation.  */


[commit] Fix -Werror build break (Re: PATCH [10/n]: Prepare x32: PR rtl-optimization/49114)

2011-06-29 Thread Ulrich Weigand
> H.J. Lu wrote:
> > >>* reload.c (struct replacement): Remove SUBREG_LOC member.
> > >>(push_reload): Do not set it.
> > >>(push_replacement): Likewise.
> > >>(subst_reload): Remove dead code.
> > >>(copy_replacements): Remove assertion.
> > >>(copy_replacements_1): Do not handle SUBREG_LOC.
> > >>(move_replacements): Likewise.
> > >>(find_replacement): Remove dead code.  Use 
> > >> reload_adjust_reg_for_mode.
> > >>Detect subregs via recursive descent instead of via SUBREG_LOC.
> > >>
> > >
> > > It works much better.  I am testing it now.
> > >
> > 
> > It works.  There are no regressions on Linux/ia32 nor Linux/x86-64.
> > Can you check it in and mention PR rtl-optimization/49114 ChangeLog?
> 
> OK, I've checked the patch in now.  Thanks for testing!

Pat points out that this breaks the build on platforms that do not define
LIMIT_RELOAD_CLASS due to a -Werror unused variable warning:

/home/gccbuild/gcc_trunk_anonsvn/gcc/gcc/reload.c: In function 'push_reload':
/home/gccbuild/gcc_trunk_anonsvn/gcc/gcc/reload.c:926:28: error: variable
'out_subreg_loc' set but not used [-Werror=unused-but-set-variable]
/home/gccbuild/gcc_trunk_anonsvn/gcc/gcc/reload.c:926:8: error: variable
'in_subreg_loc' set but not used [-Werror=unused-but-set-variable]

Fixed by placing the variable under #ifdef LIMIT_RELOAD_CLASS as well.
Committed to mainline.

Bye,
Ulrich


ChangeLog:

PR rtl-optimization/49114
* reload.c (push_reload): Define in_subreg_loc and out_subreg_loc
only if LIMIT_RELOAD_CLASS to avoid -Werror build breaks.

Index: gcc/reload.c
===
*** gcc/reload.c(revision 175631)
--- gcc/reload.c(working copy)
*** push_reload (rtx in, rtx out, rtx *inloc
*** 923,929 
--- 923,931 
int i;
int dont_share = 0;
int dont_remove_subreg = 0;
+ #ifdef LIMIT_RELOAD_CLASS
rtx *in_subreg_loc = 0, *out_subreg_loc = 0;
+ #endif
int secondary_in_reload = -1, secondary_out_reload = -1;
enum insn_code secondary_in_icode = CODE_FOR_nothing;
enum insn_code secondary_out_icode = CODE_FOR_nothing;
*** push_reload (rtx in, rtx out, rtx *inloc
*** 1068,1074 
--- 1070,1078 
  #endif
  ))
  {
+ #ifdef LIMIT_RELOAD_CLASS
in_subreg_loc = inloc;
+ #endif
inloc = &SUBREG_REG (in);
in = *inloc;
  #if ! defined (LOAD_EXTEND_OP) && ! defined (WORD_REGISTER_OPERATIONS)
*** push_reload (rtx in, rtx out, rtx *inloc
*** 1163,1169 
--- 1167,1175 
  #endif
  ))
  {
+ #ifdef LIMIT_RELOAD_CLASS
out_subreg_loc = outloc;
+ #endif
outloc = &SUBREG_REG (out);
out = *outloc;
  #if ! defined (LOAD_EXTEND_OP) && ! defined (WORD_REGISTER_OPERATIONS)


-- 
  Dr. Ulrich Weigand
  GNU Toolchain for Linux on System z and Cell BE
  ulrich.weig...@de.ibm.com


Re: Backport patches that are correctness or performance bug fixes for latest AMD/Intel hardware.

2011-06-29 Thread Jakub Jelinek
On Wed, Jun 29, 2011 at 11:03:55AM -0500, harsha.jaga...@amd.com wrote:
> Is it ok to commit backported patch from trunk below to gcc 4.6 as long as
> bootstrap and tests pass (ongoing)? This is one of the patches that is
> significant enough a bug for recent AMD and Intel hardware.

Yeah, this is ok for 4.6.

> 2011-06-29 Harsha Jagasia 
> Backport from mainline
>   
>   2011-05-31  Alexandre Oliva  
> 
>   * config/i386/i386.c (ix86_rtx_costs): Drop NEG from sub for FMA.
>   * config/i386/sse.md: Add n to negated FMA pattern names.

Jakub


Re: PATCH [5/n]: Prepare x32: PR middle-end/48016: Inconsistency in non-local goto save area

2011-06-29 Thread Michael Matz
Hi,

On Wed, 29 Jun 2011, H.J. Lu wrote:

> > diff --git a/gcc/function.c b/gcc/function.c
> > index 81c4d39..131bc09 100644
> > --- a/gcc/function.c
> > +++ b/gcc/function.c
> > @@ -4780,7 +4780,7 @@ expand_function_start (tree subr)
> >                       cfun->nonlocal_goto_save_area,
> >                       integer_zero_node, NULL_TREE, NULL_TREE);
> >       r_save = expand_expr (t_save, NULL_RTX, VOIDmode, EXPAND_WRITE);
> > -      r_save = convert_memory_address (Pmode, r_save);
> > +      r_save = adjust_address (r_save, Pmode, 0);

This is actually the same problem as in explow.c.  t_save is built with 
ptr_type_node, where it should have been using
  TREE_TYPE (TREE_TYPE (cfun->nonlocal_goto_save_area))

Then r_save should have the correct mode already, possibly this could be 
asserted.  You are right that r_save needs to correspond to the 
nonlocal_goto_save_area[0] array-ref, hence pseudos aren't okay, therefore 
convert_memory_address isn't.  Actually I think we might even want to 
assert that indeed the expanded r_save is of Pmode already.


Ciao,
Michael.

Re: [PATCH] Split out SCEV dumping

2011-06-29 Thread Sebastian Pop
On Wed, Jun 29, 2011 at 09:27, Richard Guenther  wrote:
>
> I'm finally annoyed enough about this to do the patch splitting out
> SCEV details dumping to a TDF_SCEV (-scev) flag.
>
> Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.

Looks very good.  Thanks for this cleanup.

Sebastian


Re: [testsuite] skip ARM tests if no thumb2 support

2011-06-29 Thread Richard Sandiford
Janis Johnson  writes:
> On 06/29/2011 06:55 AM, Richard Earnshaw wrote:
>> I'd posit a third option:
>> 
>>  3. Add a new dg directive (perhaps dg-ignore-multilib) that instructs
>> the framework to ignore the multilib options entirely (only supported
>> for compile/assembly tests).
>> 
>> There are a lot of target-specific tests that are not really testing
>> execution, just that the compiler generates the right instruction under
>> specific compilation options.  It seems silly to force all this into the
>> multi-lib framework.
>> 
>> R.
>
> The MIPS tests have support for something like that that, but it seems
> to me that if tests are being run for lots of multilibs then it's
> better to test things like code generation for a variety of relevant
> options instead of using the same options every time.  

I might be misunderstanding what you mean, but the MIPS version tries
to keep as many of the original options as possible, only overriding
those that are known to be incompatible with the test.  So e.g. a
test for a MIPS IV feature will run normally on a MIPS IV-compatible
target, but will override the architecture on other targets.

That said, I wouldn't recommend the mips.exp approach to anyone.
It's been useful, and it seems to do what it's meant to, but it's
a very... heavy-weight approach.

Richard


Re: [PATCH, PR 49089] Don't split AVX256 unaligned loads by default on bdver1 and generic

2011-06-29 Thread Eric Botcazou
> Thanks,

Note that there is no "i386" component in Bugzilla, only a "target" so this 
should have been PR target/49089.  The end result is that there are no xrefs in 
the PR, which is still open btw.  So please add the xrefs to the commits in the 
PR manually and close it if you are done with it.

-- 
Eric Botcazou


Backport patches that are correctness or performance bug fixes for latest AMD/Intel hardware.

2011-06-29 Thread harsha.jagasia
Is it ok to commit backported patch from trunk below to gcc 4.6 as long as
bootstrap and tests pass (ongoing)? This is one of the patches that is
significant enough a bug for recent AMD and Intel hardware.

2011-06-29 Harsha Jagasia 
Backport from mainline

2011-05-31  Alexandre Oliva  

* config/i386/i386.c (ix86_rtx_costs): Drop NEG from sub for FMA.
* config/i386/sse.md: Add n to negated FMA pattern names.

Index: config/i386/sse.md
===
--- config/i386/sse.md  (revision 175646)
+++ config/i386/sse.md  (working copy)
@@ -2130,7 +2130,7 @@ (define_insn "*fma_fmsub_"
   [(set_attr "type" "ssemuladd")
(set_attr "mode" "")])
 
-(define_insn "*fma_fmadd_"
+(define_insn "*fma_fnmadd_"
   [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
(fma:FMAMODE
  (neg:FMAMODE
@@ -2145,7 +2145,7 @@ (define_insn "*fma_fmadd_"
   [(set_attr "type" "ssemuladd")
(set_attr "mode" "")])
 
-(define_insn "*fma_fmsub_"
+(define_insn "*fma_fnmsub_"
   [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
(fma:FMAMODE
  (neg:FMAMODE
Index: config/i386/i386.c
===
--- config/i386/i386.c  (revision 175646)
+++ config/i386/i386.c  (working copy)
@@ -29081,12 +29081,12 @@ ix86_rtx_costs (rtx x, int code, int out
 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS.  */
sub = XEXP (x, 0);
if (GET_CODE (sub) == NEG)
- sub = XEXP (x, 0);
+ sub = XEXP (sub, 0);
*total += rtx_cost (sub, FMA, speed);
 
sub = XEXP (x, 2);
if (GET_CODE (sub) == NEG)
- sub = XEXP (x, 0);
+ sub = XEXP (sub, 0);
*total += rtx_cost (sub, FMA, speed);
return true;
   }



Re: [Patch, AVR]: Cleanup avr.c a bit

2011-06-29 Thread Andreas Schwab
Georg-Johann Lay  writes:

> @@ -1257,17 +1129,19 @@ print_operand_address (FILE *file, rtx a
> rtx x = addr;
> if (GET_CODE (x) == CONST)
>   x = XEXP (x, 0);
> -   if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x,1)) == CONST_INT)
> +   if (GET_CODE (x) == PLUS
> +  && CONST_INT_P (XEXP (x,1)))

Please also add a space after the comma.

> @@ -5034,9 +4933,11 @@ avr_insert_attributes (tree node, tree *
>  {
>static const char dsec[] = ".progmem.data";
>  
> -  *attributes = tree_cons (get_identifier ("section"),
> -   build_tree_list (NULL, build_string 
> (strlen (dsec), dsec)),
> -   *attributes);
> +  *attributes =
> +tree_cons (get_identifier ("section"),
> +   build_tree_list (NULL,
> +build_string (strlen (dsec), dsec)),
> +   *attributes);

GNU style is to break before the operator.

Andreas.

-- 
Andreas Schwab, sch...@redhat.com
GPG Key fingerprint = D4E8 DBE3 3813 BB5D FA84  5EC7 45C6 250E 6F00 984E
"And now for something completely different."


[Patch, AVR]: Cleanup avr.c a bit

2011-06-29 Thread Georg-Johann Lay
This patch is mostly mechanical:

Add some comment, break long lines, use CONST_INT_P instead of
CONST_INT == GET_CODE().

The targetm initializer is moved down in the file similar to m32c,
s390, alpha, etc. back ends.

That way a hook can be defined on the fly:

static foo_t
foo_hook (foo_args)
{
  ...
}

#undef FOO_HOOK
#define FOO_HOOK foo_hook

targetm = ...

instead of the redundant prototyping


static foo_t foo_hook (foo_args)

#undef FOO_HOOK
#define FOO_HOOK foo_hook

targetm = ...

foo_t
foo_hook (foo_args)
{
  ...
}


This reduced redundancy in avr.c and subsequent patches that
add/remove hooks will need less lines to be changed.


reg_class_tab is now local static and use smallest register class.


Tested without regressions.

Johann

* config/avr/avr.c (targetm): Move down.
(TARGET_ASM_ALIGNED_HI_OP): Move down.
(TARGET_ASM_ALIGNED_SI_OP): Move down.
(TARGET_ASM_UNALIGNED_HI_OP): Move down.
(TARGET_ASM_UNALIGNED_SI_OP): Move down.
(TARGET_ASM_INTEGER): Move down.
(TARGET_ASM_FILE_START): Move down.
(TARGET_ASM_FILE_END): Move down.
(TARGET_ASM_FUNCTION_END_PROLOGUE): Move down.
(TARGET_ASM_FUNCTION_BEGIN_EPILOGUE): Move down.
(TARGET_FUNCTION_VALUE): Move down.
(TARGET_LIBCALL_VALUE): Move down.
(TARGET_FUNCTION_VALUE_REGNO_P): Move down.
(TARGET_ATTRIBUTE_TABLE): Move down.
(TARGET_ASM_FUNCTION_RODATA_SECTION): Move down.
(TARGET_INSERT_ATTRIBUTES): Move down.
(TARGET_SECTION_TYPE_FLAGS): Move down.
(TARGET_ASM_INIT_SECTIONS): Move down.
(TARGET_ENCODE_SECTION_INFO): Move down.
(TARGET_REGISTER_MOVE_COST): Move down.
(TARGET_MEMORY_MOVE_COST): Move down.
(TARGET_RTX_COSTS): Move down.
(TARGET_ADDRESS_COST): Move down.
(TARGET_MACHINE_DEPENDENT_REORG): Move down.
(TARGET_FUNCTION_ARG): Move down.
(TARGET_FUNCTION_ARG_ADVANCE): Move down.
(TARGET_LEGITIMIZE_ADDRESS): Move down.
(TARGET_RETURN_IN_MEMORY): Move down.
(TARGET_STRICT_ARGUMENT_NAMING): Move down.
(TARGET_BUILTIN_SETJMP_FRAME_VALUE): Move down.
(TARGET_HARD_REGNO_SCRATCH_OK): Move down.
(TARGET_CASE_VALUES_THRESHOLD): Move down.
(TARGET_LEGITIMATE_ADDRESS_P): Move down.
(TARGET_FRAME_POINTER_REQUIRED): Move down.
(TARGET_CAN_ELIMINATE): Move down.
(TARGET_CLASS_LIKELY_SPILLED_P): Move down.
(TARGET_OPTION_OVERRIDE): Move down.
(TARGET_CANNOT_MODIFY_JUMPS_P): Move down.
(TARGET_FUNCTION_OK_FOR_SIBCALL): Move down.
(TARGET_INIT_BUILTINS): Move down.
(TARGET_EXPAND_BUILTIN): Move down.
(avr_operand_rtx_cost): Move down.
(avr_init_machine_status): Move up.
(avr_legitimize_address): Define on the fly.
(avr_legitimate_address_p): Define on the fly.
(avr_hard_regno_scratch_ok): Define on the fly.
(avr_builtin_setjmp_frame_value): Define on the fly.
(avr_case_values_threshold): Define on the fly.
(avr_can_eliminate): Define on the fly.
(avr_asm_named_section): Define on the fly.
(avr_frame_pointer_required_p): Define on the fly.
(avr_regnames): Remove unused variable.
(reg_class_tab): Use SIMPLE_LD_REGS for r16-r23.
Use NO_LD_REGS for r0-r15.
(avr_regno_reg_class): Incorporate reg_class_tab.
(out_movqi_r_mr): Use CONST_INT_P.
(out_movqi_mr_r): Use CONST_INT_P.
(output_movsisf): Use CONST_INT_P.
(out_shift_with_cnt): Use CONST_INT_P.
(ashlqi3_out): Use CONST_INT_P.
(ashlhi3_out): Use CONST_INT_P.
(ashlsi3_out): Use CONST_INT_P.
(ashrqi3_out): Use CONST_INT_P.
(ashrhi3_out): Use CONST_INT_P.
(ashrsi3_out): Use CONST_INT_P.
(lshrqi3_out): Use CONST_INT_P.
(lshrhi3_out): Use CONST_INT_P.
(lshrsi3_out): Use CONST_INT_P.
(adjust_insn_length): Use CONST_INT_P.
(avr_rtx_costs): Use CONST_INT_P.
(avr_address_cost): Use CONST_INT_P.
(extra_constraint_Q): Use CONST_INT_P.
(avr_reorg): Use CONST_INT_P.
(output_reload_inhi): Use CONST_INT_P.
(output_reload_insisf): Use CONST_INT_P.
(avr_out_sbxx_branch): Use CONST_INT_P.
(print_operand_address): Use CONST_INT_P.
(print_operand): Use CONST_INT_P.
(notice_update_cc): Use CONST_INT_P.
(avr_simplify_comparison_p): Use CONST_INT_P.
(output_movqi): Use CONST_INT_P.
(output_movhi): Use CONST_INT_P.
Fix long lines.



Index: config/avr/avr.c
===
--- config/avr/avr.c	(revision 175629)
+++ config/avr/avr.c	(working copy)
@@ -51,7 +51,6 @@
 /* Maximal allowed offset for an address in the LD command */
 #define MAX_LD_OFFSET(MODE) (64 - (signed)GET_MODE_SIZE (MODE))
 
-static void avr_option_override (

Re: [testsuite] skip ARM tests if no thumb2 support

2011-06-29 Thread Janis Johnson
On 06/29/2011 06:55 AM, Richard Earnshaw wrote:
> On 15/06/11 17:38, Janis Johnson wrote:
>> On 06/15/2011 01:13 AM, Ramana Radhakrishnan wrote:
>>> On 14 June 2011 21:35, Janis Johnson  wrote:
 These tests apparently require thumb2 support (I don't yet know much
 about ARM).  OK for trunk, and later 4.6?
>>>
>>> OK - The -march=armv7-a is redundant in these tests. You should be
>>> able to lose them if arm_thumb2_ok returns true.
>>
>> That brings up an important point.  Without any changes, this test
>> passes scan-assembler when run with multilibs that don't override the
>> -march= option, but fails the scan when -march from multilibs
>> overrides the one from dg-options and doesn't support thumb2.  With
>> "dg-require-effective-target arm_thumb2_ok" the test is skipped for
>> multilibs that don't support thumb2, including those that default to an
>> arch that doesn't support it.
>>
>> If we want this and similar tests to continue to be run for default
>> multilibs with default support for older values of -march then instead
>> of using ""dg-require-effective-target arm_thumb2_ok" the test can skip
>> multilibs that use -march that don't support thumb2, e.g.
>>
>> /* { dg-skip-if "need thumb2" { arm*-*-* } { "-march=*" } { "-march=armv6t2" 
>> "-march=armv[7-9]*" "-march=armv[1-9][0-9]*" } } */
>>
>> In summary, the choice for a test like this is:
>>
>> 1. Provide options needed to test particular functionality, and skip the
>>test if multilib flags that override those options would not test the
>>relevant functionality.
>> 2. Skip the test if multilib options on their own do not provide the
>>functionality that the test covers.
>>
> 
> I'd posit a third option:
> 
>  3. Add a new dg directive (perhaps dg-ignore-multilib) that instructs
> the framework to ignore the multilib options entirely (only supported
> for compile/assembly tests).
> 
> There are a lot of target-specific tests that are not really testing
> execution, just that the compiler generates the right instruction under
> specific compilation options.  It seems silly to force all this into the
> multi-lib framework.
> 
> R.

The MIPS tests have support for something like that that, but it seems
to me that if tests are being run for lots of multilibs then it's
better to test things like code generation for a variety of relevant
options instead of using the same options every time.  

Having said that, I'm happy that you and other ARM maintainers and
developers are now thinking about how multilibs and dg-options interact
and how you want the tests to be run.  My primary goal is to get rid of
spurious failures when we run tests internally with 10 or 12 multilibs.
So far I've been stumbling about trying to skip tests when multilibs
don't support them, but I'll follow whatever approach you guys prefer.

Janis

>> Option 2 assumes that there is enough test coverage for later arch
>> versions, either with default support or multilib options.  This choice
>> applies to several tests in gcc.target/arm.
>>
>> Janis
>>
> 



Re: [PATCH] Un-obsolete Interix

2011-06-29 Thread Douglas Rupp

An update to the IA-32/x86-64 section seems the right place.
I confess to being ignorant about how to update this html page. Please 
advise.

--Doug

On 12 Jun 2011, Gerald Pfeifer wrote:
And I assume you'll be updating the release notes at
http://gcc.gnu.org/gcc-4.7/changes.html . ;-)


Re: PATCH [5/n]: Prepare x32: PR middle-end/48016: Inconsistency in non-local goto save area

2011-06-29 Thread H.J. Lu
Ping.

On Sat, Jun 25, 2011 at 9:06 AM, H.J. Lu  wrote:
> On Thu, Jun 16, 2011 at 10:18 AM, H.J. Lu  wrote:
>> On Thu, Jun 16, 2011 at 12:56 AM, Richard Guenther
>>  wrote:
>>> On Wed, Jun 15, 2011 at 9:55 PM, H.J. Lu  wrote:
 On Wed, Jun 15, 2011 at 8:16 AM, Michael Matz  wrote:
> Hi,
>
> On Wed, 15 Jun 2011, H.J. Lu wrote:
>
>> >> +  /* FIXME: update_nonlocal_goto_save_area may pass SA in the wrong 
>> >> mode.  */
>> >> +  if (GET_MODE (sa) != mode)
>> >> +    {
>> >> +      gcc_assert (ptr_mode != Pmode
>> >> +               && GET_MODE (sa) == ptr_mode
>> >> +               && mode == Pmode);
>> >> +      sa = adjust_address (sa, mode, 0);
>> >> +    }
>> >
>> > That may be appropriate for a branch, but trunk shouldn't contain 
>> > FIXMEs
>> > that explain how something should be fixed, instead that something 
>> > should
>> > be carried out.  I.e. just fix update_nonlocal_goto_save_area.
>> >
>>
>> I don't know update_nonlocal_goto_save_area enough to fix it
>> without breaking other targets.  This patch is the lest invasive.
>> Any suggestions how to properly fix it is appreciated.
>
> Well, the most obvious variant would be to move the above code right
> before the call of emit_stack_save in update_nonlocal_goto_save_area
> (using r_save and STACK_SAVEAREA_MODE (SAVE_NONLOCAL)).  All other callers
> of emit_stack_save already make sure to pass an object of correct mode, so
> this one should too.
>
> But I think it's better to just produce a correct array_ref from the
> start.  get_nl_goto_field creates an array_type for the
> nonlocal_goto_save_area of correct type (ptr_type_node or
> lang_hooks.types.type_for_mode (Pmode, 1)), and we should use that.
>
> So something like this in update_nonlocal_goto_save_area:
>  t_save = build4 (ARRAY_REF,
>                   TREE_TYPE (TREE_TYPE (cfun->nonlocal_goto_save_area)),
>                   cfun->nonlocal_goto_save_area,
>                   integer_one_node, NULL_TREE, NULL_TREE);
>
> instead of the current building of t_save.  Then r_save also should get
> the correct mode automatically.
>

 Here is the updated patch.  OK for trunk?
>>>
>>> The explow.c change is ok.  For the function.c change I wonder why
>>> convert_memory_address doesn't do the right thing - from it's documentation
>>> it definitely should, so it should be fixed instead of being replaced by
>>> adjust_address with a zero offset.
>>>
>>
>> convert_memory_address may return a pseudo register converted
>> to Pmode.  But here what we want is the same memory address
>> adjusted for Pmode.  I don't think the usage of convert_memory_address
>>
>
> Here is the code in question:
>
>      r_save = convert_memory_address (Pmode, r_save);
>
>      emit_move_insn (r_save, targetm.builtin_setjmp_frame_value ());
>
> R_SAVE must be lvalue.  But return from convert_memory_address
> isn't. I am re-posting my patch here.  OK for trunk?
>
> Thanks.
>
> --
> H.J.
> ---
> 2011-06-15  H.J. Lu  
>
>        PR middle-end/48016
>        * explow.c (update_nonlocal_goto_save_area): Use proper mode
>        for stack save area.
>
>        * function.c (expand_function_start): Properly store frame
>        pointer for non-local goto.
>
> diff --git a/gcc/explow.c b/gcc/explow.c
> index c7d8183..efe6c7e 100644
> --- a/gcc/explow.c
> +++ b/gcc/explow.c
> @@ -1102,7 +1097,9 @@ update_nonlocal_goto_save_area (void)
>      first one is used for the frame pointer save; the rest are sized by
>      STACK_SAVEAREA_MODE.  Create a reference to array index 1, the first
>      of the stack save area slots.  */
> -  t_save = build4 (ARRAY_REF, ptr_type_node, cfun->nonlocal_goto_save_area,
> +  t_save = build4 (ARRAY_REF,
> +                  TREE_TYPE (TREE_TYPE (cfun->nonlocal_goto_save_area)),
> +                  cfun->nonlocal_goto_save_area,
>                   integer_one_node, NULL_TREE, NULL_TREE);
>   r_save = expand_expr (t_save, NULL_RTX, VOIDmode, EXPAND_WRITE);
>
> diff --git a/gcc/function.c b/gcc/function.c
> index 81c4d39..131bc09 100644
> --- a/gcc/function.c
> +++ b/gcc/function.c
> @@ -4780,7 +4780,7 @@ expand_function_start (tree subr)
>                       cfun->nonlocal_goto_save_area,
>                       integer_zero_node, NULL_TREE, NULL_TREE);
>       r_save = expand_expr (t_save, NULL_RTX, VOIDmode, EXPAND_WRITE);
> -      r_save = convert_memory_address (Pmode, r_save);
> +      r_save = adjust_address (r_save, Pmode, 0);
>
>       emit_move_insn (r_save, targetm.builtin_setjmp_frame_value ());
>       update_nonlocal_goto_save_area ();
>



-- 
H.J.


C++ PATCHes for c++/45923 (constexpr diagnostics)

2011-06-29 Thread Jason Merrill
In C++0x when an expression fails to satisfy the requirements of a 
constant expression it can be hard to figure out why; previously the 
compiler would just say that a particular class or function was not 
literal or constexpr without explaining why.  This patch improves 
diagnostics by having the compiler recursively explain why classes are 
not literal or functions not constexpr.


The second patch updates the rules for what classes can be literal, 
adding aggregates.  I've left in trivial default constructors for now 
even though it was dropped from the FDIS.


The third patch is necessary to avoid some testsuite failures from 
complaints about non-literal temporaries if we do eventually drop 
trivial default constructors.


The last patch tweaks the similar function maybe_explain_implicit_delete 
to use a pointer set instead of a hash table, since we were only using 
it as a set anyway.


Tested x86_64-pc-linux-gnu, applying to trunk.
commit fe193797db267abf26a2632fed0a36828505d0c9
Author: Jason Merrill 
Date:   Mon Jun 27 17:02:23 2011 -0400

	PR c++/45923
	* class.c (explain_non_literal_class): New.
	(finalize_literal_type_property): Call it.
	* cp-tree.h: Declare it.
	* semantics.c (ensure_literal_type_for_constexpr_object): Call it.
	(is_valid_constexpr_fn): Likewise.
	(massage_constexpr_body): Split out from...
	(register_constexpr_fundef): ...here.
	(is_instantiation_of_constexpr): New.
	(expand_or_defer_fn_1): Leave DECL_SAVED_TREE alone in that case.
	(explain_invalid_constexpr_fn): New.
	(cxx_eval_call_expression): Call it.
	(potential_constant_expression_1): Likewise.  Avoid redundant errors.
	* method.c (process_subob_fn): Diagnose non-constexpr.
	(walk_field_subobs): Likewise.
	(synthesized_method_walk): Don't shortcut if we want diagnostics.
	(explain_implicit_non_constexpr): New.
	(defaulted_late_check): Use it.
	* call.c (build_cxx_call): Remember location.

diff --git a/gcc/cp/call.c b/gcc/cp/call.c
index e2d455a..56f3408 100644
--- a/gcc/cp/call.c
+++ b/gcc/cp/call.c
@@ -6721,7 +6721,10 @@ build_cxx_call (tree fn, int nargs, tree *argarray)
 {
   tree fndecl;
 
+  /* Remember roughly where this call is.  */
+  location_t loc = EXPR_LOC_OR_HERE (fn);
   fn = build_call_a (fn, nargs, argarray);
+  SET_EXPR_LOCATION (fn, loc);
 
   /* If this call might throw an exception, note that fact.  */
   fndecl = get_callee_fndecl (fn);
diff --git a/gcc/cp/class.c b/gcc/cp/class.c
index 9054b5c..6aefd68 100644
--- a/gcc/cp/class.c
+++ b/gcc/cp/class.c
@@ -37,6 +37,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "cgraph.h"
 #include "tree-dump.h"
 #include "splay-tree.h"
+#include "pointer-set.h"
 
 /* The number of nested classes being processed.  If we are not in the
scope of any class, this is zero.  */
@@ -4582,10 +4583,73 @@ finalize_literal_type_property (tree t)
 	{
 	  DECL_DECLARED_CONSTEXPR_P (fn) = false;
 	  if (!DECL_TEMPLATE_INFO (fn))
-	error ("enclosing class of %q+#D is not a literal type", fn);
+	{
+	  error ("enclosing class of constexpr non-static member "
+		 "function %q+#D is not a literal type", fn);
+	  explain_non_literal_class (t);
+	}
 	}
 }
 
+/* T is a non-literal type used in a context which requires a constant
+   expression.  Explain why it isn't literal.  */
+
+void
+explain_non_literal_class (tree t)
+{
+  static struct pointer_set_t *diagnosed;
+
+  if (!CLASS_TYPE_P (t))
+return;
+  t = TYPE_MAIN_VARIANT (t);
+
+  if (diagnosed == NULL)
+diagnosed = pointer_set_create ();
+  if (pointer_set_insert (diagnosed, t) != 0)
+/* Already explained.  */
+return;
+
+  inform (0, "%q+T is not literal because:", t);
+  if (TYPE_HAS_NONTRIVIAL_DESTRUCTOR (t))
+inform (0, "  %q+T has a non-trivial destructor", t);
+  else if (CLASSTYPE_NON_AGGREGATE (t)
+	   && !TYPE_HAS_TRIVIAL_DFLT (t)
+	   && !TYPE_HAS_CONSTEXPR_CTOR (t))
+inform (0, "  %q+T is not an aggregate, does not have a trivial "
+	"default constructor, and has no constexpr constructor that "
+	"is not a copy or move constructor", t);
+  else
+{
+  tree binfo, base_binfo, field; int i;
+  for (binfo = TYPE_BINFO (t), i = 0;
+	   BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
+	{
+	  tree basetype = TREE_TYPE (base_binfo);
+	  if (!CLASSTYPE_LITERAL_P (basetype))
+	{
+	  inform (0, "  base class %qT of %q+T is non-literal",
+		  basetype, t);
+	  explain_non_literal_class (basetype);
+	  return;
+	}
+	}
+  for (field = TYPE_FIELDS (t); field; field = TREE_CHAIN (field))
+	{
+	  tree ftype;
+	  if (TREE_CODE (field) != FIELD_DECL)
+	continue;
+	  ftype = TREE_TYPE (field);
+	  if (!literal_type_p (ftype))
+	{
+	  inform (0, "  non-static data member %q+D has "
+		  "non-literal type", field);
+	  if (CLASS_TYPE_P (ftype))
+		explain_non_literal_class (ftype);
+	}
+	}
+}
+}
+
 /* Check the validity

[PATCH] Split out SCEV dumping

2011-06-29 Thread Richard Guenther

I'm finally annoyed enough about this to do the patch splitting out
SCEV details dumping to a TDF_SCEV (-scev) flag.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.

Richard.

2011-06-29  Richard Guenther  

* doc/invoke.texi: Document -scev dump modifier.
* tree-pass.h (TDF_SCEV): New dump flag.
* tree-dump.c (dump_option_value_in): Add scev.
* tree-chrec.c: Replace all TDF_DETAILS checks with TDF_SCEV.
* tree-scalar-evolution.c: Likewise.

* gcc.dg/tree-ssa/loop-17.c: Adjust.

Index: gcc/doc/invoke.texi
===
*** gcc/doc/invoke.texi (revision 175630)
--- gcc/doc/invoke.texi (working copy)
*** Enable showing the unique ID (@code{DECL
*** 5559,5564 
--- 5559,5566 
  Enable showing the tree dump for each statement.
  @item eh
  Enable showing the EH region number holding each statement.
+ @item scev
+ Enable showing scalar evolution analysis details.
  @item all
  Turn on all options, except @option{raw}, @option{slim}, @option{verbose}
  and @option{lineno}.
Index: gcc/tree-pass.h
===
*** gcc/tree-pass.h (revision 175630)
--- gcc/tree-pass.h (working copy)
*** enum tree_dump_index
*** 83,88 
--- 83,89 
  #define TDF_ALIAS (1 << 21)   /* display alias information  */
  #define TDF_ENUMERATE_LOCALS (1 << 22)/* Enumerate locals by uid.  */
  #define TDF_CSELIB(1 << 23)   /* Dump cselib details.  */
+ #define TDF_SCEV  (1 << 24)   /* Dump SCEV details.  */
  
  
  /* In tree-dump.c */
Index: gcc/tree-dump.c
===
*** gcc/tree-dump.c (revision 175630)
--- gcc/tree-dump.c (working copy)
*** static const struct dump_option_value_in
*** 823,831 
{"alias", TDF_ALIAS},
{"nouid", TDF_NOUID},
{"enumerate_locals", TDF_ENUMERATE_LOCALS},
{"all", ~(TDF_RAW | TDF_SLIM | TDF_LINENO | TDF_TREE | TDF_RTL | TDF_IPA
| TDF_STMTADDR | TDF_GRAPH | TDF_DIAGNOSTIC | TDF_VERBOSE
!   | TDF_RHS_ONLY | TDF_NOUID | TDF_ENUMERATE_LOCALS)},
{NULL, 0}
  };
  
--- 823,832 
{"alias", TDF_ALIAS},
{"nouid", TDF_NOUID},
{"enumerate_locals", TDF_ENUMERATE_LOCALS},
+   {"scev", TDF_SCEV},
{"all", ~(TDF_RAW | TDF_SLIM | TDF_LINENO | TDF_TREE | TDF_RTL | TDF_IPA
| TDF_STMTADDR | TDF_GRAPH | TDF_DIAGNOSTIC | TDF_VERBOSE
!   | TDF_RHS_ONLY | TDF_NOUID | TDF_ENUMERATE_LOCALS | TDF_SCEV)},
{NULL, 0}
  };
  
Index: gcc/tree-chrec.c
===
*** gcc/tree-chrec.c(revision 175630)
--- gcc/tree-chrec.c(working copy)
*** chrec_apply (unsigned var,
*** 587,593 
|| chrec_contains_symbols_defined_in_loop (chrec, var))
  return chrec_dont_know;
  
!   if (dump_file && (dump_flags & TDF_DETAILS))
  fprintf (dump_file, "(chrec_apply \n");
  
if (TREE_CODE (x) == INTEGER_CST && SCALAR_FLOAT_TYPE_P (type))
--- 587,593 
|| chrec_contains_symbols_defined_in_loop (chrec, var))
  return chrec_dont_know;
  
!   if (dump_file && (dump_flags & TDF_SCEV))
  fprintf (dump_file, "(chrec_apply \n");
  
if (TREE_CODE (x) == INTEGER_CST && SCALAR_FLOAT_TYPE_P (type))
*** chrec_apply (unsigned var,
*** 628,634 
break;
  }
  
!   if (dump_file && (dump_flags & TDF_DETAILS))
  {
fprintf (dump_file, "  (varying_loop = %d\n", var);
fprintf (dump_file, ")\n  (chrec = ");
--- 628,634 
break;
  }
  
!   if (dump_file && (dump_flags & TDF_SCEV))
  {
fprintf (dump_file, "  (varying_loop = %d\n", var);
fprintf (dump_file, ")\n  (chrec = ");
Index: gcc/tree-scalar-evolution.c
===
*** gcc/tree-scalar-evolution.c (revision 175630)
--- gcc/tree-scalar-evolution.c (working copy)
*** set_scalar_evolution (basic_block instan
*** 572,578 
  
if (dump_file)
  {
!   if (dump_flags & TDF_DETAILS)
{
  fprintf (dump_file, "(set_scalar_evolution \n");
  fprintf (dump_file, "  instantiated_below = %d \n",
--- 572,578 
  
if (dump_file)
  {
!   if (dump_flags & TDF_SCEV)
{
  fprintf (dump_file, "(set_scalar_evolution \n");
  fprintf (dump_file, "  instantiated_below = %d \n",
*** get_scalar_evolution (basic_block instan
*** 600,606 
  
if (dump_file)
  {
!   if (dump_flags & TDF_DETAILS)
{
  fprintf (dump_file, "(get_scalar_evolution \n");
  fprintf (dump_file, "  (scalar = ");
--- 600,606 
  
if (dump_file)
  {
!   if (dump_flags & TDF_SCEV)
{
  fprintf (dump_file, "(get_scalar_evolution \n");
  fprintf (dump_file

C++ PATCHes to list-value-initialization

2011-06-29 Thread Jason Merrill
The first patch implements the resolution of DR 990, which clarifies 
that {} means value-initialization if the type has a default constructor.


The second patch fixes a bug in the standard I noticed while looking at 
related issues: it says that if a class has any user-provided 
constructor, we just call the default constructor.  This wording should 
have been adjusted when we added defaulted functions; I've raised the 
issue with the committee, but am also applying the obvious fix to the 
compiler, namely to only consider the user-providedness of the default 
constructor when deciding whether or not to zero-initialize first.


Tested x86_64-pc-linux-gnu, applying to trunk.
commit 2bd88a2546673c5f10bedefff0dcfa3565d5b6fa
Author: Jason Merrill 
Date:   Tue Jun 28 10:20:50 2011 -0400

	DR 990
	* call.c (convert_like_real) [ck_user]: Handle value-initialization.
	(build_new_method_call_1): Likewise.
	* init.c (expand_default_init): Handle direct list-initialization
	of aggregates.

diff --git a/gcc/cp/call.c b/gcc/cp/call.c
index cfaef7d..e2d455a 100644
--- a/gcc/cp/call.c
+++ b/gcc/cp/call.c
@@ -5592,6 +5592,18 @@ convert_like_real (conversion *convs, tree expr, tree fn, int argnum,
 	tree convfn = cand->fn;
 	unsigned i;
 
+	/* If we're initializing from {}, it's value-initialization.  */
+	if (BRACE_ENCLOSED_INITIALIZER_P (expr)
+	&& CONSTRUCTOR_NELTS (expr) == 0
+	&& TYPE_HAS_DEFAULT_CONSTRUCTOR (totype))
+	  {
+	expr = build_value_init (totype, complain);
+	expr = get_target_expr_sfinae (expr, complain);
+	if (expr != error_mark_node)
+	  TARGET_EXPR_LIST_INIT_P (expr) = true;
+	return expr;
+	  }
+
 	expr = mark_rvalue_use (expr);
 
 	/* When converting from an init list we consider explicit
@@ -5634,7 +5646,7 @@ convert_like_real (conversion *convs, tree expr, tree fn, int argnum,
 	{
 	  int nelts = CONSTRUCTOR_NELTS (expr);
 	  if (nelts == 0)
-	expr = build_value_init (totype, tf_warning_or_error);
+	expr = build_value_init (totype, complain);
 	  else if (nelts == 1)
 	expr = CONSTRUCTOR_ELT (expr, 0)->value;
 	  else
@@ -7138,10 +7150,29 @@ build_new_method_call_1 (tree instance, tree fns, VEC(tree,gc) **args,
   && BRACE_ENCLOSED_INITIALIZER_P (VEC_index (tree, *args, 0))
   && CONSTRUCTOR_IS_DIRECT_INIT (VEC_index (tree, *args, 0)))
 {
+  tree init_list = VEC_index (tree, *args, 0);
+
   gcc_assert (VEC_length (tree, *args) == 1
 		  && !(flags & LOOKUP_ONLYCONVERTING));
 
-  add_list_candidates (fns, first_mem_arg, VEC_index (tree, *args, 0),
+  /* If the initializer list has no elements and T is a class type with
+	 a default constructor, the object is value-initialized.  Handle
+	 this here so we don't need to handle it wherever we use
+	 build_special_member_call.  */
+  if (CONSTRUCTOR_NELTS (init_list) == 0
+	  && TYPE_HAS_DEFAULT_CONSTRUCTOR (basetype)
+	  && !processing_template_decl)
+	{
+	  tree ob, init = build_value_init (basetype, complain);
+	  if (integer_zerop (instance_ptr))
+	return get_target_expr_sfinae (init, complain);
+	  ob = build_fold_indirect_ref (instance_ptr);
+	  init = build2 (INIT_EXPR, TREE_TYPE (ob), ob, init);
+	  TREE_SIDE_EFFECTS (init) = true;
+	  return init;
+	}
+
+  add_list_candidates (fns, first_mem_arg, init_list,
 			   basetype, explicit_targs, template_only,
 			   conversion_path, access_binfo, flags, &candidates);
 }
@@ -8365,7 +8396,7 @@ perform_implicit_conversion (tree type, tree expr, tsubst_flags_t complain)
permitted.  If the conversion is valid, the converted expression is
returned.  Otherwise, NULL_TREE is returned, except in the case
that TYPE is a class type; in that case, an error is issued.  If
-   C_CAST_P is true, then this direction initialization is taking
+   C_CAST_P is true, then this direct-initialization is taking
place as part of a static_cast being attempted as part of a C-style
cast.  */
 
diff --git a/gcc/cp/init.c b/gcc/cp/init.c
index 3ceed90..1719339 100644
--- a/gcc/cp/init.c
+++ b/gcc/cp/init.c
@@ -1443,6 +1443,17 @@ expand_default_init (tree binfo, tree true_exp, tree exp, tree init, int flags,
   tree rval;
   VEC(tree,gc) *parms;
 
+  /* If we have direct-initialization from an initializer list, pull
+ it out of the TREE_LIST so the code below can see it.  */
+  if (init && TREE_CODE (init) == TREE_LIST
+  && BRACE_ENCLOSED_INITIALIZER_P (TREE_VALUE (init))
+  && CONSTRUCTOR_IS_DIRECT_INIT (TREE_VALUE (init)))
+{
+  gcc_checking_assert ((flags & LOOKUP_ONLYCONVERTING) == 0
+			   && TREE_CHAIN (init) == NULL_TREE);
+  init = TREE_VALUE (init);
+}
+
   if (init && BRACE_ENCLOSED_INITIALIZER_P (init)
   && CP_AGGREGATE_TYPE_P (type))
 {
diff --git a/gcc/testsuite/g++.dg/cpp0x/constexpr-initlist4.C b/gcc/testsuite/g++.dg/cpp0x/constexpr-initlist4.C
new file mode 100644
index 000..8151857
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/constexpr-initlist4.C

Re: PATCH [2/n]: Prepare x32: Convert pointer to TLS symbol if needed

2011-06-29 Thread H.J. Lu
On Wed, Jun 29, 2011 at 1:45 AM, Richard Sandiford
 wrote:
> "H.J. Lu"  writes:
>> @@ -706,7 +706,13 @@ precompute_register_parameters (int num_actuals, struct 
>> arg_data *args,
>>          pseudo now.  TLS symbols sometimes need a call to resolve.  */
>>       if (CONSTANT_P (args[i].value)
>>           && !targetm.legitimate_constant_p (args[i].mode, args[i].value))
>> -       args[i].value = force_reg (args[i].mode, args[i].value);
>> +       {
>> +         if (GET_MODE (args[i].value) != args[i].mode)
>> +           args[i].value = convert_to_mode (args[i].mode,
>> +                                            args[i].value,
>> +                                            args[i].unsignedp);
>> +         args[i].value = force_reg (args[i].mode, args[i].value);
>> +       }
>
> But if GET_MODE (args[i].value) != args[i].mode, then the call to
> targetm.legitimate_constant_p looks wrong.  The mode passed in the
> first argument is supposed to the mode of the second argument.
>
> Is there any reason why this and the following:
>
>        /* If we are to promote the function arg to a wider mode,
>           do it now.  */
>
>        if (args[i].mode != TYPE_MODE (TREE_TYPE (args[i].tree_value)))
>          args[i].value
>            = convert_modes (args[i].mode,
>                             TYPE_MODE (TREE_TYPE (args[i].tree_value)),
>                             args[i].value, args[i].unsignedp);
>
> need to be done in the current order?  I can't think of any off-hand.
> If not, would swapping them also fix the bug?
>
> (I can't review this either way, of course.)

It works on the testcase.  I will do a full test.

Thanks.

-- 
H.J.


Re: [testsuite] skip ARM tests if no thumb2 support

2011-06-29 Thread Richard Earnshaw
On 15/06/11 17:38, Janis Johnson wrote:
> On 06/15/2011 01:13 AM, Ramana Radhakrishnan wrote:
>> On 14 June 2011 21:35, Janis Johnson  wrote:
>>> These tests apparently require thumb2 support (I don't yet know much
>>> about ARM).  OK for trunk, and later 4.6?
>>
>> OK - The -march=armv7-a is redundant in these tests. You should be
>> able to lose them if arm_thumb2_ok returns true.
> 
> That brings up an important point.  Without any changes, this test
> passes scan-assembler when run with multilibs that don't override the
> -march= option, but fails the scan when -march from multilibs
> overrides the one from dg-options and doesn't support thumb2.  With
> "dg-require-effective-target arm_thumb2_ok" the test is skipped for
> multilibs that don't support thumb2, including those that default to an
> arch that doesn't support it.
> 
> If we want this and similar tests to continue to be run for default
> multilibs with default support for older values of -march then instead
> of using ""dg-require-effective-target arm_thumb2_ok" the test can skip
> multilibs that use -march that don't support thumb2, e.g.
> 
> /* { dg-skip-if "need thumb2" { arm*-*-* } { "-march=*" } { "-march=armv6t2" 
> "-march=armv[7-9]*" "-march=armv[1-9][0-9]*" } } */
> 
> In summary, the choice for a test like this is:
> 
> 1. Provide options needed to test particular functionality, and skip the
>test if multilib flags that override those options would not test the
>relevant functionality.
> 2. Skip the test if multilib options on their own do not provide the
>functionality that the test covers.
> 

I'd posit a third option:

 3. Add a new dg directive (perhaps dg-ignore-multilib) that instructs
the framework to ignore the multilib options entirely (only supported
for compile/assembly tests).

There are a lot of target-specific tests that are not really testing
execution, just that the compiler generates the right instruction under
specific compilation options.  It seems silly to force all this into the
multi-lib framework.

R.

> Option 2 assumes that there is enough test coverage for later arch
> versions, either with default support or multilib options.  This choice
> applies to several tests in gcc.target/arm.
> 
> Janis
> 
> 
> 




Re: [testsuite, libffi] XFAIL libffi.call/cls_{,long}double_va.c on IRIX 6.5 (PR libffi/46660)

2011-06-29 Thread Rainer Orth
David Gilbert  writes:

> On 29 June 2011 14:43, Rainer Orth  wrote:
>> -/* { dg-output "PR libffi/46660" { xfail mips-sgi-irix6* } } */
>> +/* { dg-output "" { xfail mips-sgi-irix6* } } PR libffi/46660 */
>
> Do you fancy adding the appropriate MIPS fix on top of the libffi varargs 
> patch
> I posted a few months back - then it could actually pass the test!

I plan to do so once libffi 3.0.11 with the varargs patch lands in the
gcc tree, unless someone beats me to it.

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: [testsuite, libffi] XFAIL libffi.call/cls_{,long}double_va.c on IRIX 6.5 (PR libffi/46660)

2011-06-29 Thread David Gilbert
On 29 June 2011 14:43, Rainer Orth  wrote:
> -/* { dg-output "PR libffi/46660" { xfail mips-sgi-irix6* } } */
> +/* { dg-output "" { xfail mips-sgi-irix6* } } PR libffi/46660 */

Do you fancy adding the appropriate MIPS fix on top of the libffi varargs patch
I posted a few months back - then it could actually pass the test!

Dave


Re: [PATCH, ARM] [2/3] iWMMXt intrinsics testsuite

2011-06-29 Thread Richard Earnshaw
On 29/06/11 09:50, Xinyu Qi wrote:
> Hi,
> 
> This patch adds some simple iWMMXt intrinsic test cases.
> 
> gcc.target/arm/iwmmxt/i_shift_error.c: New.
> gcc.target/arm/iwmmxt/iwmmxt.exp: New.
> gcc.target/arm/iwmmxt/i_wshufh_error.c: New.
> gcc.target/arm/iwmmxt/i_merge_si64_error.c: New.
> gcc.target/arm/iwmmxt/i_waligni_error.c: New.
> gcc.target/arm/iwmmxt/i_extract_error.c: New.
> 

Please use "New test" when adding new tests.

> Thanks,
> Xinyu
> 
> 
> iwmmxt_testsuite.patch
> 
> 
> Index: gcc/testsuite/gcc.target/arm/iwmmxt/i_shift_error.c
> ===
> --- gcc/testsuite/gcc.target/arm/iwmmxt/i_shift_error.c   (revision 0)
> +++ gcc/testsuite/gcc.target/arm/iwmmxt/i_shift_error.c   (revision 0)
> @@ -0,0 +1,39 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -march=iwmmxt2" } */

No, this will interact badly with multi-lib testing.  You need to add a
test along the lines of arm_iwmmxt_ok from lib/target-supports.exp

R.



Re: [testsuite, libffi] XFAIL libffi.call/cls_{,long}double_va.c on IRIX 6.5 (PR libffi/46660)

2011-06-29 Thread Rainer Orth
"H.J. Lu"  writes:

>> Index: libffi/testsuite/libffi.call/cls_double_va.c
>> ===
>> --- libffi/testsuite/libffi.call/cls_double_va.c        (revision 175618)
>> +++ libffi/testsuite/libffi.call/cls_double_va.c        (working copy)
>> @@ -6,7 +6,7 @@
>>
>>  /* { dg-do run { xfail strongarm*-*-* xscale*-*-* } } */
>>  /* { dg-output "" { xfail avr32*-*-* } } */
>> -/* { dg-output "PR libffi/46660" { xfail mips-sgi-irix6* } } */
>> +/* { dg-output "" { xfail mips-sgi-irix6* } } PR libffi/46660 */
>>  /* { dg-skip-if "" arm*-*-* { "-mfloat-abi=hard" } { "" } } */
>>
>
> Why not just add mips-sgi-irix6* to avr32*-*-*?

So the corresponding PR can be noted.  If you list it without
attribution, you'd have to search all of them to find which one is for
IRIX.

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: [testsuite, libffi] XFAIL libffi.call/cls_{,long}double_va.c on IRIX 6.5 (PR libffi/46660)

2011-06-29 Thread H.J. Lu
On Wed, Jun 29, 2011 at 5:52 AM, Rainer Orth
 wrote:
> Richard Guenther  writes:
>
>> The tests now fail on x86_64-linux and i?86-linux like
>>
>> FAIL: libffi.call/cls_double_va.c -O0 -W -Wall output pattern test, is 7.0
>> FAIL: libffi.call/cls_longdouble_va.c -O0 -W -Wall output pattern test, is 
>> 7.0
>> FAIL: libffi.call/cls_double_va.c -O2 output pattern test, is 7.0
>> FAIL: libffi.call/cls_longdouble_va.c -O2 output pattern test, is 7.0
>> FAIL: libffi.call/cls_double_va.c -O3 output pattern test, is 7.0
>> FAIL: libffi.call/cls_longdouble_va.c -O3 output pattern test, is 7.0
>> FAIL: libffi.call/cls_double_va.c -Os output pattern test, is 7.0
>> FAIL: libffi.call/cls_longdouble_va.c -Os output pattern test, is 7.0
>> FAIL: libffi.call/cls_double_va.c -O2 -fomit-frame-pointer output pattern 
>> test,
>> is 7.0
>> FAIL: libffi.call/cls_longdouble_va.c -O2 -fomit-frame-pointer output 
>> pattern te
>> st, is 7.0
>>
>> spawn [open ...]^M
>> 7.0
>> res: 4
>> 7.0
>> res: 4
>> PASS: libffi.call/cls_double_va.c -O0 -W -Wall execution test
>> FAIL: libffi.call/cls_double_va.c -O0 -W -Wall output pattern test, is 7.0
>> res: 4
>> 7.0
>> res: 4
>> , should match PR libffi/466607.0^M?
>> res: 4^M?
>> 7.0^M?
>> res: 4
>>
>> I believe your dg-output first arguments are bogus.
>
> You're right, I'm an idiot ;-)  I should have tested on a non-xfailed
> target, too.
>
> Fixed as follows, tested on i386-pc-solaris2.11 and mips-sgi-irix6.5,
> applied to mainline and 4.6 branch.  The 4.5 branch is unaffected.
>
> Sorry.
>        Rainer
>
>
> 2011-06-29  Rainer Orth  
>
>        * testsuite/libffi.call/cls_double_va.c: Move PR number to comment.
>        * testsuite/libffi.call/cls_longdouble_va.c: Likewise.
>
> Index: libffi/testsuite/libffi.call/cls_double_va.c
> ===
> --- libffi/testsuite/libffi.call/cls_double_va.c        (revision 175618)
> +++ libffi/testsuite/libffi.call/cls_double_va.c        (working copy)
> @@ -6,7 +6,7 @@
>
>  /* { dg-do run { xfail strongarm*-*-* xscale*-*-* } } */
>  /* { dg-output "" { xfail avr32*-*-* } } */
> -/* { dg-output "PR libffi/46660" { xfail mips-sgi-irix6* } } */
> +/* { dg-output "" { xfail mips-sgi-irix6* } } PR libffi/46660 */
>  /* { dg-skip-if "" arm*-*-* { "-mfloat-abi=hard" } { "" } } */
>

Why not just add mips-sgi-irix6* to avr32*-*-*?

-- 
H.J.


Re: [ARM] fix C++ EH interoperability

2011-06-29 Thread Richard Earnshaw
On 23/05/11 16:52, Nathan Sidwell wrote:
> This patch fixes an interoperability issue with code generated by ARM's
> EABI compiler.
> 
> Unlike the generic C++ ABI, which always catches pointers by value,
> ARM's ABI only catches pointers by value when there's the possibility of
> derived->base conversion happening.  The ARM __cxa_type_matcher can
> return one of 3 values to indicate the three possibilities.  GCC was
> returning the success value when a pointer matched exactly, but
> returning the pointer by value.  This is incompatible with EABI
> compliant code, which expects a reference to the thrown pointer in such
> cases.
> 
> However, we cannot simply change the GCC type matcher to return a
> reference in such cases, as then it will be incompatible with existing
> GCC-compiled code. GCC compiled code expects the pointer value at the
> landing pad, ARM-compiled code expects a reference to the pointer
> value.  Hence ARM-compiled code (usually) inserts an additional
> dereference.
> 
> Note that the compiler generating the catching code, specifies the
> personality routine (unwinder), but the code doing the type-matching
> could come from any compliant library.
> 
> In discussion with ARM, I developed the attached patch, which changes
> __cxa_type_match to return the succeeded_with_ptr_to_base enum value
> whenever a pointer is caught (rather than only when a pointer-to-derived
> is converted to a pointer-to-base).  This value indicates to the (ARM)
> unwinding machinery that the pointer value is being returned. And makes
> the type matcher work with the ARM unwinder.  We do not change the
> conditions under which _cxa_type_match performs the pointer indirection.
> 
> At the other side of the interface, we have to make GCC's implementation
> of the EABI unwinder work with an ARM-provided type matcher.  Here, the
> unwinder notes the return value from _cxa_match_type, and inserts an
> additional indirection by using a spare field in the barrier_cache
> structure.  Fortunately GCC-generated code doesn't use the common
> unwinder, but uses a gnu personality routine, which is unchanged.
> 
> This patch results has been tested for arm-linux, and independently
> tested by ARM with mixed RVCT-generated code confirming the defect has
> been fixed.
> 
> ok?
> 
> nathan
> 
> 
> arm-eh.patch
> 
> 
> 2011-05-23  Nathan Sidwell  
> 
>   gcc/
>   * config/arm/unwind-arm.c (enum __cxa_type_match_result): New.
>   (cxa_type_match): Correct declaration.
>   (__gnu_unwind_pr_common): Reconstruct
>   additional indirection when __cxa_type_match returns
>   succeeded_with_ptr_to_base.
> 
>   libstdc++/
>   * libsupc++/eh_arm.c (__cxa_type_match): Construct address of
>   thrown object here.  Return succeded_with_ptr_to_base for all
>   pointer cases.
> 

OK.

R.




RE: Ping #1: [Patch, AVR]: Fix PR34734

2011-06-29 Thread Weddington, Eric

> -Original Message-
> From: Georg-Johann Lay [mailto:a...@gjlay.de]
> Sent: Wednesday, June 29, 2011 5:26 AM
> To: Denis Chertykov
> Cc: gcc-patches@gcc.gnu.org; Weddington, Eric; Anatoly Sokolov
> Subject: Re: Ping #1: [Patch, AVR]: Fix PR34734
> 
> >
> > You can backport it if you want.
> >
> > I'm usually didn't backport such patches.
> >
> > Denis.
> 
> Ok, maybe Eric or Anatoly have some preference for 4.5/4.6.
> 

I would like it for the 4.6 series, if possible. I think that the next releases 
of the various avr toolchain distributions will very probably be moving up to 
4.6.x.

Eric


Re: [testsuite] ARM wmul tests: require arm_dsp_multiply

2011-06-29 Thread Richard Earnshaw
On 23/06/11 22:38, Janis Johnson wrote:
> Tests wmul-[1234].c and mla-2.c in gcc.target/arm require support that
> the arm backend identifies as TARGET_DSP_MULTIPLY.  The tests all
> specify a -march option with that support, but it is overridden by
> multilib flags.
> 
> This patch adds a new effective target, arm_dsp_multiply, and requires
> it for those tests instead of having them specify a -march value.  This
> means that the tests will be skipped for older targets and test coverage
> relies on testing for some newer multilibs.
> 
> The same effective target is needed for tests smlaltb-1.c, smlaltt-1.c,
> smlatb-1.c, and smlatt-1.c, but those also need to be renamed so the
> scans don't pass just because the file name is in the assembly file.
> 
> OK for trunk, and later for 4.6?
> 
> (btw, I'm currently testing ARM compile-only tests with 43 sets of
> multilib flags)
> 

I've recently approved a patch from James Greenhalgh
(http://gcc.gnu.org/ml/gcc-patches/2011-06/msg01852.html) that defines
__ARM_DSP_MULTIPLY when these features are available.  That should
simplify your target-supports change and also serve as a check that we
aren't erroneously defining that macro.

R.

> 
> gcc-20110623-2
> 
> 
> 2011-06-23  Janis Johnson  
> 
>   * lib/target-supports.exp (check_effective_target_arm_dsp_multiply):
>   New.
>   * gcc.target/arm/wmul-1.c: Require arm_dsp_multiply, don't
>   supply -march.
>   * gcc.target/arm/wmul-2.c: Likewise.
>   * gcc.target/arm/wmul-3.c: Likewise.
>   * gcc.target/arm/wmul-4.c: Likewise.
>   * gcc.target/arm/mla-2.c: Likewise.
> 
> Index: lib/target-supports.exp
> ===
> --- lib/target-supports.exp   (revision 175313)
> +++ lib/target-supports.exp   (working copy)
> @@ -1902,6 +1902,33 @@
>  }
>  }
>  
> +# Return 1 if this is an ARM target that supports DSP multiply with
> +# current multilib flags.
> +
> +proc check_effective_target_arm_dsp_multiply { } {
> +return [check_no_compiler_messages arm_dsp_multiply assembly {
> + #if defined(__ARM_ARCH_2__) || defined(__ARM_ARCH_2A__) \
> + || defined(__ARM_ARCH_3__) || defined(__ARM_ARCH_3M__) \
> + || defined(__ARM_ARCH_4__) || defined(__ARM_ARCH_4T__) \
> + || defined(__ARM_ARCH_5T__)
> + #  error NOT_SUPPORTED
> + #elif defined(__thumb__) || defined(__thumb2__)
> + #  if defined(__ARM_ARCH_5TE__) || defined(__ARM_ARCH_6__) \
> +   || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6M__) \
> +   || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) \
> +   || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7M__) \
> +   || defined(__ARM_ARCH_IWMMXT__) || defined(__ARM_ARCH_IWMMXT2__)
> + #error NOT_SUPPORTED
> + #  endif
> + #else
> + #  if defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_EP9312__)
> + #error NOT_SUPPORTED
> + #  endif
> + #endif
> + int i;
> +}]
> +}
> +
>  # Add the options needed for NEON.  We need either -mfloat-abi=softfp
>  # or -mfloat-abi=hard, but if one is already specified by the
>  # multilib, use it.  Similarly, if a -mfpu option already enables
> Index: gcc.target/arm/wmul-1.c
> ===
> --- gcc.target/arm/wmul-1.c   (revision 175313)
> +++ gcc.target/arm/wmul-1.c   (working copy)
> @@ -1,5 +1,6 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -march=armv6t2" } */
> +/* { dg-require-effective-target arm_dsp_multiply } */
> +/* { dg-options "-O2" } */
>  
>  int mac(const short *a, const short *b, int sqr, int *sum)
>  {
> Index: gcc.target/arm/wmul-2.c
> ===
> --- gcc.target/arm/wmul-2.c   (revision 175313)
> +++ gcc.target/arm/wmul-2.c   (working copy)
> @@ -1,5 +1,6 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -march=armv6t2" } */
> +/* { dg-require-effective-target arm_dsp_multiply } */
> +/* { dg-options "-O2" } */
>  
>  void vec_mpy(int y[], const short x[], short scaler)
>  {
> Index: gcc.target/arm/wmul-3.c
> ===
> --- gcc.target/arm/wmul-3.c   (revision 175313)
> +++ gcc.target/arm/wmul-3.c   (working copy)
> @@ -1,5 +1,6 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -march=armv6t2" } */
> +/* { dg-require-effective-target arm_dsp_multiply } */
> +/* { dg-options "-O2" } */
>  
>  int mac(const short *a, const short *b, int sqr, int *sum)
>  {
> Index: gcc.target/arm/wmul-4.c
> ===
> --- gcc.target/arm/wmul-4.c   (revision 175313)
> +++ gcc.target/arm/wmul-4.c   (working copy)
> @@ -1,5 +1,6 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -march=armv6t2" } */
> +/* { dg-require-effective-target arm_dsp_multiply } */
> +/* { dg-options "-O2" } */
>  
>  int mac(const int *a, const int *b, long long sqr

Re: [ARM] Deprecate -mwords-little-endian

2011-06-29 Thread Richard Earnshaw
On 29/06/11 12:28, Richard Sandiford wrote:
> ARM has an option called -mwords-little-endian that provides big-endian
> compatibility with pre-2.8 compilers.  When I asked Richard about it,
> he seemed to think it had outlived its usefulness, so this patch
> deprecates it.  We can then remove it once 4.7 is out.
> 
> Tested on arm-linux-gnueabi.  OK to install?  If so, I'll do a patch
> for the web page as well.
> 

Please also update the in-line help text in arm.opt.  OK with that change.

R.

> Richard
> 
> 
> Index: gcc/config/arm/arm.c
> ===
> --- gcc/config/arm/arm.c  2011-06-29 09:33:37.0 +0100
> +++ gcc/config/arm/arm.c  2011-06-29 12:20:35.0 +0100
> @@ -1483,6 +1483,10 @@ arm_option_override (void)
>if (TARGET_APCS_FLOAT)
>  warning (0, "passing floating point arguments in fp regs not yet 
> supported");
>  
> +  if (TARGET_LITTLE_WORDS)
> +warning (OPT_Wdeprecated, "% is deprecated and "
> +  "will be removed in a future release");
> +
>/* Initialize boolean versions of the flags, for use in the arm.md file.  
> */
>arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
>arm_arch4 = (insn_flags & FL_ARCH4) != 0;
> Index: gcc/doc/invoke.texi
> ===
> --- gcc/doc/invoke.texi   2011-06-22 16:45:29.0 +0100
> +++ gcc/doc/invoke.texi   2011-06-29 12:17:40.0 +0100
> @@ -10237,7 +10237,7 @@ Generate code for a little-endian word o
>  order.  That is, a byte order of the form @samp{32107654}.  Note: this
>  option should only be used if you require compatibility with code for
>  big-endian ARM processors generated by versions of the compiler prior to
> -2.8.
> +2.8.  This option is now deprecated.
>  
>  @item -mcpu=@var{name}
>  @opindex mcpu
> 




Re: [patch tree-optimization]: Do bitwise operator optimizations for X op !X patterns

2011-06-29 Thread Kai Tietz
- Original Message -
From: "Kai Tietz" 
To: "Richard Guenther" 
Cc: gcc-patches@gcc.gnu.org
Sent: Wednesday, June 29, 2011 1:33:30 PM
Subject: Re: [patch tree-optimization]: Do bitwise operator optimizations for X 
op !X patterns

- Original Message -
From: "Richard Guenther" 
To: "Kai Tietz" 
Cc: gcc-patches@gcc.gnu.org
Sent: Wednesday, June 29, 2011 12:14:10 PM
Subject: Re: [patch tree-optimization]: Do bitwise operator optimizations for X 
op !X patterns

On Tue, Jun 28, 2011 at 5:05 PM, Kai Tietz  wrote:
> Hello,
>
> this patch implements the X op !X patterns within tree-ssa-forwprop.c without 
> using here const-fold routines.  Additionally it does some trivial folding 
> for X op X.  Implementation
> also looks through [(type)] X op [(type)] !X, if type of X is integral and 
> precision is suitable
> for operation.
>
> ChangeLog gcc/
>
> 2011-06-28  Kai Tietz  
>
>        * tree-ssa-forwprop.c (operand_precision_onep): New
>        function.
>        (find_possible_not_expr_argument): Likewise.
>        (simplify_bitwise_binary_1): Likewise.
>        (simplify_bitwise_binary): Use simplify_bitwise_binary_1
>        for detecting various X op !X optimizations.
>
> ChangeLog gcc/testsuite
>
> 2011-06-28  Kai Tietz  
>
>        * gcc.dg/binop-notand1a.c: New test.
>        * gcc.dg/binop-notand2a.c: New test.
>        * gcc.dg/binop-notand3a.c: New test.
>        * gcc.dg/binop-notand4a.c: New test.
>        * gcc.dg/binop-notand5a.c: New test.
>        * gcc.dg/binop-notand6a.c: New test.
>        * gcc.dg/binop-notor1.c: New test.
>        * gcc.dg/binop-notor2.c: New test.
>        * gcc.dg/binop-notxor1.c: New test.
>        * gcc.dg/binop-notxor2.c: New test.
>
> Bootstrapped and regression tested for all languages plus Ada and Obj-C for 
> x86_64-pc-linux-gnu. Ok for apply?

I can't follow the code in find_possible_not_expr_argument or its uses
at all.  Please try to produce patches that look more obvious in what
they are doing - don't try to solve every testcase you can come up with
in a single patch.  Especially don't write functions like
find_possible_not_expr_argument which seems to have evolved a lot
after you wrote the overall function comment.

Thanks,
Richard.

> Regards,
> Kai
>

Well, I added some comments to these functions and renamed the 
find_possible_not_expr_argument function to detect_not_expr_operand, which hits 
its use better.
The cause for this function is, that there are more then one variant of 
expressing a logical-not and all of them are used.
This routine simply tries to detect different variants used for not. Eg ~X == 
!X and (X ^ 1) == !X for integral type of X with precision one. For X with 
integral type, (X == 0) == !X.

The folding for the three different bitwise-operations is pretty easy and it 
makes sense to implement them at once.  I see here no good point to separate 
them into different patches.  To separate them might even lead to questions 
about abstracting some code-pieces out of the main function.
I didn't added testcases for all variants I am aware now. Just those, which are 
now handled.

So hope you can read and understand logic of patch better by updated patch.

Regards,
Kai

I found that in version I've sent there is an unclosed comment.  So here is 
updated patch, which additionally simplify some code to ease reading.

Regards,
Kai
Index: gcc-head/gcc/tree-ssa-forwprop.c
===
--- gcc-head.orig/gcc/tree-ssa-forwprop.c
+++ gcc-head/gcc/tree-ssa-forwprop.c
@@ -1674,6 +1674,223 @@ simplify_builtin_call (gimple_stmt_itera
   return false;
 }
 
+/* Checks if expression has type of one-bit precision, or is result of
+   a known boolean expression.  */
+static bool
+operand_precision_onep (tree expr)
+{
+  enum tree_code code;
+  gimple def_stmt;
+
+  do
+{
+  code = TREE_CODE (expr);
+  if (!INTEGRAL_TYPE_P (TREE_TYPE (expr)))
+   return false;
+  if (TYPE_PRECISION (TREE_TYPE (expr)) == 1)
+   return true;
+  if (code != SSA_NAME)
+   break;
+  def_stmt = SSA_NAME_DEF_STMT (expr);
+  if (!def_stmt || !is_gimple_assign (def_stmt))
+   break;
+  code = gimple_assign_rhs_code (def_stmt);
+  if (!CONVERT_EXPR_CODE_P (code))
+   break;
+  expr = gimple_assign_rhs1 (def_stmt);
+}
+  while (CONVERT_EXPR_CODE_P (code));
+
+  if (code == TRUTH_NOT_EXPR || TREE_CODE_CLASS (code) == tcc_comparison)
+return true;
+  return false;
+}
+
+/* Helper routine for simplify_bitwise_binary_1 function.
+   If a NOT-expression is found, the operand of the NOT-expression is
+   stored in NEXPR.
+   This function returns either the expression after the first
+   integral cast expression, or NAME.
+   Detected not-patterns are !X or X == 0 for X with integral type, and
+   X ^ 1 or ~X for X with integral type with precision of one.
+   The value of CNT_CASTS is either zero, or one.   */
+static tree
+detect_not_expr_operand (tree name, 

Re: [testsuite, libffi] XFAIL libffi.call/cls_{,long}double_va.c on IRIX 6.5 (PR libffi/46660)

2011-06-29 Thread Rainer Orth
Richard Guenther  writes:

> The tests now fail on x86_64-linux and i?86-linux like
>
> FAIL: libffi.call/cls_double_va.c -O0 -W -Wall output pattern test, is 7.0
> FAIL: libffi.call/cls_longdouble_va.c -O0 -W -Wall output pattern test, is 7.0
> FAIL: libffi.call/cls_double_va.c -O2 output pattern test, is 7.0
> FAIL: libffi.call/cls_longdouble_va.c -O2 output pattern test, is 7.0
> FAIL: libffi.call/cls_double_va.c -O3 output pattern test, is 7.0
> FAIL: libffi.call/cls_longdouble_va.c -O3 output pattern test, is 7.0
> FAIL: libffi.call/cls_double_va.c -Os output pattern test, is 7.0
> FAIL: libffi.call/cls_longdouble_va.c -Os output pattern test, is 7.0
> FAIL: libffi.call/cls_double_va.c -O2 -fomit-frame-pointer output pattern 
> test,
> is 7.0
> FAIL: libffi.call/cls_longdouble_va.c -O2 -fomit-frame-pointer output pattern 
> te
> st, is 7.0
>
> spawn [open ...]^M
> 7.0
> res: 4
> 7.0
> res: 4
> PASS: libffi.call/cls_double_va.c -O0 -W -Wall execution test
> FAIL: libffi.call/cls_double_va.c -O0 -W -Wall output pattern test, is 7.0
> res: 4
> 7.0
> res: 4
> , should match PR libffi/466607.0^M?
> res: 4^M?
> 7.0^M?
> res: 4
>
> I believe your dg-output first arguments are bogus.

You're right, I'm an idiot ;-)  I should have tested on a non-xfailed
target, too.

Fixed as follows, tested on i386-pc-solaris2.11 and mips-sgi-irix6.5,
applied to mainline and 4.6 branch.  The 4.5 branch is unaffected.

Sorry.
Rainer


2011-06-29  Rainer Orth  

* testsuite/libffi.call/cls_double_va.c: Move PR number to comment.
* testsuite/libffi.call/cls_longdouble_va.c: Likewise.

Index: libffi/testsuite/libffi.call/cls_double_va.c
===
--- libffi/testsuite/libffi.call/cls_double_va.c(revision 175618)
+++ libffi/testsuite/libffi.call/cls_double_va.c(working copy)
@@ -6,7 +6,7 @@
 
 /* { dg-do run { xfail strongarm*-*-* xscale*-*-* } } */
 /* { dg-output "" { xfail avr32*-*-* } } */
-/* { dg-output "PR libffi/46660" { xfail mips-sgi-irix6* } } */
+/* { dg-output "" { xfail mips-sgi-irix6* } } PR libffi/46660 */
 /* { dg-skip-if "" arm*-*-* { "-mfloat-abi=hard" } { "" } } */
 
 #include "ffitest.h"
Index: libffi/testsuite/libffi.call/cls_longdouble_va.c
===
--- libffi/testsuite/libffi.call/cls_longdouble_va.c(revision 175618)
+++ libffi/testsuite/libffi.call/cls_longdouble_va.c(working copy)
@@ -6,7 +6,7 @@
 
 /* { dg-do run { xfail strongarm*-*-* xscale*-*-* } } */
 /* { dg-output "" { xfail avr32*-*-* x86_64-*-mingw* } } */
-/* { dg-output "PR libffi/46660" { xfail mips-sgi-irix6* } } */
+/* { dg-output "" { xfail mips-sgi-irix6* } } PR libffi/46660 */
 /* { dg-skip-if "" arm*-*-* { "-mfloat-abi=hard" } { "" } } */
 
 #include "ffitest.h"


-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: [PATCH, SRA] Total scalarization and padding

2011-06-29 Thread Richard Guenther
On Tue, 28 Jun 2011, Martin Jambor wrote:

> Hi,
> 
> On Tue, Jun 28, 2011 at 03:01:17PM +0200, Richard Guenther wrote:
> > On Tue, Jun 28, 2011 at 2:50 PM, Martin Jambor  wrote:
> > > Hi,
> > >
> > > at the moment SRA can get confused by alignment padding and think that
> > > it actually contains some data for which there is no planned
> > > replacement and thus might leave some loads and stores in place
> > > instead of removing them.  This is perhaps the biggest problem when we
> > > attempt total scalarization of simple structures exactly in order to
> > > get rid of these and of the variables altogether.
> > >
> > > I've pondered for quite a while how to best deal with them.  One
> > > option was to make just the total scalarization stronger.  I have also
> > > contemplated creating phantom accesses for padding I could detect
> > > (i.e. in simple structures) which would be more general, but this
> > > would complicate the parts of SRA which are already quite convoluted
> > > and I was not really sure it was worth it.
> > >
> > > Eventually I decided for the total scalarization option.  This patch
> > > changes it such that the flag is propagated down the access tree but
> > > also, if it does not work out, is reset on the way up.  If the flag
> > > survives, the access tree is considered "covered" by scalar
> > > replacements and thus it is known not to contain unscalarized data.
> > >
> > > While changing function analyze_access_subtree I have simplified the
> > > way we compute the hole flag and also fixed one comparison which we
> > > currently have the wrong way round but it fortunately does not matter
> > > because if there is a hole, the covered_to will never add up to the
> > > total size.  I'll probably post a separate patch against 4.6 just in
> > > case someone attempts to read the source.
> > >
> > > Bootstrapped and tested on x86_64-linux, OK for trunk?
> > 
> > So, what will it do for the testcase?
> > 
> > The following is what I _think_ it should do:
> > 
> > :
> >   l = *p_1(D);
> >   l$i_6 = p_1(D)->i;
> >   D.2700_2 = l$i_6;
> >   D.2701_3 = D.2700_2 + 1;
> >   l$i_12 = D.2701_3;
> >   *p_1(D) = l;
> >   p_1(D)->i = l$i_12;
> > 
> > and let FRE/DSE do their job (which they don't do, unfortunately).
> > So does your patch then remove the load/store from/to l but keep
> > the elementwise loads/stores (which are probably cleaned up by FRE)?
> > 
> 
> Well, that is what would happen if no total scalarization was going
> on.  Total scalarization is a poor-man's aggregate copy-propagation by
> splitting up small structures to individual fields whenever we can get
> rid of them this way (i.e. if they are never used in a non-assignment)
> which I introduced to fix PR 42585 - but unfortunately the padding
> problem did not occur to me until this winter.
> 
> Currently, SRA performs very badly on the testcase, creating:
> 
> :
>   l = *p_1(D);
>   l$i_6 = p_1(D)->i;
>   l$f1_8 = p_1(D)->f1;
>   l$f2_9 = p_1(D)->f2;
>   l$f3_10 = p_1(D)->f3;
>   l$f4_11 = p_1(D)->f4;
>   D.1966_2 = l$i_6;
>   D.1967_3 = D.1966_2 + 1;
>   l$i_12 = D.1967_3;
>   *p_1(D) = l;  <-- this should not be here
>   p_1(D)->i = l$i_12;
>   p_1(D)->f1 = l$f1_8;
>   p_1(D)->f2 = l$f2_9;
>   p_1(D)->f3 = l$f3_10;
>   p_1(D)->f4 = l$f4_11;
>   return;
> 
> Unfortunately, this basically survives all the way to the "optimized"
> dump.  With the patch, the assignment *p_1(D) = l; is removed and
> copyprop1 and cddce1 turn this into:
> 
> :
>   l$i_6 = p_1(D)->i;
>   D.1967_3 = l$i_6 + 1;
>   p_1(D)->i = D.1967_3;
>   return;
> 
> which is then the "optimized" gimple, already before IPA and at -O1.

Ok, that's certainly better.  Can you file a bugreport for the FRE/DSE
issue please?

> For the record, without total scalarization, the "optimized" gimple
> would be:
> 
> :
>   l = *p_1(D);
>   l$i_6 = p_1(D)->i;
>   D.1967_3 = l$i_6 + 1;
>   *p_1(D) = l;
>   p_1(D)->i = D.1967_3;
>   return;
> 
> So at the moment FRE/DSE certainly does not help.  Eventually we
> should do something like that or a real aggregate copy propagation but
> until then we probably need to live with the total scalarization
> thingy - I have learned in the PR mentioned above and a few others,
> there are people who really want at least this functionality now - and
> it should not perform this badly on unaligned structures.

The only thing I'm worried about is

struct X { int i; short s; pad p; int j; };
struct Y { int i; short s; int j; };

int main()
{
  struct X x = { 1, 2, 3, 4 };
  struct Y y = *(struct Y)&x;
  y.s++;
  *(struct Y)&x = y;
  if (x->pad != 3)
abort ();
  return 0;
}

untested, but any variant needs to make sure the padding value is not
lost by means of assigning x to y and back.  The above should translate
to y = MEM[&x + 0]; ... MEM[&x + 0] = y;

So if the result for sth like the above looks ok the patch is ok.

Thanks,
Richard.

> Martin
> 
> 
> 
> 
> > Richard.
> > 
> > 
> > > Thanks,
> > >
> > > Martin
> > >
> > >
> > 

Re: [testsuite, libffi] XFAIL libffi.call/cls_{,long}double_va.c on IRIX 6.5 (PR libffi/46660)

2011-06-29 Thread Richard Guenther
On Wed, Jun 29, 2011 at 10:26 AM, Rainer Orth
 wrote:
> A fresh look at the remaining two libffi testsuite failures on IRIX 6.5
> revealed why they cannot currently work:
>
> FAIL: libffi.call/cls_double_va.c -O0 -W -Wall output pattern test, is 0.0
> FAIL: libffi.call/cls_longdouble_va.c -O0 -W -Wall output pattern test, is 0.0
>
> Those tests pass floating point arguments to varargs functions, which
> libffi currently (i.e. before 3.0.11) doesn't support.  Unfortunately,
> the MIPS N32 and N64 ABIs pass floating point args in integer registers
> in this case, as described in the MIPSpro N32 ABI Handbook, p. 7:
>
> http://techpubs.sgi.com/library/tpl/cgi-bin/getdoc.cgi?coll=0650&db=bks&srch=&fname=/SGI_Developer/Mpro_n32_ABI/sgi_html/ch02.html
>
> while for the O32 ABI there's no difference between varargs and
> non-varargs functions.
>
> The following patch handles this.  On mainline and the 4.6 branch, it
> xfails the tests on IRIX 6.  The 4.5 branch is a bit different: since
> the O32 ABI is still supported there, the difference between them needs
> to be taken into account.  Unfortunately, dg-output doesn't take the
> input-opts and exclude-opts parameters dg-xfail-if and dg-skip-if do
> (and is taken from upstream DejaGnu, not overridden/implemented in GCC),
> so I'm forced to use dg-skip-if.  Even dg-xfail-if is not enough since
> the execution tests would XPASS and the output test FAIL.
> Unfortunately, libffi has its own implementation of dg-xfail-if and is
> missing dg-skip-if completely.  To deal with this without duplication, I
> include the implementation from gcc/testsuite/lib, which shouldn't be an
> issue since other .exp files are already included from there.
>
> Tested on mips-sgi-irix6.5 with the appropriate runtest invocations.
> For the 4.5 branch, I've rerun the whole libffi testsuite for all 3 ABIs
> to make sure nothing broke.
>
> Installed on mainline, 4.6 and 4.5 branches.

The tests now fail on x86_64-linux and i?86-linux like

FAIL: libffi.call/cls_double_va.c -O0 -W -Wall output pattern test, is 7.0
FAIL: libffi.call/cls_longdouble_va.c -O0 -W -Wall output pattern test, is 7.0
FAIL: libffi.call/cls_double_va.c -O2 output pattern test, is 7.0
FAIL: libffi.call/cls_longdouble_va.c -O2 output pattern test, is 7.0
FAIL: libffi.call/cls_double_va.c -O3 output pattern test, is 7.0
FAIL: libffi.call/cls_longdouble_va.c -O3 output pattern test, is 7.0
FAIL: libffi.call/cls_double_va.c -Os output pattern test, is 7.0
FAIL: libffi.call/cls_longdouble_va.c -Os output pattern test, is 7.0
FAIL: libffi.call/cls_double_va.c -O2 -fomit-frame-pointer output pattern test,
is 7.0
FAIL: libffi.call/cls_longdouble_va.c -O2 -fomit-frame-pointer output pattern te
st, is 7.0

spawn [open ...]^M
7.0
res: 4
7.0
res: 4
PASS: libffi.call/cls_double_va.c -O0 -W -Wall execution test
FAIL: libffi.call/cls_double_va.c -O0 -W -Wall output pattern test, is 7.0
res: 4
7.0
res: 4
, should match PR libffi/466607.0^M?
res: 4^M?
7.0^M?
res: 4

I believe your dg-output first arguments are bogus.

Richard.

>        Rainer
>
>
> Mainline/4.6 branch version:
>
> 2011-06-29  Rainer Orth  
>
>        PR libffi/46660
>        * testsuite/libffi.call/cls_double_va.c: xfail dg-output on
>        mips-sgi-irix6*.
>        * testsuite/libffi.call/cls_longdouble_va.c: Likewise.
>
> Index: libffi/testsuite/libffi.call/cls_double_va.c
> ===
> --- libffi/testsuite/libffi.call/cls_double_va.c        (revision 175617)
> +++ libffi/testsuite/libffi.call/cls_double_va.c        (working copy)
> @@ -6,6 +6,7 @@
>
>  /* { dg-do run { xfail strongarm*-*-* xscale*-*-* } } */
>  /* { dg-output "" { xfail avr32*-*-* } } */
> +/* { dg-output "PR libffi/46660" { xfail mips-sgi-irix6* } } */
>  /* { dg-skip-if "" arm*-*-* { "-mfloat-abi=hard" } { "" } } */
>
>  #include "ffitest.h"
> Index: libffi/testsuite/libffi.call/cls_longdouble_va.c
> ===
> --- libffi/testsuite/libffi.call/cls_longdouble_va.c    (revision 175617)
> +++ libffi/testsuite/libffi.call/cls_longdouble_va.c    (working copy)
> @@ -6,6 +6,7 @@
>
>  /* { dg-do run { xfail strongarm*-*-* xscale*-*-* } } */
>  /* { dg-output "" { xfail avr32*-*-* x86_64-*-mingw* } } */
> +/* { dg-output "PR libffi/46660" { xfail mips-sgi-irix6* } } */
>  /* { dg-skip-if "" arm*-*-* { "-mfloat-abi=hard" } { "" } } */
>
>  #include "ffitest.h"
>
> 4.5 branch version:
>
> 2011-06-29  Rainer Orth  
>
>        PR libffi/46660
>        * testsuite/lib/libffi-dg.exp: Load target-supports.exp,
>        target-supports-dg.exp.
>        (dg-xfail-if): Remove.
>        * testsuite/libffi.call/cls_double_va.c: Skip on mips-sgi-irix6*
>        unless -mabi=32.
>        * testsuite/libffi.call/cls_longdouble_va.c: Likewise.
>
> Index: libffi/testsuite/libffi.call/cls_double_va.c
> ===
> --- libffi/testsuite/libffi.call/cls

Re: PATCH [10/n]: Prepare x32: PR rtl-optimization/49114: Reload failed to handle (set reg:X (plus:X (subreg:X (reg:Y) 0) (const

2011-06-29 Thread Ulrich Weigand
H.J. Lu wrote:
> >>* reload.c (struct replacement): Remove SUBREG_LOC member.
> >>(push_reload): Do not set it.
> >>(push_replacement): Likewise.
> >>(subst_reload): Remove dead code.
> >>(copy_replacements): Remove assertion.
> >>(copy_replacements_1): Do not handle SUBREG_LOC.
> >>(move_replacements): Likewise.
> >>(find_replacement): Remove dead code.  Use 
> >> reload_adjust_reg_for_mode.
> >>Detect subregs via recursive descent instead of via SUBREG_LOC.
> >>
> >
> > It works much better.  I am testing it now.
> >
> 
> It works.  There are no regressions on Linux/ia32 nor Linux/x86-64.
> Can you check it in and mention PR rtl-optimization/49114 ChangeLog?

OK, I've checked the patch in now.  Thanks for testing!

Bye,
Ulrich

-- 
  Dr. Ulrich Weigand
  GNU Toolchain for Linux on System z and Cell BE
  ulrich.weig...@de.ibm.com


Re: [PATCH] Handle vectorization of invariant loads (PR46787)

2011-06-29 Thread Ira Rosen


Richard Guenther  wrote on 29/06/2011 02:19:40 PM:

>
> The following patch makes us handle invariant loads during vectorization.
> Dependence analysis currently isn't clever enough to disambiguate them
> thus we insert versioning-for-alias checks.  For the testcase hoisting
> the load is still always possible though, and for a read-after-write
> dependence it would be possible for the vectorized loop copy as the
> may-aliasing write is varying by the scalar variable size.
>
> The existing code for vectorizing invariant accesses looks very
> suspicious - it generates a vector load at the scalar address
> to then just extract the first vector element.  Huh.  IMHO this
> can be simplified as done, by just re-using the scalar load result.
> But maybe this code was supposed to deal with something entirely
> different?

This code was added to support outer loop vectorization:
http://gcc.gnu.org/ml/gcc-patches/2007-08/msg00729.html (with an intention
to be improved...). I think your version is fine for the outer loops as
well. But with this patch an unused vector load is still created (it will
probably be removed later though).

Ira

>
> This patch gives a 33% speedup to the phoronix himeno testcase
> if you bump the maximum alias versioning checks we want to insert.
>
> I'm currently re-bootstrapping & testing this but an earlier version
> was ok on x86_64-unknown-linux-gnu.
>
> 2011-06-29  Richard Guenther  
>
>PR tree-optimization/46787
>* tree-data-ref.c (dr_address_invariant_p): Remove.
>(find_data_references_in_stmt): Invariant accesses are ok now.
>* tree-vect-stmts.c (vectorizable_load): Handle invariant
>loads.
>* tree-vect-data-refs.c (vect_analyze_data_ref_access): Allow
>invariant loads.
>
>* gcc.dg/vect/vect-121.c: New testcase.
>
> Index: gcc/tree-data-ref.c
> ===
> --- gcc/tree-data-ref.c   (revision 175531)
> +++ gcc/tree-data-ref.c   (working copy)
> @@ -919,21 +919,6 @@ dr_analyze_alias (struct data_reference
>  }
>  }
>
> -/* Returns true if the address of DR is invariant.  */
> -
> -static bool
> -dr_address_invariant_p (struct data_reference *dr)
> -{
> -  unsigned i;
> -  tree idx;
> -
> -  FOR_EACH_VEC_ELT (tree, DR_ACCESS_FNS (dr), i, idx)
> -if (tree_contains_chrecs (idx, NULL))
> -  return false;
> -
> -  return true;
> -}
> -
>  /* Frees data reference DR.  */
>
>  void
> @@ -4228,19 +4213,6 @@ find_data_references_in_stmt (struct loo
>dr = create_data_ref (nest, loop_containing_stmt (stmt),
>   *ref->pos, stmt, ref->is_read);
>gcc_assert (dr != NULL);
> -
> -  /* FIXME -- data dependence analysis does not work correctly
> for objects
> - with invariant addresses in loop nests.  Let us fail here until
the
> -problem is fixed.  */
> -  if (dr_address_invariant_p (dr) && nest)
> -   {
> - free_data_ref (dr);
> - if (dump_file && (dump_flags & TDF_DETAILS))
> -   fprintf (dump_file, "\tFAILED as dr address is invariant\n");
> - ret = false;
> - break;
> -   }
> -
>VEC_safe_push (data_reference_p, heap, *datarefs, dr);
>  }
>VEC_free (data_ref_loc, heap, references);
> Index: gcc/tree-vect-stmts.c
> ===
> --- gcc/tree-vect-stmts.c   (revision 175531)
> +++ gcc/tree-vect-stmts.c   (working copy)
> @@ -4076,7 +4076,8 @@ vectorizable_load (gimple stmt, gimple_s
>&& code != COMPONENT_REF
>&& code != IMAGPART_EXPR
>&& code != REALPART_EXPR
> -  && code != MEM_REF)
> +  && code != MEM_REF
> +  && TREE_CODE_CLASS (code) != tcc_declaration)
>  return false;
>
>if (!STMT_VINFO_DATA_REF (stmt_info))
> @@ -4527,30 +4528,14 @@ vectorizable_load (gimple stmt, gimple_s
>   if (inv_p && !bb_vinfo)
>{
>  gcc_assert (!strided_load);
> -gcc_assert (nested_in_vect_loop_p (loop, stmt));
>  if (j == 0)
>{
> -int k;
> -tree t = NULL_TREE;
> -tree vec_inv, bitpos, bitsize = TYPE_SIZE (scalar_type);
> -
> -/* CHECKME: bitpos depends on endianess?  */
> -bitpos = bitsize_zero_node;
> -vec_inv = build3 (BIT_FIELD_REF, scalar_type, new_temp,
> -   bitsize, bitpos);
> -vec_dest = vect_create_destination_var (scalar_dest,
> -   NULL_TREE);
> -new_stmt = gimple_build_assign (vec_dest, vec_inv);
> -new_temp = make_ssa_name (vec_dest, new_stmt);
> -gimple_assign_set_lhs (new_stmt, new_temp);
> -vect_finish_stmt_generation (stmt, new_stmt, gsi);
> -
> -for (k = nunits - 1; k >= 0; --k)
> - t = tree_cons (NULL_TREE, new_temp, t);
> -/* FIXME: use build_constructor directly.  */
> -vec_inv = build_constructor_from_list (vectype, t);
> +tree vec_inv;
> +

Re: [patch tree-optimization]: Do bitwise operator optimizations for X op !X patterns

2011-06-29 Thread Kai Tietz
- Original Message -
From: "Richard Guenther" 
To: "Kai Tietz" 
Cc: gcc-patches@gcc.gnu.org
Sent: Wednesday, June 29, 2011 12:14:10 PM
Subject: Re: [patch tree-optimization]: Do bitwise operator optimizations for X 
op !X patterns

On Tue, Jun 28, 2011 at 5:05 PM, Kai Tietz  wrote:
> Hello,
>
> this patch implements the X op !X patterns within tree-ssa-forwprop.c without 
> using here const-fold routines.  Additionally it does some trivial folding 
> for X op X.  Implementation
> also looks through [(type)] X op [(type)] !X, if type of X is integral and 
> precision is suitable
> for operation.
>
> ChangeLog gcc/
>
> 2011-06-28  Kai Tietz  
>
>        * tree-ssa-forwprop.c (operand_precision_onep): New
>        function.
>        (find_possible_not_expr_argument): Likewise.
>        (simplify_bitwise_binary_1): Likewise.
>        (simplify_bitwise_binary): Use simplify_bitwise_binary_1
>        for detecting various X op !X optimizations.
>
> ChangeLog gcc/testsuite
>
> 2011-06-28  Kai Tietz  
>
>        * gcc.dg/binop-notand1a.c: New test.
>        * gcc.dg/binop-notand2a.c: New test.
>        * gcc.dg/binop-notand3a.c: New test.
>        * gcc.dg/binop-notand4a.c: New test.
>        * gcc.dg/binop-notand5a.c: New test.
>        * gcc.dg/binop-notand6a.c: New test.
>        * gcc.dg/binop-notor1.c: New test.
>        * gcc.dg/binop-notor2.c: New test.
>        * gcc.dg/binop-notxor1.c: New test.
>        * gcc.dg/binop-notxor2.c: New test.
>
> Bootstrapped and regression tested for all languages plus Ada and Obj-C for 
> x86_64-pc-linux-gnu. Ok for apply?

I can't follow the code in find_possible_not_expr_argument or its uses
at all.  Please try to produce patches that look more obvious in what
they are doing - don't try to solve every testcase you can come up with
in a single patch.  Especially don't write functions like
find_possible_not_expr_argument which seems to have evolved a lot
after you wrote the overall function comment.

Thanks,
Richard.

> Regards,
> Kai
>

Well, I added some comments to these functions and renamed the 
find_possible_not_expr_argument function to detect_not_expr_operand, which hits 
its use better.
The cause for this function is, that there are more then one variant of 
expressing a logical-not and all of them are used.
This routine simply tries to detect different variants used for not. Eg ~X == 
!X and (X ^ 1) == !X for integral type of X with precision one. For X with 
integral type, (X == 0) == !X.

The folding for the three different bitwise-operations is pretty easy and it 
makes sense to implement them at once.  I see here no good point to separate 
them into different patches.  To separate them might even lead to questions 
about abstracting some code-pieces out of the main function.
I didn't added testcases for all variants I am aware now. Just those, which are 
now handled.

So hope you can read and understand logic of patch better by updated patch.

Regards,
Kai
Index: gcc-head/gcc/tree-ssa-forwprop.c
===
--- gcc-head.orig/gcc/tree-ssa-forwprop.c
+++ gcc-head/gcc/tree-ssa-forwprop.c
@@ -1674,6 +1674,228 @@ simplify_builtin_call (gimple_stmt_itera
   return false;
 }
 
+/* Checks if expression has type of one-bit precision, or is result of
+   a known boolean expression.  */
+static bool
+operand_precision_onep (tree expr)
+{
+  enum tree_code code;
+
+  do
+{
+  code = TREE_CODE (expr);
+  if (!INTEGRAL_TYPE_P (TREE_TYPE (expr)))
+   return false;
+  if (TYPE_PRECISION (TREE_TYPE (expr)) == 1)
+   return true;
+  if (code == SSA_NAME)
+   {
+ gimple def_stmt = SSA_NAME_DEF_STMT (expr);
+ if (!def_stmt || !is_gimple_assign (def_stmt))
+   break;
+ code = gimple_assign_rhs_code (def_stmt);
+ if (!CONVERT_EXPR_CODE_P (code))
+   break;
+ expr = gimple_assign_rhs1 (def_stmt);
+   }
+  else
+   break;
+}
+  while (CONVERT_EXPR_CODE_P (code));
+  if (code == TRUTH_NOT_EXPR || TREE_CODE_CLASS (code) == tcc_comparison)
+return true;
+  return false;
+}
+
+/* Helper routine for simplify_bitwise_binary_1 function.
+   Ignored integral type cast is counted in CNT_CASTS. If a not expression
+   is found, the operand of the not-expression is stored in NEXPR.
+   This function returns the either the expression after the first
+   integral cast expression, or NAME.
+   Detected not-patterns are !X or X == 0 for X with integral type, and
+   X ^ 1 or ~X for X with integral type with precision of one.
+   The value of CNT_CASTS is either zero, or one.   */
+static tree
+detect_not_expr_operand (tree name, int *cnt_casts, tree *nexpr)
+{
+  enum tree_code code = ERROR_MARK;
+  *cnt_casts = 0;
+  *nexpr = NULL_TREE;
+
+  while (1)
+{
+  tree op1, op2;
+  gimple def_stmt;
+  code = TREE_CODE (name);
+  /* If name has none-intergal type, or isn't a SSA_NAME, then
+stop search.  */

Re: [build] Move unwinder to toplevel libgcc

2011-06-29 Thread Rainer Orth
Paolo Bonzini  writes:

> I think Rainer missed the existence of libgcc_tm_file. :)

I didn't, but it lives in gcc/config.gcc, which is contrary to the move
from gcc to libgcc.

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: [build] Move unwinder to toplevel libgcc

2011-06-29 Thread Rainer Orth
"Joseph S. Myers"  writes:

> I interpreted Rainer's comment as meaning that a 
> libgcc/config/mips/mips-lib.h (or similar) file would need to be created 
> and inserted in the right place in libgcc_tm_file for the large number of 
> targets presently putting mips.h in tm_file.

Indeed, that's why I refrained from attacking this by other means.

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: Ping #1: [Patch, AVR]: Fix PR34734

2011-06-29 Thread Georg-Johann Lay
Denis Chertykov wrote:
> 2011/6/29 Georg-Johann Lay :
>> Denis Chertykov wrote:
>>> 2011/6/28 Georg-Johann Lay :
 http://gcc.gnu.org/ml/gcc-patches/2011-06/msg01462.html

 Georg-Johann Lay wrote:
> PR34734 produces annoying, false warnings if __attribute__((progmem))
> is used in conjunction with C++.  DECL_INITIAL is not yet set up in
> avr_handle_progmem_attribute.
>
> Johann
>
>   PR target/34734
>   * config/avr/avr.c (avr_handle_progmem_attribute): Move warning
>   about uninitialized data attributed 'progmem' from here...
>   (avr_encode_section_info): ...to this new function.
>   (TARGET_ENCODE_SECTION_INFO): New define.
>   (avr_section_type_flags): For data in ".progmem.data", remove
>   section flag SECTION_WRITE.
 avr_encode_section_info is good place to emit the warning:
 DECL_INITIAL has stabilized for C++, the warning will appear even for
 unused variables that will eventually be thrown away, and the warning
 appears only once (new_decl_p).
>>> Approved.
>>>
>>> Denis.
>> Is this a patch that should be backported?
>> 4.6?
>> 4.5?
>>
>> It's not fix for "bug or doc" but very annoying, false warning.
> 
> You can backport it if you want.
> 
> I'm usually didn't backport such patches.
> 
> Denis.

Ok, maybe Eric or Anatoly have some preference for 4.5/4.6.

Applied to 4.7 together with following corrigendum:

Index: config/avr/avr.c
===
--- config/avr/avr.c(revision 175628)
+++ config/avr/avr.c(working copy)
@@ -5161,7 +5161,7 @@ avr_section_type_flags (tree decl, const
 /* Implement `TARGET_ENCODE_SECTION_INFO'.  */

 static void
-avr_encode_section_info (tree decl, rtx rtl ATTRIBUTE_UNUSED,
+avr_encode_section_info (tree decl, rtx rtl,
  int new_decl_p)
 {
   /* In avr_handle_progmem_attribute, DECL_INITIAL is not yet
@@ -5177,6 +5177,8 @@ avr_encode_section_info (tree decl, rtx
"uninitialized variable %q+D put into "
"program memory area", decl);
 }
+
+  default_encode_section_info (decl, rtl, new_decl_p);
 }




Re: [build] Move unwinder to toplevel libgcc

2011-06-29 Thread Paolo Bonzini

On 06/29/2011 12:41 PM, Joseph S. Myers wrote:

On Wed, 29 Jun 2011, Paolo Bonzini wrote:


On 06/20/2011 02:24 PM, Rainer Orth wrote:

* The only unwinder-related macro I haven't moved is
LIBGCC2_UNWIND_ATTRIBUTE.  It is only defined gcc/config/mips/mips.h.
I suppose we would need a libgcc equivalent of tm.h for that,
something I didn't want to attack at this point.


Something like the attached?


This patch appears to create a *second* file called libgcc_tm.h, so there
is one created in gcc/ and one in libgcc/, which seems incredibly
confusing.  By all means move the libgcc_tm.h logic to libgcc/ (though
tm.h will need to continue to include libgcc_tm.h for the target, until
the transition away from tm.h target macros in target code is complete and
code includes libgcc_tm.h directly instead), but don't have two headers
with the same name in different build directories.


Yes, the patch was not complete and only meant to be the plumbing for 
what you describe.


I think Rainer missed the existence of libgcc_tm_file. :)

Paolo


[ARM] Deprecate -mwords-little-endian

2011-06-29 Thread Richard Sandiford
ARM has an option called -mwords-little-endian that provides big-endian
compatibility with pre-2.8 compilers.  When I asked Richard about it,
he seemed to think it had outlived its usefulness, so this patch
deprecates it.  We can then remove it once 4.7 is out.

Tested on arm-linux-gnueabi.  OK to install?  If so, I'll do a patch
for the web page as well.

Richard


Index: gcc/config/arm/arm.c
===
--- gcc/config/arm/arm.c2011-06-29 09:33:37.0 +0100
+++ gcc/config/arm/arm.c2011-06-29 12:20:35.0 +0100
@@ -1483,6 +1483,10 @@ arm_option_override (void)
   if (TARGET_APCS_FLOAT)
 warning (0, "passing floating point arguments in fp regs not yet 
supported");
 
+  if (TARGET_LITTLE_WORDS)
+warning (OPT_Wdeprecated, "% is deprecated and "
+"will be removed in a future release");
+
   /* Initialize boolean versions of the flags, for use in the arm.md file.  */
   arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
   arm_arch4 = (insn_flags & FL_ARCH4) != 0;
Index: gcc/doc/invoke.texi
===
--- gcc/doc/invoke.texi 2011-06-22 16:45:29.0 +0100
+++ gcc/doc/invoke.texi 2011-06-29 12:17:40.0 +0100
@@ -10237,7 +10237,7 @@ Generate code for a little-endian word o
 order.  That is, a byte order of the form @samp{32107654}.  Note: this
 option should only be used if you require compatibility with code for
 big-endian ARM processors generated by versions of the compiler prior to
-2.8.
+2.8.  This option is now deprecated.
 
 @item -mcpu=@var{name}
 @opindex mcpu


[PATCH] Handle vectorization of invariant loads (PR46787)

2011-06-29 Thread Richard Guenther

The following patch makes us handle invariant loads during vectorization.
Dependence analysis currently isn't clever enough to disambiguate them
thus we insert versioning-for-alias checks.  For the testcase hoisting
the load is still always possible though, and for a read-after-write
dependence it would be possible for the vectorized loop copy as the
may-aliasing write is varying by the scalar variable size.

The existing code for vectorizing invariant accesses looks very
suspicious - it generates a vector load at the scalar address
to then just extract the first vector element.  Huh.  IMHO this
can be simplified as done, by just re-using the scalar load result.
But maybe this code was supposed to deal with something entirely
different?

This patch gives a 33% speedup to the phoronix himeno testcase
if you bump the maximum alias versioning checks we want to insert.

I'm currently re-bootstrapping & testing this but an earlier version
was ok on x86_64-unknown-linux-gnu.

2011-06-29  Richard Guenther  

PR tree-optimization/46787
* tree-data-ref.c (dr_address_invariant_p): Remove.
(find_data_references_in_stmt): Invariant accesses are ok now.
* tree-vect-stmts.c (vectorizable_load): Handle invariant
loads.
* tree-vect-data-refs.c (vect_analyze_data_ref_access): Allow
invariant loads.

* gcc.dg/vect/vect-121.c: New testcase.

Index: gcc/tree-data-ref.c
===
--- gcc/tree-data-ref.c (revision 175531)
+++ gcc/tree-data-ref.c (working copy)
@@ -919,21 +919,6 @@ dr_analyze_alias (struct data_reference
 }
 }
 
-/* Returns true if the address of DR is invariant.  */
-
-static bool
-dr_address_invariant_p (struct data_reference *dr)
-{
-  unsigned i;
-  tree idx;
-
-  FOR_EACH_VEC_ELT (tree, DR_ACCESS_FNS (dr), i, idx)
-if (tree_contains_chrecs (idx, NULL))
-  return false;
-
-  return true;
-}
-
 /* Frees data reference DR.  */
 
 void
@@ -4228,19 +4213,6 @@ find_data_references_in_stmt (struct loo
   dr = create_data_ref (nest, loop_containing_stmt (stmt),
*ref->pos, stmt, ref->is_read);
   gcc_assert (dr != NULL);
-
-  /* FIXME -- data dependence analysis does not work correctly for objects
- with invariant addresses in loop nests.  Let us fail here until the
-problem is fixed.  */
-  if (dr_address_invariant_p (dr) && nest)
-   {
- free_data_ref (dr);
- if (dump_file && (dump_flags & TDF_DETAILS))
-   fprintf (dump_file, "\tFAILED as dr address is invariant\n");
- ret = false;
- break;
-   }
-
   VEC_safe_push (data_reference_p, heap, *datarefs, dr);
 }
   VEC_free (data_ref_loc, heap, references);
Index: gcc/tree-vect-stmts.c
===
--- gcc/tree-vect-stmts.c   (revision 175531)
+++ gcc/tree-vect-stmts.c   (working copy)
@@ -4076,7 +4076,8 @@ vectorizable_load (gimple stmt, gimple_s
   && code != COMPONENT_REF
   && code != IMAGPART_EXPR
   && code != REALPART_EXPR
-  && code != MEM_REF)
+  && code != MEM_REF
+  && TREE_CODE_CLASS (code) != tcc_declaration)
 return false;
 
   if (!STMT_VINFO_DATA_REF (stmt_info))
@@ -4527,30 +4528,14 @@ vectorizable_load (gimple stmt, gimple_s
  if (inv_p && !bb_vinfo)
{
  gcc_assert (!strided_load);
- gcc_assert (nested_in_vect_loop_p (loop, stmt));
  if (j == 0)
{
- int k;
- tree t = NULL_TREE;
- tree vec_inv, bitpos, bitsize = TYPE_SIZE (scalar_type);
-
- /* CHECKME: bitpos depends on endianess?  */
- bitpos = bitsize_zero_node;
- vec_inv = build3 (BIT_FIELD_REF, scalar_type, new_temp,
-   bitsize, bitpos);
- vec_dest = vect_create_destination_var (scalar_dest,
- NULL_TREE);
- new_stmt = gimple_build_assign (vec_dest, vec_inv);
- new_temp = make_ssa_name (vec_dest, new_stmt);
- gimple_assign_set_lhs (new_stmt, new_temp);
- vect_finish_stmt_generation (stmt, new_stmt, gsi);
-
- for (k = nunits - 1; k >= 0; --k)
-   t = tree_cons (NULL_TREE, new_temp, t);
- /* FIXME: use build_constructor directly.  */
- vec_inv = build_constructor_from_list (vectype, t);
+ tree vec_inv;
+ gimple_stmt_iterator gsi2 = *gsi;
+ gsi_next (&gsi2);
+ vec_inv = build_vector_from_val (vectype, scalar_dest);
  new_temp = vect_init_vector (stmt, vec_inv,
-  

  1   2   >