Re: [Mingw-w64-developer] [patch] --enable-dynamic-string default for mingw-w64 v2

2011-09-30 Thread Ozkan Sezer
On Sat, Oct 1, 2011 at 9:03 AM, JonY  wrote:
> Hi,
>
> I followed Paolo's suggestion with the os_defines.h trick. I duplicated
> os/mingw32/ to os/mingw32-w64/ for this to work, since there aren't any
> built-in defines to tell the 2 apart unless you include some headers
> like _mingw.h.
>
> Patch attached, comments?

Why _GLIBCXX_FULLY_DYNAMIC_STRING being defined or not defined
is not enough?

--
O.S.


[patch] --enable-dynamic-string default for mingw-w64 v2

2011-09-30 Thread JonY
Hi,

I followed Paolo's suggestion with the os_defines.h trick. I duplicated
os/mingw32/ to os/mingw32-w64/ for this to work, since there aren't any
built-in defines to tell the 2 apart unless you include some headers
like _mingw.h.

Patch attached, comments?
Index: configure.host
===
--- configure.host  (revision 179411)
+++ configure.host  (working copy)
@@ -260,8 +260,15 @@
 atomic_word_dir=os/irix
 ;;
   mingw32*)
-os_include_dir="os/mingw32"
-error_constants_dir="os/mingw32"
+case "$host" in
+  *-w64-*)
+os_include_dir="os/mingw32-w64"
+error_constants_dir="os/mingw32-w64"
+;;
+  *)
+os_include_dir="os/mingw32"
+error_constants_dir="os/mingw32"
+;;
 OPT_LDFLAGS="${OPT_LDFLAGS} \$(lt_host_flags)"
 ;;
   netbsd*)
Index: include/bits/basic_string.h
===
--- include/bits/basic_string.h (revision 179411)
+++ include/bits/basic_string.h (working copy)
@@ -201,7 +201,7 @@
void
_M_set_length_and_sharable(size_type __n)
{
-#ifndef _GLIBCXX_FULLY_DYNAMIC_STRING
+#if !defined (_GLIBCXX_FULLY_DYNAMIC_STRING) || (_GLIBCXX_FULLY_DYNAMIC_STRING 
== 0)
  if (__builtin_expect(this != &_S_empty_rep(), false))
 #endif
{
@@ -231,7 +231,7 @@
void
_M_dispose(const _Alloc& __a)
{
-#ifndef _GLIBCXX_FULLY_DYNAMIC_STRING
+#if !defined (_GLIBCXX_FULLY_DYNAMIC_STRING) || (_GLIBCXX_FULLY_DYNAMIC_STRING 
== 0)
  if (__builtin_expect(this != &_S_empty_rep(), false))
 #endif
{
@@ -252,7 +252,7 @@
_CharT*
_M_refcopy() throw()
{
-#ifndef _GLIBCXX_FULLY_DYNAMIC_STRING
+#if !defined (_GLIBCXX_FULLY_DYNAMIC_STRING) || (_GLIBCXX_FULLY_DYNAMIC_STRING 
== 0)
  if (__builtin_expect(this != &_S_empty_rep(), false))
 #endif
 __gnu_cxx::__atomic_add_dispatch(&this->_M_refcount, 1);
@@ -430,7 +430,7 @@
*  @brief  Default constructor creates an empty string.
*/
   basic_string()
-#ifndef _GLIBCXX_FULLY_DYNAMIC_STRING
+#if !defined (_GLIBCXX_FULLY_DYNAMIC_STRING) || (_GLIBCXX_FULLY_DYNAMIC_STRING 
== 0)
   : _M_dataplus(_S_empty_rep()._M_refdata(), _Alloc()) { }
 #else
   : _M_dataplus(_S_construct(size_type(), _CharT(), _Alloc()), _Alloc()){ }
@@ -502,7 +502,7 @@
   basic_string(basic_string&& __str) noexcept
   : _M_dataplus(__str._M_dataplus)
   {
-#ifndef _GLIBCXX_FULLY_DYNAMIC_STRING  
+#if !defined (_GLIBCXX_FULLY_DYNAMIC_STRING) || (_GLIBCXX_FULLY_DYNAMIC_STRING 
== 0)
__str._M_data(_S_empty_rep()._M_refdata());
 #else
__str._M_data(_S_construct(size_type(), _CharT(), get_allocator()));
Index: include/bits/basic_string.tcc
===
--- include/bits/basic_string.tcc   (revision 179411)
+++ include/bits/basic_string.tcc   (working copy)
@@ -80,7 +80,7 @@
   _S_construct(_InIterator __beg, _InIterator __end, const _Alloc& __a,
   input_iterator_tag)
   {
-#ifndef _GLIBCXX_FULLY_DYNAMIC_STRING
+#if !defined (_GLIBCXX_FULLY_DYNAMIC_STRING) || (_GLIBCXX_FULLY_DYNAMIC_STRING 
== 0)
if (__beg == __end && __a == _Alloc())
  return _S_empty_rep()._M_refdata();
 #endif
@@ -126,7 +126,7 @@
   _S_construct(_InIterator __beg, _InIterator __end, const _Alloc& __a,
   forward_iterator_tag)
   {
-#ifndef _GLIBCXX_FULLY_DYNAMIC_STRING
+#if !defined (_GLIBCXX_FULLY_DYNAMIC_STRING) || (_GLIBCXX_FULLY_DYNAMIC_STRING 
== 0)
if (__beg == __end && __a == _Alloc())
  return _S_empty_rep()._M_refdata();
 #endif
@@ -154,7 +154,7 @@
 basic_string<_CharT, _Traits, _Alloc>::
 _S_construct(size_type __n, _CharT __c, const _Alloc& __a)
 {
-#ifndef _GLIBCXX_FULLY_DYNAMIC_STRING
+#if !defined (_GLIBCXX_FULLY_DYNAMIC_STRING) || (_GLIBCXX_FULLY_DYNAMIC_STRING 
== 0)
   if (__n == 0 && __a == _Alloc())
return _S_empty_rep()._M_refdata();
 #endif
@@ -456,7 +456,7 @@
 basic_string<_CharT, _Traits, _Alloc>::
 _M_leak_hard()
 {
-#ifndef _GLIBCXX_FULLY_DYNAMIC_STRING
+#if !defined (_GLIBCXX_FULLY_DYNAMIC_STRING) || (_GLIBCXX_FULLY_DYNAMIC_STRING 
== 0)
   if (_M_rep() == &_S_empty_rep())
return;
 #endif
Index: config/os/mingw32-w64/os_defines.h
===
--- config/os/mingw32-w64/os_defines.h  (revision 179411)
+++ config/os/mingw32-w64/os_defines.h  (working copy)
@@ -65,4 +65,9 @@
 // ioctlsocket function doesn't work for normal file-descriptors.
 #define _GLIBCXX_NO_IOCTL 1
 
+// mingw-w64 should use fully-dynamic-string by default
+#ifndef _GLIBCXX_FULLY_DYNAMIC_STRING
+#define _GLIBCXX_FULLY_DYNAMIC_STRING 1
 #endif
+
+#endif
Index: acinclude.m4
===
--- acinclude.m4 

Re: Disable early inlining while compiling for coverage (issue5173042)

2011-09-30 Thread शरद सिंघई
Here is an example. In the attached file, foo.c contains only two
functions, 'sum' and 'main'. The function 'sum' gets inlined into
'main' (with -O2).

gcc --coverage -O2 foo.c
./a.out
gcov -b foo.c

Now the coverage data for the 'if' condition in 'sum' looks like this:
(in attached file foo.c.gcov)

8:8:if (v[i]) total += 1;
branch  0 never executed
branch  1 never executed
branch  2 taken 75% (fallthrough)
branch  3 taken 25%

Thus a simple conditional looks like a four-way branch. It is due to
early inlining where a couple of basic blocks get eliminated but the
branch coverage still gets attributed to the conditional. Similarly
the coverage data for the loop in 'sum' looks like this

9:7:  for (i = 0; i < N; ++i) {
branch  0 never executed
branch  1 never executed
branch  2 taken 89%
branch  3 taken 11% (fallthrough)

After disabling early inlining, the coverage data looks saner.

Of course, in general, the coverage data cannot be accurate in
presence of optimizations. However, this improves the situation
somewhat and improves usability when compiling without optimization is
not feasible.

Sharad

On Fri, Sep 30, 2011 at 9:15 PM, Xinliang David Li  wrote:
>
> Yes, this will improve test coverage option's usability, but please
> provide the example to explain the issues.
>
> David
>
> On Fri, Sep 30, 2011 at 6:12 PM, Sharad Singhai  wrote:
> > This patch disables early inlining when --coverage option is
> > specified. This improves coverage data in presence of other
> > optimizations, specially with -O2 where early inlining changes the
> > control flow graph sufficiently enough to generate seemingly very odd
> > source coverage.
> >
> > Bootstrapped okay and regression tests passed.
> >
> > Okay for google/gcc-4_6?
> >
> > 2011-09-30   Sharad Singhai  
> >
> >        * gcc.c (cc1_options): Added -fno-early-inlining for coverage.
> >
> > Index: gcc.c
> > ===
> > --- gcc.c       (revision 179402)
> > +++ gcc.c       (working copy)
> > @@ -776,7 +776,7 @@
> >  %{!fsyntax-only:%{S:%W{o*}%{!o*:-o %b.s}}}\
> >  %{fsyntax-only:-o %j} %{-param*}\
> >  %{fmudflap|fmudflapth:-fno-builtin -fno-merge-constants}\
> > - %{coverage:-fprofile-arcs -ftest-coverage}";
> > + %{coverage:-fprofile-arcs -ftest-coverage -fno-early-inlining}";
> >
> >  /* If an assembler wrapper is used to invoke post-assembly tools
> >    like MAO, --save-temps need to be passed to save the output of
> >
> > --
> > This patch is available for review at http://codereview.appspot.com/5173042
> >


Re: Disable early inlining while compiling for coverage (issue5173042)

2011-09-30 Thread Xinliang David Li
Yes, this will improve test coverage option's usability, but please
provide the example to explain the issues.

David

On Fri, Sep 30, 2011 at 6:12 PM, Sharad Singhai  wrote:
> This patch disables early inlining when --coverage option is
> specified. This improves coverage data in presence of other
> optimizations, specially with -O2 where early inlining changes the
> control flow graph sufficiently enough to generate seemingly very odd
> source coverage.
>
> Bootstrapped okay and regression tests passed.
>
> Okay for google/gcc-4_6?
>
> 2011-09-30   Sharad Singhai  
>
>        * gcc.c (cc1_options): Added -fno-early-inlining for coverage.
>
> Index: gcc.c
> ===
> --- gcc.c       (revision 179402)
> +++ gcc.c       (working copy)
> @@ -776,7 +776,7 @@
>  %{!fsyntax-only:%{S:%W{o*}%{!o*:-o %b.s}}}\
>  %{fsyntax-only:-o %j} %{-param*}\
>  %{fmudflap|fmudflapth:-fno-builtin -fno-merge-constants}\
> - %{coverage:-fprofile-arcs -ftest-coverage}";
> + %{coverage:-fprofile-arcs -ftest-coverage -fno-early-inlining}";
>
>  /* If an assembler wrapper is used to invoke post-assembly tools
>    like MAO, --save-temps need to be passed to save the output of
>
> --
> This patch is available for review at http://codereview.appspot.com/5173042
>


Re: [google] Fix bugs in sampled profile collection

2011-09-30 Thread Xinliang David Li
ok.

David

On Fri, Sep 30, 2011 at 6:54 PM, Easwaran Raman  wrote:
> This fixes two issues with sampled profile collection. It delays
> cleanup of instrumentation_to_be_sampled after all callgraph nodes
> have been instrumented and prevents  gcov_sample_counter_decl and
> gcov_sampling_rate_decl from being garbage collected.
>
>  Ok for google/gcc-4_6 and google/main branches?
>
> -Easwaran
>
> 2011-09-30  Easwaran Raman  
>
>        * tree-profile.c (gcov_sample_counter_decl): Add GTY marker.
>        (gcov_sampling_rate_decl): Likewise.
>        (add_sampling_to_edge_counters): Do not free
>        instrumentation_to_be_sampled.
>        (cleanup_instrumentation_sampling): New function.
>        (tree_profiling): Call cleanup_instrumentation_sampling at the end.
>
> testsuite/ChangeLog.google-4_6:
>
> 2011-09-30  Easwaran Raman  
>
>        * gcc.dg/sample-profile-generate-1.c: New test.
>
> Index: gcc/testsuite/gcc.dg/sample-profile-generate-1.c
> ===
> --- gcc/testsuite/gcc.dg/sample-profile-generate-1.c    (revision 0)
> +++ gcc/testsuite/gcc.dg/sample-profile-generate-1.c    (revision 0)
> @@ -0,0 +1,26 @@
> +/* { dg-do compile} */
> +/* { dg-options "-O2 -fprofile-generate -fprofile-generate-sampling" } */
> +
> +void foobar(int);
> +
> +void
> +foo (void)
> +{
> +  int i;
> +  for (i = 0; i < 100; i++)
> +    {
> +      foobar(i);
> +    }
> +}
> +
> +void
> +bar (void)
> +{
> +  int i;
> +  for (i = 0; i < 100; i++)
> +    {
> +      foobar(i);
> +    }
> +}
> +
> +/* { dg-final { cleanup-coverage-files } } */
>
> Index: tree-profile.c
> ===
> --- tree-profile.c      (revision 178897)
> +++ tree-profile.c      (working copy)
> @@ -163,10 +163,10 @@ init_ic_make_global_vars (void)
>  static struct pointer_set_t *instrumentation_to_be_sampled = NULL;
>
>  /* extern __thread gcov_unsigned_t __gcov_sample_counter  */
> -static tree gcov_sample_counter_decl = NULL_TREE;
> +static GTY(()) tree gcov_sample_counter_decl = NULL_TREE;
>
>  /* extern gcov_unsigned_t __gcov_sampling_rate  */
> -static tree gcov_sampling_rate_decl = NULL_TREE;
> +static GTY(()) tree gcov_sampling_rate_decl = NULL_TREE;
>
>  /* forward declaration.  */
>  void gimple_init_instrumentation_sampling (void);
> @@ -281,9 +281,13 @@ add_sampling_to_edge_counters (void)
>             break;
>           }
>       }
> +}
>
> +static void
> +cleanup_instrumentation_sampling (void)
> +{
>   /* Free the bitmap.  */
> -  if (instrumentation_to_be_sampled)
> +  if (flag_profile_generate_sampling && instrumentation_to_be_sampled)
>     {
>       pointer_set_destroy (instrumentation_to_be_sampled);
>       instrumentation_to_be_sampled = NULL;
> @@ -1452,6 +1456,7 @@ tree_profiling (void)
>     }
>
>   del_node_map();
> +  cleanup_instrumentation_sampling();
>   return 0;
>  }
>


[google] Fix bugs in sampled profile collection

2011-09-30 Thread Easwaran Raman
This fixes two issues with sampled profile collection. It delays
cleanup of instrumentation_to_be_sampled after all callgraph nodes
have been instrumented and prevents  gcov_sample_counter_decl and
gcov_sampling_rate_decl from being garbage collected.

 Ok for google/gcc-4_6 and google/main branches?

-Easwaran

2011-09-30  Easwaran Raman  

* tree-profile.c (gcov_sample_counter_decl): Add GTY marker.
(gcov_sampling_rate_decl): Likewise.
(add_sampling_to_edge_counters): Do not free
instrumentation_to_be_sampled.
(cleanup_instrumentation_sampling): New function.
(tree_profiling): Call cleanup_instrumentation_sampling at the end.

testsuite/ChangeLog.google-4_6:

2011-09-30  Easwaran Raman  

* gcc.dg/sample-profile-generate-1.c: New test.

Index: gcc/testsuite/gcc.dg/sample-profile-generate-1.c
===
--- gcc/testsuite/gcc.dg/sample-profile-generate-1.c(revision 0)
+++ gcc/testsuite/gcc.dg/sample-profile-generate-1.c(revision 0)
@@ -0,0 +1,26 @@
+/* { dg-do compile} */
+/* { dg-options "-O2 -fprofile-generate -fprofile-generate-sampling" } */
+
+void foobar(int);
+
+void
+foo (void)
+{
+  int i;
+  for (i = 0; i < 100; i++)
+{
+  foobar(i);
+}
+}
+
+void
+bar (void)
+{
+  int i;
+  for (i = 0; i < 100; i++)
+{
+  foobar(i);
+}
+}
+
+/* { dg-final { cleanup-coverage-files } } */

Index: tree-profile.c
===
--- tree-profile.c  (revision 178897)
+++ tree-profile.c  (working copy)
@@ -163,10 +163,10 @@ init_ic_make_global_vars (void)
 static struct pointer_set_t *instrumentation_to_be_sampled = NULL;

 /* extern __thread gcov_unsigned_t __gcov_sample_counter  */
-static tree gcov_sample_counter_decl = NULL_TREE;
+static GTY(()) tree gcov_sample_counter_decl = NULL_TREE;

 /* extern gcov_unsigned_t __gcov_sampling_rate  */
-static tree gcov_sampling_rate_decl = NULL_TREE;
+static GTY(()) tree gcov_sampling_rate_decl = NULL_TREE;

 /* forward declaration.  */
 void gimple_init_instrumentation_sampling (void);
@@ -281,9 +281,13 @@ add_sampling_to_edge_counters (void)
 break;
   }
   }
+}

+static void
+cleanup_instrumentation_sampling (void)
+{
   /* Free the bitmap.  */
-  if (instrumentation_to_be_sampled)
+  if (flag_profile_generate_sampling && instrumentation_to_be_sampled)
 {
   pointer_set_destroy (instrumentation_to_be_sampled);
   instrumentation_to_be_sampled = NULL;
@@ -1452,6 +1456,7 @@ tree_profiling (void)
 }

   del_node_map();
+  cleanup_instrumentation_sampling();
   return 0;
 }


Disable early inlining while compiling for coverage (issue5173042)

2011-09-30 Thread Sharad Singhai
This patch disables early inlining when --coverage option is
specified. This improves coverage data in presence of other
optimizations, specially with -O2 where early inlining changes the
control flow graph sufficiently enough to generate seemingly very odd
source coverage.

Bootstrapped okay and regression tests passed.

Okay for google/gcc-4_6?

2011-09-30   Sharad Singhai  

* gcc.c (cc1_options): Added -fno-early-inlining for coverage.

Index: gcc.c
===
--- gcc.c   (revision 179402)
+++ gcc.c   (working copy)
@@ -776,7 +776,7 @@
  %{!fsyntax-only:%{S:%W{o*}%{!o*:-o %b.s}}}\
  %{fsyntax-only:-o %j} %{-param*}\
  %{fmudflap|fmudflapth:-fno-builtin -fno-merge-constants}\
- %{coverage:-fprofile-arcs -ftest-coverage}";
+ %{coverage:-fprofile-arcs -ftest-coverage -fno-early-inlining}";
 
 /* If an assembler wrapper is used to invoke post-assembly tools
like MAO, --save-temps need to be passed to save the output of

--
This patch is available for review at http://codereview.appspot.com/5173042


[PATCH] Don't assume that constants can clobber vtbl

2011-09-30 Thread Maxim Kuvyrkov
This patch makes detect_type_change analysis assume that only ADDR_EXPRs can be 
assigned to vtable entries.

Initially, the patch made a less strict assumption that constants are not 
assigned to vtables.  I then bumped the assumption to "only ADDR_EXPRs can be 
assigned to vtables".  I have this patch since GCC 4.6 and did not came across 
a testcase that would invalidate either of the assumptions.

Martin, you are the author of stmt_may_be_vtbl_ptr_store; is there any reason 
to assume that something other than ADDR_EXPR can be assigned to a vtable?

Bootstrapped and regtested on x86_64-linux-gnu {-m64/-m32} with no regressions.

OK for trunk?

Thank you,

--
Maxim Kuvyrkov
CodeSourcery / Mentor Graphics




fsf-gcc-vtbl-assign.ChangeLog
Description: Binary data


fsf-gcc-vtbl-assign.patch
Description: Binary data


Re: Commit: RX: Add support for MIN and MAX instructions in QI and HI modes

2011-09-30 Thread Richard Henderson
On 09/28/2011 07:34 AM, Nick Clifton wrote:
> -(define_insn "smaxsi3"
> -  [(set (match_operand:SI  0 "register_operand" "=r,r,r,r,r,r")
> - (smax:SI (match_operand:SI 1 "register_operand" "%0,0,0,0,0,0")
> -  (match_operand:SI 2 "rx_source_operand"
> -"r,Sint08,Sint16,Sint24,i,Q")))]
> +(define_insn "smax3"
> +  [(set (match_operand:int_modes 0 "register_operand" 
> "=r,r,r,r,r,r")
> + (smax:int_modes (match_operand:int_modes 1 "register_operand" 
> "%0,0,0,0,0,0")
> + (match_operand:int_modes 2 "rx_source_operand"
> +  
> "r,Sint08,Sint16,Sint24,i,Q")))]
>""
>"max\t%Q2, %0"

Huh?  This doesn't work for HI or QImode.  You need to sign-extend them first
to get rid of (potential) garbage in the high bits of the register.

At least, there's no .W or .B specifiers in the document I have; only .L.


r~


Re: [PATCH] Add sparc VIS 2.0 builtins, intrinsics, and option to control them.

2011-09-30 Thread David Miller
From: Richard Henderson 
Date: Fri, 30 Sep 2011 14:03:52 -0700

>   (3) Use optimize-mode-switching to minimize the number of changes
>   to the global state.  This includes the use of SIAM vs %fsr,
>   especially when a subroutine call could have changed the
>   global rounding mode.

Indeed, and I incidentally took a look at the mode switching
optimization framework and it appears that I could use it for
providing insn patterns for 'rint' and friends like i386 does.

> All of which is a lot of work.
> 
>> +(define_insn "bmask_vis"
>> +  [(set (match_operand:P 0 "register_operand" "=r")
>> +(plus:P (match_operand:P 1 "register_operand" "rJ")
>> +(match_operand:P 2 "register_operand" "rJ")))
>> +   (clobber (reg:SI GSR_REG))]
>> +  "TARGET_VIS2"
>> +  "bmask\t%r1, %r2, %0"
>> +  [(set_attr "type" "array")])
> 
> I think this is wrong.  I think you want to model this as
 ...
>> +(define_insn "bshuffle_vis"
>> +  [(set (match_operand:V64I 0 "register_operand" "=e")
>> +(unspec:V64I [(match_operand:V64I 1 "register_operand" "e")
>> +  (match_operand:V64I 2 "register_operand" "e")]
>> + UNSPEC_BSHUFFLE))
>> +   (use (reg:SI GSR_REG))]
> 
> Better to push the use of the GSR_REG into the unspec, and not leave
> it separate in the parallel.

Thanks Richard, I'll fix these up.  In general, the GSR tracking needs
a bit more work.


Re: [PATCH] Add sparc VIS 2.0 builtins, intrinsics, and option to control them.

2011-09-30 Thread Joseph S. Myers
On Fri, 30 Sep 2011, Richard Henderson wrote:

> Specifically, in-compiler support for #pragma STDC FENV_ACCESS and the
> various  routines.  We ought to be able to track the rounding
> mode (and other relevant parameters) on a per-expression basis, tagging
> each floating-point operation with the parameters in effect.

For C99 and C1X it's just dynamic rounding direction (changed by 
fesetround, possibly changed by calls to any non-pure function unless you 
can prove that function doesn't call fesetround, but the default mode can 
be presumed unless -frounding-math or the FENV_ACCESS pragma is in 
effect).  (asms accessing the relevant registers also need to be 
considered.)

N1582 (status report on the C bindings for IEEE 754-2008) mentions static 
rounding direction support but doesn't go into details.  (Practically, 
static rounding directions are more useful for various floating-point 
algorithms.)

Floating-point operations implicitly read the rounding mode.  They 
implicitly write the exception flags (as, again, do most function calls) - 
except that generally they only set rather than clearing flags (but 
function calls may also call functions that clear them).

The present defaults are -fno-rounding-math -ftrapping-math.  I'm not sure 
that with a proper implementation this would really allow much more 
optimization than -frounding-math -ftrapping-math.  Simply enabling 
exceptions should disable most constant folding where the result isn't 
exactly representable, because the "inexact" exception is required, for 
example; just knowing the rounding mode and so the value of the result 
isn't enough to fold.  And if there aren't any function calls intervening, 
all combinations of these options will allow common subexpression 
elimination (since that doesn't change the set of exceptions raised, and 
no support is required for counting the number of times a particular 
exception was raised).  So the right defaults once -ftrapping-math really 
does what it says aren't clear.

I've thought a bit about implementation approaches, but mainly at the 
level of how to decouple the front-end and back-end parts from the full 
complexity of tracking pragma state for each expression (for example, by 
setting variables on a whole-function basis and restricting inlining).  
I've also thought about how to implement testcases providing reasonably 
thorough coverage of the exceptions and rounding modes issues.  But I 
haven't had time to work on implementation of any of these pieces.

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: [google] Specify target as ix86 or x86_64 for test builtin_target.c (issue5174041)

2011-09-30 Thread Sriraman Tallam
Moving test gcc.target/i386

* testsuite/gcc.dg/builtin_target.c: Remove.
* testsuite/gcc.target/i386/builtin_target.c: New file.

On Fri, Sep 30, 2011 at 2:33 PM, Richard Henderson  wrote:
> On 09/30/2011 02:09 PM, Sriraman Tallam wrote:
>>       * testsuite/gcc.dg/builtin_target.c: Specify target as ix86 or
>>       x86_64.
>>
>> Index: gcc.dg/builtin_target.c
>> ===
>> --- gcc.dg/builtin_target.c   (revision 179400)
>> +++ gcc.dg/builtin_target.c   (working copy)
>> @@ -1,6 +1,6 @@
>>  /* This test checks if the __builtin_cpu_* calls are recognized. */
>>
>> -/* { dg-do run } */
>> +/* { dg-do run { target i?86-*-* x86_64-*-* } } */
>
> This is an indication that the test is in the wrong place.
> It should be in gcc.target/i386/

Sorry, did not realize.


>
>
> r~
>
Index: gcc.target/i386/builtin_target.c
===
--- gcc.target/i386/builtin_target.c(revision 0)
+++ gcc.target/i386/builtin_target.c(revision 0)
@@ -0,0 +1,53 @@
+/* This test checks if the __builtin_cpu_* calls are recognized. */
+
+/* { dg-do run } */
+
+int
+fn1 ()
+{
+  if (__builtin_cpu_supports_cmov () < 0)
+return -1;
+  if (__builtin_cpu_supports_mmx () < 0)
+return -1;
+  if (__builtin_cpu_supports_popcount () < 0)
+return -1;
+  if (__builtin_cpu_supports_sse () < 0)
+return -1;
+  if (__builtin_cpu_supports_sse2 () < 0)
+return -1;
+  if (__builtin_cpu_supports_sse3 () < 0)
+return -1;
+  if (__builtin_cpu_supports_ssse3 () < 0)
+return -1;
+  if (__builtin_cpu_supports_sse4_1 () < 0)
+return -1;
+  if (__builtin_cpu_supports_sse4_2 () < 0)
+return -1;
+  if (__builtin_cpu_is_amd () < 0)
+return -1;
+  if (__builtin_cpu_is_intel () < 0)
+return -1;
+  if (__builtin_cpu_is_intel_atom () < 0)
+return -1;
+  if (__builtin_cpu_is_intel_core2 () < 0)
+return -1;
+  if (__builtin_cpu_is_intel_corei7_nehalem () < 0)
+return -1;
+  if (__builtin_cpu_is_intel_corei7_westmere () < 0)
+return -1;
+  if (__builtin_cpu_is_intel_corei7_sandybridge () < 0)
+return -1;
+  if (__builtin_cpu_is_amdfam10_barcelona () < 0)
+return -1;
+  if (__builtin_cpu_is_amdfam10_shanghai () < 0)
+return -1;
+  if (__builtin_cpu_is_amdfam10_istanbul () < 0)
+return -1;
+
+  return 0;
+}
+
+int main ()
+{
+  return fn1 ();
+}
Index: gcc.dg/builtin_target.c
===
--- gcc.dg/builtin_target.c (revision 179400)
+++ gcc.dg/builtin_target.c (working copy)
@@ -1,53 +0,0 @@
-/* This test checks if the __builtin_cpu_* calls are recognized. */
-
-/* { dg-do run { target i?86-*-* x86_64-*-* } } */
-
-int
-fn1 ()
-{
-  if (__builtin_cpu_supports_cmov () < 0)
-return -1;
-  if (__builtin_cpu_supports_mmx () < 0)
-return -1;
-  if (__builtin_cpu_supports_popcount () < 0)
-return -1;
-  if (__builtin_cpu_supports_sse () < 0)
-return -1;
-  if (__builtin_cpu_supports_sse2 () < 0)
-return -1;
-  if (__builtin_cpu_supports_sse3 () < 0)
-return -1;
-  if (__builtin_cpu_supports_ssse3 () < 0)
-return -1;
-  if (__builtin_cpu_supports_sse4_1 () < 0)
-return -1;
-  if (__builtin_cpu_supports_sse4_2 () < 0)
-return -1;
-  if (__builtin_cpu_is_amd () < 0)
-return -1;
-  if (__builtin_cpu_is_intel () < 0)
-return -1;
-  if (__builtin_cpu_is_intel_atom () < 0)
-return -1;
-  if (__builtin_cpu_is_intel_core2 () < 0)
-return -1;
-  if (__builtin_cpu_is_intel_corei7_nehalem () < 0)
-return -1;
-  if (__builtin_cpu_is_intel_corei7_westmere () < 0)
-return -1;
-  if (__builtin_cpu_is_intel_corei7_sandybridge () < 0)
-return -1;
-  if (__builtin_cpu_is_amdfam10_barcelona () < 0)
-return -1;
-  if (__builtin_cpu_is_amdfam10_shanghai () < 0)
-return -1;
-  if (__builtin_cpu_is_amdfam10_istanbul () < 0)
-return -1;
-
-  return 0;
-}
-
-int main ()
-{
-  return fn1 ();
-}


Re: [google] Specify target as ix86 or x86_64 for test builtin_target.c (issue5174041)

2011-09-30 Thread Richard Henderson
On 09/30/2011 02:09 PM, Sriraman Tallam wrote:
>   * testsuite/gcc.dg/builtin_target.c: Specify target as ix86 or
>   x86_64.
> 
> Index: gcc.dg/builtin_target.c
> ===
> --- gcc.dg/builtin_target.c   (revision 179400)
> +++ gcc.dg/builtin_target.c   (working copy)
> @@ -1,6 +1,6 @@
>  /* This test checks if the __builtin_cpu_* calls are recognized. */
>  
> -/* { dg-do run } */
> +/* { dg-do run { target i?86-*-* x86_64-*-* } } */

This is an indication that the test is in the wrong place.
It should be in gcc.target/i386/


r~


Re: [google] Specify target as ix86 or x86_64 for test builtin_target.c (issue5174041)

2011-09-30 Thread Sriraman Tallam
This is now submitted.

Thanks,
-Sri.

On Fri, Sep 30, 2011 at 2:09 PM, Sriraman Tallam  wrote:
>        * testsuite/gcc.dg/builtin_target.c: Specify target as ix86 or
>        x86_64.
>
> Index: gcc.dg/builtin_target.c
> ===
> --- gcc.dg/builtin_target.c     (revision 179400)
> +++ gcc.dg/builtin_target.c     (working copy)
> @@ -1,6 +1,6 @@
>  /* This test checks if the __builtin_cpu_* calls are recognized. */
>
> -/* { dg-do run } */
> +/* { dg-do run { target i?86-*-* x86_64-*-* } } */
>
>  int
>  fn1 ()
>
> --
> This patch is available for review at http://codereview.appspot.com/5174041
>


Re: Vector shuffling

2011-09-30 Thread Richard Henderson
On 09/30/2011 12:14 PM, Artem Shinkarov wrote:
> Ok, in the attachment there is a patch which fixes mentioned errors.

The changes are ok.  I would have committed it for you, only the patch
isn't against mainline.  There are 4 rejects.


r~


Re: [PR 47382] We cannot simply fold OBJ_TYPE_REF at all in 4.6

2011-09-30 Thread Maxim Kuvyrkov
On 30/09/2011, at 6:56 PM, Maxim Kuvyrkov wrote:

> On 30/09/2011, at 4:02 PM, Maxim Kuvyrkov wrote:
> 
>> On 24/09/2011, at 2:19 AM, Martin Jambor wrote:
>> 
>>> However, both of these are really 4.8 material and since the patches
>>> probably need only minor updates, it might be worthwhile to do that so
>>> that gcc can handle the "embarrassing" simple cases.  So I will do
>>> that (though it might need to wait for about a week), re-try them on
>>> Firefox and probably propose them for submission.
>> 
>> Great!  Thank you.
> 
> Here is one of your patches updated to the latest mainline.  Just add water^W 
> changelog.
> 
> It bootstraps, and I'm regtesting it on x86_64-linux-gnu {-m64,-m32} now.

Attached is a follow up patch to your 2 devirtualization patches.  Now that 
compute_known_type_jump_func may be called with SSA_NAME argument (from 
ipa_try_devirtualize_immediately) we need to make it handle SSA_NAMEs.

One way to do so it to defer the job to detect_type_change_ssa, below patch 
implements this approach.  Note that the patch relies on detect_type_change to 
initialize JFUNC, as is done in your patch at 
http://gcc.gnu.org/ml/gcc-patches/2011-01/msg01033.html.  Without that patch we 
need to initialize JFUNC inside compute_known_type_jump_func, which may be a 
cleaner solution.

I encourage you to include this small fix in the main body of one of your 
patches and claim credit for it.  You will spare me a test-submission cycle 
that way :-).

Thank you,

--
Maxim Kuvyrkov
CodeSourcery / Mentor Graphics



fsf-gcc-devirt-pointers.ChangeLog
Description: Binary data


fsf-gcc-devirt-pointers.patch
Description: Binary data


[google] Specify target as ix86 or x86_64 for test builtin_target.c (issue5174041)

2011-09-30 Thread Sriraman Tallam
* testsuite/gcc.dg/builtin_target.c: Specify target as ix86 or
x86_64.

Index: gcc.dg/builtin_target.c
===
--- gcc.dg/builtin_target.c (revision 179400)
+++ gcc.dg/builtin_target.c (working copy)
@@ -1,6 +1,6 @@
 /* This test checks if the __builtin_cpu_* calls are recognized. */
 
-/* { dg-do run } */
+/* { dg-do run { target i?86-*-* x86_64-*-* } } */
 
 int
 fn1 ()

--
This patch is available for review at http://codereview.appspot.com/5174041


Re: [PATCH] Add sparc VIS 2.0 builtins, intrinsics, and option to control them.

2011-09-30 Thread Richard Henderson
On 09/30/2011 12:59 AM, David Miller wrote:
> 
> I tried to add the 'siam' instruction too but that one is really
> difficult because it influences the behavior of every float operation
> and I couldnt' find an easy way to express those dependencies.  I
> tried a few easy approaches but I couldn't reliably keep the compiler
> from moving 'siam' across float operations.
> 
> The 'siam' (Set Interval Arithmetic Mode) instruction is a mechanism
> to override the float rounding mode on a cycle-to-cycle basis, ie.
> without the cost of doing a write to the %fsr.

I don't think I'd ever expose this via a builtin.  This seems like a feature
we've talked about for a long time, but have never done anything about.

Specifically, in-compiler support for #pragma STDC FENV_ACCESS and the
various  routines.  We ought to be able to track the rounding
mode (and other relevant parameters) on a per-expression basis, tagging
each floating-point operation with the parameters in effect.

At some point, at or after rtl generation time, we transform these saved
parameters into manipulations of the fpu state.  We have several options:

  (1) Alpha-like where e.g. the rounding mode is directly encoded in
  the instruction.  No further optimization necessary, unless we
  are manipulating non-rounding parameters.

  (2) IA64-like where we have multiple fpu environments, and can
  encode which to use inside the instruction.  However, in this
  case we also need to set up these alternate environments and
  merge back the exception state when the user reads it.

  (3) Use optimize-mode-switching to minimize the number of changes
  to the global state.  This includes the use of SIAM vs %fsr,
  especially when a subroutine call could have changed the
  global rounding mode.

All of which is a lot of work.

> +(define_insn "bmask_vis"
> +  [(set (match_operand:P 0 "register_operand" "=r")
> +(plus:P (match_operand:P 1 "register_operand" "rJ")
> +(match_operand:P 2 "register_operand" "rJ")))
> +   (clobber (reg:SI GSR_REG))]
> +  "TARGET_VIS2"
> +  "bmask\t%r1, %r2, %0"
> +  [(set_attr "type" "array")])

I think this is wrong.  I think you want to model this as

  [(set (match_operand:DI 0 "register_operand" "=r")
(plus:DI (match_operand:DI 1 "register_or_zero_operand" "rJ")
 (match_operand:DI 2 "register_or_zero_operand" "rJ")))
   (set (zero_extract:DI
  (reg:DI GSR_REG)
  (const_int 32)
  (const_int 32))
(plus:DI (match_dup 1) (match_dup 2)))]

(1) %gsr is really set to something, not just modified in
uninteresting ways; we're going to use this value later.

(2) Only the top 32 bits of %gsr are changed; the low 32 bits are
still valid.  You don't want insns that set the low 32 bits to be
deleted as dead code.  Which is what would happen

(3) I realize this version makes things difficult for 32-bit mode.
There, I think you may have to settle for an unspec.  And perhaps
the possible benefit of Properly representing the GSR change isn't
that helpful.  In which case:

(set (reg:DI GSR_REG)
 (unspec:DI [(match_dup 1) (match_dup 2) (reg:DI GSR_REG)]
UNSPEC_BMASK))

> +(define_insn "bshuffle_vis"
> +  [(set (match_operand:V64I 0 "register_operand" "=e")
> +(unspec:V64I [(match_operand:V64I 1 "register_operand" "e")
> +   (match_operand:V64I 2 "register_operand" "e")]
> + UNSPEC_BSHUFFLE))
> +   (use (reg:SI GSR_REG))]

Better to push the use of the GSR_REG into the unspec, and not leave
it separate in the parallel.



r~


Re: [google]Make test callgraph-profiles.C run only when section attribute e is supported (issue5167045)

2011-09-30 Thread Sriraman Tallam
On Fri, Sep 30, 2011 at 1:51 PM, Diego Novillo  wrote:
> On 11-09-30 16:50 , Sriraman Tallam wrote:
>>
>> ChangeLog entry:
>>
>>
>>        * testsuite/g++.dg/tree-prof/callgraph-profiles/C:      Add
>> dg-require-section-exclude.     * testsuite/lib/target-supports.exp
>> (check_ifunc_available): New proc.      *
>> testsuite/lib/target-supports-dg.exp (dg-require-section-exclude):
>>  New
>> proc.
>
> Bad cut and paste?

Yes, sorry. Patch submitted. Thanks.

* testsuite/g++.dg/tree-prof/callgraph-profiles/C:
Add dg-require-section-exclude.
* testsuite/lib/target-supports.exp (check_ifunc_available): New proc.
* testsuite/lib/target-supports-dg.exp (dg-require-section-exclude):
New proc.

>


Re: [google]Make test callgraph-profiles.C run only when section attribute e is supported (issue5167045)

2011-09-30 Thread Diego Novillo

On 11-09-30 16:50 , Sriraman Tallam wrote:

ChangeLog entry:


* testsuite/g++.dg/tree-prof/callgraph-profiles/C:  Add
dg-require-section-exclude. * testsuite/lib/target-supports.exp
(check_ifunc_available): New proc.  *
testsuite/lib/target-supports-dg.exp (dg-require-section-exclude):  New
proc.


Bad cut and paste?


Re: [google]Make test callgraph-profiles.C run only when section attribute e is supported (issue5167045)

2011-09-30 Thread Sriraman Tallam
On Fri, Sep 30, 2011 at 1:48 PM, Diego Novillo  wrote:
> On 11-09-30 16:44 , Sriraman Tallam wrote:
>>
>> Disable running of callgraph-profiles.C is section attribute "e" is not
>> supported.
>
> OK with a ChangeLog entry.
>
> Was the original patch sent to trunk?  I don't recall.

This was only sent to google/gcc-4_6 branch.
Thanks.


>
>
> Diego.
>


Re: [google]Make test callgraph-profiles.C run only when section attribute e is supported (issue5167045)

2011-09-30 Thread Sriraman Tallam
ChangeLog entry:


* testsuite/g++.dg/tree-prof/callgraph-profiles/C:  Add
dg-require-section-exclude. * testsuite/lib/target-supports.exp
(check_ifunc_available): New proc.  *
testsuite/lib/target-supports-dg.exp (dg-require-section-exclude):  New
proc.


On Fri, Sep 30, 2011 at 1:44 PM, Sriraman Tallam  wrote:
> Disable running of callgraph-profiles.C is section attribute "e" is not 
> supported.
>
> Index: lib/target-supports-dg.exp
> ===
> --- lib/target-supports-dg.exp  (revision 179400)
> +++ lib/target-supports-dg.exp  (working copy)
> @@ -100,6 +100,16 @@
>     }
>  }
>
> +# If this target does not support the section exclude "e" attribute,
> +# skip this test.
> +
> +proc dg-require-section-exclude { args } {
> +    if { ![ check_section_exclude_available ] } {
> +       upvar dg-do-what dg-do-what
> +       set dg-do-what [list [lindex ${dg-do-what} 0] "N" "P"]
> +    }
> +}
> +
>  # If this target's linker does not support the --gc-sections flag,
>  # skip this test.
>
> Index: lib/target-supports.exp
> ===
> --- lib/target-supports.exp     (revision 179400)
> +++ lib/target-supports.exp     (working copy)
> @@ -382,6 +382,18 @@
>     }]
>  }
>
> +# Returns true if tool chain supports "e" section attribute.
> +
> +proc check_section_exclude_available { } {
> +    return [check_runtime_nocache section_exclude_available {
> +               asm(".section \".gnu.callgraph.text.main\", \"e\"");
> +               int main()
> +               {
> +                 return 0;
> +               }
> +  }]
> +}
> +
>  # Returns true if --gc-sections is supported on the target.
>
>  proc check_gc_sections_available { } {
> Index: g++.dg/tree-prof/callgraph-profiles.C
> ===
> --- g++.dg/tree-prof/callgraph-profiles.C       (revision 179400)
> +++ g++.dg/tree-prof/callgraph-profiles.C       (working copy)
> @@ -1,6 +1,7 @@
>  /* Verify if call-graph profile sections are created
>    with -fcallgraph-profiles-sections. */
>  /* { dg-options "-O2 -fcallgraph-profiles-sections -ffunction-sections 
> --save-temps" } */
> +/* { dg-require-section-exclude "" } */
>
>  int __attribute__ ((noinline))
>  foo ()
>
> --
> This patch is available for review at http://codereview.appspot.com/5167045
>


Re: [google]Make test callgraph-profiles.C run only when section attribute e is supported (issue5167045)

2011-09-30 Thread Diego Novillo

On 11-09-30 16:44 , Sriraman Tallam wrote:

Disable running of callgraph-profiles.C is section attribute "e" is not 
supported.


OK with a ChangeLog entry.

Was the original patch sent to trunk?  I don't recall.


Diego.


[google]Make test callgraph-profiles.C run only when section attribute e is supported (issue5167045)

2011-09-30 Thread Sriraman Tallam
Disable running of callgraph-profiles.C is section attribute "e" is not 
supported.

Index: lib/target-supports-dg.exp
===
--- lib/target-supports-dg.exp  (revision 179400)
+++ lib/target-supports-dg.exp  (working copy)
@@ -100,6 +100,16 @@
 }
 }
 
+# If this target does not support the section exclude "e" attribute,
+# skip this test.
+
+proc dg-require-section-exclude { args } {
+if { ![ check_section_exclude_available ] } {
+   upvar dg-do-what dg-do-what
+   set dg-do-what [list [lindex ${dg-do-what} 0] "N" "P"]
+}
+}
+
 # If this target's linker does not support the --gc-sections flag,
 # skip this test.
 
Index: lib/target-supports.exp
===
--- lib/target-supports.exp (revision 179400)
+++ lib/target-supports.exp (working copy)
@@ -382,6 +382,18 @@
 }]
 }
 
+# Returns true if tool chain supports "e" section attribute.
+
+proc check_section_exclude_available { } {
+return [check_runtime_nocache section_exclude_available {
+   asm(".section \".gnu.callgraph.text.main\", \"e\"");
+   int main()
+   {
+ return 0;
+   }
+  }]
+}
+
 # Returns true if --gc-sections is supported on the target.
 
 proc check_gc_sections_available { } {
Index: g++.dg/tree-prof/callgraph-profiles.C
===
--- g++.dg/tree-prof/callgraph-profiles.C   (revision 179400)
+++ g++.dg/tree-prof/callgraph-profiles.C   (working copy)
@@ -1,6 +1,7 @@
 /* Verify if call-graph profile sections are created
with -fcallgraph-profiles-sections. */
 /* { dg-options "-O2 -fcallgraph-profiles-sections -ffunction-sections 
--save-temps" } */
+/* { dg-require-section-exclude "" } */
 
 int __attribute__ ((noinline))
 foo ()

--
This patch is available for review at http://codereview.appspot.com/5167045


Re: [PATCH 3/7] Emit macro expansion related diagnostics

2011-09-30 Thread Jason Merrill

On 09/30/2011 11:28 AM, Jason Merrill wrote:

+linemap_location_before_p (struct line_maps *set,
+  source_location  pre,
+  source_location post)
+{
+  bool pre_from_macro_p, post_from_macro_p;
+
+  if (pre == post)
+return false;
+
+  pre_from_macro_p =
+linemap_location_from_macro_expansion_p (set, pre);
+  post_from_macro_p =
+linemap_location_from_macro_expansion_p (set, post);
+
+  if (pre_from_macro_p != post_from_macro_p)
+{
+  if (pre_from_macro_p)
+   pre = linemap_macro_loc_to_exp_point (set, pre, NULL);
+  else
+   post = linemap_macro_loc_to_exp_point (set, post, NULL);
+}


What if the two locations are from different macros?

Jason


Re: [PR 47382] We cannot simply fold OBJ_TYPE_REF at all in 4.6

2011-09-30 Thread Maxim Kuvyrkov
On 30/09/2011, at 6:56 PM, Maxim Kuvyrkov wrote:

> On 30/09/2011, at 4:02 PM, Maxim Kuvyrkov wrote:
> 
>> On 24/09/2011, at 2:19 AM, Martin Jambor wrote:
>> 
>>> However, both of these are really 4.8 material and since the patches
>>> probably need only minor updates, it might be worthwhile to do that so
>>> that gcc can handle the "embarrassing" simple cases.  So I will do
>>> that (though it might need to wait for about a week), re-try them on
>>> Firefox and probably propose them for submission.
>> 
>> Great!  Thank you.
> 
> Here is one of your patches updated to the latest mainline.  Just add water^W 
> changelog.
> 
> It bootstraps, and I'm regtesting it on x86_64-linux-gnu {-m64,-m32} now.

The regtest passed fine (C and C++ languages only).  The only failure is the 
testcase for PR43411, which this patch removes from XFAILs.  It seems the 
testcase requires your other patch to PASS.

HTH,

--
Maxim Kuvyrkov
CodeSourcery / Mentor Graphics




Re: [Patch 2/4] ARM 64 bit sync atomic operations [V2]

2011-09-30 Thread Joseph S. Myers
On Fri, 30 Sep 2011, Ramana Radhakrishnan wrote:

> On 26 July 2011 10:01, Dr. David Alan Gilbert  
> wrote:
> >
> > +
> > +extern unsigned int __write(int fd, const void *buf, unsigned int count);
> 
> Why are we using __write instead of write?

Because plain write is in the user's namespace in ISO C.  See what I said 
in  - the 
alternative is hardcoding the syscall number and using the syscall 
directly.

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: [PATCH] Fix stack red zone bug (PR38644)

2011-09-30 Thread Richard Henderson
On 09/29/2011 06:13 PM, Jiangning Liu wrote:
> 
> 
>> -Original Message-
>> From: Jakub Jelinek [mailto:ja...@redhat.com]
>> Sent: Thursday, September 29, 2011 6:14 PM
>> To: Jiangning Liu
>> Cc: 'Richard Guenther'; Andrew Pinski; gcc-patches@gcc.gnu.org
>> Subject: Re: [PATCH] Fix stack red zone bug (PR38644)
>>
>> On Thu, Sep 29, 2011 at 06:08:50PM +0800, Jiangning Liu wrote:
>>> As far as I know different back-ends are implementing different
>>> prologue/epilogue in GCC. If one day this part can be refined and
>> abstracted
>>> as well, I would say solving this stack-red-zone problem in shared
>>> prologue/epilogue code would be a perfect solution, and barrier can
>> be
>>> inserted there.
>>>
>>> I'm not saying you are wrong on keeping scheduler using a pure
>> barrier
>>> interface. From engineering point of view, I only feel my proposal is
>> so far
>>> so good, because this patch at least solve the problem for all
>> targets in a
>>> quite simple way. Maybe it can be improved in future based on this.
>>
>> But you don't want to listen about any other alternative, other
>> backends are
>> happy with being able to put the best kind of barrier at the best spot
>> in the epilogue and don't need a "generic" solution which won't model
>> very
>> well the target diversity anyway.
> 
> Jakub,
> 
> Appreciate for your attention on this issue,
> 
> 1) Can you clarify who are the "others back-ends"? Does it cover most of the
> back-ends being supported by GCC right now?

Your red-stack barrier issue is *exactly* the same as the frame pointer
barrier issue, which affects many backends.

That is, if the frame pointer is initialized before the local stack frame
is allocated, then one has to add a barrier such that memory references
based on the frame pointer are not scheduled before the local stack frame
allocation.

One example of this is in the i386 port, where the prologue looks like

push%ebp
mov %esp, %ebp
sub $frame, %esp

The rtl we emit for that subtraction looks like

(define_insn "pro_epilogue_adjust_stack__add"
  [(set (match_operand:P 0 "register_operand" "=r,r")
(plus:P (match_operand:P 1 "register_operand" "0,r")
(match_operand:P 2 "" "r,l")))
   (clobber (reg:CC FLAGS_REG))
   (clobber (mem:BLK (scratch)))]

Note the final clobber, which is a memory scheduling barrier.

Other targets use similar tricks.  For instance arm "stack_tie".

Honestly, I've found nothing convincing throughout this thread that
suggests to me that this problem should be handled generically.


r~


Re: [1/2] Make regrename callable from target reorg code

2011-09-30 Thread Richard Henderson
On 09/30/2011 09:08 AM, Bernd Schmidt wrote:
>   gcc/
>   * regrename.h: New file.
>   * regrename.c: Include it.  Also include "emit-rtl.h".
>   (struct du_head, struct du_chain, du_head_p DEF_VEC and
>   DEF_VEC_ALLOC_P): Move to regrename.h.
>   (do_replace): Remove declaration.
>   (insn_rr): New variable.
>   (cur_operand): New static variable.
>   (regrename_chain_from_id): Renamed from chain_from_id and no longer
>   static.  All callers changed.
>   (record_operand_use): New static function.
>   (scan_rtx_reg): Use it.
>   (find_best_rename_reg): New function, broken out of rename_chains.
>   (rename_chains): Use it.  Don't update chain regno and nregs here, ...
>   (regrename_do_replace): ... do it here instead.  Renamed from
>   do_replace, and no longer static.  All callers changed.
>   (regrename_analyze): No longer static.  New arg bb_mask.
>   All callers changed.  If bb_mask is nonzero, use it to limit the
>   number of basic blocks we analyze.  If we failed to analyze a block,
>   clear insn operand data.
>   (record_out_operands): New arg insn_info.  Update cur_operand if it is
>   nonnull.
>   (build_def_use): If insn_rr is nonnull, pass an insn_info to
>   record_out_operands, and update cur_operand here as well.
>   (regrename_init, regrename_finish): New functions.
>   (regrename_optimize): Use them.
>   * Makefile.in (regrename.o): Adjust dependencies.

Ok.


r~


Re: Vector shuffling

2011-09-30 Thread Richard Henderson
> I hope that the new version looks a little bit better.

Nearly ok.  Some trivial fixes, and then please commit.

> +  rtx_v0 = expand_normal (v0);
> +  rtx_mask = expand_normal (mask);
> +
> +  create_output_operand (&ops[0], target, mode);
> +  create_input_operand (&ops[3], rtx_mask, mode);
> +
> +  if (operand_equal_p (v0, v1, 0))
> +{
> +  rtx_v0 = expand_normal (v0);
> +  if (!insn_operand_matches(icode, 1, rtx_v0))
> +rtx_v0 = force_reg (mode, rtx_v0);
> +
> +  gcc_checking_assert(insn_operand_matches(icode, 2, rtx_v0));
> +
> +  create_fixed_operand (&ops[1], rtx_v0);
> +  create_fixed_operand (&ops[2], rtx_v0);
> +}
> +  else
> +{
> +  create_input_operand (&ops[1], expand_normal (v0), mode);
> +  create_input_operand (&ops[2], expand_normal (v1), mode);
> +}

The first line should be removed.  Otherwise you're expanding v0 twice.

> +(define_expand "vshuffle"
> +  [(match_operand:V_128 0 "register_operand" "")
> +   (match_operand:V_128 1 "general_operand" "")
> +   (match_operand:V_128 2 "general_operand" "")
> +   (match_operand: 3 "general_operand" "")]
> +  "TARGET_SSSE3 || TARGET_AVX"
> +{
> +  bool ok = ix86_expand_vshuffle (operands);
> +  gcc_assert (ok);
> +  DONE;
> +})

Operands 1, 2, and 3 should use register_operand.  That will avoid...

> +  op0 = force_reg (mode, op0);
> +  op1 = force_reg (mode, op0);
> +  mask = force_reg (maskmode, mask);

... these lines in ix86_expand_vshuffle and the obvious typo for op1.

> +  fprintf (stderr, "-- here in %s \n", __func__);

Remove the debugging lines.


r~


[4.6 PATCH] Fix strcat/strncat handling in PTA

2011-09-30 Thread Jakub Jelinek
On Fri, Sep 30, 2011 at 05:17:00PM +0200, Jakub Jelinek wrote:
> Here is the updated patch, bootstrapped/regtested on x86_64-linux and
> i686-linux, committed to trunk.  Will work on 4.6 backport of the str{,n}cat
> part.

And here is the backport I've just committed.  4.6 doesn't handle
BUILT_IN_*_CHK in tree-ssa-alias.c at all, so the patch is shorter.

2011-09-30  Jakub Jelinek  
Richard Guenther  

* tree-ssa-alias.c (call_may_clobber_ref_p_1): Fix
handling of BUILT_IN_STRNCAT.
(ref_maybe_used_by_call_p_1): Fix handling of BUILT_IN_STRCAT,
and BUILT_IN_STRNCAT.

--- gcc/tree-ssa-alias.c.jj 2011-09-29 15:27:17.0 +0200
+++ gcc/tree-ssa-alias.c2011-09-30 12:16:20.0 +0200
@@ -1208,8 +1208,20 @@ ref_maybe_used_by_call_p_1 (gimple call,
   && DECL_BUILT_IN_CLASS (callee) == BUILT_IN_NORMAL)
 switch (DECL_FUNCTION_CODE (callee))
   {
-   /* All the following functions clobber memory pointed to by
-  their first argument.  */
+   /* All the following functions read memory pointed to by
+  their second argument.  strcat/strncat additionally
+  reads memory pointed to by the first argument.  */
+   case BUILT_IN_STRCAT:
+   case BUILT_IN_STRNCAT:
+ {
+   ao_ref dref;
+   ao_ref_init_from_ptr_and_size (&dref,
+  gimple_call_arg (call, 0),
+  NULL_TREE);
+   if (refs_may_alias_p_1 (&dref, ref, false))
+ return true;
+ }
+ /* FALLTHRU */
case BUILT_IN_STRCPY:
case BUILT_IN_STRNCPY:
case BUILT_IN_MEMCPY:
@@ -1217,8 +1229,6 @@ ref_maybe_used_by_call_p_1 (gimple call,
case BUILT_IN_MEMPCPY:
case BUILT_IN_STPCPY:
case BUILT_IN_STPNCPY:
-   case BUILT_IN_STRCAT:
-   case BUILT_IN_STRNCAT:
  {
ao_ref dref;
tree size = NULL_TREE;
@@ -1449,7 +1459,12 @@ call_may_clobber_ref_p_1 (gimple call, a
  {
ao_ref dref;
tree size = NULL_TREE;
-   if (gimple_call_num_args (call) == 3)
+   /* Don't pass in size for strncat, as the maximum size
+  is strlen (dest) + n + 1 instead of n, resp.
+  n + 1 at dest + strlen (dest), but strlen (dest) isn't
+  known.  */
+   if (gimple_call_num_args (call) == 3
+   && DECL_FUNCTION_CODE (callee) != BUILT_IN_STRNCAT)
  size = gimple_call_arg (call, 2);
ao_ref_init_from_ptr_and_size (&dref,
   gimple_call_arg (call, 0),

Jakub


C++ PATCH to remove TREE_NEGATED_INT

2011-09-30 Thread Jason Merrill
Diego asked me what this flag was for; I looked, and found it to be a 
write-only flag.  So out it goes.


Tested x86_64-pc-linux-gnu, applied to trunk.
commit 59817b6e5fa3fafbce5fa2b1a111b2aa37eea091
Author: Jason Merrill 
Date:   Tue Sep 27 10:45:07 2011 -0400

	* cp-tree.h (TREE_NEGATED_INT): Remove.
	* semantics.c (finish_unary_op_expr): Don't set it.

diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h
index 7241d66..a1f7ebe 100644
--- a/gcc/c-family/c-common.h
+++ b/gcc/c-family/c-common.h
@@ -43,8 +43,7 @@ never after.
 #include "diagnostic-core.h"
 
 /* Usage of TREE_LANG_FLAG_?:
-   0: TREE_NEGATED_INT (in INTEGER_CST).
-  IDENTIFIER_MARKED (used by search routines).
+   0: IDENTIFIER_MARKED (used by search routines).
   DECL_PRETTY_FUNCTION_P (in VAR_DECL)
   C_MAYBE_CONST_EXPR_INT_OPERANDS (in C_MAYBE_CONST_EXPR, for C)
1: C_DECLARED_LABEL_FLAG (in LABEL_DECL)
diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index 0f7deb6..7e5aac7 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -2989,10 +2989,6 @@ more_aggr_init_expr_args_p (const aggr_init_expr_arg_iterator *iter)
 #define TYPENAME_IS_RESOLVING_P(NODE) \
   (TREE_LANG_FLAG_2 (TYPENAME_TYPE_CHECK (NODE)))
 
-/* Nonzero in INTEGER_CST means that this int is negative by dint of
-   using a twos-complement negated operand.  */
-#define TREE_NEGATED_INT(NODE) TREE_LANG_FLAG_0 (INTEGER_CST_CHECK (NODE))
-
 /* [class.virtual]
 
A class that declares or inherits a virtual function is called a
diff --git a/gcc/cp/semantics.c b/gcc/cp/semantics.c
index 89c76d5..7ad1e8d 100644
--- a/gcc/cp/semantics.c
+++ b/gcc/cp/semantics.c
@@ -2312,19 +2312,6 @@ tree
 finish_unary_op_expr (enum tree_code code, tree expr)
 {
   tree result = build_x_unary_op (code, expr, tf_warning_or_error);
-  /* Inside a template, build_x_unary_op does not fold the
- expression. So check whether the result is folded before
- setting TREE_NEGATED_INT.  */
-  if (code == NEGATE_EXPR && TREE_CODE (expr) == INTEGER_CST
-  && TREE_CODE (result) == INTEGER_CST
-  && !TYPE_UNSIGNED (TREE_TYPE (result))
-  && INT_CST_LT (result, integer_zero_node))
-{
-  /* RESULT may be a cached INTEGER_CST, so we must copy it before
-	 setting TREE_NEGATED_INT.  */
-  result = copy_node (result);
-  TREE_NEGATED_INT (result) = 1;
-}
   if (TREE_OVERFLOW_P (result) && !TREE_OVERFLOW_P (expr))
 overflow_warning (input_location, result);
 


fix for c++/44473, mangling of decimal types, checked in

2011-09-30 Thread Janis Johnson
Patch http://gcc.gnu.org/ml/gcc-patches/2010-12/msg00625.html was
approved by Jason last December but I never got around to checking
it in.  Paolo Carlini said in PR44473 that it was already approved
and doesn't need a new approval, so I checked it in after a
bootstrap and regtest of c,c++ for i686-pc-linux-gnu.

Janis


Re: Vector Comparison patch

2011-09-30 Thread Georg-Johann Lay

Artem Shinkarov schrieb:

On Fri, Sep 30, 2011 at 4:54 PM, Jakub Jelinek  wrote:


On Fri, Sep 30, 2011 at 04:48:41PM +0100, Artem Shinkarov wrote:


Most likely we can. The question is what do we really want to check
with this test. My intention was to check that a programmer can
statically get correspondence of the types, in a sense that sizeof
(float) == sizeof (int) and sizeof (double) == sizeof (long long). As
it seems my original assumption does not hold. Before using __typeof,
I would try to make sure that there is no other way to determine these
correspondences.


You can use preprocessor too, either just surround the whole test
with #if __SIZEOF_INT__ == __SIZEOF_FLOAT__ and similar,
or select the right type through preprocessor
#if __SIZEOF_INT__ == __SIZEOF_FLOAT__
#define FLOATCMPTYPE int
#elif __SIZEOF_LONG__ == __SIZEOF_FLOAT__
#define FLOATCMPTYPE long
#else
...
or __typeof, etc.

  Jakub


Ok, here is a patch which uses __typeof. Passes on x86_64.


Thanks, I will test on avr next week.

Johann



Artem.



Re: [Patch 2/4] ARM 64 bit sync atomic operations [V2]

2011-09-30 Thread H.J. Lu
On Fri, Sep 30, 2011 at 9:45 AM, Ramana Radhakrishnan
 wrote:
> On 26 July 2011 10:01, Dr. David Alan Gilbert  
> wrote:
>>
>> +
>> +extern unsigned int __write(int fd, const void *buf, unsigned int count);
>
> Why are we using __write instead of write?
>
> A comment elaborating that this file should only be in the static
> libgcc and never in the dynamic libgcc would be useful, given that the
> constructor is only pulled in only if a 64 bit sync primitive is
> referred to.
>

You may want to look a look at:

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=50583

ARM may have the same problem.

-- 
H.J.


Re: Initial shrink-wrapping patch

2011-09-30 Thread Richard Henderson
On 09/27/2011 02:02 PM, Bernd Schmidt wrote:
> Here's a new version of the entire shrink-wrapping patch with the
> trap_if test replaced by the outgoing_edges_match change, the condjump_p
> bug fixed, and the dump output and testcase adjusted a bit. Bootstrapped
> and tested on i686-linux and mips-elf. Ok? (I guess I could also leave
> out the RETURN_ADDR_REGNUM thing, since it was needed for ARM only and I
> can't quite remember why at the moment).

Please do leave out RETURN_ADDR_REGNUM for now.  If you remember why,
then you could bring it back alongside the patch for the ARM backend.

As for the i386 backend changes, not an objection per se, but I'm
trying to understand why we need so many copies of patterns.

For instance, while I understand the continued existance of the
"return" expander, why doesn't that expand to a simple_return?
After all the ix86_can_use_return_insn_p asserts that there *is*
no epilogue.  nd if "return" can expand to simple_return, can't
all of the return_internal_* patterns also only use simple_return
and avoid the macro-ization?

I don't see anything glaringly wrong in the middle end.  Although
the thread_prologue_and_epilogue_insns function is now gigantic.
If there were an easy way to break that up and reduce the amount
of conditional compilation at the same time... that'd be great,
but not a requirement.



r~


Re: [Patch, fortran] [00/21] Remove coarray support in the scalarizer

2011-09-30 Thread Steve Kargl
On Fri, Sep 16, 2011 at 01:08:13AM +0200, Mikael Morin wrote:
> 
> OK for trunk?
> 
> Mikael
> 
> patchset layout:
>  - patches 1..4: 
>   Preliminary cleanups.
>   Those are quite independant on the rest.
>   Patch 4 is optional.
> 
>  - patches 5..13: 
>   Step by step move from scalarizer-provided cobounds setup code
>   to explicit specific code in gfc_conv_expr_descriptor.
>   Patch 6 is a request for explaination and is not intended for check-in.
> 
>  - patch 14:
>   Fixes a regression.
> 
>  - patches 15..21:
>   This is the point of all the rest: remove coarray-specific code in the
>   scalarizer.

Mikael,

I've finally made it through the set of patches,
and did not find anything that raised a red flag.
I'll note that I did not study the issue/question
you raised with patch 6.  Tobias is probably the
best person to offer an opinion.  After pinging
patch 6 off of Tobias, I think the code can be
committed.

-- 
Steve


Re: [Patch 2/4] ARM 64 bit sync atomic operations [V2]

2011-09-30 Thread Ramana Radhakrishnan
On 26 July 2011 10:01, Dr. David Alan Gilbert  wrote:
>
> +
> +extern unsigned int __write(int fd, const void *buf, unsigned int count);

Why are we using __write instead of write?

A comment elaborating that this file should only be in the static
libgcc and never in the dynamic libgcc would be useful, given that the
constructor is only pulled in only if a 64 bit sync primitive is
referred to.

cheers
Ramana


PATCH: Add missing ','

2011-09-30 Thread H.J. Lu
Hi,

I checked in this patch as an obvious fix.

H.J.
---
Index: doc/extend.texi
===
--- doc/extend.texi (revision 179396)
+++ doc/extend.texi (working copy)
@@ -6722,8 +6722,8 @@ return the new value.  That is,
 builtin as @code{*ptr = ~(*ptr & value)} instead of
 @code{*ptr = ~*ptr & value}.
 
-@item bool __sync_bool_compare_and_swap (@var{type} *ptr, @var{type} oldval 
@var{type} newval, ...)
-@itemx @var{type} __sync_val_compare_and_swap (@var{type} *ptr, @var{type} 
oldval @var{type} newval, ...)
+@item bool __sync_bool_compare_and_swap (@var{type} *ptr, @var{type} oldval, 
@var{type} newval, ...)
+@itemx @var{type} __sync_val_compare_and_swap (@var{type} *ptr, @var{type} 
oldval, @var{type} newval, ...)
 @findex __sync_bool_compare_and_swap
 @findex __sync_val_compare_and_swap
 These builtins perform an atomic compare and swap.  That is, if the current
Index: ChangeLog
===
--- ChangeLog   (revision 179396)
+++ ChangeLog   (working copy)
@@ -1,3 +1,7 @@
+2011-09-30  H.J. Lu  
+
+   * doc/extend.texi: Add missing ','.
+
 2011-09-30  Bernd Schmidt  
 
* common/config/c6x/c6x-common.c (c6x_option_optimization_table):


[2/2] Fix register imbalances on c6x when modulo scheduling

2011-09-30 Thread Bernd Schmidt
This is the final piece which makes use of all the previous regrename
patches, and transforms this loop (which occurs in a popular embedded
benchmark):
sploop  2
.L4:
ldh .d1t1   *A6++[1], A7
ldh .d1t1   *++A5[1], A8
nop 4
mpy .m1 A8, A7, A19
nop 1
shr .s1 A19, A9, A20
spkernel4, 0

into this:
sploop  1
.L4:
ldh .d2t2   *B5++[1], B6
||  ldh .d1t1   *++A5[1], A8
nop 4
mpy .m1xA8, B6, A19
nop 1
shr .s1 A19, A9, A20
spkernel8, 0

In the original loop, D1 and T1 are reserved twice, while there is no
reservation for D2 and T2: there is an imbalance which limits the
initiation interval.  By shifting some of the registers to the other
side of the machine, the balance is restored, the two loads can issue in
the same cycle, and the initiation interval reaches the optimum value of 1.

This code is really quite limited and only a first step - there are more
transformations we could do, but given that, a 100% speedup on some
loops doesn't seem so bad.

Will commit once the 1/2 regrename patch is approved.


Bernd

* config/c6x/c6x.md (attr "op_pattern"): New.
(load_sdata_pic, mov_insn for QIHIM and SISFVM): Set it.
* config/c6x/c6x-mult.md.in (mulhi3_VARIANT_, mulhisi3_insn_VARIANT_):
Likewise.
* config/c6x/c6x-mult.md: Regenerate.
* config/c6x/c6x.c: Include "regrename.h".
(unit_req_table): New typedef.
(unit_reqs): Use it for the declaration.
(req_imbalance, get_unit_operand_masks, try_rename_operands,
reshuffle_units): New static functions.
(count_unit_reqs): New arg reqs.  All callers changed.  Use
get_unit_reqs, and don't merge here.
(res_mii): New arg reqs.  All callers changed.  Rewrite to use a loop
using unit_req_factor.
(hwloop_optimize): Call reshuffle_units.  Call merge_unit_reqs after
count_unit_reqs.
(c6x_reorg): Add reg notes problem, and call df_analyze.
* Makefile.in ($(out_object_file)): Depend on regrename.h.

Index: gcc/Makefile.in
===
--- gcc/Makefile.in (revision 179379)
+++ gcc/Makefile.in (working copy)
@@ -3535,7 +3536,8 @@ $(out_object_file): $(out_file) $(CONFIG
output.h $(INSN_ATTR_H) $(SYSTEM_H) toplev.h $(DIAGNOSTIC_CORE_H) \
$(TARGET_H) $(LIBFUNCS_H) $(TARGET_DEF_H) $(FUNCTION_H) $(SCHED_INT_H) \
$(TM_P_H) $(EXPR_H) langhooks.h $(GGC_H) $(OPTABS_H) $(REAL_H) \
-   tm-constrs.h $(GIMPLE_H) $(DF_H) cselib.h $(COMMON_TARGET_H) hw-doloop.h
+   tm-constrs.h $(GIMPLE_H) $(DF_H) cselib.h $(COMMON_TARGET_H) hw-doloop.h \
+   regrename.h
$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) \
$(out_file) $(OUTPUT_OPTION)
 
Index: gcc/config/c6x/c6x.md
===
--- gcc/config/c6x/c6x.md   (revision 179393)
+++ gcc/config/c6x/c6x.md   (working copy)
@@ -166,6 +166,17 @@ (define_attr "cross"
   "n,y"
   (const_string "n"))
 
+;; This describes the relationship between operands and register files.
+;; For example, "sxs" means that operands 0 and 2 determine the side of
+;; the machine, and operand 1 can optionally use the cross path.  "dt" and
+;; "td" are used to describe loads and stores.
+;; Used for register renaming in loops for improving modulo scheduling.
+(define_attr "op_pattern"
+  "unknown,dt,td,sx,sxs,ssx"
+  (cond [(eq_attr "type" "load") (const_string "td")
+(eq_attr "type" "store") (const_string "dt")]
+   (const_string "unknown")))
+
 (define_attr "has_shadow"
   "n,y"
   (const_string "n"))
@@ -567,6 +578,7 @@ (define_insn "load_sdata_pic"
%|%.\\tadda%D2\\t%$\\t%1, %2, %0"
   [(set_attr "units" "d")
(set_attr "cross" "y,n")
+   (set_attr "op_pattern" "unknown")
(set_attr "predicable" "no")])
 
 ;; Move instruction patterns
@@ -599,6 +611,7 @@ (define_insn "mov_insn"
   [(set_attr "type" "*,*,*,*,*,*,load,load,load,load,store,store,store,store")
(set_attr "units62" 
"dls,dls,ls,ls,s,s,d_addr,d_addr,d_addr,d_addr,d_addr,d_addr,d_addr,d_addr")
(set_attr "units64" 
"dls,dls,ls,ls,dl,s,d_addr,d_addr,d_addr,d_addr,d_addr,d_addr,d_addr,d_addr")
+   (set_attr "op_pattern" "sx,sx,sx,sx,*,*,*,*,*,*,*,*,*,*")
(set_attr "addr_regfile" "*,*,*,*,*,*,a,b,b,a,a,b,b,a")
(set_attr "dest_regfile" "*,*,*,*,*,*,a,a,b,b,a,a,b,b")
(set_attr "cross" "n,n,y,y,n,n,n,y,n,y,n,y,n,y")])
@@ -631,6 +644,7 @@ (define_insn "mov_insn"
   [(set_attr "type" 
"*,*,*,*,*,*,*,*,*,load,load,load,load,store,store,store,store")
(set_attr "units62" 
"dls,dls,ls,ls,s,s,d,d,*,d_addr,d_addr,d_addr,d_addr,d_addr,d_addr,d_addr,d_addr")
(set_at

Re: Vector Comparison patch

2011-09-30 Thread Artem Shinkarov
On Fri, Sep 30, 2011 at 4:54 PM, Jakub Jelinek  wrote:
> On Fri, Sep 30, 2011 at 04:48:41PM +0100, Artem Shinkarov wrote:
>> Most likely we can. The question is what do we really want to check
>> with this test. My intention was to check that a programmer can
>> statically get correspondence of the types, in a sense that sizeof
>> (float) == sizeof (int) and sizeof (double) == sizeof (long long). As
>> it seems my original assumption does not hold. Before using __typeof,
>> I would try to make sure that there is no other way to determine these
>> correspondences.
>
> You can use preprocessor too, either just surround the whole test
> with #if __SIZEOF_INT__ == __SIZEOF_FLOAT__ and similar,
> or select the right type through preprocessor
> #if __SIZEOF_INT__ == __SIZEOF_FLOAT__
> #define FLOATCMPTYPE int
> #elif __SIZEOF_LONG__ == __SIZEOF_FLOAT__
> #define FLOATCMPTYPE long
> #else
> ...
> or __typeof, etc.
>
>        Jakub
>

Ok, here is a patch which uses __typeof. Passes on x86_64.

Artem.
Index: gcc/testsuite/gcc.c-torture/execute/vector-compare-1.c
===
--- gcc/testsuite/gcc.c-torture/execute/vector-compare-1.c  (revision 
179378)
+++ gcc/testsuite/gcc.c-torture/execute/vector-compare-1.c  (working copy)
@@ -39,17 +39,17 @@ int main (int argc, char *argv[]) {
 int i;
 
 i0 = (vector (4, INT)){argc, 1,  2,  10};
-i1 = (vector (4, INT)){0, 3, 2, (INT)-23};
+i1 = (vector (4, INT)){0, 3, 2, (INT)-23};
 test (4, i0, i1, ires, "%i");
 #undef INT
 
-#define INT unsigned int 
+#define INT unsigned int
 vector (4, int) ures;
 vector (4, INT) u0;
 vector (4, INT) u1;
 
 u0 = (vector (4, INT)){argc, 1,  2,  10};
-u1 = (vector (4, INT)){0, 3, 2, (INT)-23};
+u1 = (vector (4, INT)){0, 3, 2, (INT)-23};
 test (4, u0, u1, ures, "%u");
 #undef INT
 
@@ -60,7 +60,7 @@ int main (int argc, char *argv[]) {
 vector (8, short) sres;
 
 s0 = (vector (8, SHORT)){argc, 1,  2,  10,  6, 87, (SHORT)-5, 2};
-s1 = (vector (8, SHORT)){0, 3, 2, (SHORT)-23, 12, 10, (SHORT)-2, 0};
+s1 = (vector (8, SHORT)){0, 3, 2, (SHORT)-23, 12, 10, (SHORT)-2, 0};
 test (8, s0, s1, sres, "%i");
 #undef SHORT
 
@@ -70,7 +70,7 @@ int main (int argc, char *argv[]) {
 vector (8, short) usres;
 
 us0 = (vector (8, SHORT)){argc, 1,  2,  10,  6, 87, (SHORT)-5, 2};
-us1 = (vector (8, SHORT)){0, 3, 2, (SHORT)-23, 12, 10, (SHORT)-2, 0};
+us1 = (vector (8, SHORT)){0, 3, 2, (SHORT)-23, 12, 10, (SHORT)-2, 0};
 test (8, us0, us1, usres, "%u");
 #undef SHORT
 
@@ -102,19 +102,19 @@ int main (int argc, char *argv[]) {
 /* Float comparison.  */
 vector (4, float) f0;
 vector (4, float) f1;
-vector (4, int) ifres;
+__typeof (f0 == f1) ifres;
 
 f0 = (vector (4, float)){(float)argc, 1.,  2.,  10.};
-f1 = (vector (4, float)){0., 3., 2., (float)-23};
+f1 = (vector (4, float)){0., 3., 2., (float)-23};
 test (4, f0, f1, ifres, "%f");
-
+
 /* Double comparison.  */
 vector (2, double) d0;
 vector (2, double) d1;
-vector (2, long long) idres;
+__typeof (d0 == d1) idres;
 
 d0 = (vector (2, double)){(double)argc,  10.};
-d1 = (vector (2, double)){0., (double)-23};
+d1 = (vector (2, double)){0., (double)-23};
 test (2, d0, d1, idres, "%f");
 
 


[1/2] Make regrename callable from target reorg code

2011-09-30 Thread Bernd Schmidt
The main purpose of this patch is to move some declarations to a new
file, regrename.h, and make regrename_analyze and do_replace callable
from elsewhere. The second patch will add code to c6x.c to make use of
this from the modulo-scheduling code.

This also adds a small amount of new functionality: regrename can now
record data for each insn about which chains occur in the operands.

Now testing on c6x-elf; an i686-linux bootstrap with regrenaming enabled
at -O1 completed. The patch has also been in our 4.5 tree for a while
now. Ok?


Bernd
gcc/
* regrename.h: New file.
* regrename.c: Include it.  Also include "emit-rtl.h".
(struct du_head, struct du_chain, du_head_p DEF_VEC and
DEF_VEC_ALLOC_P): Move to regrename.h.
(do_replace): Remove declaration.
(insn_rr): New variable.
(cur_operand): New static variable.
(regrename_chain_from_id): Renamed from chain_from_id and no longer
static.  All callers changed.
(record_operand_use): New static function.
(scan_rtx_reg): Use it.
(find_best_rename_reg): New function, broken out of rename_chains.
(rename_chains): Use it.  Don't update chain regno and nregs here, ...
(regrename_do_replace): ... do it here instead.  Renamed from
do_replace, and no longer static.  All callers changed.
(regrename_analyze): No longer static.  New arg bb_mask.
All callers changed.  If bb_mask is nonzero, use it to limit the
number of basic blocks we analyze.  If we failed to analyze a block,
clear insn operand data.
(record_out_operands): New arg insn_info.  Update cur_operand if it is
nonnull.
(build_def_use): If insn_rr is nonnull, pass an insn_info to
record_out_operands, and update cur_operand here as well.
(regrename_init, regrename_finish): New functions.
(regrename_optimize): Use them.
* Makefile.in (regrename.o): Adjust dependencies.

Index: gcc/regrename.c
===
--- gcc/regrename.c (revision 179379)
+++ gcc/regrename.c (working copy)
@@ -39,6 +39,8 @@
 #include "tree-pass.h"
 #include "df.h"
 #include "target.h"
+#include "emit-rtl.h"
+#include "regrename.h"
 
 /* This file implements the RTL register renaming pass of the compiler.  It is
a semi-local pass whose goal is to maximize the usage of the register file
@@ -73,47 +75,6 @@
 #error "Use a different bitmap implementation for untracked_operands."
 #endif
 
-/* We keep linked lists of DU_HEAD structures, each of which describes
-   a chain of occurrences of a reg.  */
-struct du_head
-{
-  /* The next chain.  */
-  struct du_head *next_chain;
-  /* The first and last elements of this chain.  */
-  struct du_chain *first, *last;
-  /* Describe the register being tracked, register number and count.  */
-  unsigned regno;
-  int nregs;
-
-  /* A unique id to be used as an index into the conflicts bitmaps.  */
-  unsigned id;
-  /* A bitmap to record conflicts with other chains.  */
-  bitmap_head conflicts;
-  /* Conflicts with untracked hard registers.  */
-  HARD_REG_SET hard_conflicts;
-
-  /* Nonzero if the chain crosses a call.  */
-  unsigned int need_caller_save_reg:1;
-  /* Nonzero if the register is used in a way that prevents renaming,
- such as the SET_DEST of a CALL_INSN or an asm operand that used
- to be a hard register.  */
-  unsigned int cannot_rename:1;
-};
-
-/* This struct describes a single occurrence of a register.  */
-struct du_chain
-{
-  /* Links to the next occurrence of the register.  */
-  struct du_chain *next_use;
-
-  /* The insn where the register appears.  */
-  rtx insn;
-  /* The location inside the insn.  */
-  rtx *loc;
-  /* The register class required by the insn at this location.  */
-  ENUM_BITFIELD(reg_class) cl : 16;
-};
-
 enum scan_actions
 {
   terminate_write,
@@ -144,15 +105,14 @@ static int this_tick = 0;
 
 static struct obstack rename_obstack;
 
-static void do_replace (struct du_head *, int);
+/* If nonnull, the code calling into the register renamer requested
+   information about insn operands, and we store it here.  */
+VEC(insn_rr_info, heap) *insn_rr;
+
 static void scan_rtx (rtx, rtx *, enum reg_class, enum scan_actions,
  enum op_type);
 static bool build_def_use (basic_block);
 
-typedef struct du_head *du_head_p;
-DEF_VEC_P (du_head_p);
-DEF_VEC_ALLOC_P (du_head_p, heap);
-
 /* The id to be given to the next opened chain.  */
 static unsigned current_id;
 
@@ -173,10 +133,15 @@ static HARD_REG_SET live_in_chains;
between this and live_in_chains is empty.  */
 static HARD_REG_SET live_hard_regs;
 
+/* Set while scanning RTL if INSN_RR is nonnull, i.e. if the current analysis
+   is for a caller that requires operand data.  Used in
+   record_operand_use.  */
+static operand_rr_info *cur_operand;
+
 /* Return the chain corresponding to id number ID.  T

Re: [wwwdocs] IA-32/x86-64 Changes for upcoming 4.7.0 series

2011-09-30 Thread H.J. Lu
On Fri, Sep 30, 2011 at 4:11 AM, Kirill Yukhin  wrote:
> Okay, seems maintainers have no objections
>
> Could anybody please commit that to wwwdocs?
>

Your patch can't be applied. Please provide a proper patch.

Thanks.

-- 
H.J.


Re: Vector Comparison patch

2011-09-30 Thread Jakub Jelinek
On Fri, Sep 30, 2011 at 04:48:41PM +0100, Artem Shinkarov wrote:
> Most likely we can. The question is what do we really want to check
> with this test. My intention was to check that a programmer can
> statically get correspondence of the types, in a sense that sizeof
> (float) == sizeof (int) and sizeof (double) == sizeof (long long). As
> it seems my original assumption does not hold. Before using __typeof,
> I would try to make sure that there is no other way to determine these
> correspondences.

You can use preprocessor too, either just surround the whole test
with #if __SIZEOF_INT__ == __SIZEOF_FLOAT__ and similar,
or select the right type through preprocessor
#if __SIZEOF_INT__ == __SIZEOF_FLOAT__
#define FLOATCMPTYPE int
#elif __SIZEOF_LONG__ == __SIZEOF_FLOAT__
#define FLOATCMPTYPE long
#else
...
or __typeof, etc.

Jakub


Re: Vector Comparison patch

2011-09-30 Thread Artem Shinkarov
On Fri, Sep 30, 2011 at 4:43 PM, Jakub Jelinek  wrote:
> On Fri, Sep 30, 2011 at 05:36:47PM +0200, Georg-Johann Lay wrote:
>> >> The target has
>> >>
>> >> 2 = sizeof (short)
>> >> 2 = sizeof (int)
>> >> 4 = sizeof (long int)
>> >> 8 = sizeof (long long int)
>> >>
>> >> Could you fix that? I.e. parametrize sizeof(int) out or skip the test by 
>> >> means of
>> >>
>> >> /* { dg-require-effective-target int32plus } */
>> >>
>> >> or similar.
>> >>
>> >> Thanks, Johann
>> >>
>> >> [...]
>> >>
>> > The problem actually happens when we compare float vector with float
>> > vector, it is assumed that we should get int vector as a result, but
>> > it turns out that we are getting long int.
>> >
>> > The same with double, we assume that sizeof (double) == sizeof (long
>> > long). But as it seems double has the same size as float.
>>
>> Yes.
>>
>> sizeof(double) = sizeof(float) = 4
>>
>> > Hm, I can put conditional of sort:
>> > if (sizeof (doulbe) == sizeof (long long)) and others. Or may be there
>> > is more elegant way of solving this?
>>
>> That's too late because this won't prevent the compiler from error.
>> The error already happens at compile time, not at run time.
>
> Isn't it possible to do something like:
>     vector (4, float) f0;
>     vector (4, float) f1;
> -    vector (4, int) ifres;
> +    vector (4, __typeof (f0 > f1)) ifres;
>
>     f0 = (vector (4, float)){(float)argc, 1.,  2.,  10.};
>     f1 = (vector (4, float)){0., 3., 2., (float)-23};
>     test (4, f0, f1, ifres, "%f");
>
>  /* Double comparison.  */
>     vector (2, double) d0;
>     vector (2, double) d1;
> -    vector (2, long long) idres;
> +    vector (2, __typeof (d0 > d1)) idres;
>
>     d0 = (vector (2, double)){(double)argc,  10.};
>     d1 = (vector (2, double)){0., (double)-23};
>     test (2, d0, d1, idres, "%f");
>
>        Jakub
>

Most likely we can. The question is what do we really want to check
with this test. My intention was to check that a programmer can
statically get correspondence of the types, in a sense that sizeof
(float) == sizeof (int) and sizeof (double) == sizeof (long long). As
it seems my original assumption does not hold. Before using __typeof,
I would try to make sure that there is no other way to determine these
correspondences.

Artem.


Re: Vector Comparison patch

2011-09-30 Thread Jakub Jelinek
On Fri, Sep 30, 2011 at 05:36:47PM +0200, Georg-Johann Lay wrote:
> >> The target has
> >>
> >> 2 = sizeof (short)
> >> 2 = sizeof (int)
> >> 4 = sizeof (long int)
> >> 8 = sizeof (long long int)
> >>
> >> Could you fix that? I.e. parametrize sizeof(int) out or skip the test by 
> >> means of
> >>
> >> /* { dg-require-effective-target int32plus } */
> >>
> >> or similar.
> >>
> >> Thanks, Johann
> >>
> >> [...]
> >>
> > The problem actually happens when we compare float vector with float
> > vector, it is assumed that we should get int vector as a result, but
> > it turns out that we are getting long int.
> > 
> > The same with double, we assume that sizeof (double) == sizeof (long
> > long). But as it seems double has the same size as float.
> 
> Yes.
> 
> sizeof(double) = sizeof(float) = 4
> 
> > Hm, I can put conditional of sort:
> > if (sizeof (doulbe) == sizeof (long long)) and others. Or may be there
> > is more elegant way of solving this?
> 
> That's too late because this won't prevent the compiler from error.
> The error already happens at compile time, not at run time.

Isn't it possible to do something like:
 vector (4, float) f0;
 vector (4, float) f1;
-vector (4, int) ifres;
+vector (4, __typeof (f0 > f1)) ifres;

 f0 = (vector (4, float)){(float)argc, 1.,  2.,  10.};
 f1 = (vector (4, float)){0., 3., 2., (float)-23};
 test (4, f0, f1, ifres, "%f");

 /* Double comparison.  */
 vector (2, double) d0;
 vector (2, double) d1;
-vector (2, long long) idres;
+vector (2, __typeof (d0 > d1)) idres;

 d0 = (vector (2, double)){(double)argc,  10.};
 d1 = (vector (2, double)){0., (double)-23};
 test (2, d0, d1, idres, "%f");

Jakub


Re: Vector Comparison patch

2011-09-30 Thread Georg-Johann Lay
Artem Shinkarov schrieb:
> On Fri, Sep 30, 2011 at 4:01 PM, Georg-Johann Lay  wrote:
>> Artem Shinkarov schrieb:
>>> Here is a new version of the patch which considers the changes from
>>> 2011-09-02  Richard Guenther
>>>
>>>
>>> ChangeLog
>>>
>>> 20011-09-06 Artjoms Sinkarovs 
>>>
>>>gcc/
>>>* fold-const.c (constant_boolean_node): Adjust the meaning
>>>of boolean for vector types: true = {-1,..}, false = {0,..}.
>>>(fold_unary_loc): Avoid conversion of vector comparison to
>>>boolean type.
>>>* expr.c (expand_expr_real_2): Expand vector comparison by
>>>building an appropriate VEC_COND_EXPR.
>>>* c-typeck.c (build_binary_op): Typecheck vector comparisons.
>>>(c_objc_common_truthvalue_conversion): Adjust.
>>>* tree-vect-generic.c (do_compare): Helper function.
>>>(expand_vector_comparison): Check if hardware supports
>>>vector comparison of the given type or expand vector
>>>piecewise.
>>>(expand_vector_operation): Treat comparison as binary
>>>operation of vector type.
>>>(expand_vector_operations_1): Adjust.
>>>* tree-cfg.c (verify_gimple_comparison): Adjust.
>>>
>>>gcc/config/i386
>>>* i386.c (ix86_expand_sse_movcc): Consider a case when
>>>vcond operators are {-1,..} and {0,..}.
>>>
>>>gcc/doc
>>>* extend.texi: Adjust.
>>>
>>>gcc/testsuite
>>>* gcc.c-torture/execute/vector-compare-1.c: New test.
>>>* gcc.c-torture/execute/vector-compare-2.c: New test.
>>>* gcc.dg/vector-compare-1.c: New test.
>>>* gcc.dg/vector-compare-2.c: New test.
>>>
>>> bootstrapped and tested on x86_64-unknown-linux-gnu.
>>>
>>>
>>> Thanks,
>>> Artem.
>> Hi Artem,
>>
>> the new test case gcc.c-torture/execute/vector-compare-1.c causes bunch of
>> FAILS in regression tests for avr-unknown-none (see attachment).
>>
>> The target has
>>
>> 2 = sizeof (short)
>> 2 = sizeof (int)
>> 4 = sizeof (long int)
>> 8 = sizeof (long long int)
>>
>> Could you fix that? I.e. parametrize sizeof(int) out or skip the test by 
>> means of
>>
>> /* { dg-require-effective-target int32plus } */
>>
>> or similar.
>>
>> Thanks, Johann
>>
>> [...]
>>
> Hi
> 
> The problem actually happens when we compare float vector with float
> vector, it is assumed that we should get int vector as a result, but
> it turns out that we are getting long int.
> 
> The same with double, we assume that sizeof (double) == sizeof (long
> long). But as it seems double has the same size as float.

Yes.

sizeof(double) = sizeof(float) = 4

> Hm, I can put conditional of sort:
> if (sizeof (doulbe) == sizeof (long long)) and others. Or may be there
> is more elegant way of solving this?

That's too late because this won't prevent the compiler from error.
The error already happens at compile time, not at run time.

> I can fix it, but keep in mind that I don't have a permission to
> commit to the trunk.

You could browse ./testsuite/lib/target-supports.exp and try to find some gate
functions that fit the test case's requirement like
check_effective_target_large_double, check_effective_target_double64,
check_effective_target_x32 or a combination of them.

Johann

> Artem.



Re: ifcvt cond_exec support rewrite

2011-09-30 Thread Bernd Schmidt
Hi Nick,

>> Experiments show that the
>> existing multi-if-block support isn't terribly effective on FRV;
>> before-after comparisons show that by turning it off, there are three
>> spots in gcc that are meaningfully changed, and below 20 in the C
>> benchmarks of SPEC2k.
>>
>> FRV also doesn't build in mainline, and it looks as if it likely hasn't
>> in a while, so I haven't tried testing this patch on it. If no public
>> manual can be found, is it still my responsibility to keep this support
>> working, or can we put the burden on the FRV maintainers?
> 
> You can put in on the FRV maintainers.

Thanks. That sounds like the manual you have was under NDA after all :(

> Although now that the FRV port
> does build it would be very much appreciated if you could provide a
> patch for this backend as well.

Ok. I'll wait for general review and if it looks like this will go in
I'll make an effort to at least make sure it generates correct code on
FRV (although it won't do multiple-block conversion).


Bernd


Re: [PATCH 3/7] Emit macro expansion related diagnostics

2011-09-30 Thread Jason Merrill

On 09/29/2011 05:21 PM, Dodji Seketeli wrote:

+ When the token is /not/ an argument for a macro, xI is the same
+ location as yI.  Otherwise, xI is either the virtual location of
+ that argument token if it comes from a macro expansion itself, or
+ its spelling location.


I think this could be a little clearer:

 Otherwise, xI is the location of the token outside this macro 
expansion.  If this macro was expanded from another macro expansion, xI 
is a virtual location representing the token in that macro expansion; 
otherwise, it is the spelling location of the token.



+  /* This is the location of the expansion point of the current macro
+ map.  That expansion point location is held by the map that was
+ current right before the current one. It could have been either
+ a macro or an ordinary map, depending on if we are in a
+ nested expansion context not.  */


This should clarify what location the expansion point is: is it the 
location of the macro name?  the closing paren?



+source_location linemap_macro_map_loc_to_def_point (const struct line_map*,
+   source_location);
+source_location linemap_macro_map_loc_unwind_toward_spelling (const struct line
_map*,
+ source_location);
+source_location linemap_macro_map_loc_to_exp_point (const struct line_map*,
+   source_location);


These should be static.


+const struct line_map *linemap_enter_macro (struct line_maps *,
+   struct cpp_hashnode*,
+   source_location,
+   unsigned int);
+source_location linemap_add_macro_token (const struct line_map *,
+unsigned int,
+source_location,
+source_location);
+int linemap_get_expansion_line (struct line_maps *,
+   source_location);
+const char* linemap_get_expansion_filename (struct line_maps *,
+   source_location);


And these should be declared in an internal header, not the public 
include/line-map.h.



+/* Expand source code location LOC and return a user readable source
+   code location.  */
+expanded_location linemap_expand_location (const struct line_map *,
+  source_location loc);
+
+/* Expand source code location LOC and return a user readable source
+   code location.  The LRK parameter is the same as for
+   linemap_resolve_location.  */
+
+expanded_location linemap_expand_location_full (struct line_maps *,


These comments should clarify that the first function only takes 
spelling locations, while the second also takes virtual locations.



-finish_declspecs (struct c_declspecs *specs)
+finish_declspecs (struct c_declspecs *specs,
+ location_t where)


I'm not sure the beginning of the declspecs is a better place for these 
diagnostics than the beginning of the declarator.  Why make this change?


Jason


Re: ifcvt cond_exec support rewrite

2011-09-30 Thread Nick Clifton

Hi Bernd,


Experiments show that the
existing multi-if-block support isn't terribly effective on FRV;
before-after comparisons show that by turning it off, there are three
spots in gcc that are meaningfully changed, and below 20 in the C
benchmarks of SPEC2k.

FRV also doesn't build in mainline, and it looks as if it likely hasn't
in a while, so I haven't tried testing this patch on it. If no public
manual can be found, is it still my responsibility to keep this support
working, or can we put the burden on the FRV maintainers?


You can put in on the FRV maintainers.  Although now that the FRV port 
does build it would be very much appreciated if you could provide a 
patch for this backend as well.


Cheers
  Nick


Re: [PATCH 2/9] [doloop] Correct extracting loop exit condition

2011-09-30 Thread Roman Zhuykov
2011/7/22 Richard Sandiford :
> zhr...@ispras.ru writes:
>> This patch fixes the compiler segfault found while regtesting trunk with SMS 
>> on
>> IA64 platform.  Segfault happens on test gcc.dg/pr45259.c with -fmodulo-sched
>> enabled.  The following jump instruction is given as argument for
>> doloop_condition_get function:
>> (jump_insn 86 85 88 7 (set (pc)
>>         (reg/f:DI 403)) 339 {indirect_jump}
>>      (expr_list:REG_DEAD (reg/f:DI 403)
>>         (nil)))
>> The patch adds checking for the form of comparison instruction before
>> extracting loop exit condition.
>>
>> 2011-07-20  Roman Zhuykov  
>>       * loop-doloop.c (doloop_condition_get): Correctly check
>>       the form of comparison instruction.
>> ---
>>  gcc/loop-doloop.c |    2 ++
>>  1 files changed, 2 insertions(+), 0 deletions(-)
>>
>> diff --git a/gcc/loop-doloop.c b/gcc/loop-doloop.c
>> index f8429c4..dfc4a16 100644
>> --- a/gcc/loop-doloop.c
>> +++ b/gcc/loop-doloop.c
>> @@ -153,6 +153,8 @@ doloop_condition_get (rtx doloop_pat)
>>        else
>>          inc = PATTERN (prev_insn);
>>        /* We expect the condition to be of the form (reg != 0)  */
>> +      if (GET_CODE (cmp) != SET || GET_CODE (SET_SRC (cmp)) != IF_THEN_ELSE)
>> +     return 0;
>>        cond = XEXP (SET_SRC (cmp), 0);
>>        if (GET_CODE (cond) != NE || XEXP (cond, 1) != const0_rtx)
>>          return 0;
>
> I think it'd be better to integrate:
>
>      /* We expect the condition to be of the form (reg != 0)  */
>      cond = XEXP (SET_SRC (cmp), 0);
>      if (GET_CODE (cond) != NE || XEXP (cond, 1) != const0_rtx)
>        return 0;
>
> into:
>
>  /* We expect a GE or NE comparison with 0 or 1.  */
>  if ((GET_CODE (condition) != GE
>       && GET_CODE (condition) != NE)
>      || (XEXP (condition, 1) != const0_rtx
>          && XEXP (condition, 1) != const1_rtx))
>    return 0;
>
> The next "if" already uses "GET_CODE (pattern) != PARALLEL" as a check
> for the second and third cases.  E.g. something like:
>
>  if (GET_CODE (pattern) == PARALLEL)
>    {
>      /* We expect a GE or NE comparison with 0 or 1.  */
>      if ((GET_CODE (condition) != GE
>           && GET_CODE (condition) != NE)
>          || (XEXP (condition, 1) != const0_rtx
>              && XEXP (condition, 1) != const1_rtx))
>        return 0;
>    }
>  else
>    {
>      /* In the second and third cases, we expect the condition to
>         be of the form (reg != 0)  */
>      if (GET_CODE (condition) != NE || XEXP (condition, 1) != const0_rtx)
>        return 0;
>    }
>
> That's pre-approved (independently of the other patches) if it works.
Changed like the following. Will commit if no objections after a couple of days.

--
Roman Zhuykov
zhr...@ispras.ru
2011-09-30  Roman Zhuykov  
	* loop-doloop.c (doloop_condition_get): Correctly check
	the form of comparison instruction.
---
 gcc/loop-doloop.c |   28 +---
 1 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/gcc/loop-doloop.c b/gcc/loop-doloop.c
index a7e264f..4e83649 100644
--- a/gcc/loop-doloop.c
+++ b/gcc/loop-doloop.c
@@ -113,7 +113,6 @@ doloop_condition_get (rtx doloop_pat)
 
   if (GET_CODE (pattern) != PARALLEL)
 {
-  rtx cond;
   rtx prev_insn = prev_nondebug_insn (doloop_pat);
   rtx cmp_arg1, cmp_arg2;
   rtx cmp_orig;
@@ -152,10 +151,6 @@ doloop_condition_get (rtx doloop_pat)
 	}
   else
 inc = PATTERN (prev_insn);
-  /* We expect the condition to be of the form (reg != 0)  */
-  cond = XEXP (SET_SRC (cmp), 0);
-  if (GET_CODE (cond) != NE || XEXP (cond, 1) != const0_rtx)
-return 0;
 }
   else
 {
@@ -193,12 +188,23 @@ doloop_condition_get (rtx doloop_pat)
   /* Extract loop termination condition.  */
   condition = XEXP (SET_SRC (cmp), 0);
 
-  /* We expect a GE or NE comparison with 0 or 1.  */
-  if ((GET_CODE (condition) != GE
-   && GET_CODE (condition) != NE)
-  || (XEXP (condition, 1) != const0_rtx
-  && XEXP (condition, 1) != const1_rtx))
-return 0;
+  if (GET_CODE (pattern) == PARALLEL)
+{
+  /* We expect a GE or NE comparison with 0 or 1.  */
+  if ((GET_CODE (condition) != GE
+	   && GET_CODE (condition) != NE)
+	   || (XEXP (condition, 1) != const0_rtx
+	   && XEXP (condition, 1) != const1_rtx))
+return 0;
+}
+  else
+{
+  /* In the second and third cases, we expect the condition
+ to be of the form (reg != 0)  */
+  if (GET_CODE (condition) != NE
+	  || XEXP (condition, 1) != const0_rtx)
+return 0;
+}
 
   if ((XEXP (condition, 0) == reg)
   /* For the third case:  */  


Re: [PATCH 0/9] [RFC] Expand SMS functionality

2011-09-30 Thread Roman Zhuykov
Ping.
The following RTL patches need reviews:
[PATCH 4/9] Move the SMS pass earlier
http://gcc.gnu.org/ml/gcc-patches/2011-07/msg01811.html
[PATCH 7/9] New assertion in rtl_lv_add_condition_to_bb
http://gcc.gnu.org/ml/gcc-patches/2011-07/msg01808.html
[PATCH 8/9] Extend simple_rhs_p
http://gcc.gnu.org/ml/gcc-patches/2011-07/msg01810.html


2011/7/21  :
> All the work described in next emails was done while trying to improve SMS
> functionality.  The main idea is to remove requrement of doloop_end 
> instruction
> pattern.  This allows SMS to work on more platforms, for example x86-64 and
> ARM.
> --
> Roman Zhuykov
> zhr...@ispras.ru
>


Re: [Patch] Support DEC-C extensions

2011-09-30 Thread Joseph S. Myers
On Fri, 30 Sep 2011, Tristan Gingold wrote:

> If you prefer a target hook, I'm fine with that.  I will write such a patch.
> 
> I don't think it must be restricted to system headers, as it is possible 
> that the user 'imports' such a function (and define it in one of VMS 
> favorite languages such as macro-32 or bliss).

If it's not restricted to system headers, then probably the option is 
better than the target hook.

-- 
Joseph S. Myers
jos...@codesourcery.com


[PATCH] Teach PTA and aliasing about strdup/strndup (take 2)

2011-09-30 Thread Jakub Jelinek
On Fri, Sep 30, 2011 at 12:35:49PM +0200, Richard Guenther wrote:
> On Fri, Sep 30, 2011 at 12:23 PM, Jakub Jelinek  wrote:
> > On Fri, Sep 30, 2011 at 10:25:35AM +0200, Richard Guenther wrote:
> >> > This patch teaches PTA/aliasing about strdup/strndup (that the passed in
> >> > string is just read and doesn't escape in any way, and that otherwise it
> >> > acts as malloc or other allocation calls.
> >> > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> >
> > Ok, here is the promissed updated patch.
> 
> Ok if it passes bootstrap & test.  The str[n]cat changes are probably
> worth backporting.

Here is the updated patch, bootstrapped/regtested on x86_64-linux and
i686-linux, committed to trunk.  Will work on 4.6 backport of the str{,n}cat
part.

2011-09-30  Jakub Jelinek  
Richard Guenther  

* tree-ssa-structalias.c (find_func_aliases_for_builtin_call): Handle
BUILT_IN_STRDUP and BUILT_IN_STRNDUP.
* tree-ssa-alias.c (call_may_clobber_ref_p_1): Likewise.  Fix
handling of BUILT_IN_STRNCAT and BUILT_IN_STRNCAT_CHK.
(ref_maybe_used_by_call_p_1): Fix handling of BUILT_IN_STRCAT,
BUILT_IN_STRNCAT, BUILT_IN_STRCAT_CHK and BUILT_IN_STRNCAT_CHK.

* gcc.dg/strlenopt-21.c: New test.

--- gcc/tree-ssa-structalias.c.jj   2011-09-29 15:27:17.0 +0200
+++ gcc/tree-ssa-structalias.c  2011-09-30 11:27:06.0 +0200
@@ -4130,6 +4130,24 @@ find_func_aliases_for_builtin_call (gimp
   case BUILT_IN_REMQUOL:
   case BUILT_IN_FREE:
return true;
+  case BUILT_IN_STRDUP:
+  case BUILT_IN_STRNDUP:
+   if (gimple_call_lhs (t))
+ {
+   handle_lhs_call (t, gimple_call_lhs (t), gimple_call_flags (t),
+NULL, fndecl);
+   get_constraint_for_ptr_offset (gimple_call_lhs (t),
+  NULL_TREE, &lhsc);
+   get_constraint_for_ptr_offset (gimple_call_arg (t, 0),
+  NULL_TREE, &rhsc);
+   do_deref (&lhsc);
+   do_deref (&rhsc);
+   process_all_all_constraints (lhsc, rhsc);
+   VEC_free (ce_s, heap, lhsc);
+   VEC_free (ce_s, heap, rhsc);
+   return true;
+ }
+   break;
   /* Trampolines are special - they set up passing the static
 frame.  */
   case BUILT_IN_INIT_TRAMPOLINE:
--- gcc/tree-ssa-alias.c.jj 2011-09-29 15:27:17.0 +0200
+++ gcc/tree-ssa-alias.c2011-09-30 12:16:20.0 +0200
@@ -1178,8 +1177,20 @@ ref_maybe_used_by_call_p_1 (gimple call,
   && DECL_BUILT_IN_CLASS (callee) == BUILT_IN_NORMAL)
 switch (DECL_FUNCTION_CODE (callee))
   {
-   /* All the following functions clobber memory pointed to by
-  their first argument.  */
+   /* All the following functions read memory pointed to by
+  their second argument.  strcat/strncat additionally
+  reads memory pointed to by the first argument.  */
+   case BUILT_IN_STRCAT:
+   case BUILT_IN_STRNCAT:
+ {
+   ao_ref dref;
+   ao_ref_init_from_ptr_and_size (&dref,
+  gimple_call_arg (call, 0),
+  NULL_TREE);
+   if (refs_may_alias_p_1 (&dref, ref, false))
+ return true;
+ }
+ /* FALLTHRU */
case BUILT_IN_STRCPY:
case BUILT_IN_STRNCPY:
case BUILT_IN_MEMCPY:
@@ -1187,8 +1198,6 @@ ref_maybe_used_by_call_p_1 (gimple call,
case BUILT_IN_MEMPCPY:
case BUILT_IN_STPCPY:
case BUILT_IN_STPNCPY:
-   case BUILT_IN_STRCAT:
-   case BUILT_IN_STRNCAT:
  {
ao_ref dref;
tree size = NULL_TREE;
@@ -1199,14 +1208,23 @@ ref_maybe_used_by_call_p_1 (gimple call,
   size);
return refs_may_alias_p_1 (&dref, ref, false);
  }
+   case BUILT_IN_STRCAT_CHK:
+   case BUILT_IN_STRNCAT_CHK:
+ {
+   ao_ref dref;
+   ao_ref_init_from_ptr_and_size (&dref,
+  gimple_call_arg (call, 0),
+  NULL_TREE);
+   if (refs_may_alias_p_1 (&dref, ref, false))
+ return true;
+ }
+ /* FALLTHRU */
case BUILT_IN_STRCPY_CHK:
case BUILT_IN_STRNCPY_CHK:
case BUILT_IN_MEMCPY_CHK:
case BUILT_IN_MEMMOVE_CHK:
case BUILT_IN_MEMPCPY_CHK:
case BUILT_IN_STPCPY_CHK:
-   case BUILT_IN_STRCAT_CHK:
-   case BUILT_IN_STRNCAT_CHK:
  {
ao_ref dref;
tree size = NULL_TREE;
@@ -1226,6 +1244,19 @@ ref_maybe_used_by_call_p_1 (gimple call,
   size);
return refs_may_alias_p_1 (&dref, ref, false);
  }
+   /* These read memory pointed to by the first argument.  */
+   case 

Re: Vector Comparison patch

2011-09-30 Thread Artem Shinkarov
On Fri, Sep 30, 2011 at 4:01 PM, Georg-Johann Lay  wrote:
> Artem Shinkarov schrieb:
>> Here is a new version of the patch which considers the changes from
>> 2011-09-02  Richard Guenther
>>
>>
>> ChangeLog
>>
>> 20011-09-06 Artjoms Sinkarovs 
>>
>>        gcc/
>>        * fold-const.c (constant_boolean_node): Adjust the meaning
>>        of boolean for vector types: true = {-1,..}, false = {0,..}.
>>        (fold_unary_loc): Avoid conversion of vector comparison to
>>        boolean type.
>>        * expr.c (expand_expr_real_2): Expand vector comparison by
>>        building an appropriate VEC_COND_EXPR.
>>        * c-typeck.c (build_binary_op): Typecheck vector comparisons.
>>        (c_objc_common_truthvalue_conversion): Adjust.
>>        * tree-vect-generic.c (do_compare): Helper function.
>>        (expand_vector_comparison): Check if hardware supports
>>        vector comparison of the given type or expand vector
>>        piecewise.
>>        (expand_vector_operation): Treat comparison as binary
>>        operation of vector type.
>>        (expand_vector_operations_1): Adjust.
>>        * tree-cfg.c (verify_gimple_comparison): Adjust.
>>
>>        gcc/config/i386
>>        * i386.c (ix86_expand_sse_movcc): Consider a case when
>>        vcond operators are {-1,..} and {0,..}.
>>
>>        gcc/doc
>>        * extend.texi: Adjust.
>>
>>        gcc/testsuite
>>        * gcc.c-torture/execute/vector-compare-1.c: New test.
>>        * gcc.c-torture/execute/vector-compare-2.c: New test.
>>        * gcc.dg/vector-compare-1.c: New test.
>>        * gcc.dg/vector-compare-2.c: New test.
>>
>> bootstrapped and tested on x86_64-unknown-linux-gnu.
>>
>>
>> Thanks,
>> Artem.
>
> Hi Artem,
>
> the new test case gcc.c-torture/execute/vector-compare-1.c causes bunch of
> FAILS in regression tests for avr-unknown-none (see attachment).
>
> The target has
>
> 2 = sizeof (short)
> 2 = sizeof (int)
> 4 = sizeof (long int)
> 8 = sizeof (long long int)
>
> Could you fix that? I.e. parametrize sizeof(int) out or skip the test by 
> means of
>
> /* { dg-require-effective-target int32plus } */
>
> or similar.
>
> Thanks, Johann
>
>
>
>
>
>
>
>
> ./gcc/testsuite/gcc.c-torture/execute/vector-compare-1.c: In function 'main':
> ./gcc/testsuite/gcc.c-torture/execute/vector-compare-1.c:109:5: error: 
> incompatible types when assigning to type '__vector(4) int' from type 
> '__vector(4) long int'
> ./gcc/testsuite/gcc.c-torture/execute/vector-compare-1.c:109:5: error: 
> incompatible types when assigning to type '__vector(4) int' from type 
> '__vector(4) long int'
> ./gcc/testsuite/gcc.c-torture/execute/vector-compare-1.c:109:5: error: 
> incompatible types when assigning to type '__vector(4) int' from type 
> '__vector(4) long int'
> ./gcc/testsuite/gcc.c-torture/execute/vector-compare-1.c:109:5: error: 
> incompatible types when assigning to type '__vector(4) int' from type 
> '__vector(4) long int'
> ./gcc/testsuite/gcc.c-torture/execute/vector-compare-1.c:109:5: error: 
> incompatible types when assigning to type '__vector(4) int' from type 
> '__vector(4) long int'
> ./gcc/testsuite/gcc.c-torture/execute/vector-compare-1.c:109:5: error: 
> incompatible types when assigning to type '__vector(4) int' from type 
> '__vector(4) long int'
> ./gcc/testsuite/gcc.c-torture/execute/vector-compare-1.c:118:5: error: 
> incompatible types when assigning to type '__vector(2) long long int' from 
> type '__vector(2) long int'
> ./gcc/testsuite/gcc.c-torture/execute/vector-compare-1.c:118:5: error: 
> incompatible types when assigning to type '__vector(2) long long int' from 
> type '__vector(2) long int'
> ./gcc/testsuite/gcc.c-torture/execute/vector-compare-1.c:118:5: error: 
> incompatible types when assigning to type '__vector(2) long long int' from 
> type '__vector(2) long int'
> ./gcc/testsuite/gcc.c-torture/execute/vector-compare-1.c:118:5: error: 
> incompatible types when assigning to type '__vector(2) long long int' from 
> type '__vector(2) long int'
> ./gcc/testsuite/gcc.c-torture/execute/vector-compare-1.c:118:5: error: 
> incompatible types when assigning to type '__vector(2) long long int' from 
> type '__vector(2) long int'
> ./gcc/testsuite/gcc.c-torture/execute/vector-compare-1.c:118:5: error: 
> incompatible types when assigning to type '__vector(2) long long int' from 
> type '__vector(2) long int'
> compiler exited with status 1
> output is:
> ./gcc/testsuite/gcc.c-torture/execute/vector-compare-1.c: In function 'main':
> ./gcc/testsuite/gcc.c-torture/execute/vector-compare-1.c:109:5: error: 
> incompatible types when assigning to type '__vector(4) int' from type 
> '__vector(4) long int'
> ./gcc/testsuite/gcc.c-torture/execute/vector-compare-1.c:109:5: error: 
> incompatible types when assigning to type '__vector(4) int' from type 
> '__vector(4) long int'
> ./gcc/testsuite/gcc.c-torture/execute/vector-compare-1.c:109:5: error: 
> incompatible types when assigning to type '__vector(4) int' from type 
> '__vector(4) 

[PATCH] Restrict fixes (take 2)

2011-09-30 Thread Jakub Jelinek
On Fri, Sep 30, 2011 at 10:57:25AM +0200, Richard Guenther wrote:
> Definitely.  Seeing a decl will enable better offset-based
> disambiguation.

Ok, here is an updated patch.  Bootstrapped/regtested on x86_64-linux
and i686-linux, ok for trunk?

2011-09-30  Jakub Jelinek  

* fold-const.c (fold_unary_loc): Don't optimize
POINTER_PLUS_EXPR casted to TYPE_RESTRICT pointer by
casting the inner pointer if it isn't TYPE_RESTRICT.
* tree-ssa-forwprop.c (forward_propagate_addr_expr_1): Don't through
casts from non-TYPE_RESTRICT pointer to TYPE_RESTRICT pointer.

* gcc.dg/tree-ssa/restrict-4.c: New test.

--- gcc/fold-const.c.jj 2011-09-29 14:25:46.0 +0200
+++ gcc/fold-const.c2011-09-29 18:20:04.0 +0200
@@ -7929,6 +7929,7 @@ fold_unary_loc (location_t loc, enum tre
 that this happens when X or Y is NOP_EXPR or Y is INTEGER_CST. */
   if (POINTER_TYPE_P (type)
  && TREE_CODE (arg0) == POINTER_PLUS_EXPR
+ && (!TYPE_RESTRICT (type) || TYPE_RESTRICT (TREE_TYPE (arg0)))
  && (TREE_CODE (TREE_OPERAND (arg0, 1)) == INTEGER_CST
  || TREE_CODE (TREE_OPERAND (arg0, 0)) == NOP_EXPR
  || TREE_CODE (TREE_OPERAND (arg0, 1)) == NOP_EXPR))
--- gcc/tree-ssa-forwprop.c.jj  2011-09-15 12:18:54.0 +0200
+++ gcc/tree-ssa-forwprop.c 2011-09-29 19:08:03.0 +0200
@@ -804,6 +804,11 @@ forward_propagate_addr_expr_1 (tree name
   && ((rhs_code == SSA_NAME && rhs == name)
  || CONVERT_EXPR_CODE_P (rhs_code)))
 {
+  /* Don't propagate restrict pointer's RHS.  */
+  if (TYPE_RESTRICT (TREE_TYPE (lhs))
+ && !TYPE_RESTRICT (TREE_TYPE (name))
+ && !is_gimple_min_invariant (def_rhs))
+   return false;
   /* Only recurse if we don't deal with a single use or we cannot
 do the propagation to the current statement.  In particular
 we can end up with a conversion needed for a non-invariant
--- gcc/testsuite/gcc.dg/tree-ssa/restrict-4.c.jj   2011-09-29 
20:21:00.0 +0200
+++ gcc/testsuite/gcc.dg/tree-ssa/restrict-4.c  2011-09-29 20:21:57.0 
+0200
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+int
+foo (int *x, int y)
+{
+  int *__restrict p1 = x;
+  int *__restrict p2 = x + 32;
+  p1[y] = 1;
+  p2[4] = 2;
+  return p1[y];
+}
+
+int
+bar (int *x, int y)
+{
+  int *__restrict p1 = x;
+  int *p3 = x + 32;
+  int *__restrict p2 = p3;
+  p1[y] = 1;
+  p2[4] = 2;
+  return p1[y];
+}
+
+/* { dg-final { scan-tree-dump-times "return 1;" 2 "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */

Jakub


[PATCH] fold_range_test like optimization on GIMPLE (PR tree-optimization/46309, take 2)

2011-09-30 Thread Jakub Jelinek
On Fri, Sep 30, 2011 at 03:14:12PM +0200, Richard Guenther wrote:
> Ah, indeed.  I'll have a look at the updated patch.

Here is what I've committed after bootstrapping/regtesting it on
x86_64-linux and i686-linux and Richard's approval on IRC.

2011-09-30  Jakub Jelinek  

PR tree-optimization/46309
* fold-const.c (make_range, merge_ranges): Remove prototypes.
(make_range_step): New function.
(make_range): Use it.
* tree.h (make_range_step): New prototypes.
* Makefile.in (tree-ssa-reassoc.o): Depend on $(DIAGNOSTIC_CORE_H).
* tree-ssa-reassoc.c: Include diagnostic-core.h.
(struct range_entry): New type.
(init_range_entry, range_entry_cmp, update_range_test,
optimize_range_tests): New functions.
(reassociate_bb): Call optimize_range_tests.

* gcc.dg/pr46309.c: New test.

--- gcc/fold-const.c.jj 2011-09-30 13:17:22.0 +0200
+++ gcc/fold-const.c2011-09-30 14:08:30.0 +0200
@@ -115,9 +115,6 @@ static int simple_operand_p (const_tree)
 static tree range_binop (enum tree_code, tree, tree, int, tree, int);
 static tree range_predecessor (tree);
 static tree range_successor (tree);
-extern tree make_range (tree, int *, tree *, tree *, bool *);
-extern bool merge_ranges (int *, tree *, tree *, int, tree, tree, int,
- tree, tree);
 static tree fold_range_test (location_t, enum tree_code, tree, tree, tree);
 static tree fold_cond_expr_with_comparison (location_t, tree, tree, tree, 
tree);
 static tree unextend (tree, int, int, tree);
@@ -3790,6 +3787,255 @@ range_binop (enum tree_code code, tree t
   return constant_boolean_node (result, type);
 }
 
+/* Helper routine for make_range.  Perform one step for it, return
+   new expression if the loop should continue or NULL_TREE if it should
+   stop.  */
+
+tree
+make_range_step (location_t loc, enum tree_code code, tree arg0, tree arg1,
+tree exp_type, tree *p_low, tree *p_high, int *p_in_p,
+bool *strict_overflow_p)
+{
+  tree arg0_type = TREE_TYPE (arg0);
+  tree n_low, n_high, low = *p_low, high = *p_high;
+  int in_p = *p_in_p, n_in_p;
+
+  switch (code)
+{
+case TRUTH_NOT_EXPR:
+  *p_in_p = ! in_p;
+  return arg0;
+
+case EQ_EXPR: case NE_EXPR:
+case LT_EXPR: case LE_EXPR: case GE_EXPR: case GT_EXPR:
+  /* We can only do something if the range is testing for zero
+and if the second operand is an integer constant.  Note that
+saying something is "in" the range we make is done by
+complementing IN_P since it will set in the initial case of
+being not equal to zero; "out" is leaving it alone.  */
+  if (low == NULL_TREE || high == NULL_TREE
+ || ! integer_zerop (low) || ! integer_zerop (high)
+ || TREE_CODE (arg1) != INTEGER_CST)
+   return NULL_TREE;
+
+  switch (code)
+   {
+   case NE_EXPR:  /* - [c, c]  */
+ low = high = arg1;
+ break;
+   case EQ_EXPR:  /* + [c, c]  */
+ in_p = ! in_p, low = high = arg1;
+ break;
+   case GT_EXPR:  /* - [-, c] */
+ low = 0, high = arg1;
+ break;
+   case GE_EXPR:  /* + [c, -] */
+ in_p = ! in_p, low = arg1, high = 0;
+ break;
+   case LT_EXPR:  /* - [c, -] */
+ low = arg1, high = 0;
+ break;
+   case LE_EXPR:  /* + [-, c] */
+ in_p = ! in_p, low = 0, high = arg1;
+ break;
+   default:
+ gcc_unreachable ();
+   }
+
+  /* If this is an unsigned comparison, we also know that EXP is
+greater than or equal to zero.  We base the range tests we make
+on that fact, so we record it here so we can parse existing
+range tests.  We test arg0_type since often the return type
+of, e.g. EQ_EXPR, is boolean.  */
+  if (TYPE_UNSIGNED (arg0_type) && (low == 0 || high == 0))
+   {
+ if (! merge_ranges (&n_in_p, &n_low, &n_high,
+ in_p, low, high, 1,
+ build_int_cst (arg0_type, 0),
+ NULL_TREE))
+   return NULL_TREE;
+
+ in_p = n_in_p, low = n_low, high = n_high;
+
+ /* If the high bound is missing, but we have a nonzero low
+bound, reverse the range so it goes from zero to the low bound
+minus 1.  */
+ if (high == 0 && low && ! integer_zerop (low))
+   {
+ in_p = ! in_p;
+ high = range_binop (MINUS_EXPR, NULL_TREE, low, 0,
+ integer_one_node, 0);
+ low = build_int_cst (arg0_type, 0);
+   }
+   }
+
+  *p_low = low;
+  *p_high = high;
+  *p_in_p = in_p;
+  return arg0;
+
+case NEGATE_EXPR:
+  /* (-x) IN [a,b] -> x in [-b, -a]  */
+  n_low = range_binop (MINUS_EXPR, exp_type,
+  build_int_cst (exp_type, 0),
+

Re: Vector Comparison patch

2011-09-30 Thread Georg-Johann Lay
Artem Shinkarov schrieb:
> Here is a new version of the patch which considers the changes from
> 2011-09-02  Richard Guenther
> 
> 
> ChangeLog
> 
> 20011-09-06 Artjoms Sinkarovs 
> 
>gcc/
>* fold-const.c (constant_boolean_node): Adjust the meaning
>of boolean for vector types: true = {-1,..}, false = {0,..}.
>(fold_unary_loc): Avoid conversion of vector comparison to
>boolean type.
>* expr.c (expand_expr_real_2): Expand vector comparison by
>building an appropriate VEC_COND_EXPR.
>* c-typeck.c (build_binary_op): Typecheck vector comparisons.
>(c_objc_common_truthvalue_conversion): Adjust.
>* tree-vect-generic.c (do_compare): Helper function.
>(expand_vector_comparison): Check if hardware supports
>vector comparison of the given type or expand vector
>piecewise.
>(expand_vector_operation): Treat comparison as binary
>operation of vector type.
>(expand_vector_operations_1): Adjust.
>* tree-cfg.c (verify_gimple_comparison): Adjust.
> 
>gcc/config/i386
>* i386.c (ix86_expand_sse_movcc): Consider a case when
>vcond operators are {-1,..} and {0,..}.
> 
>gcc/doc
>* extend.texi: Adjust.
> 
>gcc/testsuite
>* gcc.c-torture/execute/vector-compare-1.c: New test.
>* gcc.c-torture/execute/vector-compare-2.c: New test.
>* gcc.dg/vector-compare-1.c: New test.
>* gcc.dg/vector-compare-2.c: New test.
> 
> bootstrapped and tested on x86_64-unknown-linux-gnu.
> 
> 
> Thanks,
> Artem.

Hi Artem,

the new test case gcc.c-torture/execute/vector-compare-1.c causes bunch of
FAILS in regression tests for avr-unknown-none (see attachment).

The target has

2 = sizeof (short)
2 = sizeof (int)
4 = sizeof (long int)
8 = sizeof (long long int)

Could you fix that? I.e. parametrize sizeof(int) out or skip the test by means 
of

/* { dg-require-effective-target int32plus } */

or similar.

Thanks, Johann







./gcc/testsuite/gcc.c-torture/execute/vector-compare-1.c: In function 'main':
./gcc/testsuite/gcc.c-torture/execute/vector-compare-1.c:109:5: error: 
incompatible types when assigning to type '__vector(4) int' from type 
'__vector(4) long int'
./gcc/testsuite/gcc.c-torture/execute/vector-compare-1.c:109:5: error: 
incompatible types when assigning to type '__vector(4) int' from type 
'__vector(4) long int'
./gcc/testsuite/gcc.c-torture/execute/vector-compare-1.c:109:5: error: 
incompatible types when assigning to type '__vector(4) int' from type 
'__vector(4) long int'
./gcc/testsuite/gcc.c-torture/execute/vector-compare-1.c:109:5: error: 
incompatible types when assigning to type '__vector(4) int' from type 
'__vector(4) long int'
./gcc/testsuite/gcc.c-torture/execute/vector-compare-1.c:109:5: error: 
incompatible types when assigning to type '__vector(4) int' from type 
'__vector(4) long int'
./gcc/testsuite/gcc.c-torture/execute/vector-compare-1.c:109:5: error: 
incompatible types when assigning to type '__vector(4) int' from type 
'__vector(4) long int'
./gcc/testsuite/gcc.c-torture/execute/vector-compare-1.c:118:5: error: 
incompatible types when assigning to type '__vector(2) long long int' from type 
'__vector(2) long int'
./gcc/testsuite/gcc.c-torture/execute/vector-compare-1.c:118:5: error: 
incompatible types when assigning to type '__vector(2) long long int' from type 
'__vector(2) long int'
./gcc/testsuite/gcc.c-torture/execute/vector-compare-1.c:118:5: error: 
incompatible types when assigning to type '__vector(2) long long int' from type 
'__vector(2) long int'
./gcc/testsuite/gcc.c-torture/execute/vector-compare-1.c:118:5: error: 
incompatible types when assigning to type '__vector(2) long long int' from type 
'__vector(2) long int'
./gcc/testsuite/gcc.c-torture/execute/vector-compare-1.c:118:5: error: 
incompatible types when assigning to type '__vector(2) long long int' from type 
'__vector(2) long int'
./gcc/testsuite/gcc.c-torture/execute/vector-compare-1.c:118:5: error: 
incompatible types when assigning to type '__vector(2) long long int' from type 
'__vector(2) long int'
compiler exited with status 1
output is:
./gcc/testsuite/gcc.c-torture/execute/vector-compare-1.c: In function 'main':
./gcc/testsuite/gcc.c-torture/execute/vector-compare-1.c:109:5: error: 
incompatible types when assigning to type '__vector(4) int' from type 
'__vector(4) long int'
./gcc/testsuite/gcc.c-torture/execute/vector-compare-1.c:109:5: error: 
incompatible types when assigning to type '__vector(4) int' from type 
'__vector(4) long int'
./gcc/testsuite/gcc.c-torture/execute/vector-compare-1.c:109:5: error: 
incompatible types when assigning to type '__vector(4) int' from type 
'__vector(4) long int'
./gcc/testsuite/gcc.c-torture/execute/vector-compare-1.c:109:5: error: 
incompatible types when assigning to type '__vector(4) int' from type 
'__vector(4) long int'
./gcc/testsuite/gcc.c-torture/execute/vector-compare-1.c:10

Re: [PATCH 1/2] LTO: split out writing of top level asm nodes

2011-09-30 Thread Jan Beulich
>>> On 30.09.11 at 14:34, Richard Guenther  wrote:
> On Fri, Sep 30, 2011 at 1:43 PM, Jan Beulich  wrote:
>> Split out LTO's writing of top level asm nodes in preparation of extending
>> what needs to be written out when top level asm-s get enhanced to accept a
>> limited set of input operands.
> 
> Ok with ...

For reference, this is the patch I committed (also slightly extended the
change description for the file where you asked for a change):

gcc/
2011-09-30  Jan Beulich  

* lto-cgraph.c (output_cgraph): Remove processing of
'cgraph_asm_nodes', call lto_output_toplevel_asms() instead.
(input_cgraph_1): Remove loop calling cgraph_add_asm_node(), call
lto_input_toplevel_asms() instead.
* lto-section-in.c (lto_section_name): Add "asm" entry.
* lto-streamer-in.c (lto_input_toplevel_asms): New.
* lto-streamer-out.c (lto_output_toplevel_asms): New.
* lto-streamer.h (LTO_minor_version): Bump.
(enum lto_section_type): Add LTO_section_asm.
(struct lto_asm_header): New.
(lto_input_toplevel_asms, lto_output_toplevel_asms): Declare.
* tree-streamer.h (streamer_write_string_cst): Declare.
* tree-streamer-out.c (write_string_cst): Rename to
streamer_write_string_cst and make global. Handle incoming string
being NULL.
(streamer_write_tree_header): Adjust call to renamed function.

--- 2011-09-29.orig/gcc/lto-cgraph.c2011-09-29 15:07:41.0 +0200
+++ 2011-09-29/gcc/lto-cgraph.c 2011-09-29 15:07:50.0 +0200
@@ -817,7 +817,6 @@ output_cgraph (cgraph_node_set set, varp
   int i, n_nodes;
   lto_cgraph_encoder_t encoder;
   lto_varpool_encoder_t varpool_encoder;
-  struct cgraph_asm_node *can;
   static bool asm_nodes_output = false;
 
   if (flag_wpa)
@@ -854,6 +853,8 @@ output_cgraph (cgraph_node_set set, varp
 
   streamer_write_uhwi_stream (ob->main_stream, 0);
 
+  lto_destroy_simple_output_block (ob);
+
   /* Emit toplevel asms.
  When doing WPA we must output every asm just once.  Since we do not 
partition asm
  nodes at all, output them to first output.  This is kind of hack, but 
should work
@@ -861,19 +862,9 @@ output_cgraph (cgraph_node_set set, varp
   if (!asm_nodes_output)
 {
   asm_nodes_output = true;
-  for (can = cgraph_asm_nodes; can; can = can->next)
-   {
- int len = TREE_STRING_LENGTH (can->asm_str);
- streamer_write_uhwi_stream (ob->main_stream, len);
- for (i = 0; i < len; ++i)
-   streamer_write_char_stream (ob->main_stream,
-   TREE_STRING_POINTER (can->asm_str)[i]);
-   }
+  lto_output_toplevel_asms ();
 }
 
-  streamer_write_uhwi_stream (ob->main_stream, 0);
-
-  lto_destroy_simple_output_block (ob);
   output_varpool (set, vset);
   output_refs (set, vset, encoder, varpool_encoder);
 }
@@ -1185,7 +1176,6 @@ input_cgraph_1 (struct lto_file_decl_dat
   VEC(cgraph_node_ptr, heap) *nodes = NULL;
   struct cgraph_node *node;
   unsigned i;
-  unsigned HOST_WIDE_INT len;
 
   tag = streamer_read_enum (ib, LTO_cgraph_tags, LTO_cgraph_last_tag);
   while (tag)
@@ -1206,18 +1196,8 @@ input_cgraph_1 (struct lto_file_decl_dat
   tag = streamer_read_enum (ib, LTO_cgraph_tags, LTO_cgraph_last_tag);
 }
 
-  /* Input toplevel asms.  */
-  len = streamer_read_uhwi (ib);
-  while (len)
-{
-  char *str = (char *)xmalloc (len + 1);
-  for (i = 0; i < len; ++i)
-   str[i] = streamer_read_uchar (ib);
-  cgraph_add_asm_node (build_string (len, str));
-  free (str);
+  lto_input_toplevel_asms (file_data);
 
-  len = streamer_read_uhwi (ib);
-}
   /* AUX pointers should be all non-zero for nodes read from the stream.  */
 #ifdef ENABLE_CHECKING
   FOR_EACH_VEC_ELT (cgraph_node_ptr, nodes, i, node)
--- 2011-09-29.orig/gcc/lto-section-in.c2011-09-29 15:07:41.0 
+0200
+++ 2011-09-29/gcc/lto-section-in.c 2011-09-29 15:07:50.0 +0200
@@ -53,6 +53,7 @@ const char *lto_section_name[LTO_N_SECTI
   "cgraph",
   "vars",
   "refs",
+  "asm",
   "jmpfuncs",
   "pureconst",
   "reference",
--- 2011-09-29.orig/gcc/lto-streamer-in.c   2011-09-29 15:07:41.0 
+0200
+++ 2011-09-29/gcc/lto-streamer-in.c2011-09-30 15:06:56.0 +0200
@@ -1141,6 +1141,47 @@ lto_input_tree (struct lto_input_block *
 }
 
 
+/* Input toplevel asms.  */
+
+void
+lto_input_toplevel_asms (struct lto_file_decl_data *file_data)
+{
+  size_t len;
+  const char *data = lto_get_section_data (file_data, LTO_section_asm,
+  NULL, &len);
+  const struct lto_asm_header *header = (const struct lto_asm_header *) data;
+  int32_t string_offset;
+  struct data_in *data_in;
+  struct lto_input_block ib;
+  tree str;
+
+  if (! data)
+return;
+
+  string_offset = sizeof (*header) + header->main_size;
+
+  LTO_INIT_INPUT_BLOCK (ib,
+   data + sizeof (*header),
+   

Re: [Patch] Support DEC-C extensions

2011-09-30 Thread Tristan Gingold

On Sep 30, 2011, at 4:43 PM, Joseph S. Myers wrote:

> On Thu, 29 Sep 2011, Tristan Gingold wrote:
> 
>> Hi,
>> 
>> DEC-C, the DEC compiler provided on VMS, has added to ANSI-C at least 
>> one extension that is difficult to work-around as it is used in the 
>> system headers: varargs without named argument.  It makes sense on VMS 
>> because of its ABI which pass the number of arguments used.
> 
> If it's about system headers, is there a reason you used a command-line 
> option rather than having a target hook that controls whether the front 
> end accepts this in system headers (only)?

If you prefer a target hook, I'm fine with that.  I will write such a patch.

I don't think it must be restricted to system headers, as it is possible that 
the user 'imports' such a function (and define it in one of VMS favorite 
languages such as macro-32 or bliss).

Tristan.



Re: [Patch] Support DEC-C extensions

2011-09-30 Thread Joseph S. Myers
On Thu, 29 Sep 2011, Tristan Gingold wrote:

> Hi,
> 
> DEC-C, the DEC compiler provided on VMS, has added to ANSI-C at least 
> one extension that is difficult to work-around as it is used in the 
> system headers: varargs without named argument.  It makes sense on VMS 
> because of its ABI which pass the number of arguments used.

If it's about system headers, is there a reason you used a command-line 
option rather than having a target hook that controls whether the front 
end accepts this in system headers (only)?

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: [Patch] Support DEC-C extensions

2011-09-30 Thread Basile Starynkevitch
On Fri, 30 Sep 2011 09:24:03 +0200
Tristan Gingold  wrote:

> 
> On Sep 29, 2011, at 5:54 PM, Basile Starynkevitch wrote:
> > I believe that such an extension is useful on other systems, even when 
> > their ABI don't
> > pass the number of arguments.
> > 
> > The use case I would have in mind is when the signature of the called 
> > function (that is
> > the number & types of arguments) is determined by something else, perhaps a 
> > global
> > variable or data. Think e.g. of a printf-like function, except that the 
> > format string is
> > conventionally assigned to some fixed global before calling it.
> 
> In fact you can't access to the arguments with ANSI-C as va_start needs a 
> named argument. So you can't write such code.

I agree with that point, but that suggests that such a function is not DEC 
like. (and
IIRC, in the good days of  ten years ago, not today's , 
such a
function was possible).

I was only saying that there is no reason to call such an extension DEC-like. 
And
apparently, you tested it on a x86/Linux which is not a DEC system :-)


And I do think that such an extension can be useful.

Cheers.


-- 
Basile STARYNKEVITCH http://starynkevitch.net/Basile/
email: basilestarynkevitchnet mobile: +33 6 8501 2359
8, rue de la Faiencerie, 92340 Bourg La Reine, France
*** opinions {are only mine, sont seulement les miennes} ***


Re: [PATCH] Look at restrict disambiguation in tree-ssa-alias.c unconditionally (PR tree-optimization/50522)

2011-09-30 Thread Joseph S. Myers
On Mon, 26 Sep 2011, Jakub Jelinek wrote:

> Hi!
> 
> Adding Joseph and Jason to CC.
> 
> On Mon, Sep 26, 2011 at 04:56:20PM +0200, Richard Guenther wrote:
> > Let's see what kind of fallout we get ;)  For example, if the
> > following is valid C code I expect we will vectorize the second
> > loop (disambiguating p[i] and q[i]) bogously:
> > 
> > void foo (int *p)
> > {
> >   int * __restrict p1 = p;
> >   int * __restrict p2 = p + 32;
> >   int *q;
> >   int i;
> >   for (i = 0; i < 32; ++i)
> > p1[i] = p2[i];
> >   p = p1;
> >   q = p2 - 31;
> >   for (i = 0; i < 32; ++i)
> > p[i] = q[i];
> > }
> > 
> > because p and q base on different restrict qualified pointers
> > (p1 and p2 respective).  At the moment we are safe from this
> > because of the TYPE_RESTRICT checks.
> > 
> > Any opinion on the above?  Is it valid to base non-restrict
> > pointers on restrict ones?  It would be sort-of weird at least,
> > but at least I don't think the first loop use is bogus (even
> > though the pointed-to objects are the same).
> 
> If the last loop was
>   for (i = 0; i < 32; i++)
> q[i] = p[i];
> then I believe the above would be clearly invalid C99, because
> an object X (say incoming p[4]) would be modified in the same block
> using a pointer based on p1 and using a pointer not based on p1
> (q), which would violate the requirements that if the object is
> modified through lvalue whose address is based on p1, all modifications
> to B in that block should be done through lvalues whose address is
> based on p1.  In the above testcase all modifications are made through
> lvalues whose addresses are p1 based though, so it is less clear.
> Joseph?

If an object that is accessed by a restricted pointer is also modified, 
then all accesses (not just all modifications) must be through pointers 
based on the restricted pointer.  So in the original loop with p[i] = 
q[i], q[i] for i from 0 to 30 is an object that was previously modified 
through p1 and is now being accessed through p2.  So this code appears 
invalid to me.

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: [VTA, PR49310] O(n+m)-ish emit_notes

2011-09-30 Thread Jakub Jelinek
On Tue, Sep 27, 2011 at 04:26:47PM -0300, Alexandre Oliva wrote:
> On Sep 20, 2011, Jakub Jelinek  wrote:
> 
> > For NOTE_INSN_CALL_ARG_LOCATION, the locations aren't location lists, but
> > a single location at the point of the call.  They are independent of
> > all other locations, so any kind of caching only decreases the chance
> > that a suitable location
> 
> With the proposed patch, we cache full expressions, and we should have
> an expression if there's a location for the expression (save for
> expression depth limits).
> 
> > is found (and as the numbers show, it decreases it a lot).
> 
> It decreased because of a bug: many equivalence expressions that used to
> be only in the cselib equivalence lists were no longer used with the
> proposed patch.  I have a fix to bring them back in, but see below.

Ok.

> >> > can't it be postponed to start of vt_emit* phase
> 
> >> If my hunch is correct, no.  My concern is precisely that the equivalent
> >> may cease to hold (say once we cross a val_reset within a loop), but
> >> we'll keep on relying on it.
> 
> > After vt_initialize, all cselib locations should hold are just some
> > expressions containing VALUEs, constants, ENTRY_VALUEs and nothing else,
> > all REGs and MEMs are supposed to be flushed.  Those are equivalences that
> > are always constant, they don't need any kind of resetting and they never
> > cease to hold.  Say VALUE3 is always VALUE1 + VALUE2.
> 
> I realize that.  The problem is, I had observed nonsensical equivalences
> in dataflow_set loc lists such as (plus (value) (const_int)) in the
> location list of the value itself (and a nonzero constant).  After much
> pondering, I've concluded that this was a symptom of the first round of
> dataflow analysis in the initial implementation of VTA, in which we used
> union rather than intersection semantics.  This is no longer the case,
> and I've now convinced myself this can't happen any more, so we can rely
> on cselib equivalences, not just for expanding location expressions as
> we did before my patch (and will do again in a subsequent version of
> it), but also for dataflow set merging (the next algorithm I'm going to
> try to optimize).

Ok.

> >> > Can't you use ENUM_BITFIELD (onepart_enum) onepart : 8; instead?
> 
> >> I don't think it will then be packed in the same word as n_var_parts.
> 
> > enum A { B, C, D };
> > struct S { char a; enum A b : 8; char c; char d; };
> > int i = sizeof (struct S);
> 
> > results in i = 4 for all the cc1 and cc1plus cross compilers on my box I've
> > tried, with various ABI options.
> 
> Yeah, I'm pretty sure GCC will behave like that on nearly all ABIs, but
> that's not mandated by standards.  Indeed, IIRC even enum bitfields
> aren't mandated by standards.  That's why we have ENUM_BITFIELD, after
> all!  Anyway, since ENUM_BITFIELD was supposed to address this very
> issue, I guess it just makes sense for me to go with it ;-)

While C doesn't have enum bitfields in the standard, C++ does, so it isn't
something very strange.  Furthermore, nothing relies on it being packed up
in the struct, it is just an optimization, so if some compiler doesn't
support it or doesn't pack it well, nothing bad happens.

> > If it is just performance optimization, I'd say there should be
> >   gcc_checking_assert (DECL_RTL_SET_P (dtemp));
> > before it to verify and make it obvious that you aren't expecting it to be
> > NULL.
> 
> I'm adding DECL_RTL_KNOWN_SET, with a DECL_RTL_SET_P checking_assert,
> and using that.  How's that?

Fine with me.

Jakub


[testsuite] Don't XFAIL gcc.dg/graphite/interchange-14.c (PR tree-optimization/49662)

2011-09-30 Thread Rainer Orth
It seems that the following three tests don't fail anymore anywhere for
some time, so the following patch removes the three xfail's to avoid the
noise from XPASSes.

Tested with the approrpriate runtest invocation on i386-pc-solaris2.11.

Ok for mainline?

Rainer


2011-09-30  Rainer Orth  

PR tree-optimization/49662
* gcc.dg/graphite/interchange-14.c: Remove xfail *-*-*.
* gcc.dg/graphite/interchange-15.c: Likewise.
* gcc.dg/graphite/interchange-mvt.c: Likewise.

# HG changeset patch
# Parent 05bbade90d8c63fc0779cf1118fa9c161c417df7
Don't XFAIL gcc.dg/graphite/interchange-14.c (PR tree-optimization/49662)

diff --git a/gcc/testsuite/gcc.dg/graphite/interchange-14.c b/gcc/testsuite/gcc.dg/graphite/interchange-14.c
--- a/gcc/testsuite/gcc.dg/graphite/interchange-14.c
+++ b/gcc/testsuite/gcc.dg/graphite/interchange-14.c
@@ -55,5 +55,5 @@ main (void)
 }
 
 /* PRE destroys the perfect nest and we can't cope with that yet.  */
-/* { dg-final { scan-tree-dump-times "will be interchanged" 1 "graphite" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "will be interchanged" 1 "graphite" } } */
 /* { dg-final { cleanup-tree-dump "graphite" } } */
diff --git a/gcc/testsuite/gcc.dg/graphite/interchange-15.c b/gcc/testsuite/gcc.dg/graphite/interchange-15.c
--- a/gcc/testsuite/gcc.dg/graphite/interchange-15.c
+++ b/gcc/testsuite/gcc.dg/graphite/interchange-15.c
@@ -49,6 +49,6 @@ main (void)
 }
 
 /* PRE destroys the perfect nest and we can't cope with that yet.  */
-/* { dg-final { scan-tree-dump-times "will be interchanged" 1 "graphite" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "will be interchanged" 1 "graphite" } } */
 /* { dg-final { cleanup-tree-dump "graphite" } } */
 
diff --git a/gcc/testsuite/gcc.dg/graphite/interchange-mvt.c b/gcc/testsuite/gcc.dg/graphite/interchange-mvt.c
--- a/gcc/testsuite/gcc.dg/graphite/interchange-mvt.c
+++ b/gcc/testsuite/gcc.dg/graphite/interchange-mvt.c
@@ -59,6 +59,6 @@ main (void)
 }
 
 /* PRE destroys the perfect nest and we can't cope with that yet.  */
-/* { dg-final { scan-tree-dump-times "will be interchanged" 1 "graphite" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "will be interchanged" 1 "graphite" } } */
 /* { dg-final { cleanup-tree-dump "graphite" } } */
 

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: [Patch,AVR]: PR50566: Better log output with -mdeb/-mlog= [4/n]

2011-09-30 Thread Georg-Johann Lay
This is the patch to add log output to LEGITIMIZE_RELOAD_ADDRESS.

The code is moved from macro in avr.h to a new function in avr.c.

Functionality is the same, but IMO it's more convenient to have it as function
than as a quite long macro.

Ok for trunk?

PR target/50566
* config/avr/avr-protos.h (avr_legitimize_reload_address): New
prototype.
* config/avr/avr.h (LEGITIMIZE_RELOAD_ADDRESS): Copy worker code
from here...
* config/avr/avr.c (avr_legitimize_reload_address) ...to this new
function.  Log if avr_log.legitimize_reload_address.
Index: config/avr/avr-protos.h
===
--- config/avr/avr-protos.h	(revision 179378)
+++ config/avr/avr-protos.h	(working copy)
@@ -106,6 +106,7 @@ extern RTX_CODE avr_normalize_condition
 extern void out_shift_with_cnt (const char *templ, rtx insn,
 rtx operands[], int *len, int t_len);
 extern rtx avr_incoming_return_addr_rtx (void);
+extern rtx avr_legitimize_reload_address (rtx, enum machine_mode, int, int, int, int, rtx (*)(rtx,int));
 #endif /* RTX_CODE */
 
 #ifdef REAL_VALUE_TYPE
Index: config/avr/avr.c
===
--- config/avr/avr.c	(revision 179378)
+++ config/avr/avr.c	(working copy)
@@ -1290,6 +1290,87 @@ avr_legitimize_address (rtx x, rtx oldx,
 }
 
 
+/* Implement `LEGITIMIZE_RELOAD_ADDRESS'.  */
+/* This will allow register R26/27 to be used where it is no worse than normal
+   base pointers R28/29 or R30/31.  For example, if base offset is greater
+   than 63 bytes or for R++ or --R addressing.  */
+
+rtx
+avr_legitimize_reload_address (rtx x, enum machine_mode mode,
+   int opnum, int type, int addr_type,
+   int ind_levels ATTRIBUTE_UNUSED,
+   rtx (*mk_memloc)(rtx,int))
+{
+  if (avr_log.legitimize_reload_address)
+avr_edump ("\n%?:%m %r\n", mode, x);
+  
+  if (1 && (GET_CODE (x) == POST_INC
+|| GET_CODE (x) == PRE_DEC))
+{
+  push_reload (XEXP (x, 0), XEXP (x, 0), &XEXP (x, 0), &XEXP (x, 0),
+   POINTER_REGS, GET_MODE (x), GET_MODE (x), 0, 0,
+   opnum, RELOAD_OTHER);
+  
+  if (avr_log.legitimize_reload_address)
+avr_edump (" RCLASS = %R\n IN = %r\n OUT = %r\n",
+   POINTER_REGS, XEXP (x, 0), XEXP (x, 0));
+  
+  return x;
+}
+  
+  if (GET_CODE (x) == PLUS
+  && REG_P (XEXP (x, 0))
+  && 0 == reg_equiv_constant (REGNO (XEXP (x, 0)))
+  && CONST_INT_P (XEXP (x, 1))
+  && INTVAL (XEXP (x, 1)) >= 1)
+{
+  bool fit = INTVAL (XEXP (x, 1)) <= MAX_LD_OFFSET (mode);
+  
+  if (fit)
+{
+  if (reg_equiv_address (REGNO (XEXP (x, 0))) != 0)
+{
+  int regno = REGNO (XEXP (x, 0));
+  rtx mem = mk_memloc (x, regno);
+  
+  push_reload (XEXP (mem, 0), NULL_RTX, &XEXP (mem, 0), NULL,
+   POINTER_REGS, Pmode, VOIDmode, 0, 0,
+   1, addr_type);
+  
+  if (avr_log.legitimize_reload_address)
+avr_edump (" RCLASS = %R\n IN = %r\n OUT = %r\n",
+   POINTER_REGS, XEXP (mem, 0), NULL_RTX);
+  
+  push_reload (mem, NULL_RTX, &XEXP (x, 0), NULL,
+   BASE_POINTER_REGS, GET_MODE (x), VOIDmode, 0, 0,
+   opnum, type);
+  
+  if (avr_log.legitimize_reload_address)
+avr_edump (" RCLASS = %R\n IN = %r\n OUT = %r\n",
+   BASE_POINTER_REGS, mem, NULL_RTX);
+  
+  return x;
+}
+}
+  else if (! (frame_pointer_needed
+  && XEXP (x, 0) == frame_pointer_rtx))
+{
+  push_reload (x, NULL_RTX, &x, NULL,
+   POINTER_REGS, GET_MODE (x), VOIDmode, 0, 0,
+   opnum, type);
+  
+  if (avr_log.legitimize_reload_address)
+avr_edump (" RCLASS = %R\n IN = %r\n OUT = %r\n",
+   POINTER_REGS, x, NULL_RTX);
+  
+  return x;
+}
+}
+  
+  return NULL_RTX;
+}
+
+
 /* Helper function to print assembler resp. track instruction
sequence lengths.

Index: config/avr/avr.h
===
--- config/avr/avr.h	(revision 179378)
+++ config/avr/avr.h	(working copy)
@@ -385,51 +385,17 @@ typedef struct avr_args {
   (REGNO (X) >= FIRST_PSEUDO_REGISTER || REG_OK_FOR_BASE_STRICT_P(X))
 
 #define REG_OK_FOR_BASE_STRICT_P(X) REGNO_OK_FOR_BASE_P (REGNO (X))
-
-/* LEGITIMIZE_RELOAD_ADDRESS will allow register R26/27 to be used, where it
-   is no worse than normal base pointers R28/29 and R30/31. For example:
-   If base offset is greater than 63 bytes or for R++ or --R

Re: Update testsuite to run with slim LTO

2011-09-30 Thread Diego Novillo

On 11-09-27 13:23 , Jan Hubicka wrote:


sync and pr34850 tests doesn't pass with slim LTO. The reason is that they
excpects diagnostics that is output too late in compilation (usually at
expansion time).  These should be probably fixed as QOI issue but they are not
real bug - the diagnostics will be output at linktime.  I will open PR tracking
this.  We probably should output pretty much everything till end of early opts
except for stuff that really looks for optimization results.  Especially now
when we handle always inline in early inlining.


Could you add a link to the PR in these tests?


* lib/lto.exp: When linker plugin is available test both
plugin/non-plugin LTO paths as well as fat and slim LTO.
lib/c-torture.exp: Likewise.
lib/gcc-dg.exp: Likweise
* gcc.c-torture/compile/sync-1.c: Do not run with slim LTO.
* gcc.c-torture/compile/sync-2.c: Do not run with slim LTO.
* gcc.c-torture/compile/sync-3.c: Do not run with slim LTO.
* gcc.dg/noncompile/920507-1.c: Do not run with slim LTO.
* g++.dg/torture/pr34850.C: Do not run with slim LTO


OK with the change above.


Diego.


Re: [Patch Ada/Darwin] factor LIBGNAT_TARGET_PAIRS for darwin sub-targets.

2011-09-30 Thread Iain Sandoe


On 29 Sep 2011, at 15:37, Arnaud Charlet wrote:


No functional change, just factor out the common LIBGNAT_TARGET_PAIRS
across the port.
OK for trunk?


OK


regrettably, I'd allowed my ppc and x86 trees to get out of sync, and  
the applied patch was not correct on powerpc.

corrected by a partial reversion of r179358 as below,
apologies for the noise
Iain

ada:

* gcc-interface/Makefile.in (Darwin): Partial reversion of previous
change to powerpc section.

Index: gcc/ada/gcc-interface/Makefile.in
===
--- gcc/ada/gcc-interface/Makefile.in   (revision 179378)
+++ gcc/ada/gcc-interface/Makefile.in   (working copy)
@@ -2187,7 +2187,9 @@ ifeq ($(strip $(filter-out darwin%,$(osys))),)
   ifeq ($(strip $(filter-out powerpc%,$(arch))),)
 LIBGNAT_TARGET_PAIRS += \
   s-intman.adb

RE: [Patch,AVR]: PR50566: Better log output with -mdeb/-mlog= [3/n]

2011-09-30 Thread Weddington, Eric


> -Original Message-
> From: Georg-Johann Lay [mailto:a...@gjlay.de]
> Sent: Friday, September 30, 2011 6:18 AM
> To: gcc-patches@gcc.gnu.org
> Cc: Denis Chertykov; Weddington, Eric
> Subject: Re: [Patch,AVR]: PR50566: Better log output with -mdeb/-mlog=
> [3/n]
> 
> This adds log output to avr_address_cost.
> 
> Ok for trunk?
> 
> Johann
> 
>   PR target/50566
>   * config/avr/avr-protos.h (avr_log_t): New field address_cost.
>   * config/avr/avr.c (avr_address_cost): Use it.
>   * config/avr/avr-log.c (avr_log_set_avr_log): Initialize it.
>   (avr_log_vadump): Unknown %-codes finish printing.

Please commit.

Eric



[PATCH] Fix PR50574

2011-09-30 Thread Richard Guenther

This fixes PR50574, we shouldn't compare vector mode sizes because
vectors can have BLKmode in gimple.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.

Richard.

2011-09-30  Richard Guenther  

PR middle-end/50574
* tree-cfg.c (verify_gimple_comparison): Compare component
mode sizes for vector comparisons.

Index: gcc/tree-cfg.c
===
--- gcc/tree-cfg.c  (revision 179378)
+++ gcc/tree-cfg.c  (working copy)
@@ -3229,8 +3302,8 @@ verify_gimple_comparison (tree type, tre
 }
 
   if (TYPE_VECTOR_SUBPARTS (type) != TYPE_VECTOR_SUBPARTS (op0_type)
- || (GET_MODE_SIZE (TYPE_MODE (type))
- != GET_MODE_SIZE (TYPE_MODE (op0_type
+ || (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (type)))
+ != GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (op0_type)
 {
   error ("invalid vector comparison resulting type");
   debug_generic_expr (type);



Re: [PATCH, PR50527] Don't assume alignment of vla-related allocas.

2011-09-30 Thread Richard Guenther
On Thu, Sep 29, 2011 at 3:15 PM, Tom de Vries  wrote:
> On 09/28/2011 11:53 AM, Richard Guenther wrote:
>> On Wed, Sep 28, 2011 at 11:34 AM, Tom de Vries  
>> wrote:
>>> Richard,
>>>
>>> I got a patch for PR50527.
>>>
>>> The patch prevents the alignment of vla-related allocas to be set to
>>> BIGGEST_ALIGNMENT in ccp. The alignment may turn out smaller after folding
>>> the alloca.
>>>
>>> Bootstrapped and regtested on x86_64.
>>>
>>> OK for trunk?
>>
>> Hmm.  As gfortran with -fstack-arrays uses VLAs it's probably bad that
>> the vectorizer then will no longer see that the arrays are properly aligned.
>>
>> I'm not sure what the best thing to do is here, other than trying to record
>> the alignment requirement of the VLA somewhere.
>>
>> Forcing the alignment of the alloca replacement decl to BIGGEST_ALIGNMENT
>> has the issue that it will force stack-realignment which isn't free (and the
>> point was to make the decl cheaper than the alloca).  But that might
>> possibly be the better choice.
>>
>> Any other thoughts?
>
> How about the approach in this (untested) patch? Using the DECL_ALIGN of the 
> vla
> for the new array prevents stack realignment for folded vla-allocas, also for
> large vlas.
>
> This will not help in vectorizing large folded vla-allocas, but I think it's 
> not
> reasonable to expect BIGGEST_ALIGNMENT when writing a vla (although that has
> been the case up until we started to fold). If you want to trigger 
> vectorization
> for a vla, you can still use the aligned attribute on the declaration.
>
> Still, the unfolded vla-allocas will have BIGGEST_ALIGNMENT, also without 
> using
> an attribute on the decl. This patch exploits this by setting it at the end of
> the 3rd pass_ccp, renamed to pass_ccp_last. This is not very effective in
> propagation though, because although the ptr_info of the lhs is propagated via
> copy_prop afterwards, it's not propagated anymore via ccp.
>
> Another way to do this would be to set BIGGEST_ALIGNMENT at the end of ccp2 
> and
> not fold during ccp3.

Ugh, somehow I like this the least ;)

How about lowering VLAs to

  p = __builtin_alloca (...);
  p = __builtin_assume_aligned (p, DECL_ALIGN (vla));

and not assume anything for alloca itself if it feeds a
__builtin_assume_aligned?

Or rather introduce a __builtin_alloca_with_align () and for VLAs do

 p = __builtin_alloca_with_align (..., DECL_ALIGN (vla));

that's less awkward to use?

Sorry for not having a clear plan here ;)

Richard.

> Thanks,
> - Tom
>
>


Re: [Patch 1/4] ARM 64 bit sync atomic operations [V2]

2011-09-30 Thread Ramana Radhakrishnan
Hi Dave,


The nit-picky bit - There are still a number of formatting issues with
your patch . Could you run your patch through
contrib/check_GNU_style.sh and correct these. These are typically
around problems with the number of spaces between a full stop and the
end of comment, lines with trailing whitespaces and a few lines with
number of characters > 80.  Thanks.

>@@ -23590,82 +23637,142 @@ arm_output_sync_loop (emit_f emit,
>
>+  else
>+  {
>+/* Silence false potentially unused warning */
>+required_value_lo = NULL;
>+required_value_hi = NULL;
>+  }
>

s/NULL/NULL_RTX in a number of places in arm.c

>@@ -23516,14 +23530,41 @@ arm_output_strex (emit_f emit,
> rtx value,
> rtx memory)
> {
>-  const char *suffix = arm_ldrex_suffix (mode);
>-  rtx operands[3];
>+  rtx operands[4];
>
>   operands[0] = result;
>   operands[1] = value;
>-  operands[2] = memory;
>-  arm_output_asm_insn (emit, 0, operands, "strex%s%s\t%%0, %%1, %%C2", suffix,
>- cc);
>+  if (mode != DImode)
>+{
>+  const char *suffix = arm_ldrex_suffix (mode);
>+  operands[2] = memory;
>+  arm_output_asm_insn (emit, 0, operands, "strex%s%s\t%%0, %%1, %%C2",
>+suffix, cc);
>+}
>+  else
>+{
>+  /* The restrictions on target registers in ARM mode are that the two
>+   registers are consecutive and the first one is even; Thumb is
>+   actually more flexible, but DI should give us this anyway.
>+   Note that the 1st register always gets the lowest word in memory.  */
>+  gcc_assert ((REGNO (value) & 1) == 0);
>+  operands[2] = gen_rtx_REG (SImode, REGNO (value) + 1);
>+  operands[3] = memory;
>+  arm_output_asm_insn (emit, 0, operands, "strexd%s\t%%0, %%1, %%2, %%C3",
>+ cc);
>+}
>

The restriction is actually mandatory for ARM state only and thus I'm fine
with this assertion being true only in ARM state.


I don't like duplicating the tests from gcc.dg into gcc.target/arm.
If you wanted to check for assembler output specific to a target you could
add your own conditions to the test in gcc.dg and conditionalize that on
target arm_eabi

Something like :

{ dg-final { scan-assembler "ldrexd\t"} {target arm_eabi}} } .

I would like a testsuite maintainer to comment on the testsuite infrastructure
bits as well but I have a few comments below .



>> +# Return 1 if the target supports atomic operations on "long long" and can 
>> actually
>+# execute them
>+# So far only put checks in for ARM, others may want to add their own
>+proc check_effective_target_sync_longlong { } {
>+return [check_runtime sync_longlong_runtime {
>+  #include 
>+  int main()
>+  {
>+  long long l1;
>+
>+  if (sizeof(long long)!=8)

Space between ')' and ! as well as '=' and 8

>+exit(1);
>+
>+  #ifdef __arm__

Why is this checking only for ARM state ? We could have ldrexd in T2 as
well ?

Otherwise the functionality looks good to me. Can you confirm that
this has survived a testrun for v7-a thumb2 and v7-a arm state ?

cheers
Ramana


Re: [PATCH 2/2] allow certain kinds of inputs to top level asm()-s

2011-09-30 Thread Jakub Jelinek
On Fri, Sep 30, 2011 at 01:54:16PM +0100, Jan Beulich wrote:
> >>> On 30.09.11 at 14:47, Jakub Jelinek  wrote:
> > On Fri, Sep 30, 2011 at 12:43:54PM +0100, Jan Beulich wrote:
> >> This is so that use of symbols referenced in these asm()-s can be
> >> properly tracked by the compiler, just like is the case for all other
> >> asm()-s. I'm particularly looking forward to use this in the Linux
> >> kernel. It is certainly not very useful in PIC code, at least not with
> >> some extra care.
> > 
> > Even in PIC code it can be useful to have toplevel asm like
> > asm ("..." : : "i" (offsetof (struct S, field)), "i" (some_enum_value), "i" 
> > (sizeof (struct S)));
> > etc.
> 
> But wasn't it you who pointed out that this has limited use in PIC
> mode when I first submitted this?

Sure, some input operands may be problematic in PIC, but the above ones
are just compile time integer constants and those are fine always.

Jakub


Re: [PATCH] fold_range_test like optimization on GIMPLE (PR tree-optimization/46309)

2011-09-30 Thread Richard Guenther
On Fri, Sep 30, 2011 at 2:44 PM, Jakub Jelinek  wrote:
> On Fri, Sep 30, 2011 at 02:26:40PM +0200, Richard Guenther wrote:
>> > It is boolean only in some testcases, the is_bool stuff discussed at the
>> > beginning above was originally just an early return
>> >  if (TREE_CODE (TREE_TYPE (exp)) != BOOLEAN_TYPE)
>> >    return;
>> > before the loop, but it turned out that often the type of the | operands
>> > is integer, with either bool casted to integer, or with the type of EQ_EXPR
>> > etc. being integer instead of bool.
>>
>> Really?  The type of EQ_EXPR should be always either BOOLEAN_TYPE
>> or INTEGRAL_TYPE_P with TYPE_PRECISION == 1.  That's what
>> the gimple verifier checks.  Or do you mean that fold introduces these
>> kind of types during range-test simplification?
>
> Consider:
>
> int
> f1 (int a, int b)
> {
>  int v1 = (a <= 64);
>  int v2 = (a == 66);
>  int v3 = (a == 67);
>  int v4 = (a == 65);
>  return b || v1 || v2 || v3 || v4;
> }
>
> int
> f2 (int a, int b)
> {
>  int v1 = (a <= 64);
>  int v2 = (a == 66);
>  int v3 = (a == 67);
>  int v4 = (a == 65);
>  return b | v1 | v2 | v3 | v4;
> }
>
> in *.dse1 f1 is:
>  D.2744_2 = a_1(D) <= 64;
>  v1_3 = (int) D.2744_2;
>  D.2745_4 = a_1(D) == 66;
>  v2_5 = (int) D.2745_4;
>  D.2746_6 = a_1(D) == 67;
>  v3_7 = (int) D.2746_6;
>  D.2747_8 = a_1(D) == 65;
>  v4_9 = (int) D.2747_8;
>  D.2749_11 = b_10(D) | v1_3;
>  D.2750_12 = D.2749_11 | v2_5;
>  D.2751_13 = D.2750_12 | v3_7;
>  D.2752_14 = D.2751_13 | v4_9;
>  D.2753_15 = D.2752_14 != 0;
>  D.2748_16 = (int) D.2753_15;
>  return D.2748_16;
> and f2 is:
>  D.2735_2 = a_1(D) <= 64;
>  v1_3 = (int) D.2735_2;
>  D.2736_4 = a_1(D) == 66;
>  v2_5 = (int) D.2736_4;
>  D.2737_6 = a_1(D) == 67;
>  v3_7 = (int) D.2737_6;
>  D.2738_8 = a_1(D) == 65;
>  v4_9 = (int) D.2738_8;
>  D.2740_11 = b_10(D) | v1_3;
>  D.2741_12 = D.2740_11 | v2_5;
>  D.2742_13 = D.2741_12 | v3_7;
>  D.2739_14 = D.2742_13 | v4_9;
>  return D.2739_14;
> In both cases, the arguments of BIT_IOR_EXPR are ints
> and init_range_entry needs to go through the casts to reach the
> comparison (on which it figures out that the value is really 0/1,
> well, in this case already on the rhs of the cast, as it is _Bool).

Ah, indeed.  I'll have a look at the updated patch.

Richard.

>        Jakub
>


Re: [PATCH] Fix stack red zone bug (PR38644)

2011-09-30 Thread Richard Sandiford
Richard Sandiford  writes:
> In contrast, after the tree optimisers have handed off the initial IL,

um, I meant frontend :-)

> the tree optimisers are more or less in full control.

Richard


Re: [PATCH 2/2] allow certain kinds of inputs to top level asm()-s

2011-09-30 Thread Jan Beulich
>>> On 30.09.11 at 14:42, Richard Guenther  wrote:
> On Fri, Sep 30, 2011 at 1:43 PM, Jan Beulich  wrote:
>> This is so that use of symbols referenced in these asm()-s can be
>> properly tracked by the compiler, just like is the case for all other
>> asm()-s. I'm particularly looking forward to use this in the Linux
>> kernel. It is certainly not very useful in PIC code, at least not with
>> some extra care.
> 
> I miss documentation for this.

Just like on the first submission - if top level asm-s as they are without
this change were documented somewhere, I would be happy to extend
that documentation. But honestly, I don't think it's appropriate to ask
me to start writing documentation for this from ground up - that should
be done by someone more familiar with the base feature, so that
eventual caveats can be pointed out, and it can be put at a proper
(rather than guessed) location in the documentation.

> This does not address the other issue
> we have, like specifying the set of symbols _defined_ by a toplevel
> asm, right?  I might misremember but sth like
> 
> extern void foo (void);
> asm(""  "foo");
> 
> was supposed to do the trick.  Or should we treat those as outputs
> (given you use inputs for symbol uses)?

I don't recall any discussion of how to deal with symbols defined by a
top level asm - I was just asked to follow the "normal" asm syntax in
having two colons in the middle instead of one as I had originally (not
expecting any use for outputs here).

Jan

> Honza, do you remember if we decided on anything here?
> 
> Thanks,
> Richard.




Re: [PATCH] Fix stack red zone bug (PR38644)

2011-09-30 Thread Richard Guenther
On Fri, Sep 30, 2011 at 2:46 PM, Richard Sandiford
 wrote:
> "Jiangning Liu"  writes:
>>> You seem to feel strongly about this because it's a wrong-code bug that
>>> is very easy to introduce and often very hard to detect.  And I
>>> defintely
>>> sympathise with that.  If we were going to to do it in a target-
>>> independent
>>> way, though, I think it would be better to scan patterns like epilogue
>>> and
>>> automatically introduce barriers before assignments to
>>> stack_pointer_rtx
>>> (subject to the kind of hook in your patch).  But I still don't think
>>> that's better than leaving the onus on the backend.  The backend is
>>> still responsible for much more complicated things like determning
>>> the correct deallocation and register-restore sequence, and for
>>> determining the correct CFI sequence.
>>>
>>
>> I think middle-end in GCC is actually shared code rather than the part
>> exactly in the middle. A pass working on RTL can be a middle end just
>> because the code can be shared for all targets, and some passes can even
>> work for both GIMPLE and RTL.
>>
>> Actually some optimizations need to work through "shared part" (middle-end)
>> plus "target specific part" (back-end). You are thinking the interface
>> between this "shared part" and "target specific part" should be using
>> "barrier" as a properly model. To some extension I agree with this. However,
>> it doesn't mean the fix should be in back-end rather than middle end,
>> because obviously this problem is a common ABI issue for all targets. If we
>> can abstract this issue to be a shared part, why shouldn't we do it in
>> middle end to reduce the onus of back-end? Back-end should handle the target
>> specific things rather than only the complicated things.
>
> And for avoidance of doubt, the automatic barrier insertion that I
> described would be one way of doing it in target-independent code.
> But...
>
>> If a complicated problem can be implemented in a "shared code" manner, we
>> still want to put it into middle end rather than back-end. I believe those
>> optimizations based on SSA form are complicated enough, but they are all in
>> middle end. This is the logic I'm seeing in GCC.
>
> The situation here is different.  The target-independent rtl code is
> being given a blob of instructions that the backend has generated for
> the epilogue.  There's no fine-tuning beyond that.  E.g. we don't have
> separate patterns for "restore registers", "deallocate stack", "return":
> we just have one monolithic "epilogue" pattern.  The target-independent
> code has very little control.
>
> In contrast, after the tree optimisers have handed off the initial IL,
> the tree optimisers are more or less in full control.  There are very
> few cases where we generate further trees outside the middle-end.  The only
> case I know off-hand is the innards of va_start and va_arg, which can be
> generated by the backend.
>
> So let's suppose we had a similar situation there, where we wanted
> va_arg do something special in a certain situation.  If we had the
> same three choices of:
>
>  1. use an on-the-side hook to represent the special something
>  2. scan the code generated by the backend and automatically
>     inject the special something at an appropriate place
>  3. require each backend to do it properly from the start
>
> (OK, slightly prejudiced wording :-)) I think we'd still choose 3.
>
>> For this particular issue, I don't think that hook interface I'm
>> proposing is more complicated than the barrier. Instead, it is easier
>> for back-end implementer to be aware of the potential issue before
>> really solving stack red zone problem, because it is very clearly
>> listed in target hook list.
>
> The point for "model it in the IL" supporters like myself is that we
> have both many backends and many rtl passes.  Putting it in a hook keeps
> things simple for the backends, but it means that every rtl pass must be
> aware of this on-the-side dependency.  Perhaps sched2 really is the only
> pass that needs to look at the hook at present.  But perhaps not.
> E.g. dbr_schedule (not a problem on ARM, I realise) also reorders
> instructions, so maybe it would need to be audited to see whether any
> calls to this hook are needed.  And perhaps we'd add more rtl passes
> later.
>
> The point behind using a barrier is that the rtl passes do not then need
> to treat the stack-deallocation dependency as a special case.  They can
> just use the normal analysis and get it right.
>
> In other words, we're both arguing for safety here.

Indeed.  It's certainly not only scheduling that can move instructions,
but RTL PRE, combine, ifcvt all can effectively cause instruction motion
(just to name a few).

Richard.

> Richard
>


Re: [PATCH 2/2] allow certain kinds of inputs to top level asm()-s

2011-09-30 Thread Jan Beulich
>>> On 30.09.11 at 14:47, Jakub Jelinek  wrote:
> On Fri, Sep 30, 2011 at 12:43:54PM +0100, Jan Beulich wrote:
>> This is so that use of symbols referenced in these asm()-s can be
>> properly tracked by the compiler, just like is the case for all other
>> asm()-s. I'm particularly looking forward to use this in the Linux
>> kernel. It is certainly not very useful in PIC code, at least not with
>> some extra care.
> 
> Even in PIC code it can be useful to have toplevel asm like
> asm ("..." : : "i" (offsetof (struct S, field)), "i" (some_enum_value), "i" 
> (sizeof (struct S)));
> etc.

But wasn't it you who pointed out that this has limited use in PIC
mode when I first submitted this?

Jan



Re: [PATCH 2/2] allow certain kinds of inputs to top level asm()-s

2011-09-30 Thread Jakub Jelinek
On Fri, Sep 30, 2011 at 12:43:54PM +0100, Jan Beulich wrote:
> This is so that use of symbols referenced in these asm()-s can be
> properly tracked by the compiler, just like is the case for all other
> asm()-s. I'm particularly looking forward to use this in the Linux
> kernel. It is certainly not very useful in PIC code, at least not with
> some extra care.

Even in PIC code it can be useful to have toplevel asm like
asm ("..." : : "i" (offsetof (struct S, field)), "i" (some_enum_value), "i" 
(sizeof (struct S)));
etc.

Jakub


Re: [PATCH] Fix stack red zone bug (PR38644)

2011-09-30 Thread Richard Sandiford
"Jiangning Liu"  writes:
>> You seem to feel strongly about this because it's a wrong-code bug that
>> is very easy to introduce and often very hard to detect.  And I
>> defintely
>> sympathise with that.  If we were going to to do it in a target-
>> independent
>> way, though, I think it would be better to scan patterns like epilogue
>> and
>> automatically introduce barriers before assignments to
>> stack_pointer_rtx
>> (subject to the kind of hook in your patch).  But I still don't think
>> that's better than leaving the onus on the backend.  The backend is
>> still responsible for much more complicated things like determning
>> the correct deallocation and register-restore sequence, and for
>> determining the correct CFI sequence.
>> 
>
> I think middle-end in GCC is actually shared code rather than the part
> exactly in the middle. A pass working on RTL can be a middle end just
> because the code can be shared for all targets, and some passes can even
> work for both GIMPLE and RTL.
>
> Actually some optimizations need to work through "shared part" (middle-end)
> plus "target specific part" (back-end). You are thinking the interface
> between this "shared part" and "target specific part" should be using
> "barrier" as a properly model. To some extension I agree with this. However,
> it doesn't mean the fix should be in back-end rather than middle end,
> because obviously this problem is a common ABI issue for all targets. If we
> can abstract this issue to be a shared part, why shouldn't we do it in
> middle end to reduce the onus of back-end? Back-end should handle the target
> specific things rather than only the complicated things. 

And for avoidance of doubt, the automatic barrier insertion that I
described would be one way of doing it in target-independent code.
But...

> If a complicated problem can be implemented in a "shared code" manner, we
> still want to put it into middle end rather than back-end. I believe those
> optimizations based on SSA form are complicated enough, but they are all in
> middle end. This is the logic I'm seeing in GCC.

The situation here is different.  The target-independent rtl code is
being given a blob of instructions that the backend has generated for
the epilogue.  There's no fine-tuning beyond that.  E.g. we don't have
separate patterns for "restore registers", "deallocate stack", "return":
we just have one monolithic "epilogue" pattern.  The target-independent
code has very little control.

In contrast, after the tree optimisers have handed off the initial IL,
the tree optimisers are more or less in full control.  There are very
few cases where we generate further trees outside the middle-end.  The only
case I know off-hand is the innards of va_start and va_arg, which can be
generated by the backend.

So let's suppose we had a similar situation there, where we wanted
va_arg do something special in a certain situation.  If we had the
same three choices of:

  1. use an on-the-side hook to represent the special something
  2. scan the code generated by the backend and automatically
 inject the special something at an appropriate place
  3. require each backend to do it properly from the start

(OK, slightly prejudiced wording :-)) I think we'd still choose 3.

> For this particular issue, I don't think that hook interface I'm
> proposing is more complicated than the barrier. Instead, it is easier
> for back-end implementer to be aware of the potential issue before
> really solving stack red zone problem, because it is very clearly
> listed in target hook list.

The point for "model it in the IL" supporters like myself is that we
have both many backends and many rtl passes.  Putting it in a hook keeps
things simple for the backends, but it means that every rtl pass must be
aware of this on-the-side dependency.  Perhaps sched2 really is the only
pass that needs to look at the hook at present.  But perhaps not.
E.g. dbr_schedule (not a problem on ARM, I realise) also reorders
instructions, so maybe it would need to be audited to see whether any
calls to this hook are needed.  And perhaps we'd add more rtl passes
later.

The point behind using a barrier is that the rtl passes do not then need
to treat the stack-deallocation dependency as a special case.  They can
just use the normal analysis and get it right.

In other words, we're both arguing for safety here.

Richard


Re: [PATCH] fold_range_test like optimization on GIMPLE (PR tree-optimization/46309)

2011-09-30 Thread Jakub Jelinek
On Fri, Sep 30, 2011 at 02:26:40PM +0200, Richard Guenther wrote:
> > It is boolean only in some testcases, the is_bool stuff discussed at the
> > beginning above was originally just an early return
> >  if (TREE_CODE (TREE_TYPE (exp)) != BOOLEAN_TYPE)
> >    return;
> > before the loop, but it turned out that often the type of the | operands
> > is integer, with either bool casted to integer, or with the type of EQ_EXPR
> > etc. being integer instead of bool.
> 
> Really?  The type of EQ_EXPR should be always either BOOLEAN_TYPE
> or INTEGRAL_TYPE_P with TYPE_PRECISION == 1.  That's what
> the gimple verifier checks.  Or do you mean that fold introduces these
> kind of types during range-test simplification?

Consider:

int
f1 (int a, int b)
{
  int v1 = (a <= 64);
  int v2 = (a == 66);
  int v3 = (a == 67);
  int v4 = (a == 65);
  return b || v1 || v2 || v3 || v4;
}

int
f2 (int a, int b)
{
  int v1 = (a <= 64);
  int v2 = (a == 66);
  int v3 = (a == 67);
  int v4 = (a == 65);
  return b | v1 | v2 | v3 | v4;
}

in *.dse1 f1 is:
  D.2744_2 = a_1(D) <= 64;
  v1_3 = (int) D.2744_2;
  D.2745_4 = a_1(D) == 66;
  v2_5 = (int) D.2745_4;
  D.2746_6 = a_1(D) == 67;
  v3_7 = (int) D.2746_6;
  D.2747_8 = a_1(D) == 65;
  v4_9 = (int) D.2747_8;
  D.2749_11 = b_10(D) | v1_3;
  D.2750_12 = D.2749_11 | v2_5;
  D.2751_13 = D.2750_12 | v3_7;
  D.2752_14 = D.2751_13 | v4_9;
  D.2753_15 = D.2752_14 != 0;
  D.2748_16 = (int) D.2753_15;
  return D.2748_16;
and f2 is:
  D.2735_2 = a_1(D) <= 64;
  v1_3 = (int) D.2735_2;
  D.2736_4 = a_1(D) == 66;
  v2_5 = (int) D.2736_4;
  D.2737_6 = a_1(D) == 67;
  v3_7 = (int) D.2737_6;
  D.2738_8 = a_1(D) == 65;
  v4_9 = (int) D.2738_8;
  D.2740_11 = b_10(D) | v1_3;
  D.2741_12 = D.2740_11 | v2_5;
  D.2742_13 = D.2741_12 | v3_7;
  D.2739_14 = D.2742_13 | v4_9;
  return D.2739_14;
In both cases, the arguments of BIT_IOR_EXPR are ints
and init_range_entry needs to go through the casts to reach the
comparison (on which it figures out that the value is really 0/1,
well, in this case already on the rhs of the cast, as it is _Bool).

Jakub


Re: [PATCH 2/2] allow certain kinds of inputs to top level asm()-s

2011-09-30 Thread Richard Guenther
On Fri, Sep 30, 2011 at 1:43 PM, Jan Beulich  wrote:
> This is so that use of symbols referenced in these asm()-s can be
> properly tracked by the compiler, just like is the case for all other
> asm()-s. I'm particularly looking forward to use this in the Linux
> kernel. It is certainly not very useful in PIC code, at least not with
> some extra care.

I miss documentation for this.  This does not address the other issue
we have, like specifying the set of symbols _defined_ by a toplevel
asm, right?  I might misremember but sth like

extern void foo (void);
asm(""  "foo");

was supposed to do the trick.  Or should we treat those as outputs
(given you use inputs for symbol uses)?

Honza, do you remember if we decided on anything here?

Thanks,
Richard.

> gcc/
> 2011-09-30  Jan Beulich  
>
>        * c-parser.c (c_parser_simple_asm_expr): Add new second parameter
>        'inputsp'. Process inputs if caller indicates they are allowed. Adjust
>        calls to c_parser_asm_operands().
>        (c_parser_asm_operands): Change type of second parameter from 'bool'
>        to 'int'. Call c_parser_expression() only for non-negative 'mode', and
>        c_parser_expr_no_commas() otherwise.
>        (c_parser_declaration_or_fndef): Pass NULL as new second argument to
>        c_parser_simple_asm_expr().
>        (c_parser_asm_definition): New local variables 'loc' and 'inputs'.
>        Adjust calls to c_parser_simple_asm_expr() and cgraph_add_asm_node().
>        (c_parser_simple_asm_expr):
>        * cgraph.c (cgraph_add_asm_node): Call check_unique_operand_names() to
>        validate input operands. Store inputs and location.
>        * cgraph.h (struct cgraph_asm_node): Add 'inputs' and 'loc'.
>        (cgraph_add_asm_node): New second and third parameters.
>        * cgraphunit.c (cgraph_output_pending_asms): Pass new second and third
>        arguments to assemble_asm().
>        (process_function_and_variable_attributes): New local variable 'anode'.
>        Process list starting from 'cgraph_asm_nodes'.
>        (cgraph_output_in_order): Pass new second and third arguments to
>        assemble_asm().
>        * cp/parser.c (enum required_token): Add RT_COLON_NO_OUTPUT.
>        (cp_parser_asm_definition): New local variable 'loc'. Correct a
>        comment. Parse and process input operands if permitted.
>        (cp_parser_required_error): Handle new case RT_COLON_NO_OUTPUT.
>        * lto-streamer-in.c (lto_input_toplevel_asms): Pass new second and
>        third arguments to cgraph_add_asm_node().
>        * lto-streamer-out.c (lto_output_toplevel_asms): Also output inputs
>        and location.
>        * output.h (assemble_asm): New second and third parameters.
>        * stmt.c (check_unique_operand_names): Remove static declaration and
>        make global.
>        * tree.h (check_unique_operand_names): Declare.
>        * varasm.c: Include pretty-print.h.
>        (assemble_asm): New parameters 'inputs' and 'loc'. Process inputs if
>        provided.
>
> gcc/testsuite/
> 2011-09-30  Jan Beulich  
>
>        * g++.dg/ext/asm-static-1.C: New.
>        * gcc.dg/asm-static-1.c: New.
>        * gcc.dg/asm-static-2.c: New.
>        * gcc.dg/asm-static-3.c: New.
>        * gcc.dg/asm-static-4.c: New.
>
> --- 2011-09-29.orig/gcc/c-parser.c      2011-09-28 10:56:01.0 +0200
> +++ 2011-09-29/gcc/c-parser.c   2011-09-29 15:07:29.0 +0200
> @@ -1131,7 +1131,7 @@ static struct c_arg_info *c_parser_parms
>  static struct c_arg_info *c_parser_parms_list_declarator (c_parser *, tree,
>                                                          tree);
>  static struct c_parm *c_parser_parameter_declaration (c_parser *, tree);
> -static tree c_parser_simple_asm_expr (c_parser *);
> +static tree c_parser_simple_asm_expr (c_parser *, tree *);
>  static tree c_parser_attributes (c_parser *);
>  static struct c_type_name *c_parser_type_name (c_parser *);
>  static struct c_expr c_parser_initializer (c_parser *);
> @@ -1150,7 +1150,7 @@ static void c_parser_while_statement (c_
>  static void c_parser_do_statement (c_parser *);
>  static void c_parser_for_statement (c_parser *);
>  static tree c_parser_asm_statement (c_parser *);
> -static tree c_parser_asm_operands (c_parser *, bool);
> +static tree c_parser_asm_operands (c_parser *, int);
>  static tree c_parser_asm_goto_operands (c_parser *);
>  static tree c_parser_asm_clobbers (c_parser *);
>  static struct c_expr c_parser_expr_no_commas (c_parser *, struct c_expr *);
> @@ -1623,7 +1623,7 @@ c_parser_declaration_or_fndef (c_parser
>             function definition.  */
>          fndef_ok = false;
>          if (c_parser_next_token_is_keyword (parser, RID_ASM))
> -           asm_name = c_parser_simple_asm_expr (parser);
> +           asm_name = c_parser_simple_asm_expr (parser, NULL);
>          if (c_parser_next_token_is_keyword (parser, RID_ATTRIBUTE))
>            postfix_attrs = c_parser_attributes (parser);
>          if (c_parser_next_token_is

Re: [PATCH 1/2] LTO: split out writing of top level asm nodes

2011-09-30 Thread Richard Guenther
On Fri, Sep 30, 2011 at 1:43 PM, Jan Beulich  wrote:
> Split out LTO's writing of top level asm nodes in preparation of extending
> what needs to be written out when top level asm-s get enhanced to accept a
> limited set of input operands.

Ok with ...

> gcc/
> 2011-09-30  Jan Beulich  
>
>        * lto-cgraph.c (output_cgraph): Remove processing of 
> 'cgraph_asm_nodes',
>        call lto_output_toplevel_asms() instead.
>        (input_cgraph_1): Remove loop calling cgraph_add_asm_node(), call
>        lto_input_toplevel_asms() instead.
>        * lto-section-in.c (lto_section_name): Add "asm" entry.
>        * lto-streamer-in.c (lto_input_toplevel_asms): New.
>        * lto-streamer-out.c (lto_output_toplevel_asms): New.
>        * lto-streamer.h (LTO_minor_version): Bump.
>        (enum lto_section_type): Add LTO_section_asm.
>        (struct lto_asm_header): New.
>        (lto_input_toplevel_asms, lto_output_toplevel_asms): Declare.
>        * tree-streamer.h (streamer_write_string_cst): Declare.
>        * tree-streamer-out.c (write_string_cst): Rename to
>        streamer_write_string_cst and make global.
>        (streamer_write_tree_header): Adjust call to renamed function.
>
> --- 2011-09-29.orig/gcc/lto-cgraph.c    2011-09-28 10:56:01.0 +0200
> +++ 2011-09-29/gcc/lto-cgraph.c 2011-09-29 15:07:23.0 +0200
> @@ -817,7 +817,6 @@ output_cgraph (cgraph_node_set set, varp
>   int i, n_nodes;
>   lto_cgraph_encoder_t encoder;
>   lto_varpool_encoder_t varpool_encoder;
> -  struct cgraph_asm_node *can;
>   static bool asm_nodes_output = false;
>
>   if (flag_wpa)
> @@ -854,6 +853,8 @@ output_cgraph (cgraph_node_set set, varp
>
>   streamer_write_uhwi_stream (ob->main_stream, 0);
>
> +  lto_destroy_simple_output_block (ob);
> +
>   /* Emit toplevel asms.
>      When doing WPA we must output every asm just once.  Since we do not 
> partition asm
>      nodes at all, output them to first output.  This is kind of hack, but 
> should work
> @@ -861,19 +862,9 @@ output_cgraph (cgraph_node_set set, varp
>   if (!asm_nodes_output)
>     {
>       asm_nodes_output = true;
> -      for (can = cgraph_asm_nodes; can; can = can->next)
> -       {
> -         int len = TREE_STRING_LENGTH (can->asm_str);
> -         streamer_write_uhwi_stream (ob->main_stream, len);
> -         for (i = 0; i < len; ++i)
> -           streamer_write_char_stream (ob->main_stream,
> -                                       TREE_STRING_POINTER 
> (can->asm_str)[i]);
> -       }
> +      lto_output_toplevel_asms ();
>     }
>
> -  streamer_write_uhwi_stream (ob->main_stream, 0);
> -
> -  lto_destroy_simple_output_block (ob);
>   output_varpool (set, vset);
>   output_refs (set, vset, encoder, varpool_encoder);
>  }
> @@ -1185,7 +1176,6 @@ input_cgraph_1 (struct lto_file_decl_dat
>   VEC(cgraph_node_ptr, heap) *nodes = NULL;
>   struct cgraph_node *node;
>   unsigned i;
> -  unsigned HOST_WIDE_INT len;
>
>   tag = streamer_read_enum (ib, LTO_cgraph_tags, LTO_cgraph_last_tag);
>   while (tag)
> @@ -1206,18 +1196,8 @@ input_cgraph_1 (struct lto_file_decl_dat
>       tag = streamer_read_enum (ib, LTO_cgraph_tags, LTO_cgraph_last_tag);
>     }
>
> -  /* Input toplevel asms.  */
> -  len = streamer_read_uhwi (ib);
> -  while (len)
> -    {
> -      char *str = (char *)xmalloc (len + 1);
> -      for (i = 0; i < len; ++i)
> -       str[i] = streamer_read_uchar (ib);
> -      cgraph_add_asm_node (build_string (len, str));
> -      free (str);
> +  lto_input_toplevel_asms (file_data);
>
> -      len = streamer_read_uhwi (ib);
> -    }
>   /* AUX pointers should be all non-zero for nodes read from the stream.  */
>  #ifdef ENABLE_CHECKING
>   FOR_EACH_VEC_ELT (cgraph_node_ptr, nodes, i, node)
> --- 2011-09-29.orig/gcc/lto-section-in.c        2011-09-28 10:56:01.0 
> +0200
> +++ 2011-09-29/gcc/lto-section-in.c     2011-09-29 15:07:23.0 +0200
> @@ -53,6 +53,7 @@ const char *lto_section_name[LTO_N_SECTI
>   "cgraph",
>   "vars",
>   "refs",
> +  "asm",
>   "jmpfuncs",
>   "pureconst",
>   "reference",
> --- 2011-09-29.orig/gcc/lto-streamer-in.c       2011-09-28 10:56:01.0 
> +0200
> +++ 2011-09-29/gcc/lto-streamer-in.c    2011-09-29 15:07:23.0 +0200
> @@ -1141,6 +1141,47 @@ lto_input_tree (struct lto_input_block *
>  }
>
>
> +/* Input toplevel asms.  */
> +
> +void
> +lto_input_toplevel_asms (struct lto_file_decl_data *file_data)
> +{
> +  size_t len;
> +  const char *data = lto_get_section_data (file_data, LTO_section_asm,
> +                                          NULL, &len);
> +  const struct lto_asm_header *header = (const struct lto_asm_header *) data;
> +  int32_t string_offset;
> +  struct data_in *data_in;
> +  struct lto_input_block ib;
> +  tree str;
> +
> +  if (! data)
> +    return;
> +
> +  string_offset = sizeof (*header) + header->main_size;
> +
> +  LTO_INIT_INPUT_BLOCK (ib,
> +                       data + sizeof (*header),
> +                       0,
> +                       header->

Re: [PATCH] fold_range_test like optimization on GIMPLE (PR tree-optimization/46309)

2011-09-30 Thread Richard Guenther
On Fri, Sep 30, 2011 at 1:11 PM, Jakub Jelinek  wrote:
> On Fri, Sep 30, 2011 at 12:33:07PM +0200, Richard Guenther wrote:
>> > +  low = build_int_cst (TREE_TYPE (exp), 0);
>> > +  high = low;
>> > +  in_p = 0;
>> > +  strict_overflow_p = false;
>> > +  is_bool = TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE;
>>
>> Effective boolean are also TYPE_PRECISION () == 1 types.  Remember
>> we don't preserve conversions from BOOLEAN_TYPE to such types.
>
> I can replace these with TYPE_PRECISION (TREE_TYPE (exp)) == 1;
> checks if you prefer, though maybe it would need to also do
> && TYPE_UNSIGNED (TREE_TYPE (exp)), at least if different operands of
> the | have different inner 1-bit signedness we could not merge them.

The canonical test for boolean-kind types is now

  TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
  || TYPE_PRECISION (TREE_TYPE (exp)) == 1

Ada for example has non-1-precision BOOLEAN_TYPEs.  But, see
very below.

>> > +       CASE_CONVERT:
>> > +         is_bool |= TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE;
>>
>> Likewise.  Though I wonder why !=?  Does it matter whether the extension
>> sign- or zero-extends?
>
> I think the |= is needed, this loop follows through not just casts, but
> then also through the comparisons and again through casts.
> It doesn't matter if the types or casts inside of the comparison argument
> are bool or not (and likely they will not be), yet we don't want to stop
> iterating because of that.
> It wants to reconstruct ranges from what e.g. fold in fold_range_test
> already created before, so stuff like
> (int) (((unsigned) x - 64U) <= 31U)
> for + [64, 95] range.
>
>> > +                 if (integer_onep (range_binop (LT_EXPR, 
>> > integer_type_node,
>> > +                                                p->low, 0, q->low, 0)))
>>
>> that's just
>>
>> tem = fold_binary (LT_EXPR, boolean_type_node, p->low, q->low);
>> if (tem && integer_onep (tem))
>>   return -1;
>
> Ok, can change that.
>
>> which avoids building the LT_EXPR tree if it doesn't fold.  Similar below.
>> (ISTR some integer_onep () variant that handles a NULL argument ...)
>
> Couldn't find any.  integer_onep needs non-NULL, compare_tree_int too.
>
>> > +  /* Try to merge ranges.  */
>> > +  for (first = i; i < length; i++)
>> > +    {
>> > +      tree low = ranges[i].low;
>> > +      tree high = ranges[i].high;
>> > +      int in_p = ranges[i].in_p;
>> > +      bool strict_overflow_p = ranges[i].strict_overflow_p;
>> > +
>> > +      for (j = i + 1; j < length; j++)
>> > +       {
>>
>> That looks quadratic - do we want to limit this with a --param, simply
>> partitioning the array into quadratic chunks?
>
> This isn't quadratic (except in the hypothetical case
> where all merge_ranges calls would succeed, but then
> build_range_test would fail).  In the likely case where if range merges
> succeed then update_range_test succeeds too this is just linear (plus the
> qsort before that which isn't linear though).
> So perhaps:
> +      if (j > i + 1
> +         && update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
> +                               ops, ranges[i].exp, in_p, low, high,
> +                               strict_overflow_p))
> +       {
> +         i = j - 1;
> +         any_changes = true;
> +       }
> could be
> +      if (j > i + 1)
> +       {
> +         if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, 
> opcode,
> +                               ops, ranges[i].exp, in_p, low, high,
> +                               strict_overflow_p))
> +           any_changes = true;
> +         i = j - 1;
> +       }
> (then it isn't quadratic), or could try a few times before giving up:
> +      if (j > i + 1)
> +       {
> +         if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, 
> opcode,
> +                               ops, ranges[i].exp, in_p, low, high,
> +                               strict_overflow_p))
> +           {
> +             any_changes = true;
> +             i = j - 1;
> +           }
> +         else if (update_fail_count == 64)
> +           i = j - 1;
> +         else
> +           update_fail_count = 0;
> +       }
> where int update_fail_count = 0; would be after the
>      bool strict_overflow_p = ranges[i].strict_overflow_p;
> line in the outer loop.
>
>> > +         if (ranges[i].exp != ranges[j].exp)
>> > +           break;
>>
>> Or isn't it too bad because of this check?
>
> The above limits the chunks to a particular SSA_NAME.  Within each
> chunk for the same SSA_NAME, the ranges are sorted in a way that
> merge_ranges will likely succeed, so it isn't quadratic unless
> update_range_test fails (see above).
>
> BTW, the second loop (the one that attempts to optimize
> x == 1 || x == 3 into (x & ~2) == 1 etc. is quadratic, which is why
> there is
>  for (j = i + 1; j < length && j < i + 64; j++)
> don't think it is a limit people will often run into and thus I don't
> think it is worth adding a --param= for that.

Ok.

>> > +  

Re: [Patch,AVR]: PR50566: Better log output with -mdeb/-mlog= [3/n]

2011-09-30 Thread Georg-Johann Lay
This adds log output to avr_address_cost.

Ok for trunk?

Johann

PR target/50566
* config/avr/avr-protos.h (avr_log_t): New field address_cost.
* config/avr/avr.c (avr_address_cost): Use it.
* config/avr/avr-log.c (avr_log_set_avr_log): Initialize it.
(avr_log_vadump): Unknown %-codes finish printing.

Index: config/avr/avr-log.c
===
--- config/avr/avr-log.c	(revision 179378)
+++ config/avr/avr-log.c	(working copy)
@@ -283,7 +283,12 @@ avr_log_vadump (FILE *file, const char *
   abort();
   
 default:
-  fputc (*(fmt-1), file);
+  /* Unknown %-code: Stop printing */
+  
+  fprintf (file, "??? %%%c ???\n", *(fmt-1));
+  fmt = "";
+  
+  break;
 }
   break; /* % */
 }
@@ -318,6 +323,7 @@ avr_log_set_avr_log (void)
   SET_DUMP_DETAIL (legitimize_address);
   SET_DUMP_DETAIL (legitimize_reload_address);
   SET_DUMP_DETAIL (constraints);
+  SET_DUMP_DETAIL (address_cost);
 
 #undef SET_DUMP_DETAIL
 }
Index: config/avr/avr-protos.h
===
--- config/avr/avr-protos.h	(revision 179378)
+++ config/avr/avr-protos.h	(working copy)
@@ -129,6 +129,7 @@ typedef struct
   unsigned legitimize_address :1;
   unsigned legitimize_reload_address :1;
   unsigned constraints :1;
+  unsigned address_cost :1;
 } avr_log_t;
 
 extern avr_log_t avr_log;
Index: config/avr/avr.c
===
--- config/avr/avr.c	(revision 179378)
+++ config/avr/avr.c	(working copy)
@@ -6573,23 +6573,33 @@ avr_rtx_costs (rtx x, int codearg, int o
   return done;
 }
 
-/* Calculate the cost of a memory address.  */
+
+/* Implement `TARGET_ADDRESS_COST'.  */
 
 static int
 avr_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
 {
+  int cost = 4;
+  
   if (GET_CODE (x) == PLUS
-  && GET_CODE (XEXP (x,1)) == CONST_INT
-  && (REG_P (XEXP (x,0)) || GET_CODE (XEXP (x,0)) == SUBREG)
-  && INTVAL (XEXP (x,1)) >= 61)
-return 18;
-  if (CONSTANT_ADDRESS_P (x))
+  && CONST_INT_P (XEXP (x, 1))
+  && (REG_P (XEXP (x, 0))
+  || GET_CODE (XEXP (x, 0)) == SUBREG))
+{
+  if (INTVAL (XEXP (x, 1)) >= 61)
+cost = 18;
+}
+  else if (CONSTANT_ADDRESS_P (x))
 {
-  if (optimize > 0 && io_address_operand (x, QImode))
-	return 2;
-  return 4;
+  if (optimize > 0
+  && io_address_operand (x, QImode))
+cost = 2;
 }
-  return 4;
+
+  if (avr_log.address_cost)
+avr_edump ("\n%?: %d = %r\n", cost, x);
+  
+  return cost;
 }
 
 /* Test for extra memory constraint 'Q'.


[WWW, Patch] Update Fortran section of gcc-4.7/changes.html

2011-09-30 Thread Tobias Burnus

I intent to commit the attached patch in the next days.

Do you have suggestions for a better wording or other items which should 
be listed?


For the current version, see http://gcc.gnu.org/gcc-4.7/changes.html#fortran

Tobias
Index: changes.html
===
RCS file: /cvs/gcc/wwwdocs/htdocs/gcc-4.7/changes.html,v
retrieving revision 1.41
diff -u -r1.41 changes.html
--- changes.html	30 Sep 2011 08:03:36 -	1.41
+++ changes.html	30 Sep 2011 11:54:45 -
@@ -356,10 +356,15 @@
   not support backtracing on all targets.
 Fortran 2008:
   
-The http://gcc.gnu.org/wiki/Coarray";>coarray
-  support has been extended. In particular, an experimental,
+Support for the DO CONCURRENT construct has been
+  added, which allows the user to specify that individual loop
+  iterations have no interdependencies.
+http://gcc.gnu.org/wiki/Coarray";>Coarrays:
+  Full single-image support except for polymorphic coarrays.
+  Additionally, preliminary support for multiple images via an
   MPI-based http://gcc.gnu.org/wiki/CoarrayLib";>
-  coarray communication library has been added.
+  coarray communication library has been added. Note:
+  Remote coarray access is not yet possible.
   
 TR 29113:
   
@@ -370,6 +375,10 @@
   report (TR) 29113 on Further Interoperability of Fortran with C.
 The OPTIONAL attribute is now allowed
   for dummy arguments of BIND(C) procedures. 
+The RANK intrinsic has been added.
+The implementation of the ASYNCHRONOUS attribute
+  in GCC is compatible with the candidate draft of TR 29113
+	  (since GCC 4.6).
   
   
 


RE: [PATCH] Fix stack red zone bug (PR38644)

2011-09-30 Thread Jiangning Liu


> -Original Message-
> From: Richard Sandiford [mailto:rdsandif...@googlemail.com]
> Sent: Friday, September 30, 2011 4:15 PM
> To: Jiangning Liu
> Cc: 'Jakub Jelinek'; 'Richard Guenther'; Andrew Pinski; gcc-
> patc...@gcc.gnu.org
> Subject: Re: [PATCH] Fix stack red zone bug (PR38644)
> 
> "Jiangning Liu"  writes:
> >> -Original Message-
> >> From: Jakub Jelinek [mailto:ja...@redhat.com]
> >> Sent: Thursday, September 29, 2011 6:14 PM
> >> To: Jiangning Liu
> >> Cc: 'Richard Guenther'; Andrew Pinski; gcc-patches@gcc.gnu.org
> >> Subject: Re: [PATCH] Fix stack red zone bug (PR38644)
> >>
> >> On Thu, Sep 29, 2011 at 06:08:50PM +0800, Jiangning Liu wrote:
> >> > As far as I know different back-ends are implementing different
> >> > prologue/epilogue in GCC. If one day this part can be refined and
> >> abstracted
> >> > as well, I would say solving this stack-red-zone problem in shared
> >> > prologue/epilogue code would be a perfect solution, and barrier
> can
> >> be
> >> > inserted there.
> >> >
> >> > I'm not saying you are wrong on keeping scheduler using a pure
> >> barrier
> >> > interface. From engineering point of view, I only feel my proposal
> is
> >> so far
> >> > so good, because this patch at least solve the problem for all
> >> targets in a
> >> > quite simple way. Maybe it can be improved in future based on this.
> >>
> >> But you don't want to listen about any other alternative, other
> >> backends are
> >> happy with being able to put the best kind of barrier at the best
> spot
> >> in the epilogue and don't need a "generic" solution which won't
> model
> >> very
> >> well the target diversity anyway.
> >
> > Jakub,
> >
> > Appreciate for your attention on this issue,
> >
> > 1) Can you clarify who are the "others back-ends"? Does it cover most
> of the
> > back-ends being supported by GCC right now?
> 
> Not answering for Jakub of course, but as a maintainer of a backend, I
> know
> MIPS doesn't have the required barrier at the moment.  But that's a bug.
> 
> Like others in this thread, I'm strongly of the opinion that this
> should
> be modelled directly in the IL.  And it's already supposed to be
> modelled
> in the IL.  Target-independent code emits the required barriers in
> cases
> where it rather than the backend patterns are responsible.  E.g.:
> 
> emit_clobber (gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode)));
> emit_clobber (gen_rtx_MEM (BLKmode, hard_frame_pointer_rtx));
> 
> emit_move_insn (hard_frame_pointer_rtx, fp);
> emit_stack_restore (SAVE_NONLOCAL, stack);
> 
> from expand_builtin_longjmp() and:
> 
>   if (sa != 0)
> {
>   sa = validize_mem (sa);
>   /* These clobbers prevent the scheduler from moving
>references to variable arrays below the code
>that deletes (pops) the arrays.  */
>   emit_clobber (gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode)));
>   emit_clobber (gen_rtx_MEM (BLKmode, stack_pointer_rtx));
> }
> 
> from emit_stack_restore().  Backends that fail to follow suit are IMO
> just buggy.
> 
> FWIW, I intend to fix MIPS this weekend.

Richard S.,

Appreciate your attention on this issue and investigation on MIPS target.

Really glad to know you "find" a potential bug for MIPS through this
discussion. To some extension this proved my hypothesis previously.

> 
> You seem to feel strongly about this because it's a wrong-code bug that
> is very easy to introduce and often very hard to detect.  And I
> defintely
> sympathise with that.  If we were going to to do it in a target-
> independent
> way, though, I think it would be better to scan patterns like epilogue
> and
> automatically introduce barriers before assignments to
> stack_pointer_rtx
> (subject to the kind of hook in your patch).  But I still don't think
> that's better than leaving the onus on the backend.  The backend is
> still responsible for much more complicated things like determning
> the correct deallocation and register-restore sequence, and for
> determining the correct CFI sequence.
> 

I think middle-end in GCC is actually shared code rather than the part
exactly in the middle. A pass working on RTL can be a middle end just
because the code can be shared for all targets, and some passes can even
work for both GIMPLE and RTL.

Actually some optimizations need to work through "shared part" (middle-end)
plus "target specific part" (back-end). You are thinking the interface
between this "shared part" and "target specific part" should be using
"barrier" as a properly model. To some extension I agree with this. However,
it doesn't mean the fix should be in back-end rather than middle end,
because obviously this problem is a common ABI issue for all targets. If we
can abstract this issue to be a shared part, why shouldn't we do it in
middle end to reduce the onus of back-end? Back-end should handle the target
specific things rather than only the complicated things. 

If a complicated problem can be imple

[PATCH 2/2] allow certain kinds of inputs to top level asm()-s

2011-09-30 Thread Jan Beulich
This is so that use of symbols referenced in these asm()-s can be
properly tracked by the compiler, just like is the case for all other
asm()-s. I'm particularly looking forward to use this in the Linux
kernel. It is certainly not very useful in PIC code, at least not with
some extra care.

gcc/
2011-09-30  Jan Beulich  

* c-parser.c (c_parser_simple_asm_expr): Add new second parameter
'inputsp'. Process inputs if caller indicates they are allowed. Adjust
calls to c_parser_asm_operands().
(c_parser_asm_operands): Change type of second parameter from 'bool'
to 'int'. Call c_parser_expression() only for non-negative 'mode', and
c_parser_expr_no_commas() otherwise.
(c_parser_declaration_or_fndef): Pass NULL as new second argument to
c_parser_simple_asm_expr().
(c_parser_asm_definition): New local variables 'loc' and 'inputs'.
Adjust calls to c_parser_simple_asm_expr() and cgraph_add_asm_node().
(c_parser_simple_asm_expr): 
* cgraph.c (cgraph_add_asm_node): Call check_unique_operand_names() to
validate input operands. Store inputs and location.
* cgraph.h (struct cgraph_asm_node): Add 'inputs' and 'loc'.
(cgraph_add_asm_node): New second and third parameters.
* cgraphunit.c (cgraph_output_pending_asms): Pass new second and third
arguments to assemble_asm().
(process_function_and_variable_attributes): New local variable 'anode'.
Process list starting from 'cgraph_asm_nodes'.
(cgraph_output_in_order): Pass new second and third arguments to
assemble_asm().
* cp/parser.c (enum required_token): Add RT_COLON_NO_OUTPUT.
(cp_parser_asm_definition): New local variable 'loc'. Correct a
comment. Parse and process input operands if permitted.
(cp_parser_required_error): Handle new case RT_COLON_NO_OUTPUT.
* lto-streamer-in.c (lto_input_toplevel_asms): Pass new second and
third arguments to cgraph_add_asm_node().
* lto-streamer-out.c (lto_output_toplevel_asms): Also output inputs
and location.
* output.h (assemble_asm): New second and third parameters.
* stmt.c (check_unique_operand_names): Remove static declaration and
make global.
* tree.h (check_unique_operand_names): Declare.
* varasm.c: Include pretty-print.h.
(assemble_asm): New parameters 'inputs' and 'loc'. Process inputs if
provided.

gcc/testsuite/
2011-09-30  Jan Beulich  

* g++.dg/ext/asm-static-1.C: New.
* gcc.dg/asm-static-1.c: New.
* gcc.dg/asm-static-2.c: New.
* gcc.dg/asm-static-3.c: New.
* gcc.dg/asm-static-4.c: New.

--- 2011-09-29.orig/gcc/c-parser.c  2011-09-28 10:56:01.0 +0200
+++ 2011-09-29/gcc/c-parser.c   2011-09-29 15:07:29.0 +0200
@@ -1131,7 +1131,7 @@ static struct c_arg_info *c_parser_parms
 static struct c_arg_info *c_parser_parms_list_declarator (c_parser *, tree,
  tree);
 static struct c_parm *c_parser_parameter_declaration (c_parser *, tree);
-static tree c_parser_simple_asm_expr (c_parser *);
+static tree c_parser_simple_asm_expr (c_parser *, tree *);
 static tree c_parser_attributes (c_parser *);
 static struct c_type_name *c_parser_type_name (c_parser *);
 static struct c_expr c_parser_initializer (c_parser *);
@@ -1150,7 +1150,7 @@ static void c_parser_while_statement (c_
 static void c_parser_do_statement (c_parser *);
 static void c_parser_for_statement (c_parser *);
 static tree c_parser_asm_statement (c_parser *);
-static tree c_parser_asm_operands (c_parser *, bool);
+static tree c_parser_asm_operands (c_parser *, int);
 static tree c_parser_asm_goto_operands (c_parser *);
 static tree c_parser_asm_clobbers (c_parser *);
 static struct c_expr c_parser_expr_no_commas (c_parser *, struct c_expr *);
@@ -1623,7 +1623,7 @@ c_parser_declaration_or_fndef (c_parser 
 function definition.  */
  fndef_ok = false;
  if (c_parser_next_token_is_keyword (parser, RID_ASM))
-   asm_name = c_parser_simple_asm_expr (parser);
+   asm_name = c_parser_simple_asm_expr (parser, NULL);
  if (c_parser_next_token_is_keyword (parser, RID_ATTRIBUTE))
postfix_attrs = c_parser_attributes (parser);
  if (c_parser_next_token_is (parser, CPP_EQ))
@@ -1782,9 +1782,12 @@ c_parser_declaration_or_fndef (c_parser 
 static void
 c_parser_asm_definition (c_parser *parser)
 {
-  tree asm_str = c_parser_simple_asm_expr (parser);
+  location_t loc = c_parser_peek_token (parser)->location;
+  tree inputs = NULL_TREE;
+  tree asm_str = c_parser_simple_asm_expr (parser, &inputs);
+
   if (asm_str)
-cgraph_add_asm_node (asm_str);
+cgraph_add_asm_node (asm_str, inputs, loc);
   c_parser_skip_until_found (parser, CPP_SEMICOLON, "expected %<;%>");
 }
 
@@ -3330,10 +,11 @@ c_parser_asm_string_literal

[PATCH 1/2] LTO: split out writing of top level asm nodes

2011-09-30 Thread Jan Beulich
Split out LTO's writing of top level asm nodes in preparation of extending
what needs to be written out when top level asm-s get enhanced to accept a
limited set of input operands.

gcc/
2011-09-30  Jan Beulich  

* lto-cgraph.c (output_cgraph): Remove processing of 'cgraph_asm_nodes',
call lto_output_toplevel_asms() instead.
(input_cgraph_1): Remove loop calling cgraph_add_asm_node(), call
lto_input_toplevel_asms() instead.
* lto-section-in.c (lto_section_name): Add "asm" entry.
* lto-streamer-in.c (lto_input_toplevel_asms): New.
* lto-streamer-out.c (lto_output_toplevel_asms): New.
* lto-streamer.h (LTO_minor_version): Bump.
(enum lto_section_type): Add LTO_section_asm.
(struct lto_asm_header): New.
(lto_input_toplevel_asms, lto_output_toplevel_asms): Declare.
* tree-streamer.h (streamer_write_string_cst): Declare.
* tree-streamer-out.c (write_string_cst): Rename to
streamer_write_string_cst and make global.
(streamer_write_tree_header): Adjust call to renamed function.

--- 2011-09-29.orig/gcc/lto-cgraph.c2011-09-28 10:56:01.0 +0200
+++ 2011-09-29/gcc/lto-cgraph.c 2011-09-29 15:07:23.0 +0200
@@ -817,7 +817,6 @@ output_cgraph (cgraph_node_set set, varp
   int i, n_nodes;
   lto_cgraph_encoder_t encoder;
   lto_varpool_encoder_t varpool_encoder;
-  struct cgraph_asm_node *can;
   static bool asm_nodes_output = false;
 
   if (flag_wpa)
@@ -854,6 +853,8 @@ output_cgraph (cgraph_node_set set, varp
 
   streamer_write_uhwi_stream (ob->main_stream, 0);
 
+  lto_destroy_simple_output_block (ob);
+
   /* Emit toplevel asms.
  When doing WPA we must output every asm just once.  Since we do not 
partition asm
  nodes at all, output them to first output.  This is kind of hack, but 
should work
@@ -861,19 +862,9 @@ output_cgraph (cgraph_node_set set, varp
   if (!asm_nodes_output)
 {
   asm_nodes_output = true;
-  for (can = cgraph_asm_nodes; can; can = can->next)
-   {
- int len = TREE_STRING_LENGTH (can->asm_str);
- streamer_write_uhwi_stream (ob->main_stream, len);
- for (i = 0; i < len; ++i)
-   streamer_write_char_stream (ob->main_stream,
-   TREE_STRING_POINTER (can->asm_str)[i]);
-   }
+  lto_output_toplevel_asms ();
 }
 
-  streamer_write_uhwi_stream (ob->main_stream, 0);
-
-  lto_destroy_simple_output_block (ob);
   output_varpool (set, vset);
   output_refs (set, vset, encoder, varpool_encoder);
 }
@@ -1185,7 +1176,6 @@ input_cgraph_1 (struct lto_file_decl_dat
   VEC(cgraph_node_ptr, heap) *nodes = NULL;
   struct cgraph_node *node;
   unsigned i;
-  unsigned HOST_WIDE_INT len;
 
   tag = streamer_read_enum (ib, LTO_cgraph_tags, LTO_cgraph_last_tag);
   while (tag)
@@ -1206,18 +1196,8 @@ input_cgraph_1 (struct lto_file_decl_dat
   tag = streamer_read_enum (ib, LTO_cgraph_tags, LTO_cgraph_last_tag);
 }
 
-  /* Input toplevel asms.  */
-  len = streamer_read_uhwi (ib);
-  while (len)
-{
-  char *str = (char *)xmalloc (len + 1);
-  for (i = 0; i < len; ++i)
-   str[i] = streamer_read_uchar (ib);
-  cgraph_add_asm_node (build_string (len, str));
-  free (str);
+  lto_input_toplevel_asms (file_data);
 
-  len = streamer_read_uhwi (ib);
-}
   /* AUX pointers should be all non-zero for nodes read from the stream.  */
 #ifdef ENABLE_CHECKING
   FOR_EACH_VEC_ELT (cgraph_node_ptr, nodes, i, node)
--- 2011-09-29.orig/gcc/lto-section-in.c2011-09-28 10:56:01.0 
+0200
+++ 2011-09-29/gcc/lto-section-in.c 2011-09-29 15:07:23.0 +0200
@@ -53,6 +53,7 @@ const char *lto_section_name[LTO_N_SECTI
   "cgraph",
   "vars",
   "refs",
+  "asm",
   "jmpfuncs",
   "pureconst",
   "reference",
--- 2011-09-29.orig/gcc/lto-streamer-in.c   2011-09-28 10:56:01.0 
+0200
+++ 2011-09-29/gcc/lto-streamer-in.c2011-09-29 15:07:23.0 +0200
@@ -1141,6 +1141,47 @@ lto_input_tree (struct lto_input_block *
 }
 
 
+/* Input toplevel asms.  */
+
+void
+lto_input_toplevel_asms (struct lto_file_decl_data *file_data)
+{
+  size_t len;
+  const char *data = lto_get_section_data (file_data, LTO_section_asm,
+  NULL, &len);
+  const struct lto_asm_header *header = (const struct lto_asm_header *) data;
+  int32_t string_offset;
+  struct data_in *data_in;
+  struct lto_input_block ib;
+  tree str;
+
+  if (! data)
+return;
+
+  string_offset = sizeof (*header) + header->main_size;
+
+  LTO_INIT_INPUT_BLOCK (ib,
+   data + sizeof (*header),
+   0,
+   header->main_size);
+
+  data_in = lto_data_in_create (file_data, data + string_offset,
+   header->string_size, NULL);
+
+  /* Make sure the file was generated by the exact same compiler.  */
+  lto_check_version (header->lto_header.major_version,
+   

[PATCH 0/2] allow certain kinds of inputs to top level asm()-s (v2)

2011-09-30 Thread Jan Beulich
This is so that use of symbols referenced in these asm()-s can be
properly tracked by the compiler, just like is the case for all other
asm()-s. I'm particularly looking forward to use this in the Linux
kernel. It is certainly not very useful in PIC code, at least not with
some extra care.

Changes from the original posting (over a year ago) are the splitting
into two parts (first separating out LTO's writing of top level asm
nodes, then doing the change actually intended here) and one or
two problems fixed in the LTO code.

Jan



Re: [PATCH, SMS 1/2] Avoid generating redundant reg-moves

2011-09-30 Thread Ayal Zaks
On Fri, Sep 30, 2011 at 10:03 AM, Revital Eres  wrote:
> Hello,
>
>> This
>> +  /* Skip instructions that do not set a register.  */
>> +  if (set && !REG_P (SET_DEST (set)))
>> +    continue;
>> is ok. Can you also prevent !set insns from having reg_moves? (To be updated
>> once auto_inc insns will be supported, if they'll deserve reg_moves too.)
>
> I added a check to verify that no reg-moves are created for !set instructions.
>
> Currently re-testing on ppc64-redhat-linux (bootstrap and regtest) and
> arm-linux-gnueabi (bootstrap c).
>
> OK to commit once tesing completes?
>

OK. later case >> latter case.

Ayal.



> Thanks,
> Revital
>
> gcc/
>        * modulo-sched.c (generate_reg_moves): Skip instructions that
>        do not set a register and verify no regmoves are created for
>        !single_set instructions.
>
>
> testsuite/
>         * gcc.dg/sms-10.c: New file.
>


Re: [PATCH] fold_range_test like optimization on GIMPLE (PR tree-optimization/46309)

2011-09-30 Thread Jakub Jelinek
On Fri, Sep 30, 2011 at 12:33:07PM +0200, Richard Guenther wrote:
> > +  low = build_int_cst (TREE_TYPE (exp), 0);
> > +  high = low;
> > +  in_p = 0;
> > +  strict_overflow_p = false;
> > +  is_bool = TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE;
> 
> Effective boolean are also TYPE_PRECISION () == 1 types.  Remember
> we don't preserve conversions from BOOLEAN_TYPE to such types.

I can replace these with TYPE_PRECISION (TREE_TYPE (exp)) == 1;
checks if you prefer, though maybe it would need to also do
&& TYPE_UNSIGNED (TREE_TYPE (exp)), at least if different operands of
the | have different inner 1-bit signedness we could not merge them.

> > +       CASE_CONVERT:
> > +         is_bool |= TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE;
> 
> Likewise.  Though I wonder why !=?  Does it matter whether the extension
> sign- or zero-extends?

I think the |= is needed, this loop follows through not just casts, but
then also through the comparisons and again through casts.
It doesn't matter if the types or casts inside of the comparison argument
are bool or not (and likely they will not be), yet we don't want to stop
iterating because of that.
It wants to reconstruct ranges from what e.g. fold in fold_range_test
already created before, so stuff like
(int) (((unsigned) x - 64U) <= 31U)
for + [64, 95] range.

> > +                 if (integer_onep (range_binop (LT_EXPR, integer_type_node,
> > +                                                p->low, 0, q->low, 0)))
> 
> that's just
> 
> tem = fold_binary (LT_EXPR, boolean_type_node, p->low, q->low);
> if (tem && integer_onep (tem))
>   return -1;

Ok, can change that.

> which avoids building the LT_EXPR tree if it doesn't fold.  Similar below.
> (ISTR some integer_onep () variant that handles a NULL argument ...)

Couldn't find any.  integer_onep needs non-NULL, compare_tree_int too.

> > +  /* Try to merge ranges.  */
> > +  for (first = i; i < length; i++)
> > +    {
> > +      tree low = ranges[i].low;
> > +      tree high = ranges[i].high;
> > +      int in_p = ranges[i].in_p;
> > +      bool strict_overflow_p = ranges[i].strict_overflow_p;
> > +
> > +      for (j = i + 1; j < length; j++)
> > +       {
> 
> That looks quadratic - do we want to limit this with a --param, simply
> partitioning the array into quadratic chunks?

This isn't quadratic (except in the hypothetical case
where all merge_ranges calls would succeed, but then
build_range_test would fail).  In the likely case where if range merges
succeed then update_range_test succeeds too this is just linear (plus the
qsort before that which isn't linear though).
So perhaps:
+  if (j > i + 1
+ && update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
+   ops, ranges[i].exp, in_p, low, high,
+   strict_overflow_p))
+   {
+ i = j - 1;
+ any_changes = true;
+   }
could be
+  if (j > i + 1)
+   {
+ if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
+   ops, ranges[i].exp, in_p, low, high,
+   strict_overflow_p))
+   any_changes = true;
+ i = j - 1;
+   }
(then it isn't quadratic), or could try a few times before giving up:
+  if (j > i + 1)
+   {
+ if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
+   ops, ranges[i].exp, in_p, low, high,
+   strict_overflow_p))
+   {
+ any_changes = true;
+ i = j - 1;
+   }
+ else if (update_fail_count == 64)
+   i = j - 1;
+ else
+   update_fail_count = 0;
+   }
where int update_fail_count = 0; would be after the
  bool strict_overflow_p = ranges[i].strict_overflow_p;
line in the outer loop.

> > +         if (ranges[i].exp != ranges[j].exp)
> > +           break;
> 
> Or isn't it too bad because of this check?

The above limits the chunks to a particular SSA_NAME.  Within each
chunk for the same SSA_NAME, the ranges are sorted in a way that
merge_ranges will likely succeed, so it isn't quadratic unless
update_range_test fails (see above).

BTW, the second loop (the one that attempts to optimize
x == 1 || x == 3 into (x & ~2) == 1 etc. is quadratic, which is why
there is
  for (j = i + 1; j < length && j < i + 64; j++)
don't think it is a limit people will often run into and thus I don't
think it is worth adding a --param= for that.

> > +         if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
> > +                            ranges[j].in_p, ranges[j].low, ranges[j].high))
> > +           break;
> 
> And this early out?  I suppose some comment on why together with
> the sorting this is of limited complexity would help.
> 
> > @@ -2447,6 +2864,9 @@ reassociate_bb (basic_block bb)
> >                  optimize_ops_list (rhs_code, &ops);
> >                }
> >
> > +             if (rhs_co

Re: [wwwdocs] IA-32/x86-64 Changes for upcoming 4.7.0 series

2011-09-30 Thread Kirill Yukhin
Okay, seems maintainers have no objections

Could anybody please commit that to wwwdocs?

Thanks, K

On Tue, Sep 27, 2011 at 8:19 PM, Gerald Pfeifer  wrote:
> On Tue, 27 Sep 2011, Kirill Yukhin wrote:
>> So, if you are ok, let's wait a couple of days for maintainers inputs.
>
> Yep, looks good.  Unless you hear to the contrary from one of the
> x86 maintainers, I suggest you go ahead and commit in two days.
>
> Gerald
>


Re: PATCH: PR lto/50568: [4.7 Regression] Massive LTO failures

2011-09-30 Thread Diego Novillo
On Fri, Sep 30, 2011 at 00:09, Andi Kleen  wrote:

> gcc/lto/:
>
> 2011-09-29  H.J. Lu  
>            Andi Kleen  
>
>        PR lto/50568
>        * lto.c (lto_splay_tree_delete_id): New.
>        (lto_splay_tree_compare_ids): Likewise.
>        (lto_splay_tree_lookup): Likewise.
>        (lto_splay_tree_id_equal_p): Likewise.
>        (lto_splay_tree_insert): Likewise.
>        (lto_splay_tree_new): Likewise.
>        (lto_resolution_read): Change id to unsigned HOST_WIDE_INT.
>        Use lto_splay_tree_id_equal_p and lto_splay_tree_lookup.
>        (create_subid_section_table): Use lto_splay_tree_lookup and
>        lto_splay_tree_insert.
>        (lto_file_read): Use lto_splay_tree_new.
>
> lto-plugin/:
>
> 2011-09-29  H.J. Lu  
>            Andi Kleen  
>
>        PR lto/50568
>        * lto-plugin.c (sym_aux): Change id to unsigned long long.
>        (plugin_symtab): Likewise.
>        (dump_symtab): Likewise.
>        (resolve_conflicts): Likewise.
>        (process_symtab): Likewise.

OK.


Diego.


Re: Vector Comparison patch

2011-09-30 Thread Matthew Gretton-Dann

On 29/09/11 12:27, Richard Guenther wrote:

On Thu, Sep 29, 2011 at 12:00 PM, Richard Guenther
  wrote:

On Wed, Sep 28, 2011 at 4:23 PM, Richard Guenther
  wrote:

On Mon, Sep 26, 2011 at 5:43 PM, Richard Guenther
  wrote:

On Mon, Sep 26, 2011 at 4:25 PM, Richard Guenther
  wrote:

On Wed, Sep 7, 2011 at 5:06 PM, Joseph S. Myers  wrote:

This looks like it has the same issue with maybe needing to use
TYPE_MAIN_VARIANT in type comparisons as the shuffle patch.


I don't think so, we move qualifiers to the vector type from the element type
in make_vector_type and the tests only look at the component type.

I am re-testing the patch currently and will commit it if that succeeds.


Unfortunately gcc.c-torture/execute/vector-compare-1.c fails with -m32
for

vector (2, double) d0;
vector (2, double) d1;
vector (2, long) idres;

d0 = (vector (2, double)){(double)argc,  10.};
d1 = (vector (2, double)){0., (double)-23};
idres = (d0>  d1);

as appearantly the type we chose to assign to (d0>  d1) is different
from that of idres:

/space/rguenther/src/svn/trunk/gcc/testsuite/gcc.c-torture/execute/vector-compare-1.c:118:5:
error: incompatible types when assigning to type '__vector(2) long
int' from type '__vector(2) long long int'^M

Adjusting it to vector (2, long long) otoh yields, for -m64:

/space/rguenther/src/svn/trunk/gcc/testsuite/gcc.c-torture/execute/vector-compare-1.c:118:5:
error: incompatible types when assigning to type '__vector(2) long
long int' from type '__vector(2) long int'

But those two types are at least compatible from their modes.  Joseph,
should we accept mode-compatible types in assignments or maybe
transparently convert them?


Looks like we have a more suitable solution for these automatically
generated vector types - mark them with TYPE_VECTOR_OPAQUE.

I'm testing the following incremental patch.

Richard.

Index: gcc/c-typeck.c
===
--- gcc/c-typeck.c.orig 2011-09-28 16:22:10.0 +0200
+++ gcc/c-typeck.c  2011-09-28 16:18:39.0 +0200
@@ -9928,8 +9928,10 @@ build_binary_op (location_t location, en
 }

   /* Always construct signed integer vector type.  */
-  intt = c_common_type_for_size (TYPE_PRECISION (TREE_TYPE
(type0)), 0);
-  result_type = build_vector_type (intt, TYPE_VECTOR_SUBPARTS (type0));
+  intt = c_common_type_for_size (GET_MODE_BITSIZE
+  (TYPE_MODE (TREE_TYPE (type0))), 0);
+  result_type = build_opaque_vector_type (intt,
+ TYPE_VECTOR_SUBPARTS (type0));
   converted = 1;
   break;
 }
@@ -10063,8 +10065,10 @@ build_binary_op (location_t location, en
 }

   /* Always construct signed integer vector type.  */
-  intt = c_common_type_for_size (TYPE_PRECISION (TREE_TYPE
(type0)), 0);
-  result_type = build_vector_type (intt, TYPE_VECTOR_SUBPARTS (type0));
+  intt = c_common_type_for_size (GET_MODE_BITSIZE
+  (TYPE_MODE (TREE_TYPE (type0))), 0);
+  result_type = build_opaque_vector_type (intt,
+ TYPE_VECTOR_SUBPARTS (type0));
   converted = 1;
   break;
 }


That doesn't seem to work either.  Because we treat the opaque and
non-opaque variants of vector  as different (the opaque type isn't
a variant type of the non-opaque one - something suspicious anyway).

I'm going to try to apply some surgery on how we build opaque variants
and then re-visit the above again.


Bootstrapped and tested on x86_64-unknown-linux-gnu and installed.

Richard.


Richard.





I'm still getting errors with latest trunk (r179378) for arm-none-eabi. 
 Please see http://gcc.gnu.org/PR50576.


Thanks,

Matt


--
Matthew Gretton-Dann
Principal Engineer, PD Software - Tools, ARM Ltd



Re: [PATCH] fold_range_test like optimization on GIMPLE (PR tree-optimization/46309)

2011-09-30 Thread Richard Guenther
On Thu, Sep 29, 2011 at 11:15 PM, Jakub Jelinek  wrote:
> Hi!
>
> This patch implements a fold_range_test like optimization on GIMPLE, inside
> tree-ssa-reassoc and tweaks fold-const.c so that most of the code can be
> shared in between the two.
> The advantage of the reassoc optimization is that it doesn't attempt to
> merge just 2 ranges at a time, instead it sorts the ranges for the same
> SSA_NAME and thus can optimize even cases where source code doesn't have
> the numbers in a range test in increasing or decreasing order (and also
> can optimize things that were in multiple statements in the source).
> Additionally, it optimizes cases like
> x == 1 || x == 3
> into (x & ~2) == 1.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2011-09-27  Jakub Jelinek  
>
>        PR tree-optimization/46309
>        * fold-const.c (make_range, merge_ranges): Remove prototypes.
>        (range_binop): Likewise, no longer static.
>        (make_range_step): New function.
>        (make_range): Use it.
>        * tree.h (make_range_step, range_binop): New prototypes.
>        * Makefile.in (tree-ssa-reassoc.o): Depend on $(DIAGNOSTIC_CORE_H).
>        * tree-ssa-reassoc.c: Include diagnostic-core.h.
>        (struct range_entry): New type.
>        (init_range_entry, range_entry_cmp, update_range_test,
>        optimize_range_tests): New functions.
>        (reassociate_bb): Call optimize_range_tests.
>
>        * gcc.dg/pr46309.c: New test.
>
> --- gcc/fold-const.c.jj 2011-09-05 12:28:53.0 +0200
> +++ gcc/fold-const.c    2011-09-27 16:39:09.0 +0200
> @@ -112,12 +112,8 @@ static tree decode_field_reference (loca
>  static int all_ones_mask_p (const_tree, int);
>  static tree sign_bit_p (tree, const_tree);
>  static int simple_operand_p (const_tree);
> -static tree range_binop (enum tree_code, tree, tree, int, tree, int);
>  static tree range_predecessor (tree);
>  static tree range_successor (tree);
> -extern tree make_range (tree, int *, tree *, tree *, bool *);
> -extern bool merge_ranges (int *, tree *, tree *, int, tree, tree, int,
> -                         tree, tree);
>  static tree fold_range_test (location_t, enum tree_code, tree, tree, tree);
>  static tree fold_cond_expr_with_comparison (location_t, tree, tree, tree, 
> tree);
>  static tree unextend (tree, int, int, tree);
> @@ -3731,7 +3727,7 @@ simple_operand_p (const_tree exp)
>    must be specified for a comparison.  ARG1 will be converted to ARG0's
>    type if both are specified.  */
>
> -static tree
> +tree
>  range_binop (enum tree_code code, tree type, tree arg0, int upper0_p,
>             tree arg1, int upper1_p)
>  {
> @@ -3790,6 +3786,255 @@ range_binop (enum tree_code code, tree t
>   return constant_boolean_node (result, type);
>  }
>
> +/* Helper routine for make_range.  Perform one step for it, return
> +   new expression if the loop should continue or NULL_TREE if it should
> +   stop.  */
> +
> +tree
> +make_range_step (location_t loc, enum tree_code code, tree arg0, tree arg1,
> +                tree exp_type, tree *p_low, tree *p_high, int *p_in_p,
> +                bool *strict_overflow_p)
> +{
> +  tree arg0_type = TREE_TYPE (arg0);
> +  tree n_low, n_high, low = *p_low, high = *p_high;
> +  int in_p = *p_in_p, n_in_p;
> +
> +  switch (code)
> +    {
> +    case TRUTH_NOT_EXPR:
> +      *p_in_p = ! in_p;
> +      return arg0;
> +
> +    case EQ_EXPR: case NE_EXPR:
> +    case LT_EXPR: case LE_EXPR: case GE_EXPR: case GT_EXPR:
> +      /* We can only do something if the range is testing for zero
> +        and if the second operand is an integer constant.  Note that
> +        saying something is "in" the range we make is done by
> +        complementing IN_P since it will set in the initial case of
> +        being not equal to zero; "out" is leaving it alone.  */
> +      if (low == NULL_TREE || high == NULL_TREE
> +         || ! integer_zerop (low) || ! integer_zerop (high)
> +         || TREE_CODE (arg1) != INTEGER_CST)
> +       return NULL_TREE;
> +
> +      switch (code)
> +       {
> +       case NE_EXPR:  /* - [c, c]  */
> +         low = high = arg1;
> +         break;
> +       case EQ_EXPR:  /* + [c, c]  */
> +         in_p = ! in_p, low = high = arg1;
> +         break;
> +       case GT_EXPR:  /* - [-, c] */
> +         low = 0, high = arg1;
> +         break;
> +       case GE_EXPR:  /* + [c, -] */
> +         in_p = ! in_p, low = arg1, high = 0;
> +         break;
> +       case LT_EXPR:  /* - [c, -] */
> +         low = arg1, high = 0;
> +         break;
> +       case LE_EXPR:  /* + [-, c] */
> +         in_p = ! in_p, low = 0, high = arg1;
> +         break;
> +       default:
> +         gcc_unreachable ();
> +       }
> +
> +      /* If this is an unsigned comparison, we also know that EXP is
> +        greater than or equal to zero.  We base the range tests we make
> +        on that fact, so we record it here so we can parse existing
> +        range tests.  We test 

Re: [Patch] Support DEC-C extensions

2011-09-30 Thread Tristan Gingold

On Sep 30, 2011, at 11:10 AM, Gabriel Dos Reis wrote:

> On Thu, Sep 29, 2011 at 10:10 AM, Tristan Gingold  wrote:
>> Hi,
>> 
>> DEC-C, the DEC compiler provided on VMS, has added to ANSI-C at least one 
>> extension that is difficult to work-around as it is used in the system 
>> headers: varargs without named argument.  It makes sense on VMS because of 
>> its ABI which pass the number of arguments used.
>> 
>> This patch allows such declaration when the new flag -fdecc-extensions is 
>> used (C and ObjC only as C++ already allows that).
>> 
>> I use the plural for consistency with other -fxxx-extensions and in case 
>> where others extensions are added.
>> 
>> Bootstrapped on x86_64-darwin, no regressions.
>> 
>> Ok for mainline ?
>> 
> 
> Note that
> 
>   void f(...) { }
> 
> is already valid C++, so I don't think the choice of the name
> -fdecc-extensions is appropriate.

Can you suggest a name ?

Tristan.




Re: [Patch] Support DEC-C extensions

2011-09-30 Thread Gabriel Dos Reis
On Thu, Sep 29, 2011 at 10:10 AM, Tristan Gingold  wrote:
> Hi,
>
> DEC-C, the DEC compiler provided on VMS, has added to ANSI-C at least one 
> extension that is difficult to work-around as it is used in the system 
> headers: varargs without named argument.  It makes sense on VMS because of 
> its ABI which pass the number of arguments used.
>
> This patch allows such declaration when the new flag -fdecc-extensions is 
> used (C and ObjC only as C++ already allows that).
>
> I use the plural for consistency with other -fxxx-extensions and in case 
> where others extensions are added.
>
> Bootstrapped on x86_64-darwin, no regressions.
>
> Ok for mainline ?
>

Note that

   void f(...) { }

is already valid C++, so I don't think the choice of the name
-fdecc-extensions is appropriate.


Re: [PATCH] Do not fold addressable operands of "m" into non-addressable (PR inline-asm/50571, take 2)

2011-09-30 Thread Richard Guenther
On Fri, Sep 30, 2011 at 10:56 AM, Jakub Jelinek  wrote:
> Hi!
>
> On Fri, Sep 30, 2011 at 10:34:28AM +0200, Richard Guenther wrote:
>> Hmm, I don't think this change is ok.  We rely on maybe_fold_reference
>> to re-fold mem-refs to valid gimple form (from propagating say
>> &a.b.c to MEM[p, 4] which first gives the invalid MEM[&a.b.c, 4] and
>> then the folding changes this to MEM[&a, 12]).  You need to preserve that
>> and only disable constant folding.
>>
>> Thus I suggest to add a parameter to maybe_fold_reference that says
>> whether to try constant folding (we already have is_lhs, so why not
>> use that?)
>
> So like this instead?

Yes.  That's ok if it passes testing.

Thanks,
Richard.

> 2011-09-30  Jakub Jelinek  
>
>        PR inline-asm/50571
>        * gimple-fold.c (fold_stmt_1) : If
>        input constraints allow mem and not reg, pass true instead of
>        false as second argument to maybe_fold_reference.
>
>        * gcc.dg/pr50571.c: New test.
>
> --- gcc/gimple-fold.c.jj        2011-09-29 14:25:46.0 +0200
> +++ gcc/gimple-fold.c   2011-09-30 10:54:10.0 +0200
> @@ -1201,28 +1201,45 @@ fold_stmt_1 (gimple_stmt_iterator *gsi,
>
>     case GIMPLE_ASM:
>       /* Fold *& in asm operands.  */
> -      for (i = 0; i < gimple_asm_noutputs (stmt); ++i)
> -       {
> -         tree link = gimple_asm_output_op (stmt, i);
> -         tree op = TREE_VALUE (link);
> -         if (REFERENCE_CLASS_P (op)
> -             && (op = maybe_fold_reference (op, true)) != NULL_TREE)
> -           {
> -             TREE_VALUE (link) = op;
> -             changed = true;
> -           }
> -       }
> -      for (i = 0; i < gimple_asm_ninputs (stmt); ++i)
> -       {
> -         tree link = gimple_asm_input_op (stmt, i);
> -         tree op = TREE_VALUE (link);
> -         if (REFERENCE_CLASS_P (op)
> -             && (op = maybe_fold_reference (op, false)) != NULL_TREE)
> -           {
> -             TREE_VALUE (link) = op;
> -             changed = true;
> -           }
> -       }
> +      {
> +       size_t noutputs;
> +       const char **oconstraints;
> +       const char *constraint;
> +       bool allows_mem, allows_reg;
> +
> +       noutputs = gimple_asm_noutputs (stmt);
> +       oconstraints = XALLOCAVEC (const char *, noutputs);
> +
> +       for (i = 0; i < gimple_asm_noutputs (stmt); ++i)
> +         {
> +           tree link = gimple_asm_output_op (stmt, i);
> +           tree op = TREE_VALUE (link);
> +           oconstraints[i]
> +             = TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (link)));
> +           if (REFERENCE_CLASS_P (op)
> +               && (op = maybe_fold_reference (op, true)) != NULL_TREE)
> +             {
> +               TREE_VALUE (link) = op;
> +               changed = true;
> +             }
> +         }
> +       for (i = 0; i < gimple_asm_ninputs (stmt); ++i)
> +         {
> +           tree link = gimple_asm_input_op (stmt, i);
> +           tree op = TREE_VALUE (link);
> +           constraint
> +             = TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (link)));
> +           parse_input_constraint (&constraint, 0, 0, noutputs, 0,
> +                                   oconstraints, &allows_mem, &allows_reg);
> +           if (REFERENCE_CLASS_P (op)
> +               && (op = maybe_fold_reference (op, !allows_reg && allows_mem))
> +                  != NULL_TREE)
> +             {
> +               TREE_VALUE (link) = op;
> +               changed = true;
> +             }
> +         }
> +      }
>       break;
>
>     case GIMPLE_DEBUG:
> --- gcc/testsuite/gcc.dg/pr50571.c.jj   2011-09-29 21:28:05.0 +0200
> +++ gcc/testsuite/gcc.dg/pr50571.c      2011-09-29 21:30:08.0 +0200
> @@ -0,0 +1,11 @@
> +/* PR inline-asm/50571 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +static const int var[4] = { 1, 2, 3, 4 };
> +
> +void
> +foo (void)
> +{
> +  __asm volatile ("" : : "m" (*(int *) var));
> +}
>
>
>        Jakub
>


Re: [PATCH] Restrict fixes

2011-09-30 Thread Richard Guenther
On Fri, 30 Sep 2011, Jakub Jelinek wrote:

> On Fri, Sep 30, 2011 at 09:50:09AM +0200, Richard Guenther wrote:
> > Hmm, in fwprop can you limit your change to non-invariant addresses? 
> > That is, we do want to propagate invariant addresses over
> > restrict casts, because that will give us _more_ precise alias info
> > than restrict.
> 
> Will it?

Definitely.  Seeing a decl will enable better offset-based
disambiguation.

> I'd think we instead want add the non-restrict -> restrict check
> in another spot (ssa_forward_propagate_and_combine) below.
> Without that I'm afraid it is harder to disambiguate the accesses
> (though, it still fails).  Or should PTA be able to disambiguate
> it even without the ssa_forward_propagate_and_combine hunk?
> One store will be through p1 + variableoffset with PT { a,  p1> } (restr)
> and the other either with the hunk to p2 + constoffset with PT { a,  decl for p2> } (restr)
> or without the hunk a + constoffset.

But that points-to sets means the pointers are based on a common
pointer (&a), thus they will not be disambiguated.

I fear that we won't ever get

 int * restrict x = p;
 int * restrict y = p + 10;

optimized but not "optimize" a following

 int * restrict x1 = x + 1;

that's the whole point of adding the restrict tags - to be able to
follow to a common base-object conservatively.

Richard.

> 2011-09-30  Jakub Jelinek  
> 
>   * fold-const.c (fold_unary_loc): Don't optimize
>   POINTER_PLUS_EXPR casted to TYPE_RESTRICT pointer by
>   casting the inner pointer if it isn't TYPE_RESTRICT.
>   * tree-ssa-forwprop.c (forward_propagate_addr_expr_1): Don't through
>   casts from non-TYPE_RESTRICT pointer to TYPE_RESTRICT pointer.
> 
>   * gcc.dg/tree-ssa/restrict-4.c: New test.
>   * gcc.dg/tree-ssa/restrict-5.c: New test.
> 
> --- gcc/fold-const.c.jj   2011-09-29 14:25:46.0 +0200
> +++ gcc/fold-const.c  2011-09-29 18:20:04.0 +0200
> @@ -7929,6 +7929,7 @@ fold_unary_loc (location_t loc, enum tre
>that this happens when X or Y is NOP_EXPR or Y is INTEGER_CST. */
>if (POINTER_TYPE_P (type)
> && TREE_CODE (arg0) == POINTER_PLUS_EXPR
> +   && (!TYPE_RESTRICT (type) || TYPE_RESTRICT (TREE_TYPE (arg0)))
> && (TREE_CODE (TREE_OPERAND (arg0, 1)) == INTEGER_CST
> || TREE_CODE (TREE_OPERAND (arg0, 0)) == NOP_EXPR
> || TREE_CODE (TREE_OPERAND (arg0, 1)) == NOP_EXPR))
> --- gcc/tree-ssa-forwprop.c.jj2011-09-15 12:18:54.0 +0200
> +++ gcc/tree-ssa-forwprop.c   2011-09-30 10:02:46.0 +0200
> @@ -804,6 +804,10 @@ forward_propagate_addr_expr_1 (tree name
>&& ((rhs_code == SSA_NAME && rhs == name)
> || CONVERT_EXPR_CODE_P (rhs_code)))
>  {
> +  /* Don't propagate restrict pointer's RHS.  */
> +  if (TYPE_RESTRICT (TREE_TYPE (lhs))
> +   && !TYPE_RESTRICT (TREE_TYPE (name)))
> + return false;
>/* Only recurse if we don't deal with a single use or we cannot
>do the propagation to the current statement.  In particular
>we can end up with a conversion needed for a non-invariant
> @@ -2392,7 +2396,9 @@ ssa_forward_propagate_and_combine (void)
>as well, as this is valid gimple.  */
> || (CONVERT_EXPR_CODE_P (code)
> && TREE_CODE (rhs) == ADDR_EXPR
> -   && POINTER_TYPE_P (TREE_TYPE (lhs
> +   && POINTER_TYPE_P (TREE_TYPE (lhs))
> +   && (!TYPE_RESTRICT (TREE_TYPE (lhs))
> +   || TYPE_RESTRICT (TREE_TYPE (rhs)
>   {
> tree base = get_base_address (TREE_OPERAND (rhs, 0));
> if ((!base
> --- gcc/testsuite/gcc.dg/tree-ssa/restrict-4.c.jj 2011-09-29 
> 20:21:00.0 +0200
> +++ gcc/testsuite/gcc.dg/tree-ssa/restrict-4.c2011-09-29 
> 20:21:57.0 +0200
> @@ -0,0 +1,26 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
> +
> +int
> +foo (int *x, int y)
> +{
> +  int *__restrict p1 = x;
> +  int *__restrict p2 = x + 32;
> +  p1[y] = 1;
> +  p2[4] = 2;
> +  return p1[y];
> +}
> +
> +int
> +bar (int *x, int y)
> +{
> +  int *__restrict p1 = x;
> +  int *p3 = x + 32;
> +  int *__restrict p2 = p3;
> +  p1[y] = 1;
> +  p2[4] = 2;
> +  return p1[y];
> +}
> +
> +/* { dg-final { scan-tree-dump-times "return 1;" 2 "optimized" } } */
> +/* { dg-final { cleanup-tree-dump "optimized" } } */
> --- gcc/testsuite/gcc.dg/tree-ssa/restrict-5.c.jj 2011-09-30 
> 10:04:45.0 +0200
> +++ gcc/testsuite/gcc.dg/tree-ssa/restrict-5.c2011-09-30 
> 10:05:11.0 +0200
> @@ -0,0 +1,17 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
> +
> +int a[64];
> +
> +int
> +foo (int x)
> +{
> +  int *__restrict p1 = a + 4;
> +  int *__restrict p2 = a + 16;
> +  p1[x] = 1;
> +  p2[2] = 2;
> +  return p1[x];
> +}
> +
> +/* { dg-final { scan-tree-dump-times "return 1;" 1 "optimized" } } */
> +/* { dg-final {

  1   2   >