Re: PATCH: Properly generate X32 IE sequence

2012-03-20 Thread Eric Botcazou
 The patch is bootstrapping now on x86_64-pc-linux-gnu.

It very likely breaks bootstrap with RTL checking enabled:

/sil.a/gnatmail/gnatmail-x/build-sil/x86-linux/gnat/obj/./gcc/xgcc 
-B/sil.a/gnatmail/gnatmail-x/build-sil/x86-linux/gnat/obj/./gcc/ 
-B/usr/gnat/i686-pc-linux-gnu/bin/ -B/usr/gnat/i686-pc-linux-gnu/lib/ -isystem 
/usr/gnat/i686-pc-linux-gnu/include -isystem 
/usr/gnat/i686-pc-linux-gnu/sys-include-g -O2 -O2  -g -O2 -DIN_GCC   -W 
-Wall -Wwrite-strings -Wcast-qual -Wstrict-prototypes -Wmissing-prototypes 
-Wold-style-definition  -isystem ./include   -fpic -g -DIN_LIBGCC2 
-fbuilding-libgcc -fno-stack-protector   -fpic -I. -I. -I../.././gcc 
-I../../../src/libgcc -I../../../src/libgcc/. -I../../../src/libgcc/../gcc 
-I../../../src/libgcc/../include -I../../../src/libgcc/config/libbid 
-DENABLE_DECIMAL_BID_FORMAT -DHAVE_CC_TLS  -DUSE_TLS -o 
_popcountsi2.o -MT _popcountsi2.o -MD -MP -MF 
_popcountsi2.dep -DL_popcountsi2 -c ../../../src/libgcc/libgcc2.c 
-fvisibility=hidden -DHIDE_EXPORTS
../../../src/libgcc/libgcc2.c: In function '__popcountsi2':
../../../src/libgcc/libgcc2.c:835:1: internal compiler error: RTL check: 
expected elt 1 type 'i' or 'n', have '0' (rtx mem) in ix86_decompose_address, 
at config/i386/i386.c:11522
Please submit a full bug report,
with preprocessed source if appropriate.
See URL:mailto:rep...@adacore.com for instructions.
make[3]: *** [_popcountsi2.o] Error 1

-- 
Eric Botcazou


Re: PATCH: Properly generate X32 IE sequence

2012-03-20 Thread Jakub Jelinek
On Tue, Mar 20, 2012 at 09:51:07AM +0100, Eric Botcazou wrote:
  The patch is bootstrapping now on x86_64-pc-linux-gnu.
 
 It very likely breaks bootstrap with RTL checking enabled:
 
 /sil.a/gnatmail/gnatmail-x/build-sil/x86-linux/gnat/obj/./gcc/xgcc 
 -B/sil.a/gnatmail/gnatmail-x/build-sil/x86-linux/gnat/obj/./gcc/ 
 -B/usr/gnat/i686-pc-linux-gnu/bin/ -B/usr/gnat/i686-pc-linux-gnu/lib/ 
 -isystem /usr/gnat/i686-pc-linux-gnu/include -isystem 
 /usr/gnat/i686-pc-linux-gnu/sys-include-g -O2 -O2  -g -O2 -DIN_GCC   -W 
 -Wall -Wwrite-strings -Wcast-qual -Wstrict-prototypes -Wmissing-prototypes 
 -Wold-style-definition  -isystem ./include   -fpic -g -DIN_LIBGCC2 
 -fbuilding-libgcc -fno-stack-protector   -fpic -I. -I. -I../.././gcc 
 -I../../../src/libgcc -I../../../src/libgcc/. -I../../../src/libgcc/../gcc 
 -I../../../src/libgcc/../include -I../../../src/libgcc/config/libbid 
 -DENABLE_DECIMAL_BID_FORMAT -DHAVE_CC_TLS  -DUSE_TLS -o 
 _popcountsi2.o -MT _popcountsi2.o -MD -MP -MF 
 _popcountsi2.dep -DL_popcountsi2 -c ../../../src/libgcc/libgcc2.c 
 -fvisibility=hidden -DHIDE_EXPORTS
 ../../../src/libgcc/libgcc2.c: In function '__popcountsi2':
 ../../../src/libgcc/libgcc2.c:835:1: internal compiler error: RTL check: 
 expected elt 1 type 'i' or 'n', have '0' (rtx mem) in ix86_decompose_address, 
 at config/i386/i386.c:11522
 Please submit a full bug report,
 with preprocessed source if appropriate.
 See URL:mailto:rep...@adacore.com for instructions.
 make[3]: *** [_popcountsi2.o] Error 1

Yeah, my bootstrap just failed the same.  Will test:

2012-03-20  Jakub Jelinek  ja...@redhat.com

* config/i386/i386.c (ix86_decompose_address) case ZERO_EXTEND:
If operand isn't UNSPEC, return 0.

--- gcc/config/i386/i386.c.jj   2012-03-20 09:35:06.0 +0100
+++ gcc/config/i386/i386.c  2012-03-20 09:56:35.038835835 +0100
@@ -11516,6 +11516,8 @@ ix86_decompose_address (rtx addr, struct
 
case ZERO_EXTEND:
  op = XEXP (op, 0);
+ if (GET_CODE (op) != UNSPEC)
+   return 0;
  /* FALLTHRU */
 
case UNSPEC:

Jakub


[Patch] libgfortran: do not assume libm

2012-03-20 Thread Tristan Gingold
Hi,

I am starting to build fortran for VMS.

The first serious issue was with libgfortran/configure.  It checks for several 
math functions, but directly in libm using AC_CHECK_LIB.
But there is on such things as libm on VMS systems (thus requiring to define 
MATH_LIBRARY as ).  Therefore all these tests fail,
resulting in a failure during libgfortran build.

I think the best way to test for the math function is to use AC_CHECK_DECLS as 
hinted by the autoconf manual
(cf the AC_CHECK_DECLS example in 
http://www.gnu.org/software/autoconf/manual/autoconf.html
 AC_CHECK_DECLS([j0], [], [], [[#include math.h]])
 AC_CHECK_DECLS([[basename(char *)], [dirname(char *)]])

)
but I am not an autoconf expert.

I think that this change makes the AIX specific check for __clog obsolete, but 
I haven't removed it.

With this change I was able to cross build libgfortran for VMS (ia64 and alpha).
I have also bootstrapped gcc for x86_64-darwin, without fortran regressions.

Ok for trunk ?

Tristan.

libgfortran/
2012-03-20  Tristan Gingold  ging...@adacore.com

* configure.ac: Check for libm.  Use AC_CHECK_DECLS for
math functions.
* configure: Regenerate.

diff --git a/libgfortran/configure.ac b/libgfortran/configure.ac
index 7011a93..aaaedc2 100644
--- a/libgfortran/configure.ac
+++ b/libgfortran/configure.ac
@@ -268,192 +268,195 @@ strcasestr getrlimit gettimeofday stat fstat lstat 
getpwuid vsnprintf dup \
 getcwd localtime_r gmtime_r strerror_r getpwuid_r ttyname_r clock_gettime \
 readlink getgid getpid getppid getuid geteuid umask)
 
+# Check for libm
+AC_CHECK_LIB([m],[sin])
+
 # Check for C99 (and other IEEE) math functions
-AC_CHECK_LIB([m],[acosf],[AC_DEFINE([HAVE_ACOSF],[1],[libm includes acosf])])
-AC_CHECK_LIB([m],[acos],[AC_DEFINE([HAVE_ACOS],[1],[libm includes acos])])
-AC_CHECK_LIB([m],[acosl],[AC_DEFINE([HAVE_ACOSL],[1],[libm includes acosl])])
-AC_CHECK_LIB([m],[acoshf],[AC_DEFINE([HAVE_ACOSHF],[1],[libm includes 
acoshf])])
-AC_CHECK_LIB([m],[acosh],[AC_DEFINE([HAVE_ACOSH],[1],[libm includes acosh])])
-AC_CHECK_LIB([m],[acoshl],[AC_DEFINE([HAVE_ACOSHL],[1],[libm includes 
acoshl])])
-AC_CHECK_LIB([m],[asinf],[AC_DEFINE([HAVE_ASINF],[1],[libm includes asinf])])
-AC_CHECK_LIB([m],[asin],[AC_DEFINE([HAVE_ASIN],[1],[libm includes asin])])
-AC_CHECK_LIB([m],[asinl],[AC_DEFINE([HAVE_ASINL],[1],[libm includes asinl])])
-AC_CHECK_LIB([m],[asinhf],[AC_DEFINE([HAVE_ASINHF],[1],[libm includes 
asinhf])])
-AC_CHECK_LIB([m],[asinh],[AC_DEFINE([HAVE_ASINH],[1],[libm includes asinh])])
-AC_CHECK_LIB([m],[asinhl],[AC_DEFINE([HAVE_ASINHL],[1],[libm includes 
asinhl])])
-AC_CHECK_LIB([m],[atan2f],[AC_DEFINE([HAVE_ATAN2F],[1],[libm includes 
atan2f])])
-AC_CHECK_LIB([m],[atan2],[AC_DEFINE([HAVE_ATAN2],[1],[libm includes atan2])])
-AC_CHECK_LIB([m],[atan2l],[AC_DEFINE([HAVE_ATAN2L],[1],[libm includes 
atan2l])])
-AC_CHECK_LIB([m],[atanf],[AC_DEFINE([HAVE_ATANF],[1],[libm includes atanf])])
-AC_CHECK_LIB([m],[atan],[AC_DEFINE([HAVE_ATAN],[1],[libm includes atan])])
-AC_CHECK_LIB([m],[atanl],[AC_DEFINE([HAVE_ATANL],[1],[libm includes atanl])])
-AC_CHECK_LIB([m],[atanhf],[AC_DEFINE([HAVE_ATANHF],[1],[libm includes 
atanhf])])
-AC_CHECK_LIB([m],[atanh],[AC_DEFINE([HAVE_ATANH],[1],[libm includes atanh])])
-AC_CHECK_LIB([m],[atanhl],[AC_DEFINE([HAVE_ATANHL],[1],[libm includes 
atanhl])])
-AC_CHECK_LIB([m],[cargf],[AC_DEFINE([HAVE_CARGF],[1],[libm includes cargf])])
-AC_CHECK_LIB([m],[carg],[AC_DEFINE([HAVE_CARG],[1],[libm includes carg])])
-AC_CHECK_LIB([m],[cargl],[AC_DEFINE([HAVE_CARGL],[1],[libm includes cargl])])
-AC_CHECK_LIB([m],[ceilf],[AC_DEFINE([HAVE_CEILF],[1],[libm includes ceilf])])
-AC_CHECK_LIB([m],[ceil],[AC_DEFINE([HAVE_CEIL],[1],[libm includes ceil])])
-AC_CHECK_LIB([m],[ceill],[AC_DEFINE([HAVE_CEILL],[1],[libm includes ceill])])
-AC_CHECK_LIB([m],[copysignf],[AC_DEFINE([HAVE_COPYSIGNF],[1],[libm includes 
copysignf])])
-AC_CHECK_LIB([m],[copysign],[AC_DEFINE([HAVE_COPYSIGN],[1],[libm includes 
copysign])])
-AC_CHECK_LIB([m],[copysignl],[AC_DEFINE([HAVE_COPYSIGNL],[1],[libm includes 
copysignl])])
-AC_CHECK_LIB([m],[cosf],[AC_DEFINE([HAVE_COSF],[1],[libm includes cosf])])
-AC_CHECK_LIB([m],[cos],[AC_DEFINE([HAVE_COS],[1],[libm includes cos])])
-AC_CHECK_LIB([m],[cosl],[AC_DEFINE([HAVE_COSL],[1],[libm includes cosl])])
-AC_CHECK_LIB([m],[ccosf],[AC_DEFINE([HAVE_CCOSF],[1],[libm includes ccosf])])
-AC_CHECK_LIB([m],[ccos],[AC_DEFINE([HAVE_CCOS],[1],[libm includes ccos])])
-AC_CHECK_LIB([m],[ccosl],[AC_DEFINE([HAVE_CCOSL],[1],[libm includes ccosl])])
-AC_CHECK_LIB([m],[coshf],[AC_DEFINE([HAVE_COSHF],[1],[libm includes coshf])])
-AC_CHECK_LIB([m],[cosh],[AC_DEFINE([HAVE_COSH],[1],[libm includes cosh])])
-AC_CHECK_LIB([m],[coshl],[AC_DEFINE([HAVE_COSHL],[1],[libm includes coshl])])
-AC_CHECK_LIB([m],[ccoshf],[AC_DEFINE([HAVE_CCOSHF],[1],[libm includes 
ccoshf])])
-AC_CHECK_LIB([m],[ccosh],[AC_DEFINE([HAVE_CCOSH],[1],[libm includes ccosh])])

Re: [debug/profile-mode] broken c++config.h

2012-03-20 Thread Paolo Bonzini
Il 19/03/2012 13:32, Paolo Carlini ha scritto:
 Should the addition be \$$ to escape it for the shell as well as for
 make?
 (I know it works, but that might not be true for all shells.)
 i don't think that $, could be expaneded by any shell.
 I'm not worried about it not expanding, but rather being rejected as
 invalid syntax - but maybe it's fine.  It certainly works OK with bash
 and ksh.
 Thus, are we going to apply the patch? Maybe Paolo can double check the
 sed detail.

I think \$$ is better.  Besides that it looks good.

Paolo



Re: [PATCH] Make vector lowering use vectors of proper sign

2012-03-20 Thread Paolo Bonzini
Il 14/03/2012 15:53, Richard Guenther ha scritto:
 
 I noticed when trying to fix PR52584 that vector lowering always
 creates unsigned vector types.  The following fixes that, also
 getting rid of the weird use of a langhook.
 
 Bootstrapped and tested on x86_64-unknown-linux-gnu.
 
 Richard.
 
 2012-03-14  Richard Guenther  rguent...@suse.de
 
   PR middle-end/52584
   * tree-vect-generic.c (type_for_widest_vector_mode): Take
   element type instead of mode, use build_vector_type_for_mode
   instead of the langhook, build a vector of proper signedness.
   (expand_vector_operations_1): Adjust.
 
 Index: gcc/tree-vect-generic.c
 ===
 *** gcc/tree-vect-generic.c   (revision 185379)
 --- gcc/tree-vect-generic.c   (working copy)
 *** expand_vector_operation (gimple_stmt_ite
 *** 471,483 
   gimple_assign_rhs2 (assign), code);
   }
   
 ! /* Return a type for the widest vector mode whose components are of mode
 !INNER_MODE, or NULL_TREE if none is found.
 !SATP is true for saturating fixed-point types.  */
   
   static tree
 ! type_for_widest_vector_mode (enum machine_mode inner_mode, optab op, int 
 satp)
   {
 enum machine_mode best_mode = VOIDmode, mode;
 int best_nunits = 0;
   
 --- 471,483 
   gimple_assign_rhs2 (assign), code);
   }
   
 ! /* Return a type for the widest vector mode whose components are of type
 !TYPE, or NULL_TREE if none is found.  */
   
   static tree
 ! type_for_widest_vector_mode (tree type, optab op)
   {
 +   enum machine_mode inner_mode = TYPE_MODE (type);
 enum machine_mode best_mode = VOIDmode, mode;
 int best_nunits = 0;
   
 *** type_for_widest_vector_mode (enum machin
 *** 503,515 
 if (best_mode == VOIDmode)
   return NULL_TREE;
 else
 ! {
 !   /* For fixed-point modes, we need to pass satp as the 2nd parameter.  
 */
 !   if (ALL_FIXED_POINT_MODE_P (best_mode))
 ! return lang_hooks.types.type_for_mode (best_mode, satp);
 ! 
 !   return lang_hooks.types.type_for_mode (best_mode, 1);
 ! }
   }
   
   
 --- 503,509 
 if (best_mode == VOIDmode)
   return NULL_TREE;
 else
 ! return build_vector_type_for_mode (type, best_mode);
   }
   
   
 *** expand_vector_operations_1 (gimple_stmt_
 *** 856,863 
 if (!VECTOR_MODE_P (TYPE_MODE (type))  op)
   {
 tree vector_compute_type
 ! = type_for_widest_vector_mode (TYPE_MODE (TREE_TYPE (type)), op,
 !TYPE_SATURATING (TREE_TYPE (type)));
 if (vector_compute_type != NULL_TREE
  (TYPE_VECTOR_SUBPARTS (vector_compute_type)
  TYPE_VECTOR_SUBPARTS (compute_type))
 --- 850,856 
 if (!VECTOR_MODE_P (TYPE_MODE (type))  op)
   {
 tree vector_compute_type
 ! = type_for_widest_vector_mode (TREE_TYPE (type), op);
 if (vector_compute_type != NULL_TREE
  (TYPE_VECTOR_SUBPARTS (vector_compute_type)
  TYPE_VECTOR_SUBPARTS (compute_type))
 

Looks good.

Paolo



Re: [PATCH] Fix PRs 52080, 52097 and 48124, rewrite bitfield expansion, enable the C++ memory model wrt bitfields everywhere

2012-03-20 Thread Richard Guenther
On Mon, 19 Mar 2012, Richard Guenther wrote:

 On Mon, 19 Mar 2012, Eric Botcazou wrote:
 
   But it's only ever computed for RECORD_TYPEs where DECL_QUALIFIER is
   unused.
  
  OK, that could work indeed.
  
   For now giving up seems to be easiest (just give up when
   DECL_FIELD_OFFSET is not equal for all of the bitfield members).
   That will at most get you the miscompiles for the PRs back, for
   languages with funny structure layout.
  
  I have another variant of the DECL_FIELD_OFFSET problem:
  
  FAIL: gnat.dg/specs/pack8.ads (test for excess errors)
  Excess errors:
  +===GNAT BUG DETECTED==+
  | 4.8.0 20120314 (experimental) [trunk revision 185395] (i586-suse-linux) 
  GCC 
  error:|
  | in finish_bitfield_representative, at stor-layout.c:1762 |
  | Error detected at pack8.ads:17:4   
  
  Testcase attached:
  
gnat.dg/specs/pack8.ads
gnat.dg/specs/pack8_pkg.ads
 
 Thanks.  That one indeed has different DECL_FIELD_OFFSET,
 
 ((sizetype) MAX_EXPR (integer) pack8__R1s, 0 + (sizetype) MAX_EXPR 
 (integer) pack8__R1s, 0) + 1
 
 vs.
 
 (sizetype) MAX_EXPR (integer) pack8__R1s, 0 + (sizetype) MAX_EXPR 
 (integer) pack8__R1s, 0
 
 we're not putting the 1 byte offset into DECL_FIELD_BIT_OFFSET
 because DECL_OFFSET_ALIGN is 8 in this case.  Eventually we should
 be able to relax how many bits we push into DECL_FIELD_BIT_OFFSET.
 
  I agree that giving up (for now) is a sensible option.  Thanks.
 
 Done with the patch below.  We're actually not going to generate
 possibly wrong-code again but sub-optimal code.
 
 Bootstrap  regtest pending on x86_64-unknown-linux-gnu.

This is what I have applied after bootstrapping and testing on
x86_64-unknown-linux-gnu.

Richard.

2012-03-20  Richard Guenther  rguent...@suse.de

* stor-layout.c (finish_bitfield_representative): Fallback
to conservative maximum size if the padding up to the next
field cannot be computed as a constant.
(finish_bitfield_layout): If we cannot compute the distance
between the start of the bitfield representative and the
bitfield member start a new representative.
* expr.c (get_bit_range): The distance between the start of
the bitfield representative and the bitfield member is zero
if the field offsets are not constants.

* gnat.dg/pack16.adb: New testcase.
* gnat.dg/pack16_pkg.ads: Likewise.
* gnat.dg/specs/pack8.ads: Likewise.
* gnat.dg/specs/pack8_pkg.ads: Likewise.

Index: gcc/stor-layout.c
===
*** gcc/stor-layout.c   (revision 185518)
--- gcc/stor-layout.c   (working copy)
*** finish_bitfield_representative (tree rep
*** 1781,1790 
return;
maxsize = size_diffop (DECL_FIELD_OFFSET (nextf),
 DECL_FIELD_OFFSET (repr));
!   gcc_assert (host_integerp (maxsize, 1));
!   maxbitsize = (tree_low_cst (maxsize, 1) * BITS_PER_UNIT
!   + tree_low_cst (DECL_FIELD_BIT_OFFSET (nextf), 1)
!   - tree_low_cst (DECL_FIELD_BIT_OFFSET (repr), 1));
  }
else
  {
--- 1781,1797 
return;
maxsize = size_diffop (DECL_FIELD_OFFSET (nextf),
 DECL_FIELD_OFFSET (repr));
!   if (host_integerp (maxsize, 1))
!   {
! maxbitsize = (tree_low_cst (maxsize, 1) * BITS_PER_UNIT
!   + tree_low_cst (DECL_FIELD_BIT_OFFSET (nextf), 1)
!   - tree_low_cst (DECL_FIELD_BIT_OFFSET (repr), 1));
! /* If the group ends within a bitfield nextf does not need to be
!aligned to BITS_PER_UNIT.  Thus round up.  */
! maxbitsize = (maxbitsize + BITS_PER_UNIT - 1)  ~(BITS_PER_UNIT - 1);
!   }
!   else
!   maxbitsize = bitsize;
  }
else
  {
*** finish_bitfield_layout (record_layout_in
*** 1888,1893 
--- 1895,1902 
}
else if (DECL_BIT_FIELD_TYPE (field))
{
+ gcc_assert (repr != NULL_TREE);
+ 
  /* Zero-size bitfields finish off a representative and
 do not have a representative themselves.  This is
 required by the C++ memory model.  */
*** finish_bitfield_layout (record_layout_in
*** 1896,1901 
--- 1905,1928 
  finish_bitfield_representative (repr, prev);
  repr = NULL_TREE;
}
+ 
+ /* We assume that either DECL_FIELD_OFFSET of the representative
+and each bitfield member is a constant or they are equal.
+This is because we need to be able to compute the bit-offset
+of each field relative to the representative in get_bit_range
+during RTL expansion.
+If these constraints are not met, simply force a new
+representative to be generated.  That will at most
+  

Re: PATCH: Properly generate X32 IE sequence

2012-03-20 Thread Paolo Bonzini
Il 19/03/2012 20:13, Uros Bizjak ha scritto:
 2012-03-19  Uros Bizjak  ubiz...@gmail.com
 
   * config/i386/i386.c (get_thread_pointer): Add tp_mode argument.
   Generate ZERO_EXTEND in place if GET_MODE (tp) != tp_mode.
   (legitimize_tls_address) TLS_MODEL_INITIAL_EXEC: Always generate
   DImode UNSPEC_GOTNTPOFF references on TARGET_64BIT.
   (ix86_decompose_address): Allow zero extended UNSPEC_TP references.
 
   Revert:
   2012-03-13  Uros Bizjak  ubiz...@gmail.com
 
   * config/i386/i386.h (TARGET_TLS_INDIRECT_SEG_REFS): New.
   * config/i386/i386.c (ix86_decompose_address): Use
   TARGET_TLS_INDIRECT_SEG_REFS to prevent %fs:(%reg) addresses.
   (legitimize_tls_address): Use TARGET_TLS_INDIRECT_SEG_REFS to load
   thread pointer to a register.
 
   Revert:
   2012-03-10  H.J. Lu  hongjiu...@intel.com
 
   * config/i386/i386.c (ix86_decompose_address): Disallow fs:(reg)
   if Pmode != word_mode.
   (legitimize_tls_address): Call gen_tls_initial_exec_x32 if
   Pmode == SImode for TARGET_X32.
 
   * config/i386/i386.md (UNSPEC_TLS_IE_X32): New.
   (tls_initial_exec_x32): Likewise.
 
 Tested on x86_64-pc-linux-gnu {,-m32}.

No testcases?

Paolo



[Patch]: ggc-page.c: use uintptr_t instead of size_t

2012-03-20 Thread Tristan Gingold
Hi,

ggc-page.c uses size_t to cast pointers to an integer type.  Unfortunately, 
this isn't portable for systems (such as … VMS) where size_t precision is less 
than pointers precision.

Fortunately, thanks to configure, uintptr_t type is always present, so this 
path simply replaces size_t by uintptr_t for such conversions (but still 
keeping size_t for size expressions).

I haven't tried to convert ggc-zone.c, because it requires mmap (which is not 
usable as is on VMS).

Tested by cross bootstrapping for VMS.

Ok for trunk ?

Tristan.

gcc/
2012-03-20  Tristan Gingold  ging...@adacore.com

* ggc-page.c (PAGE_L1_SIZE, PAGE_L2_SIZE, LOOKUP_L1, LOOKUP_L2)
(ggc_allocated_p, lookup_page_table_entry, set_page_table_entry)
(alloc_page, init_ggc, clear_marks, struct ggc_pch_data)
(ggc_pch_this_base): Use uintptr_t instead of size_t.

diff --git a/gcc/ggc-page.c b/gcc/ggc-page.c
index ee796cb..ff23092 100644
--- a/gcc/ggc-page.c
+++ b/gcc/ggc-page.c
@@ -121,14 +121,14 @@ along with GCC; see the file COPYING3.  If not see
 
 #define PAGE_L1_BITS   (8)
 #define PAGE_L2_BITS   (32 - PAGE_L1_BITS - G.lg_pagesize)
-#define PAGE_L1_SIZE   ((size_t) 1  PAGE_L1_BITS)
-#define PAGE_L2_SIZE   ((size_t) 1  PAGE_L2_BITS)
+#define PAGE_L1_SIZE   ((uintptr_t) 1  PAGE_L1_BITS)
+#define PAGE_L2_SIZE   ((uintptr_t) 1  PAGE_L2_BITS)
 
 #define LOOKUP_L1(p) \
-  (((size_t) (p)  (32 - PAGE_L1_BITS))  ((1  PAGE_L1_BITS) - 1))
+  (((uintptr_t) (p)  (32 - PAGE_L1_BITS))  ((1  PAGE_L1_BITS) - 1))
 
 #define LOOKUP_L2(p) \
-  (((size_t) (p)  G.lg_pagesize)  ((1  PAGE_L2_BITS) - 1))
+  (((uintptr_t) (p)  G.lg_pagesize)  ((1  PAGE_L2_BITS) - 1))
 
 /* The number of objects per allocation page, for objects on a page of
the indicated ORDER.  */
@@ -560,7 +560,7 @@ ggc_allocated_p (const void *p)
   base = G.lookup[0];
 #else
   page_table table = G.lookup;
-  size_t high_bits = (size_t) p  ~ (size_t) 0x;
+  uintptr_t high_bits = (uintptr_t) p  ~ (uintptr_t) 0x;
   while (1)
 {
   if (table == NULL)
@@ -592,7 +592,7 @@ lookup_page_table_entry (const void *p)
   base = G.lookup[0];
 #else
   page_table table = G.lookup;
-  size_t high_bits = (size_t) p  ~ (size_t) 0x;
+  uintptr_t high_bits = (uintptr_t) p  ~ (uintptr_t) 0x;
   while (table-high_bits != high_bits)
 table = table-next;
   base = table-table[0];
@@ -617,7 +617,7 @@ set_page_table_entry (void *p, page_entry *entry)
   base = G.lookup[0];
 #else
   page_table table;
-  size_t high_bits = (size_t) p  ~ (size_t) 0x;
+  uintptr_t high_bits = (uintptr_t) p  ~ (uintptr_t) 0x;
   for (table = G.lookup; table; table = table-next)
 if (table-high_bits == high_bits)
   goto found;
@@ -826,7 +826,7 @@ alloc_page (unsigned order)
alloc_size = entry_size + G.pagesize - 1;
   allocation = XNEWVEC (char, alloc_size);
 
-  page = (char *) (((size_t) allocation + G.pagesize - 1)  -G.pagesize);
+  page = (char *) (((uintptr_t) allocation + G.pagesize - 1)  
-G.pagesize);
   head_slop = page - allocation;
   if (multiple_pages)
tail_slop = ((size_t) allocation + alloc_size)  (G.pagesize - 1);
@@ -1662,13 +1662,13 @@ init_ggc (void)
   {
 char *p = alloc_anon (NULL, G.pagesize, true);
 struct page_entry *e;
-if ((size_t)p  (G.pagesize - 1))
+if ((uintptr_t)p  (G.pagesize - 1))
   {
/* How losing.  Discard this one and try another.  If we still
   can't get something useful, give up.  */
 
p = alloc_anon (NULL, G.pagesize, true);
-   gcc_assert (!((size_t)p  (G.pagesize - 1)));
+   gcc_assert (!((uintptr_t)p  (G.pagesize - 1)));
   }
 
 /* We have a good page, might as well hold onto it...  */
@@ -1782,7 +1782,7 @@ clear_marks (void)
  size_t bitmap_size = BITMAP_SIZE (num_objects + 1);
 
  /* The data should be page-aligned.  */
- gcc_assert (!((size_t) p-page  (G.pagesize - 1)));
+ gcc_assert (!((uintptr_t) p-page  (G.pagesize - 1)));
 
  /* Pages that aren't in the topmost context are not collected;
 nevertheless, we need their in-use bit vectors to store GC
@@ -2204,7 +2204,7 @@ struct ggc_pch_ondisk
 struct ggc_pch_data
 {
   struct ggc_pch_ondisk d;
-  size_t base[NUM_ORDERS];
+  uintptr_t base[NUM_ORDERS];
   size_t written[NUM_ORDERS];
 };
 
@@ -2247,7 +2247,7 @@ ggc_pch_total_size (struct ggc_pch_data *d)
 void
 ggc_pch_this_base (struct ggc_pch_data *d, void *base)
 {
-  size_t a = (size_t) base;
+  uintptr_t a = (uintptr_t) base;
   unsigned i;
 
   for (i = 0; i  NUM_ORDERS; i++)


Re: [Patch] libgfortran: do not assume libm

2012-03-20 Thread Tobias Burnus
Hi Tristan,

 I am starting to build fortran for VMS.

Thanks for the patch and this endeavor.

 The first serious issue was with libgfortran/configure.  It checks for
 several math functions, but directly in libm using AC_CHECK_LIB.
 But there is on such things as libm on VMS systems 

I think after building you will run into the same issue when using
gfortran. The linking of -lm is hardcoded in gcc/fortran/gfortranspec.c
(search there for MATH_LIBRARY).

And it is hard coded in libgfortran/libgfortran.spec.in and in the
as-needed check of libgfortran/acinclude.m4 within a libquadmath check.

Talking about libquadmath, its libquadmath/configure.ac has the same issue.

 * * *

To your patch:

First, I am far from being a configure expert and thus would like if
a build maintainer could have a look (or Janne, who seems to have also
more experience.)


I don't understand the purpose of the line:

+# Check for libm
+AC_CHECK_LIB([m],[sin])

Except of printing to stdout and to the logs whether -lm is available and
contains sin, it doesn't seem to do anything. If that's the purpose, I think
it needs a better comment. If not, I would like to know its purpose.

Otherwise, the patch looks fine to me - but as written, I would like if
someone else (build maintainer, Janne, ...) could have a look.

Tobias


Re: [Patch] libgfortran: do not assume libm

2012-03-20 Thread Tristan Gingold

On Mar 20, 2012, at 10:48 AM, Tobias Burnus wrote:

 Hi Tristan,
 
 I am starting to build fortran for VMS.
 
 Thanks for the patch and this endeavor.
 
 The first serious issue was with libgfortran/configure.  It checks for
 several math functions, but directly in libm using AC_CHECK_LIB.
 But there is on such things as libm on VMS systems 
 
 I think after building you will run into the same issue when using
 gfortran. The linking of -lm is hardcoded in gcc/fortran/gfortranspec.c
 (search there for MATH_LIBRARY).

A target may redefine MATH_LIBRARY to  to avoid linking with -lm, and this is
correctly handled by gfortranspec.c.  After defining MATH_LIBRARY in vms.h, I 
was
able to cross build a hello whorl fortran program.

So that part is already correctly handled!

 And it is hard coded in libgfortran/libgfortran.spec.in and in the
 as-needed check of libgfortran/acinclude.m4 within a libquadmath check.
 
 Talking about libquadmath, its libquadmath/configure.ac has the same issue.

I have to check that.  I was able to build libquadmath without errors, but I
didn't look at config.log, so I may have missed issues.

 * * *
 
 To your patch:
 
 First, I am far from being a configure expert and thus would like if
 a build maintainer could have a look (or Janne, who seems to have also
 more experience.)

Sure.

 I don't understand the purpose of the line:
 
 +# Check for libm
 +AC_CHECK_LIB([m],[sin])
 
 Except of printing to stdout and to the logs whether -lm is available and
 contains sin, it doesn't seem to do anything. If that's the purpose, I think
 it needs a better comment. If not, I would like to know its purpose.

The purpose is to check for the presence of -libm.  If found, all tests will be 
linked
with -lm, which is necessary for the following tests on regular platform.  
Indeed, I
should improve the documentation.

 Otherwise, the patch looks fine to me - but as written, I would like if
 someone else (build maintainer, Janne, ...) could have a look.

Thank you for your prompt reply,
Tristan.



Re: [debug/profile-mode] broken c++config.h

2012-03-20 Thread Paolo Carlini

On 03/20/2012 10:22 AM, Paolo Bonzini wrote:

I think \$$ is better.  Besides that it looks good.
Thanks for the review. Yesterday, when Benjamin had a look, I decided to 
go ahead and just commit the patch as posted (after having double 
checked that indeed it worked for me), thus no escaping. But I noticed 
that nearby we have got quite a few unescaped $$, should we change all 
of them?


Thanks,
Paolo.


Re: remove wrong code in immed_double_const

2012-03-20 Thread Richard Sandiford
Richard Guenther richard.guent...@gmail.com writes:
 On Tue, Mar 20, 2012 at 12:31 AM, Mike Stump mikest...@comcast.net wrote:
 On Mar 19, 2012, at 2:44 PM, Richard Sandiford wrote:
 Mike Stump mikest...@comcast.net writes:
 If we're going to remove the assert, we need to define stuff like
 that.

 Orthogonal.  The rest of the compiler defines what happens, it either
 is inconsistent, in which case it is by fiat, undefined, or it is
 consistent, in which case that consistency defines it.  The compiler
 is free to document this in a nice way, or do, what is usually done,
 which is to assume everybody just knows what it does.  Anyway, my
 point is, this routine doesn't define the data structure, and is
 _completely_ orthogonal to your concern.  It doesn't matter if it zero
 extends or sign extends or is inconsistent, has bugs, doesn't have
 bugs, is documented, or isn't documented.  In every single one of
 these cases, the code in the routine I am fixing, doesn't change.
 That is _why_ it is orthogonal.  If it weren't, you'd be able to state
 a value for which is mattered.  You can't, which is why you are wrong.
 If you think you are not wrong, please state a value for which it
 matters how it is defined.

 immed_double_const and CONST_DOUBLE are currently
 only defined for 2 HOST_WIDE_INTs.

 I don't happen to share your view.  The routine is defined by documentation. 
  The documentation might exist in a .texi file, in this case there is no 
 texi file for immed_double_const I don't think, next up, it is defined by 
 the comments before the routine.  In this case, it isn't so defined.

 The current definition reads:

 /* Return a CONST_DOUBLE or CONST_INT for a value specified as a pair
   of ints: I0 is the low-order word and I1 is the high-order word.
   Do not use this routine for non-integer modes; convert to
   REAL_VALUE_TYPE and use CONST_DOUBLE_FROM_REAL_VALUE.  */

 which, is is fine, and I don't _want_ to change that definition of the 
 routine.  I can't fix it, because it isn't broken.  If it were, you would be 
 able to state a case where the new code behaves in a manor inconsistent with 
 the definition, since there is none you cannot state one, and this is _why_ 
 you have failed to state such a case.  If you disagree, please state the 
 case.

 Now, if you review comment is, could you please update the comments in the 
 routine, I would just say, oh, sure:

 Index: emit-rtl.c
 ===
 --- emit-rtl.c  (revision 184563)
 +++ emit-rtl.c  (working copy)
 @@ -525,10 +525,9 @@ immed_double_const (HOST_WIDE_INT i0, HO

      1) If GET_MODE_BITSIZE (mode) = HOST_BITS_PER_WIDE_INT, then we use
        gen_int_mode.
 -     2) GET_MODE_BITSIZE (mode) == 2 * HOST_BITS_PER_WIDE_INT, but the 
 value of
 -       the integer fits into HOST_WIDE_INT anyway (i.e., i1 consists only
 -       from copies of the sign bit, and sign of i0 and i1 are the same),  
 then
 -       we return a CONST_INT for i0.
 +     2) If the value of the integer fits into HOST_WIDE_INT anyway
 +       (i.e., i1 consists only from copies of the sign bit, and sign
 +       of i0 and i1 are the same), then we return a CONST_INT for i0.
      3) Otherwise, we create a CONST_DOUBLE for i0 and i1.  */
   if (mode != VOIDmode)
     {
 @@ -540,8 +539,6 @@ immed_double_const (HOST_WIDE_INT i0, HO

       if (GET_MODE_BITSIZE (mode) = HOST_BITS_PER_WIDE_INT)
        return gen_int_mode (i0, mode);
 -
 -      gcc_assert (GET_MODE_BITSIZE (mode) == 2 * HOST_BITS_PER_WIDE_INT);
     }

   /* If this integer fits in one word, return a CONST_INT.  */


 Sorry I missed it.  Now, on to CONST_DOUBLE.  It does appear in a texi file:


 @findex const_double
 @item (const_double:@var{m} @var{i0} @var{i1} @dots{})
 Represents either a floating-point constant of mode @var{m} or an
 integer constant too large to fit into @code{HOST_BITS_PER_WIDE_INT}
 bits but small enough to fit within twice that number of bits (GCC
 does not provide a mechanism to represent even larger constants).  In
 the latter case, @var{m} will be @code{VOIDmode}.

 @findex CONST_DOUBLE_LOW
 If @var{m} is @code{VOIDmode}, the bits of the value are stored in
 @var{i0} and @var{i1}.  @var{i0} is customarily accessed with the macro
 @code{CONST_DOUBLE_LOW} and @var{i1} with @code{CONST_DOUBLE_HIGH}.


 Here again, I don't want to change the definition.  The current definition 
 applies and I am merely making the code conform to it.  It says that 
 CONST_DOUBLE is used when the _value_ of the constant is too large to fit 
 into HOST_BITS_PER_WIDE_INT bits.

 So, if you disagree with me, you will necessarily have to quote the 
 definition you are using, explain what the words mean to you _and_ state a 
 specific case in which the code post modification doesn't not conform with 
 the existing definition.  You have failed yet again to do that.


 So, as good functions do, immed_double_const asserts that it is not being 
 used out 

[PATCH] Fix PR52627

2012-03-20 Thread Richard Guenther

This should fix PR52627.

Applied as obvious.

Richard.

2012-03-20  Richard Guenther  rguent...@suse.de

PR gcov-profile/52627
* libgcov.c (init_mx): Fix mutex name.

Index: libgcc/libgcov.c
===
*** libgcc/libgcov.c(revision 185563)
--- libgcc/libgcov.c(working copy)
*** __gthread_mutex_t __gcov_flush_mx ATTRIB
*** 713,719 
  static void
  init_mx (void)
  {
!   __GTHREAD_MUTEX_INIT_FUNCTION (mx);
  }
  static void
  init_mx_once (void)
--- 713,719 
  static void
  init_mx (void)
  {
!   __GTHREAD_MUTEX_INIT_FUNCTION (__gcov_flush_mx);
  }
  static void
  init_mx_once (void)


Re: PATCH: Properly generate X32 IE sequence

2012-03-20 Thread Jakub Jelinek
On Tue, Mar 20, 2012 at 09:58:29AM +0100, Jakub Jelinek wrote:
 Yeah, my bootstrap just failed the same.  Will test:
 
 2012-03-20  Jakub Jelinek  ja...@redhat.com
 
   * config/i386/i386.c (ix86_decompose_address) case ZERO_EXTEND:
   If operand isn't UNSPEC, return 0.

Committed as obvious now that bootstrap/regtest finished on x86_64-linux
and i686-linux.

 --- gcc/config/i386/i386.c.jj 2012-03-20 09:35:06.0 +0100
 +++ gcc/config/i386/i386.c2012-03-20 09:56:35.038835835 +0100
 @@ -11516,6 +11516,8 @@ ix86_decompose_address (rtx addr, struct
  
   case ZERO_EXTEND:
 op = XEXP (op, 0);
 +   if (GET_CODE (op) != UNSPEC)
 + return 0;
 /* FALLTHRU */
  
   case UNSPEC:

Jakub


[PATCH] AVX2 permutation improvements

2012-03-20 Thread Jakub Jelinek
Hi!

This patch improves register - register broadcast AVX2 permutations
and also starts using vpermps where possible for V8SFmode
permutations.  Bootstrapped/regtested on x86_64-linux and i686-linux,
ok for trunk?

2012-03-20  Jakub Jelinek  ja...@redhat.com

PR target/52607
* config/i386/i386.md (isa attribute): Add avx2 and noavx2.
(enabled attribute): Handle avx2 and noavx2 isas.
* config/i386/sse.md (avx2_vec_dupv8sf_1, avx2_pbroadcastmode_1):
New insns.
(vec_dupmode): Add avx2 =x,x alternative.
(vec_dupmode splitter): Don't split if TARGET_AVX2.
(*avx_vperm_broadcast_mode): Don't split V4DFmode if TARGET_AVX2.
For TARGET_AVX2, V8SFmode and elt == 0 split into vbroadcastss.
* config/i386/i386.c (expand_vec_perm_pshufb): Emit also vpermps
for V8SFmode.
(expand_vec_perm_1): For broadcasts, use avx2_pbroadcastmode_1
if possible, handle also V8SFmode.

--- gcc/config/i386/i386.md.jj  2012-03-20 08:51:30.937236938 +0100
+++ gcc/config/i386/i386.md 2012-03-20 08:54:50.742079909 +0100
@@ -639,7 +639,7 @@ (define_attr use_carry 0,1 (const_st
 (define_attr movu 0,1 (const_string 0))
 
 ;; Used to control the enabled attribute on a per-instruction basis.
-(define_attr isa base,sse2,sse2_noavx,sse3,sse4,sse4_noavx,noavx,avx,bmi2
+(define_attr isa 
base,sse2,sse2_noavx,sse3,sse4,sse4_noavx,noavx,avx,avx2,noavx2,bmi2
   (const_string base))
 
 (define_attr enabled 
@@ -652,6 +652,8 @@ (define_attr enabled 
   (symbol_ref TARGET_SSE4_1  !TARGET_AVX)
 (eq_attr isa avx) (symbol_ref TARGET_AVX)
 (eq_attr isa noavx) (symbol_ref !TARGET_AVX)
+(eq_attr isa avx2) (symbol_ref TARGET_AVX2)
+(eq_attr isa noavx2) (symbol_ref !TARGET_AVX2)
 (eq_attr isa bmi2) (symbol_ref TARGET_BMI2)
]
(const_int 1)))
--- gcc/config/i386/sse.md.jj   2012-03-20 08:51:30.940236899 +0100
+++ gcc/config/i386/sse.md  2012-03-20 08:55:22.344898469 +0100
@@ -3808,6 +3808,18 @@ (define_insn avx2_vec_dupmode
 (set_attr prefix vex)
 (set_attr mode MODE)])
 
+(define_insn avx2_vec_dupv8sf_1
+  [(set (match_operand:V8SF 0 register_operand =x)
+   (vec_duplicate:V8SF
+ (vec_select:SF
+   (match_operand:V8SF 1 register_operand x)
+   (parallel [(const_int 0)]]
+  TARGET_AVX2
+  vbroadcastss\t{%x1, %0|%0, %x1}
+  [(set_attr type sselog1)
+(set_attr prefix vex)
+(set_attr mode V8SF)])
+
 (define_insn vec_dupv4sf
   [(set (match_operand:V4SF 0 register_operand =x,x,x)
(vec_duplicate:V4SF
@@ -11876,6 +11888,19 @@ (define_insn avx2_pbroadcastmode
(set_attr prefix vex)
(set_attr mode sseinsnmode)])
 
+(define_insn avx2_pbroadcastmode_1
+  [(set (match_operand:VI_256 0 register_operand =x)
+   (vec_duplicate:VI_256
+ (vec_select:ssescalarmode
+   (match_operand:VI_256 1 nonimmediate_operand xm)
+   (parallel [(const_int 0)]]
+  TARGET_AVX2
+  vpbroadcastssemodesuffix\t{%x1, %0|%0, %x1}
+  [(set_attr type ssemov)
+   (set_attr prefix_extra 1)
+   (set_attr prefix vex)
+   (set_attr mode sseinsnmode)])
+
 (define_insn avx2_permvarv8si
   [(set (match_operand:V8SI 0 register_operand =x)
(unspec:V8SI
@@ -11967,16 +11992,18 @@ (define_mode_iterator AVX_VEC_DUP_MODE
   [V8SI V8SF V4DI V4DF])
 
 (define_insn vec_dupmode
-  [(set (match_operand:AVX_VEC_DUP_MODE 0 register_operand =x,x)
+  [(set (match_operand:AVX_VEC_DUP_MODE 0 register_operand =x,x,x)
(vec_duplicate:AVX_VEC_DUP_MODE
- (match_operand:ssescalarmode 1 nonimmediate_operand m,?x)))]
+ (match_operand:ssescalarmode 1 nonimmediate_operand m,x,?x)))]
   TARGET_AVX
   @
vbroadcastssescalarmodesuffix\t{%1, %0|%0, %1}
+   vbroadcastssescalarmodesuffix\t{%x1, %0|%0, %x1}
#
   [(set_attr type ssemov)
(set_attr prefix_extra 1)
(set_attr prefix vex)
+   (set_attr isa *,avx2,noavx2)
(set_attr mode V8SF)])
 
 (define_insn avx2_vbroadcasti128_mode
@@ -11995,7 +12022,7 @@ (define_split
   [(set (match_operand:AVX_VEC_DUP_MODE 0 register_operand)
(vec_duplicate:AVX_VEC_DUP_MODE
  (match_operand:ssescalarmode 1 register_operand)))]
-  TARGET_AVX  reload_completed
+  TARGET_AVX  !TARGET_AVX2  reload_completed
   [(set (match_dup 2)
(vec_duplicate:ssehalfvecmode (match_dup 1)))
(set (match_dup 0)
@@ -12057,7 +12084,7 @@ (define_insn_and_split *avx_vperm_broad
[(match_operand 3 const_int_operand C,n,n)])))]
   TARGET_AVX
   #
-   reload_completed
+   reload_completed  (MODEmode != V4DFmode || !TARGET_AVX2)
   [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
 {
   rtx op0 = operands[0], op1 = operands[1];
@@ -12067,6 +12094,13 @@ (define_insn_and_split *avx_vperm_broad
 {
   int mask;
 
+  if (TARGET_AVX2  elt == 0)
+   {
+ emit_insn (gen_vec_dupmode (op0, gen_lowpart (ssescalarmodemode,
+   

[PATCH] Decrease GC garbage in i?86 vec shuffle expansion

2012-03-20 Thread Jakub Jelinek
Hi!

On a testcase with 256 __builtin_shuffle V4DFmode calls I've counted
17 calls to expand_vselect{,_vconcat} on average for each __builtin_shuffle
call (some during testing, some during actual expansion, but that is
also often not successful).  This patch adjusts the code that for testing
it doesn't create new insns again and again, only if testing shows that such
an insn is supported, it calls emit_insn (copy_rtx ()) to emit it.

Bootstrapped/regtested on x86_64-linux and i686-linux, additionally tested
with
GCC_TEST_RUN_EXPENSIVE=1 make check-gcc 
RUNTESTFLAGS='--target_board=unix\{-m32/-mavx,-m64/-mavx\} 
dg-torture.exp=vshuf*'
Ok for trunk?

2012-03-20  Jakub Jelinek  ja...@redhat.com

* config/i386/i386.c (vselect_insn): New variable.
(init_vselect_insn): New function.
(expand_vselect, expand_vselect_insn): Add testing_p argument.
Call init_vselect_insn if vselect_insn is NULL.  Adjust
PATTERN (vselect_insn), instead of creating a new insn each time,
only emit a copy of it if not testing and recog has been successful.
(expand_vec_perm_pshufb, expand_vec_perm_1,
expand_vec_perm_pshuflw_pshufhw, expand_vec_perm_broadcast_1): Adjust
callers.

--- gcc/config/i386/i386.c.jj   2012-03-19 18:10:20.0 +0100
+++ gcc/config/i386/i386.c  2012-03-20 08:46:59.751806243 +0100
@@ -35517,43 +35517,88 @@ ix86_builtin_vectorization_cost (enum ve
 }
 }
 
+/* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
+   insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
+   insn every time.  */
+
+static GTY(()) rtx vselect_insn;
+
+/* Initialize vselect_insn.  */
+
+static void
+init_vselect_insn (void)
+{
+  unsigned i;
+  rtx x;
+
+  x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
+  for (i = 0; i  MAX_VECT_LEN; ++i)
+XVECEXP (x, 0, i) = const0_rtx;
+  x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
+   const0_rtx), x);
+  x = gen_rtx_SET (VOIDmode, const0_rtx, x);
+  start_sequence ();
+  vselect_insn = emit_insn (x);
+  end_sequence ();
+}
+
 /* Construct (set target (vec_select op0 (parallel perm))) and
return true if that's a valid instruction in the active ISA.  */
 
 static bool
-expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
+expand_vselect (rtx target, rtx op0, const unsigned char *perm,
+   unsigned nelt, bool testing_p)
 {
-  rtx rperm[MAX_VECT_LEN], x;
-  unsigned i;
+  unsigned int i;
+  rtx x, save_vconcat;
+  int icode;
 
-  for (i = 0; i  nelt; ++i)
-rperm[i] = GEN_INT (perm[i]);
+  if (vselect_insn == NULL_RTX)
+init_vselect_insn ();
 
-  x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
-  x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
-  x = gen_rtx_SET (VOIDmode, target, x);
+  x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
+  PUT_NUM_ELEM (XVEC (x, 0), nelt);
+  for (i = 0; i  nelt; ++i)
+XVECEXP (x, 0, i) = GEN_INT (perm[i]);
+  save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
+  XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
+  PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
+  SET_DEST (PATTERN (vselect_insn)) = target;
+  icode = recog_memoized (vselect_insn);
+
+  if (icode = 0  !testing_p)
+emit_insn (copy_rtx (PATTERN (vselect_insn)));
+
+  SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
+  XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
+  INSN_CODE (vselect_insn) = -1;
 
-  x = emit_insn (x);
-  if (recog_memoized (x)  0)
-{
-  remove_insn (x);
-  return false;
-}
-  return true;
+  return icode = 0;
 }
 
 /* Similar, but generate a vec_concat from op0 and op1 as well.  */
 
 static bool
 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
-   const unsigned char *perm, unsigned nelt)
+   const unsigned char *perm, unsigned nelt,
+   bool testing_p)
 {
   enum machine_mode v2mode;
   rtx x;
+  bool ok;
+
+  if (vselect_insn == NULL_RTX)
+init_vselect_insn ();
 
   v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
-  x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
-  return expand_vselect (target, x, perm, nelt);
+  x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
+  PUT_MODE (x, v2mode);
+  XEXP (x, 0) = op0;
+  XEXP (x, 1) = op1;
+  ok = expand_vselect (target, x, perm, nelt, testing_p);
+  XEXP (x, 0) = const0_rtx;
+  XEXP (x, 1) = const0_rtx;
+  return ok;
 }
 
 /* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to implement D
@@ -35885,7 +35930,7 @@ expand_vec_perm_pshufb (struct expand_ve
return true;
  return expand_vselect (gen_lowpart (V4DImode, d-target),
 gen_lowpart (V4DImode, d-op0),
-perm, 4);
+perm, 4, false);

[PATCH] i?86 AVX cross-lane 2 insn permutations (PR target/52607)

2012-03-20 Thread Jakub Jelinek
Hi!

This patch implements expansion of 32-byte shuffles using vperm2[fi]128
insn followed by some single insn two operand shuffle (can be e.g. vunpck*,
vshuf* or vblend*).  Bootstrapped/regtested on x86_64-linux and i686-linux,
ok for trunk?

2012-03-20  Jakub Jelinek  ja...@redhat.com

PR target/52607
* config/i386/i386.c (expand_vec_perm_vperm2f128): New function.
(ix86_expand_vec_perm_const_1): Call it.

--- gcc/config/i386/i386.c.jj   2012-03-20 08:56:29.0 +0100
+++ gcc/config/i386/i386.c  2012-03-20 09:10:34.275976008 +0100
@@ -36627,6 +36627,126 @@ expand_vec_perm_vpermq_perm_1 (struct ex
   return true;
 }
 
+/* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to expand
+   a vector permutation using two instructions, vperm2f128 resp.
+   vperm2i128 followed by any single in-lane permutation.  */
+
+static bool
+expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
+{
+  struct expand_vec_perm_d dfirst, dsecond;
+  unsigned i, j, nelt = d-nelt, nelt2 = nelt / 2, perm;
+  bool ok;
+
+  if (!TARGET_AVX
+  || GET_MODE_SIZE (d-vmode) != 32
+  || (d-vmode != V8SFmode  d-vmode != V4DFmode  !TARGET_AVX2))
+return false;
+
+  dsecond = *d;
+  if (d-op0 == d-op1)
+dsecond.op1 = gen_reg_rtx (d-vmode);
+  dsecond.testing_p = true;
+
+  /* ((perm  2)|perm)  0x33 is the vperm2[fi]128
+ immediate.  For perm  16 the second permutation uses
+ d-op0 as first operand, for perm = 16 it uses d-op1
+ as first operand.  The second operand is the result of
+ vperm2[fi]128.  */
+  for (perm = 0; perm  32; perm++)
+{
+  /* Ignore permutations which do not move anything cross-lane.  */
+  if (perm  16)
+   {
+ /* The second shuffle for e.g. V4DFmode has
+0123 and ABCD operands.
+Ignore AB23, as 23 is already in the second lane
+of the first operand.  */
+ if ((perm  0xc) == (1  2)) continue;
+ /* And 01CD, as 01 is in the first lane of the first
+operand.  */
+ if ((perm  3) == 0) continue;
+ /* And 4567, as then the vperm2[fi]128 doesn't change
+anything on the original 4567 second operand.  */
+ if ((perm  0xf) == ((3  2) | 2)) continue;
+   }
+  else
+   {
+ /* The second shuffle for e.g. V4DFmode has
+4567 and ABCD operands.
+Ignore AB67, as 67 is already in the second lane
+of the first operand.  */
+ if ((perm  0xc) == (3  2)) continue;
+ /* And 45CD, as 45 is in the first lane of the first
+operand.  */
+ if ((perm  3) == 2) continue;
+ /* And 0123, as then the vperm2[fi]128 doesn't change
+anything on the original 0123 first operand.  */
+ if ((perm  0xf) == (1  2)) continue;
+   }
+
+  for (i = 0; i  nelt; i++)
+   {
+ j = d-perm[i] / nelt2;
+ if (j == ((perm  (2 * (i = nelt2)))  3))
+   dsecond.perm[i] = nelt + (i  nelt2) + (d-perm[i]  (nelt2 - 1));
+ else if (j == (unsigned) (i = nelt2) + 2 * (perm = 16))
+   dsecond.perm[i] = d-perm[i]  (nelt - 1);
+ else
+   break;
+   }
+
+  if (i == nelt)
+   {
+ start_sequence ();
+ ok = expand_vec_perm_1 (dsecond);
+ end_sequence ();
+   }
+  else
+   ok = false;
+
+  if (ok)
+   {
+ if (d-testing_p)
+   return true;
+
+ /* Found a usable second shuffle.  dfirst will be
+vperm2f128 on d-op0 and d-op1.  */
+ dsecond.testing_p = false;
+ dfirst = *d;
+ if (d-op0 == d-op1)
+   dfirst.target = dsecond.op1;
+ else
+   dfirst.target = gen_reg_rtx (d-vmode);
+ for (i = 0; i  nelt; i++)
+   dfirst.perm[i] = (i  (nelt2 - 1))
++ ((perm  (2 * (i = nelt2)))  3) * nelt2;
+
+ ok = expand_vec_perm_1 (dfirst);
+ gcc_assert (ok);
+
+ /* And dsecond is some single insn shuffle, taking
+d-op0 and result of vperm2f128 (if perm  16) or
+d-op1 and result of vperm2f128 (otherwise).  */
+ dsecond.op1 = dfirst.target;
+ if (perm = 16)
+   dsecond.op0 = dfirst.op1;
+
+ ok = expand_vec_perm_1 (dsecond);
+ gcc_assert (ok);
+
+ return true;
+   }
+
+  /* For d-op0 == d-op1 the only useful vperm2f128 permutation
+is 0x10.  */
+  if (d-op0 == d-op1)
+   return false;
+}
+
+  return false;
+}
+
 /* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to simplify
a two vector permutation using 2 intra-lane interleave insns
and cross-lane shuffle for 32-byte vectors.  */
@@ -37414,6 +37534,9 @@ ix86_expand_vec_perm_const_1 (struct exp
   if (expand_vec_perm_vpermq_perm_1 (d))
 return true;
 
+  if (expand_vec_perm_vperm2f128 (d))
+return true;
+
   /* Try sequences of three instructions.  */
 
   if 

Re: [Patch/cfgexpand]: also consider assembler_name to call expand_main_function

2012-03-20 Thread Tristan Gingold

On Mar 15, 2012, at 10:37 AM, Richard Guenther wrote:

 On Wed, 14 Mar 2012, Tristan Gingold wrote:
[…]

 
 Well.  To make this work in LTO the main function (thus, the program
 entry point) should be marked at cgraph level and all users of
 MAIN_NAME_P should instead check a flag on the cgraph node.
 
 Will write a predicate in tree.[ch].
 
 Please instead transition main-ness to the graph.

Hi,

here is the patch I wrote.  Does it match what you had in mind ?

main_identifier_node is now set in tree.c

I haven't changed MAIN_NAME_P uses in c-decl.c and cp/decl.c (obviously).
I haven't yet checked beyond simple build.

Tristan.

diff --git a/gcc/ada/gcc-interface/trans.c b/gcc/ada/gcc-interface/trans.c
index 89f5438..c575e97 100644
--- a/gcc/ada/gcc-interface/trans.c
+++ b/gcc/ada/gcc-interface/trans.c
@@ -622,8 +622,6 @@ gigi (Node_Id gnat_root, int max_gnat_node, int number_name 
ATTRIBUTE_UNUSED,
   integer_type_node, NULL_TREE, true, false, true, false,
   NULL, Empty);
 
-  main_identifier_node = get_identifier (main);
-
   /* Install the builtins we might need, either internally or as
  user available facilities for Intrinsic imports.  */
   gnat_install_builtins ();
diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c
index b83f45b..5d05d8a 100644
--- a/gcc/c-family/c-common.c
+++ b/gcc/c-family/c-common.c
@@ -5094,8 +5094,6 @@ c_common_nodes_and_builtins (void)
   if (!flag_preprocess_only)
 c_define_builtins (va_list_ref_type_node, va_list_arg_type_node);
 
-  main_identifier_node = get_identifier (main);
-
   /* Create the built-in __null node.  It is important that this is
  not shared.  */
   null_node = make_node (INTEGER_CST);
diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
index bd21169..7a7a774 100644
--- a/gcc/cfgexpand.c
+++ b/gcc/cfgexpand.c
@@ -4513,9 +4513,8 @@ gimple_expand_cfg (void)
 
   /* If this function is `main', emit a call to `__main'
  to run global initializers, etc.  */
-  if (DECL_NAME (current_function_decl)
-   MAIN_NAME_P (DECL_NAME (current_function_decl))
-   DECL_FILE_SCOPE_P (current_function_decl))
+  if (DECL_FILE_SCOPE_P (current_function_decl)
+   cgraph_main_function_p (cgraph_get_node (current_function_decl)))
 expand_main_function ();
 
   /* Initialize the stack_protect_guard field.  This must happen after the
diff --git a/gcc/cgraph.c b/gcc/cgraph.c
index 9cc3690..528fd19 100644
--- a/gcc/cgraph.c
+++ b/gcc/cgraph.c
@@ -2766,7 +2766,7 @@ cgraph_propagate_frequency_1 (struct cgraph_node *node, 
void *data)
  /* It makes sense to put main() together with the static constructors.
 It will be executed for sure, but rest of functions called from
 main are definitely not at startup only.  */
- if (MAIN_NAME_P (DECL_NAME (edge-caller-decl)))
+ if (cgraph_main_function_p (edge-caller))
d-only_called_at_startup = 0;
   d-only_called_at_exit = edge-caller-only_called_at_exit;
}
diff --git a/gcc/cgraph.h b/gcc/cgraph.h
index 191364c..4db3417 100644
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -101,6 +101,9 @@ struct GTY(()) cgraph_local_info {
 
   /* True if the function may enter serial irrevocable mode.  */
   unsigned tm_may_enter_irr : 1;
+
+  /* True if the function is the program entry point (main in C).  */
+  unsigned main_function : 1;
 };
 
 /* Information about the function that needs to be computed globally
@@ -790,6 +793,13 @@ cgraph_next_function_with_gimple_body (struct cgraph_node 
*node)
   return NULL;
 }
 
+/* Return true iff NODE is the main function (main in C).  */
+static inline bool
+cgraph_main_function_p (struct cgraph_node *node)
+{
+  return node-local.main_function;
+}
+
 /* Walk all functions with body defined.  */
 #define FOR_EACH_FUNCTION_WITH_GIMPLE_BODY(node) \
for ((node) = cgraph_first_function_with_gimple_body (); (node); \
diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c
index 516f187..4a59f63 100644
--- a/gcc/cgraphunit.c
+++ b/gcc/cgraphunit.c
@@ -346,6 +346,10 @@ cgraph_finalize_function (tree decl, bool nested)
   notice_global_symbol (decl);
   node-local.finalized = true;
   node-lowered = DECL_STRUCT_FUNCTION (decl)-cfg != NULL;
+  node-local.main_function =
+DECL_FILE_SCOPE_P (decl)
+ ((!DECL_ASSEMBLER_NAME_SET_P (decl)  MAIN_NAME_P (DECL_NAME (decl)))
+   || decl_assembler_name_equal (decl, main_identifier_node));
 
   if (cgraph_decide_is_function_needed (node, decl))
 cgraph_mark_needed_node (node);
diff --git a/gcc/config/i386/cygming.h b/gcc/config/i386/cygming.h
index 2eccda9..8ac169e 100644
--- a/gcc/config/i386/cygming.h
+++ b/gcc/config/i386/cygming.h
@@ -360,7 +360,8 @@ do {\
 
 #undef PROFILE_HOOK
 #define PROFILE_HOOK(LABEL)\
-  if (MAIN_NAME_P (DECL_NAME (current_function_decl))) \
+  if (DECL_FILE_SCOPE_P 

Re: [debug/profile-mode] broken c++config.h

2012-03-20 Thread Paolo Bonzini
Il 20/03/2012 11:33, Paolo Carlini ha scritto:
 On 03/20/2012 10:22 AM, Paolo Bonzini wrote:
 I think \$$ is better.  Besides that it looks good.
 Thanks for the review. Yesterday, when Benjamin had a look, I decided to
 go ahead and just commit the patch as posted (after having double
 checked that indeed it worked for me), thus no escaping. But I noticed
 that nearby we have got quite a few unescaped $$, should we change all
 of them?

As you prefer... apparently they do not cause trouble, so you might as
well not care.

Paolo



Re: [Patch/cfgexpand]: also consider assembler_name to call expand_main_function

2012-03-20 Thread Richard Guenther
On Tue, 20 Mar 2012, Tristan Gingold wrote:

 
 On Mar 15, 2012, at 10:37 AM, Richard Guenther wrote:
 
  On Wed, 14 Mar 2012, Tristan Gingold wrote:
 […]
 
  
  Well.  To make this work in LTO the main function (thus, the program
  entry point) should be marked at cgraph level and all users of
  MAIN_NAME_P should instead check a flag on the cgraph node.
  
  Will write a predicate in tree.[ch].
  
  Please instead transition main-ness to the graph.
 
 Hi,
 
 here is the patch I wrote.  Does it match what you had in mind ?

Basically yes.  Comments below.

 main_identifier_node is now set in tree.c

Looks good, hopefully my review-grep was as good as yours ;)

 I haven't changed MAIN_NAME_P uses in c-decl.c and cp/decl.c (obviously).
 I haven't yet checked beyond simple build.
 
 Tristan.
 
 diff --git a/gcc/ada/gcc-interface/trans.c b/gcc/ada/gcc-interface/trans.c
 index 89f5438..c575e97 100644
 --- a/gcc/ada/gcc-interface/trans.c
 +++ b/gcc/ada/gcc-interface/trans.c
 @@ -622,8 +622,6 @@ gigi (Node_Id gnat_root, int max_gnat_node, int 
 number_name ATTRIBUTE_UNUSED,
  integer_type_node, NULL_TREE, true, false, true, false,
  NULL, Empty);
  
 -  main_identifier_node = get_identifier (main);
 -
/* Install the builtins we might need, either internally or as
   user available facilities for Intrinsic imports.  */
gnat_install_builtins ();
 diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c
 index b83f45b..5d05d8a 100644
 --- a/gcc/c-family/c-common.c
 +++ b/gcc/c-family/c-common.c
 @@ -5094,8 +5094,6 @@ c_common_nodes_and_builtins (void)
if (!flag_preprocess_only)
  c_define_builtins (va_list_ref_type_node, va_list_arg_type_node);
  
 -  main_identifier_node = get_identifier (main);
 -
/* Create the built-in __null node.  It is important that this is
   not shared.  */
null_node = make_node (INTEGER_CST);
 diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
 index bd21169..7a7a774 100644
 --- a/gcc/cfgexpand.c
 +++ b/gcc/cfgexpand.c
 @@ -4513,9 +4513,8 @@ gimple_expand_cfg (void)
  
/* If this function is `main', emit a call to `__main'
   to run global initializers, etc.  */
 -  if (DECL_NAME (current_function_decl)
 -   MAIN_NAME_P (DECL_NAME (current_function_decl))
 -   DECL_FILE_SCOPE_P (current_function_decl))
 +  if (DECL_FILE_SCOPE_P (current_function_decl)
 +   cgraph_main_function_p (cgraph_get_node (current_function_decl)))
  expand_main_function ();

The DECL_FILE_SCOPE_P check is redundant, please remove them everywhere
you call cgraph_main_function_p.  I suppose returning false if the
cgraph node is NULL in cgraph_main_function_p would be good.

  
/* Initialize the stack_protect_guard field.  This must happen after the
 diff --git a/gcc/cgraph.c b/gcc/cgraph.c
 index 9cc3690..528fd19 100644
 --- a/gcc/cgraph.c
 +++ b/gcc/cgraph.c
 @@ -2766,7 +2766,7 @@ cgraph_propagate_frequency_1 (struct cgraph_node *node, 
 void *data)
 /* It makes sense to put main() together with the static constructors.
It will be executed for sure, but rest of functions called from
main are definitely not at startup only.  */
 -   if (MAIN_NAME_P (DECL_NAME (edge-caller-decl)))
 +   if (cgraph_main_function_p (edge-caller))
   d-only_called_at_startup = 0;
d-only_called_at_exit = edge-caller-only_called_at_exit;
   }
 diff --git a/gcc/cgraph.h b/gcc/cgraph.h
 index 191364c..4db3417 100644
 --- a/gcc/cgraph.h
 +++ b/gcc/cgraph.h
 @@ -101,6 +101,9 @@ struct GTY(()) cgraph_local_info {
  
/* True if the function may enter serial irrevocable mode.  */
unsigned tm_may_enter_irr : 1;
 +
 +  /* True if the function is the program entry point (main in C).  */
 +  unsigned main_function : 1;
  };
  
  /* Information about the function that needs to be computed globally
 @@ -790,6 +793,13 @@ cgraph_next_function_with_gimple_body (struct 
 cgraph_node *node)
return NULL;
  }
  
 +/* Return true iff NODE is the main function (main in C).  */
 +static inline bool
 +cgraph_main_function_p (struct cgraph_node *node)
 +{
 +  return node-local.main_function;

node  node-local.main_function

 +}
 +
  /* Walk all functions with body defined.  */
  #define FOR_EACH_FUNCTION_WITH_GIMPLE_BODY(node) \
 for ((node) = cgraph_first_function_with_gimple_body (); (node); \
 diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c
 index 516f187..4a59f63 100644
 --- a/gcc/cgraphunit.c
 +++ b/gcc/cgraphunit.c
 @@ -346,6 +346,10 @@ cgraph_finalize_function (tree decl, bool nested)
notice_global_symbol (decl);
node-local.finalized = true;
node-lowered = DECL_STRUCT_FUNCTION (decl)-cfg != NULL;
 +  node-local.main_function =
 +DECL_FILE_SCOPE_P (decl)
 + ((!DECL_ASSEMBLER_NAME_SET_P (decl)  MAIN_NAME_P (DECL_NAME (decl)))
 + || decl_assembler_name_equal (decl, main_identifier_node));

If we finalize a function we should always create an assembler name,

Re: remove wrong code in immed_double_const

2012-03-20 Thread Richard Sandiford
Richard Guenther richard.guent...@gmail.com writes:
 I've no objection to moving the assert down to after the GEN_INT.
 But it sounds like I'm on my own with the whole CONST_DOUBLE sign thing.
 (That is, if we remove the assert altogether, we effectively treat the
 number as sign-extended if it happens to fit in a CONST_INT, and
 zero-extended otherwise.

 Why do we treat it zero-extended otherwise?  Because we use
 gen_int_mode for CONST_INTs, which sign-extends?

Just to make sure we're not talking past each other, I meant
moving the assert to:

/* If this integer fits in one word, return a CONST_INT.  */
[A] if ((i1 == 0  i0 = 0) || (i1 == ~0  i0  0))
  return GEN_INT (i0);

---HERE---

/* We use VOIDmode for integers.  */
value = rtx_alloc (CONST_DOUBLE);
PUT_MODE (value, VOIDmode);

CONST_DOUBLE_LOW (value) = i0;
CONST_DOUBLE_HIGH (value) = i1;

for (i = 2; i  (sizeof CONST_DOUBLE_FORMAT - 1); i++)
  XWINT (value, i) = 0;

return lookup_const_double (value);

[A] treats i0 and i1 as a sign-extended value.  So if we
removed the assert (or moved it to the suggested place):

immed_double_const (-1, -1, 4_hwi_mode)

would create -1 in 4_hwi_mode, represented as a CONST_INT.
The three implicit high-order HWIs are -1.  That's fine,
because CONST_INT has long been defined as sign-extending
rather than zero-extending.

But if we fail the [A] test, we go on to create a CONST_DOUBLE.
The problem is that AIUI we have never defined what happens for
CONST_DOUBLE if the mode is wider than 2 HWIs.  Again AIUI,
that's why the assert is there.

This matters because of things like the handling in simplify_immed_subreg
(which, e.g., we use to generate CONST_DOUBLE pool constants, split
constant moves in lower-subreg.c, etc.).  CONST_INT is already
well-defined to be a sign-extended constant, and we handle it correctly:

  switch (GET_CODE (el))
{
case CONST_INT:
  for (i = 0;
   i  HOST_BITS_PER_WIDE_INT  i  elem_bitsize;
   i += value_bit)
*vp++ = INTVAL (el)  i;
  /* CONST_INTs are always logically sign-extended.  */
  for (; i  elem_bitsize; i += value_bit)
*vp++ = INTVAL (el)  0 ? -1 : 0;
  break;

But because of this assert, the equivalent meaning for
CONST_DOUBLE has never been defined, and the current code
happens to zero-extend it:

case CONST_DOUBLE:
  if (GET_MODE (el) == VOIDmode)
{
  /* If this triggers, someone should have generated a
 CONST_INT instead.  */
  gcc_assert (elem_bitsize  HOST_BITS_PER_WIDE_INT);

  for (i = 0; i  HOST_BITS_PER_WIDE_INT; i += value_bit)
*vp++ = CONST_DOUBLE_LOW (el)  i;
  while (i  HOST_BITS_PER_WIDE_INT * 2  i  elem_bitsize)
{
  *vp++
= CONST_DOUBLE_HIGH (el)  (i - HOST_BITS_PER_WIDE_INT);
  i += value_bit;
}
  /* It shouldn't matter what's done here, so fill it with
 zero.  */
  for (; i  elem_bitsize; i += value_bit)
*vp++ = 0;
}

So the upshot is that:

immed_double_const (-1, -1, 4_hwi_mode)

sign-extends i1 (the second -1), creating (-1, -1, -1, -1).  But:

immed_double_const (0, -1, 4_hwi_mode)

effectively (as the code falls out at the moment) zero-extends it,
creating (0, -1, 0, 0).  That kind of inconsistency seems wrong.

So what I was trying to say was that if we remove the assert
altogether, and allow CONST_DOUBLEs to be wider than 2 HWIs,
we need to define what the implicit high-order HWIs of a
CONST_DOUBLE are, just like we already do for CONST_INT.
If we remove the assert altogether, it very much matters
what is done by that last *vp line.

If Mike or anyone is up to doing that, then great.  But if instead
it's just a case of handling zero correctly, moving rather than
removing the assert seems safer.

I'm obviously not explaining this well :-)

Richard


Re: remove wrong code in immed_double_const

2012-03-20 Thread Richard Guenther
On Tue, Mar 20, 2012 at 1:26 PM, Richard Sandiford
rdsandif...@googlemail.com wrote:
 Richard Guenther richard.guent...@gmail.com writes:
 I've no objection to moving the assert down to after the GEN_INT.
 But it sounds like I'm on my own with the whole CONST_DOUBLE sign thing.
 (That is, if we remove the assert altogether, we effectively treat the
 number as sign-extended if it happens to fit in a CONST_INT, and
 zero-extended otherwise.

 Why do we treat it zero-extended otherwise?  Because we use
 gen_int_mode for CONST_INTs, which sign-extends?

 Just to make sure we're not talking past each other, I meant
 moving the assert to:

    /* If this integer fits in one word, return a CONST_INT.  */
 [A] if ((i1 == 0  i0 = 0) || (i1 == ~0  i0  0))
      return GEN_INT (i0);

 ---HERE---

    /* We use VOIDmode for integers.  */
    value = rtx_alloc (CONST_DOUBLE);
    PUT_MODE (value, VOIDmode);

    CONST_DOUBLE_LOW (value) = i0;
    CONST_DOUBLE_HIGH (value) = i1;

    for (i = 2; i  (sizeof CONST_DOUBLE_FORMAT - 1); i++)
      XWINT (value, i) = 0;

    return lookup_const_double (value);

 [A] treats i0 and i1 as a sign-extended value.  So if we
 removed the assert (or moved it to the suggested place):

    immed_double_const (-1, -1, 4_hwi_mode)

 would create -1 in 4_hwi_mode, represented as a CONST_INT.
 The three implicit high-order HWIs are -1.  That's fine,
 because CONST_INT has long been defined as sign-extending
 rather than zero-extending.

 But if we fail the [A] test, we go on to create a CONST_DOUBLE.
 The problem is that AIUI we have never defined what happens for
 CONST_DOUBLE if the mode is wider than 2 HWIs.  Again AIUI,
 that's why the assert is there.

 This matters because of things like the handling in simplify_immed_subreg
 (which, e.g., we use to generate CONST_DOUBLE pool constants, split
 constant moves in lower-subreg.c, etc.).  CONST_INT is already
 well-defined to be a sign-extended constant, and we handle it correctly:

      switch (GET_CODE (el))
        {
        case CONST_INT:
          for (i = 0;
               i  HOST_BITS_PER_WIDE_INT  i  elem_bitsize;
               i += value_bit)
            *vp++ = INTVAL (el)  i;
          /* CONST_INTs are always logically sign-extended.  */
          for (; i  elem_bitsize; i += value_bit)
            *vp++ = INTVAL (el)  0 ? -1 : 0;
          break;

 But because of this assert, the equivalent meaning for
 CONST_DOUBLE has never been defined, and the current code
 happens to zero-extend it:

        case CONST_DOUBLE:
          if (GET_MODE (el) == VOIDmode)
            {
              /* If this triggers, someone should have generated a
                 CONST_INT instead.  */
              gcc_assert (elem_bitsize  HOST_BITS_PER_WIDE_INT);

              for (i = 0; i  HOST_BITS_PER_WIDE_INT; i += value_bit)
                *vp++ = CONST_DOUBLE_LOW (el)  i;
              while (i  HOST_BITS_PER_WIDE_INT * 2  i  elem_bitsize)
                {
                  *vp++
                    = CONST_DOUBLE_HIGH (el)  (i - HOST_BITS_PER_WIDE_INT);
                  i += value_bit;
                }
              /* It shouldn't matter what's done here, so fill it with
                 zero.  */
              for (; i  elem_bitsize; i += value_bit)
                *vp++ = 0;
            }

 So the upshot is that:

    immed_double_const (-1, -1, 4_hwi_mode)

 sign-extends i1 (the second -1), creating (-1, -1, -1, -1).  But:

    immed_double_const (0, -1, 4_hwi_mode)

 effectively (as the code falls out at the moment) zero-extends it,
 creating (0, -1, 0, 0).  That kind of inconsistency seems wrong.

 So what I was trying to say was that if we remove the assert
 altogether, and allow CONST_DOUBLEs to be wider than 2 HWIs,
 we need to define what the implicit high-order HWIs of a
 CONST_DOUBLE are, just like we already do for CONST_INT.
 If we remove the assert altogether, it very much matters
 what is done by that last *vp line.

 If Mike or anyone is up to doing that, then great.  But if instead
 it's just a case of handling zero correctly, moving rather than
 removing the assert seems safer.

 I'm obviously not explaining this well :-)

Ok, I see what you mean.  Yes, moving the assert past the GEN_INT
case (though that is specifically meant to deal with the VOIDmode case
I think?) is ok.

Thanks,
Richard.

 Richard


Re: remove wrong code in immed_double_const

2012-03-20 Thread Michael Matz
Hi,

On Tue, 20 Mar 2012, Richard Sandiford wrote:

 If Mike or anyone is up to doing that, then great.  But if instead it's 
 just a case of handling zero correctly, moving rather than removing the 
 assert seems safer.
 
 I'm obviously not explaining this well :-)

Actually you did.  I've tried yesterday to come up with a text that would 
do the same (because I agree with you that deleting the assert changes 
the spec of the function, simply because the assert _is_ part of the 
spec of the function), and my attempt was _much_ worse than yours, so I 
didn't send it :)


Ciao,
Michael.


[PATCH, ARM] Don't allow arbitrary constants into Neon vector compare

2012-03-20 Thread Richard Earnshaw
The Neon vector-compare operations allow a register or zero (vector of)
as the operands.  However, we currently permit any immediate.  This can
allow the optimizer to sink a non-trivial constant into a loop when it
was better left in a register.  Further, by hiding the register needed
to rematerialize the value from the register allocators we can end up
with worse code in some cases.

Fixed by only allowing zero or a register in the compare operations.

* arm/predicates.md (zero_operand, reg_or_zero_operand): New predicates.
* arm/neon.md (neon_vceqmode, neon_vcgemode): Use 
reg_or_zero_operand
predicate.
(neon_vclemode, neon_vcltmode): Use zero_operand predicate.

Tested on both gcc-4.7 and trunk, but only committing it to trunk.

R.--- config/arm/neon.md  (revision 185488)
+++ config/arm/neon.md  (local)
@@ -2114,7 +2114,7 @@ (define_insn neon_vceqmode
   [(set (match_operand:V_cmp_result 0 s_register_operand =w,w)
 (unspec:V_cmp_result
  [(match_operand:VDQW 1 s_register_operand w,w)
-  (match_operand:VDQW 2 nonmemory_operand w,Dz)
+  (match_operand:VDQW 2 reg_or_zero_operand w,Dz)
   (match_operand:SI 3 immediate_operand i,i)]
   UNSPEC_VCEQ))]
   TARGET_NEON
@@ -2133,7 +2133,7 @@ (define_insn neon_vcgemode
   [(set (match_operand:V_cmp_result 0 s_register_operand =w,w)
 (unspec:V_cmp_result
  [(match_operand:VDQW 1 s_register_operand w,w)
-  (match_operand:VDQW 2 nonmemory_operand w,Dz)
+  (match_operand:VDQW 2 reg_or_zero_operand w,Dz)
   (match_operand:SI 3 immediate_operand i,i)]
   UNSPEC_VCGE))]
   TARGET_NEON
@@ -2164,7 +2164,7 @@ (define_insn neon_vcgtmode
   [(set (match_operand:V_cmp_result 0 s_register_operand =w,w)
 (unspec:V_cmp_result
  [(match_operand:VDQW 1 s_register_operand w,w)
-  (match_operand:VDQW 2 nonmemory_operand w,Dz)
+  (match_operand:VDQW 2 reg_or_zero_operand w,Dz)
(match_operand:SI 3 immediate_operand i,i)]
   UNSPEC_VCGT))]
   TARGET_NEON
@@ -2198,7 +2198,7 @@ (define_insn neon_vclemode
   [(set (match_operand:V_cmp_result 0 s_register_operand =w)
 (unspec:V_cmp_result
  [(match_operand:VDQW 1 s_register_operand w)
-  (match_operand:VDQW 2 nonmemory_operand Dz)
+  (match_operand:VDQW 2 zero_operand Dz)
   (match_operand:SI 3 immediate_operand i)]
   UNSPEC_VCLE))]
   TARGET_NEON
@@ -2215,7 +2215,7 @@ (define_insn neon_vcltmode
   [(set (match_operand:V_cmp_result 0 s_register_operand =w)
 (unspec:V_cmp_result
  [(match_operand:VDQW 1 s_register_operand w)
-  (match_operand:VDQW 2 nonmemory_operand Dz)
+  (match_operand:VDQW 2 zero_operand Dz)
   (match_operand:SI 3 immediate_operand i)]
   UNSPEC_VCLT))]
   TARGET_NEON
--- config/arm/predicates.md(revision 185488)
+++ config/arm/predicates.md(local)
@@ -89,6 +89,15 @@ (define_predicate vfp_register_operand
   REGNO_REG_CLASS (REGNO (op)) == VFP_REGS)));
 })
 
+(define_predicate zero_operand
+  (and (match_code const_int,const_double,const_vector)
+   (match_test op == CONST0_RTX (mode
+
+;; Match a register, or zero in the appropriate mode.
+(define_predicate reg_or_zero_operand
+  (ior (match_operand 0 s_register_operand)
+   (match_operand 0 zero_operand)))
+
 (define_special_predicate subreg_lowpart_operator
   (and (match_code subreg)
(match_test subreg_lowpart_p (op

Re: [Patch/cfgexpand]: also consider assembler_name to call expand_main_function

2012-03-20 Thread Tristan Gingold

On Mar 20, 2012, at 1:21 PM, Richard Guenther wrote:

 On Tue, 20 Mar 2012, Tristan Gingold wrote:
 
 
 On Mar 15, 2012, at 10:37 AM, Richard Guenther wrote:
 
 On Wed, 14 Mar 2012, Tristan Gingold wrote:
 […]
 
 
 Well.  To make this work in LTO the main function (thus, the program
 entry point) should be marked at cgraph level and all users of
 MAIN_NAME_P should instead check a flag on the cgraph node.
 
 Will write a predicate in tree.[ch].
 
 Please instead transition main-ness to the graph.
 
 Hi,
 
 here is the patch I wrote.  Does it match what you had in mind ?
 
 Basically yes.  Comments below.
 
 main_identifier_node is now set in tree.c
 
 Looks good, hopefully my review-grep was as good as yours ;)

[…]

 diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
 index bd21169..7a7a774 100644
 --- a/gcc/cfgexpand.c
 +++ b/gcc/cfgexpand.c
 @@ -4513,9 +4513,8 @@ gimple_expand_cfg (void)
 
   /* If this function is `main', emit a call to `__main'
  to run global initializers, etc.  */
 -  if (DECL_NAME (current_function_decl)
 -   MAIN_NAME_P (DECL_NAME (current_function_decl))
 -   DECL_FILE_SCOPE_P (current_function_decl))
 +  if (DECL_FILE_SCOPE_P (current_function_decl)
 +   cgraph_main_function_p (cgraph_get_node (current_function_decl)))
 expand_main_function ();
 
 The DECL_FILE_SCOPE_P check is redundant, please remove them everywhere
 you call cgraph_main_function_p.  I suppose returning false if the
 cgraph node is NULL in cgraph_main_function_p would be good.

Ok.  (I added the DECL_FILE_SCOPE_P check to avoid the cgraph lookup for speed 
reason)

[…]

 diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c
 index 516f187..4a59f63 100644
 --- a/gcc/cgraphunit.c
 +++ b/gcc/cgraphunit.c
 @@ -346,6 +346,10 @@ cgraph_finalize_function (tree decl, bool nested)
   notice_global_symbol (decl);
   node-local.finalized = true;
   node-lowered = DECL_STRUCT_FUNCTION (decl)-cfg != NULL;
 +  node-local.main_function =
 +DECL_FILE_SCOPE_P (decl)
 + ((!DECL_ASSEMBLER_NAME_SET_P (decl)  MAIN_NAME_P (DECL_NAME 
 (decl)))
 +|| decl_assembler_name_equal (decl, main_identifier_node));
 
 If we finalize a function we should always create an assembler name,
 thus I'd change the above to
 
  node-local.main_function = decl_assembler_name_equal (decl, 
 main_identifier_node);

Indeed.  At worst, the assembler name is created during the call to 
notice_global_symbol.

 btw, decl_assembler_name_equal doesn't seem to remove target-specific
 mangling - do some OSes mangle main differently (I'm thinking of
 leading underscores or complete renames)?  Thus, I guess the
 targets might want to be able to provide the main_identifier_assember_name
 you use here.

I think this is currently OK because decl_assembler_name_equal deals
with leading underscore correctly.  I have checked that on Darwin,
which has a leading underscore.

The only target that mangle names is i386 cygwin/mingw, which 'annotates'
stdcall and fastcall function, but main() is regular.

But I agree this mechanism is fragile.

In order to make this mechanism stronger, we could add main_function_node, which
designates the FUNCTION_DECL that is the main function (if not NULL_TREE), with
a fallback on main_identifier_node for regular languages such as C or C++.

Tristan.



Re: [Patch/cfgexpand]: also consider assembler_name to call expand_main_function

2012-03-20 Thread Richard Guenther
On Tue, 20 Mar 2012, Tristan Gingold wrote:

 
 On Mar 20, 2012, at 1:21 PM, Richard Guenther wrote:
 
  On Tue, 20 Mar 2012, Tristan Gingold wrote:
  
  
  On Mar 15, 2012, at 10:37 AM, Richard Guenther wrote:
  
  On Wed, 14 Mar 2012, Tristan Gingold wrote:
  […]
  
  
  Well.  To make this work in LTO the main function (thus, the program
  entry point) should be marked at cgraph level and all users of
  MAIN_NAME_P should instead check a flag on the cgraph node.
  
  Will write a predicate in tree.[ch].
  
  Please instead transition main-ness to the graph.
  
  Hi,
  
  here is the patch I wrote.  Does it match what you had in mind ?
  
  Basically yes.  Comments below.
  
  main_identifier_node is now set in tree.c
  
  Looks good, hopefully my review-grep was as good as yours ;)
 
 […]
 
  diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
  index bd21169..7a7a774 100644
  --- a/gcc/cfgexpand.c
  +++ b/gcc/cfgexpand.c
  @@ -4513,9 +4513,8 @@ gimple_expand_cfg (void)
  
/* If this function is `main', emit a call to `__main'
   to run global initializers, etc.  */
  -  if (DECL_NAME (current_function_decl)
  -   MAIN_NAME_P (DECL_NAME (current_function_decl))
  -   DECL_FILE_SCOPE_P (current_function_decl))
  +  if (DECL_FILE_SCOPE_P (current_function_decl)
  +   cgraph_main_function_p (cgraph_get_node (current_function_decl)))
  expand_main_function ();
  
  The DECL_FILE_SCOPE_P check is redundant, please remove them everywhere
  you call cgraph_main_function_p.  I suppose returning false if the
  cgraph node is NULL in cgraph_main_function_p would be good.
 
 Ok.  (I added the DECL_FILE_SCOPE_P check to avoid the cgraph lookup for 
 speed reason)
 
 […]
 
  diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c
  index 516f187..4a59f63 100644
  --- a/gcc/cgraphunit.c
  +++ b/gcc/cgraphunit.c
  @@ -346,6 +346,10 @@ cgraph_finalize_function (tree decl, bool nested)
notice_global_symbol (decl);
node-local.finalized = true;
node-lowered = DECL_STRUCT_FUNCTION (decl)-cfg != NULL;
  +  node-local.main_function =
  +DECL_FILE_SCOPE_P (decl)
  + ((!DECL_ASSEMBLER_NAME_SET_P (decl)  MAIN_NAME_P (DECL_NAME 
  (decl)))
  +  || decl_assembler_name_equal (decl, main_identifier_node));
  
  If we finalize a function we should always create an assembler name,
  thus I'd change the above to
  
   node-local.main_function = decl_assembler_name_equal (decl, 
  main_identifier_node);
 
 Indeed.  At worst, the assembler name is created during the call to 
 notice_global_symbol.
 
  btw, decl_assembler_name_equal doesn't seem to remove target-specific
  mangling - do some OSes mangle main differently (I'm thinking of
  leading underscores or complete renames)?  Thus, I guess the
  targets might want to be able to provide the main_identifier_assember_name
  you use here.
 
 I think this is currently OK because decl_assembler_name_equal deals
 with leading underscore correctly.  I have checked that on Darwin,
 which has a leading underscore.
 
 The only target that mangle names is i386 cygwin/mingw, which 'annotates'
 stdcall and fastcall function, but main() is regular.
 
 But I agree this mechanism is fragile.
 
 In order to make this mechanism stronger, we could add main_function_node, 
 which
 designates the FUNCTION_DECL that is the main function (if not NULL_TREE), 
 with
 a fallback on main_identifier_node for regular languages such as C or C++.

I'd rather get away from using a global main_identifier_node, instead
make that frontend specific, and introduce targetm.main_assembler_name
which the assembler-name creating langhook would make sure to use
when mangling what the FE thinks main is.  main_identifier_node should
not serve any purpose outside of Frontends.

But I see both as a possible cleanup opportunity, not a necessary change.

Richard.

Re: [arm] Improve longlong.h umul_ppmm, count_trailing_zeros

2012-03-20 Thread Richard Earnshaw
On 01/02/12 13:23, Richard Earnshaw wrote:
 On 31/01/12 05:15, Richard Henderson wrote:
 Despite how trivial this is, I assume this must wait for stage1.
 Ok?


 r~


  * longlong.h [arm] (umul_ppmm): Use umull.
  [arm] (count_trailing_zeros): Use __builtin_ctz.
 
 armv3m also has the widening multiply operation (it's what the M stands
 for).
 
 Otherwise ok for stage1
 

And it's a good job we did.  I've just noticed that it's broken thumb1
builds of libgcc.

 __ctzsi2:
   0:   b508push{r3, lr}
   2:   f7ff fffe   bl  0 __ctzsi2
2: R_ARM_THM_CALL   __ctzsi2
   6:   bc08pop {r3}
   8:   bc02pop {r1}
   a:   4708bx  r1

R.



[PATCH] Replace a SRA FIXME with an assert

2012-03-20 Thread Martin Jambor
Hi,

this patch which removes one of only two FIXMEs in tree-sra.c has been
sitting in my patch queue for over a year.  Yesterday I noticed it
there, bootstrapped and tested it on x86_64-linux and it passed.

I'd like to either commit it or just remove the comment, if there
likely still are size inconsistencies in assignments but we are not
planning to do anything with them in foreseeable future (and perhaps
add a note to the bug).

So, which should it be?

Thanks,

Martin


2011-01-06  Martin Jambor  mjam...@suse.cz

* tree-sra.c (build_accesses_from_assign): Make size equality test
an assert.

Index: src/gcc/tree-sra.c
===
--- src.orig/gcc/tree-sra.c
+++ src/gcc/tree-sra.c
@@ -1175,13 +1175,11 @@ build_accesses_from_assign (gimple stmt)
!lacc-grp_unscalarizable_region
!racc-grp_unscalarizable_region
AGGREGATE_TYPE_P (TREE_TYPE (lhs))
-  /* FIXME: Turn the following line into an assert after PR 40058 is
-fixed.  */
-   lacc-size == racc-size
useless_type_conversion_p (lacc-type, racc-type))
 {
   struct assign_link *link;
 
+  gcc_assert (lacc-size == racc-size);
   link = (struct assign_link *) pool_alloc (link_pool);
   memset (link, 0, sizeof (struct assign_link));
 



Re: [PATCH] Decrease GC garbage in i?86 vec shuffle expansion

2012-03-20 Thread Richard Henderson
On 03/20/12 04:27, Jakub Jelinek wrote:
 2012-03-20  Jakub Jelinek  ja...@redhat.com
 
   * config/i386/i386.c (vselect_insn): New variable.
   (init_vselect_insn): New function.
   (expand_vselect, expand_vselect_insn): Add testing_p argument.
   Call init_vselect_insn if vselect_insn is NULL.  Adjust
   PATTERN (vselect_insn), instead of creating a new insn each time,
   only emit a copy of it if not testing and recog has been successful.
   (expand_vec_perm_pshufb, expand_vec_perm_1,
   expand_vec_perm_pshuflw_pshufhw, expand_vec_perm_broadcast_1): Adjust
   callers.

Ok.


r~


Re: [PATCH] Replace a SRA FIXME with an assert

2012-03-20 Thread Richard Guenther
On Tue, 20 Mar 2012, Martin Jambor wrote:

 Hi,
 
 this patch which removes one of only two FIXMEs in tree-sra.c has been
 sitting in my patch queue for over a year.  Yesterday I noticed it
 there, bootstrapped and tested it on x86_64-linux and it passed.
 
 I'd like to either commit it or just remove the comment, if there
 likely still are size inconsistencies in assignments but we are not
 planning to do anything with them in foreseeable future (and perhaps
 add a note to the bug).
 
 So, which should it be?

Well.  Aggregate assignments can still be off I think, especially
because of the disconnect between TYPE_SIZE and DECL_SIZE in
some cases, considering *p = x; with typeof (x) == typeof (*p)
(tail-padding re-use).

The comments in PR40058 hint at that that issue might be fixed,
but I also remember issues with Ada.

GIMPLE verification ensures compatible types (but not a match
of type_size / decl_size which will be exposed by get_ref_base_and_extent)

But the real question is what do you want to guard against here?
The assert at least looks like it is going to triggert at some point,
but, would it be a problem if the sizes to not match?

Richard.


 2011-01-06  Martin Jambor  mjam...@suse.cz
 
   * tree-sra.c (build_accesses_from_assign): Make size equality test
   an assert.
 
 Index: src/gcc/tree-sra.c
 ===
 --- src.orig/gcc/tree-sra.c
 +++ src/gcc/tree-sra.c
 @@ -1175,13 +1175,11 @@ build_accesses_from_assign (gimple stmt)
 !lacc-grp_unscalarizable_region
 !racc-grp_unscalarizable_region
 AGGREGATE_TYPE_P (TREE_TYPE (lhs))
 -  /* FIXME: Turn the following line into an assert after PR 40058 is
 -  fixed.  */
 -   lacc-size == racc-size
 useless_type_conversion_p (lacc-type, racc-type))
  {
struct assign_link *link;
  
 +  gcc_assert (lacc-size == racc-size);
link = (struct assign_link *) pool_alloc (link_pool);
memset (link, 0, sizeof (struct assign_link));


[C++ Patch] PR 52487

2012-03-20 Thread Paolo Carlini

Hi,

this regression is about literal_type_p ICEing for types which cannot be 
completed. Indeed, for the testcase, complete_type cannot complete the 
type but doesn't error out either, just returns the type as-is, and the 
gcc_assert triggers. We could imagine handling such types in the caller 
- check_field_decls - but in my opinion makes more sense to just allow 
such types and return false. I also considered changing literal_type_p 
to use complete_type_or_else but then it's easy to produce duplicate 
diagnostics, for example. What do you think?


Tested x86_64-linux.

Thanks,
Paolo.

PS: eventually I guess we want to fix this in mainline and 4.7.1.

///
/cp
2012-03-20  Paolo Carlini  paolo.carl...@oracle.com

PR c++/52487
* semantics.c (literal_type_p): Simply return false for types
which cannot be completed.

/testsuite
2012-03-20  Paolo Carlini  paolo.carl...@oracle.com

PR c++/52487
* g++.dg/cpp0x/lambda/lambda-ice7.C: New.
Index: testsuite/g++.dg/cpp0x/lambda/lambda-ice7.C
===
--- testsuite/g++.dg/cpp0x/lambda/lambda-ice7.C (revision 0)
+++ testsuite/g++.dg/cpp0x/lambda/lambda-ice7.C (revision 0)
@@ -0,0 +1,9 @@
+// PR c++/52487
+// { dg-options -std=c++0x }
+
+struct A; // { dg-error forward declaration }
+
+void foo(A a)
+{
+  [=](){a;};  // { dg-error invalid use of incomplete type }
+}
Index: cp/semantics.c
===
--- cp/semantics.c  (revision 185571)
+++ cp/semantics.c  (working copy)
@@ -5610,8 +5610,7 @@ literal_type_p (tree t)
   if (CLASS_TYPE_P (t))
 {
   t = complete_type (t);
-  gcc_assert (COMPLETE_TYPE_P (t) || errorcount);
-  return CLASSTYPE_LITERAL_P (t);
+  return COMPLETE_TYPE_P (t)  CLASSTYPE_LITERAL_P (t);
 }
   if (TREE_CODE (t) == ARRAY_TYPE)
 return literal_type_p (strip_array_types (t));


Re: PATCH: Properly generate X32 IE sequence

2012-03-20 Thread H.J. Lu
On Tue, Mar 20, 2012 at 4:19 AM, Jakub Jelinek ja...@redhat.com wrote:
 On Tue, Mar 20, 2012 at 09:58:29AM +0100, Jakub Jelinek wrote:
 Yeah, my bootstrap just failed the same.  Will test:

 2012-03-20  Jakub Jelinek  ja...@redhat.com

       * config/i386/i386.c (ix86_decompose_address) case ZERO_EXTEND:
       If operand isn't UNSPEC, return 0.

 Committed as obvious now that bootstrap/regtest finished on x86_64-linux
 and i686-linux.

 --- gcc/config/i386/i386.c.jj 2012-03-20 09:35:06.0 +0100
 +++ gcc/config/i386/i386.c    2012-03-20 09:56:35.038835835 +0100
 @@ -11516,6 +11516,8 @@ ix86_decompose_address (rtx addr, struct

           case ZERO_EXTEND:
             op = XEXP (op, 0);
 +           if (GET_CODE (op) != UNSPEC)
 +             return 0;
             /* FALLTHRU */

           case UNSPEC:


Uros,

I think use the OS provided instruction to load TP into DImode register
could simplify the code.


-- 
H.J.


Re: [PATCH] AVX2 permutation improvements

2012-03-20 Thread Richard Henderson
On 03/20/12 04:22, Jakub Jelinek wrote:
 2012-03-20  Jakub Jelinek  ja...@redhat.com
 
   PR target/52607
   * config/i386/i386.md (isa attribute): Add avx2 and noavx2.
   (enabled attribute): Handle avx2 and noavx2 isas.
   * config/i386/sse.md (avx2_vec_dupv8sf_1, avx2_pbroadcastmode_1):
   New insns.
   (vec_dupmode): Add avx2 =x,x alternative.
   (vec_dupmode splitter): Don't split if TARGET_AVX2.
   (*avx_vperm_broadcast_mode): Don't split V4DFmode if TARGET_AVX2.
   For TARGET_AVX2, V8SFmode and elt == 0 split into vbroadcastss.
   * config/i386/i386.c (expand_vec_perm_pshufb): Emit also vpermps
   for V8SFmode.
   (expand_vec_perm_1): For broadcasts, use avx2_pbroadcastmode_1
   if possible, handle also V8SFmode.

Ok.


r~


Re: [Patch/cfgexpand]: also consider assembler_name to call expand_main_function

2012-03-20 Thread Tristan Gingold

On Mar 20, 2012, at 3:19 PM, Richard Guenther wrote:

[…]
 
 I'd rather get away from using a global main_identifier_node, instead
 make that frontend specific, and introduce targetm.main_assembler_name
 which the assembler-name creating langhook would make sure to use
 when mangling what the FE thinks main is.  main_identifier_node should
 not serve any purpose outside of Frontends.
 
 But I see both as a possible cleanup opportunity, not a necessary change.

Something along these lines ?

Tristan.

diff --git a/gcc/ada/gcc-interface/trans.c b/gcc/ada/gcc-interface/trans.c
index 89f5438..c575e97 100644
--- a/gcc/ada/gcc-interface/trans.c
+++ b/gcc/ada/gcc-interface/trans.c
@@ -622,8 +622,6 @@ gigi (Node_Id gnat_root, int max_gnat_node, int number_name 
ATTRIBUTE_UNUSED,
   integer_type_node, NULL_TREE, true, false, true, false,
   NULL, Empty);
 
-  main_identifier_node = get_identifier (main);
-
   /* Install the builtins we might need, either internally or as
  user available facilities for Intrinsic imports.  */
   gnat_install_builtins ();
diff --git a/gcc/ada/gcc-interface/utils.c b/gcc/ada/gcc-interface/utils.c
index 7383358..b0fa085d 100644
--- a/gcc/ada/gcc-interface/utils.c
+++ b/gcc/ada/gcc-interface/utils.c
@@ -1902,14 +1902,12 @@ create_subprog_decl (tree subprog_name, tree asm_name, 
tree subprog_type,
 {
   SET_DECL_ASSEMBLER_NAME (subprog_decl, asm_name);
 
-  /* The expand_main_function circuitry expects main_identifier_node to
-designate the DECL_NAME of the 'main' entry point, in turn expected
-to be declared as the main function literally by default.  Ada
-program entry points are typically declared with a different name
+  /* Ada program entry points are typically declared with a different name
 within the binder generated file, exported as 'main' to satisfy the
-system expectations.  Force main_identifier_node in this case.  */
-  if (asm_name == main_identifier_node)
-   DECL_NAME (subprog_decl) = main_identifier_node;
+system expectations.  Force main_assembler_node in this case.  */
+  if (IDENTIFIER_LENGTH (asm_name) == 4
+  memcmp (IDENTIFIER_POINTER (asm_name), main, 4) == 0)
+   DECL_NAME (subprog_decl) = main_assembler_name;
 }
 
   /* Add this decl to the current binding level.  */
diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h
index 835b13b..fea5181 100644
--- a/gcc/c-family/c-common.h
+++ b/gcc/c-family/c-common.h
@@ -291,6 +291,8 @@ enum c_tree_index
 
 CTI_DEFAULT_FUNCTION_TYPE,
 
+CTI_MAIN_IDENTIFIER,
+
 /* These are not types, but we have to look them up all the time.  */
 CTI_FUNCTION_NAME_DECL,
 CTI_PRETTY_FUNCTION_NAME_DECL,
@@ -426,6 +428,10 @@ extern const unsigned int num_c_common_reswords;
 
 #define default_function_type  
c_global_trees[CTI_DEFAULT_FUNCTION_TYPE]
 
+#define main_identifier_node   c_global_trees[CTI_MAIN_IDENTIFIER]
+#define MAIN_NAME_P(NODE) \
+  (IDENTIFIER_NODE_CHECK (NODE) == main_identifier_node)
+
 #define function_name_decl_node
c_global_trees[CTI_FUNCTION_NAME_DECL]
 #define pretty_function_name_decl_node 
c_global_trees[CTI_PRETTY_FUNCTION_NAME_DECL]
 #define c99_function_name_decl_node
c_global_trees[CTI_C99_FUNCTION_NAME_DECL]
diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
index bd21169..db53309 100644
--- a/gcc/cfgexpand.c
+++ b/gcc/cfgexpand.c
@@ -4513,9 +4513,7 @@ gimple_expand_cfg (void)
 
   /* If this function is `main', emit a call to `__main'
  to run global initializers, etc.  */
-  if (DECL_NAME (current_function_decl)
-   MAIN_NAME_P (DECL_NAME (current_function_decl))
-   DECL_FILE_SCOPE_P (current_function_decl))
+  if (cgraph_main_function_p (cgraph_get_node (current_function_decl)))
 expand_main_function ();
 
   /* Initialize the stack_protect_guard field.  This must happen after the
diff --git a/gcc/cgraph.c b/gcc/cgraph.c
index 9cc3690..528fd19 100644
--- a/gcc/cgraph.c
+++ b/gcc/cgraph.c
@@ -2766,7 +2766,7 @@ cgraph_propagate_frequency_1 (struct cgraph_node *node, 
void *data)
  /* It makes sense to put main() together with the static constructors.
 It will be executed for sure, but rest of functions called from
 main are definitely not at startup only.  */
- if (MAIN_NAME_P (DECL_NAME (edge-caller-decl)))
+ if (cgraph_main_function_p (edge-caller))
d-only_called_at_startup = 0;
   d-only_called_at_exit = edge-caller-only_called_at_exit;
}
diff --git a/gcc/cgraph.h b/gcc/cgraph.h
index 191364c..089d851 100644
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -101,6 +101,9 @@ struct GTY(()) cgraph_local_info {
 
   /* True if the function may enter serial irrevocable mode.  */
   unsigned tm_may_enter_irr : 1;
+
+  /* True if the function is the program entry point (main in C).  */
+  unsigned main_function : 1;
 };
 
 /* 

Re: [Patch/cfgexpand]: also consider assembler_name to call expand_main_function

2012-03-20 Thread Richard Guenther
On Tue, 20 Mar 2012, Tristan Gingold wrote:

 
 On Mar 20, 2012, at 3:19 PM, Richard Guenther wrote:
 
 […]
  
  I'd rather get away from using a global main_identifier_node, instead
  make that frontend specific, and introduce targetm.main_assembler_name
  which the assembler-name creating langhook would make sure to use
  when mangling what the FE thinks main is.  main_identifier_node should
  not serve any purpose outside of Frontends.
  
  But I see both as a possible cleanup opportunity, not a necessary change.
 
 Something along these lines ?

Yes, but I'd simply call the hook at the places you now use
main_assembler_name and not create a global tree node for it.

Richard.

 Tristan.
 
 diff --git a/gcc/ada/gcc-interface/trans.c b/gcc/ada/gcc-interface/trans.c
 index 89f5438..c575e97 100644
 --- a/gcc/ada/gcc-interface/trans.c
 +++ b/gcc/ada/gcc-interface/trans.c
 @@ -622,8 +622,6 @@ gigi (Node_Id gnat_root, int max_gnat_node, int 
 number_name ATTRIBUTE_UNUSED,
  integer_type_node, NULL_TREE, true, false, true, false,
  NULL, Empty);
  
 -  main_identifier_node = get_identifier (main);
 -
/* Install the builtins we might need, either internally or as
   user available facilities for Intrinsic imports.  */
gnat_install_builtins ();
 diff --git a/gcc/ada/gcc-interface/utils.c b/gcc/ada/gcc-interface/utils.c
 index 7383358..b0fa085d 100644
 --- a/gcc/ada/gcc-interface/utils.c
 +++ b/gcc/ada/gcc-interface/utils.c
 @@ -1902,14 +1902,12 @@ create_subprog_decl (tree subprog_name, tree 
 asm_name, tree subprog_type,
  {
SET_DECL_ASSEMBLER_NAME (subprog_decl, asm_name);
  
 -  /* The expand_main_function circuitry expects main_identifier_node to
 -  designate the DECL_NAME of the 'main' entry point, in turn expected
 -  to be declared as the main function literally by default.  Ada
 -  program entry points are typically declared with a different name
 +  /* Ada program entry points are typically declared with a different 
 name
within the binder generated file, exported as 'main' to satisfy the
 -  system expectations.  Force main_identifier_node in this case.  */
 -  if (asm_name == main_identifier_node)
 - DECL_NAME (subprog_decl) = main_identifier_node;
 +  system expectations.  Force main_assembler_node in this case.  */
 +  if (IDENTIFIER_LENGTH (asm_name) == 4
 +memcmp (IDENTIFIER_POINTER (asm_name), main, 4) == 0)
 + DECL_NAME (subprog_decl) = main_assembler_name;
  }
  
/* Add this decl to the current binding level.  */
 diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h
 index 835b13b..fea5181 100644
 --- a/gcc/c-family/c-common.h
 +++ b/gcc/c-family/c-common.h
 @@ -291,6 +291,8 @@ enum c_tree_index
  
  CTI_DEFAULT_FUNCTION_TYPE,
  
 +CTI_MAIN_IDENTIFIER,
 +
  /* These are not types, but we have to look them up all the time.  */
  CTI_FUNCTION_NAME_DECL,
  CTI_PRETTY_FUNCTION_NAME_DECL,
 @@ -426,6 +428,10 @@ extern const unsigned int num_c_common_reswords;
  
  #define default_function_type
 c_global_trees[CTI_DEFAULT_FUNCTION_TYPE]
  
 +#define main_identifier_node c_global_trees[CTI_MAIN_IDENTIFIER]
 +#define MAIN_NAME_P(NODE) \
 +  (IDENTIFIER_NODE_CHECK (NODE) == main_identifier_node)
 +
  #define function_name_decl_node  
 c_global_trees[CTI_FUNCTION_NAME_DECL]
  #define pretty_function_name_decl_node   
 c_global_trees[CTI_PRETTY_FUNCTION_NAME_DECL]
  #define c99_function_name_decl_node  
 c_global_trees[CTI_C99_FUNCTION_NAME_DECL]
 diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
 index bd21169..db53309 100644
 --- a/gcc/cfgexpand.c
 +++ b/gcc/cfgexpand.c
 @@ -4513,9 +4513,7 @@ gimple_expand_cfg (void)
  
/* If this function is `main', emit a call to `__main'
   to run global initializers, etc.  */
 -  if (DECL_NAME (current_function_decl)
 -   MAIN_NAME_P (DECL_NAME (current_function_decl))
 -   DECL_FILE_SCOPE_P (current_function_decl))
 +  if (cgraph_main_function_p (cgraph_get_node (current_function_decl)))
  expand_main_function ();
  
/* Initialize the stack_protect_guard field.  This must happen after the
 diff --git a/gcc/cgraph.c b/gcc/cgraph.c
 index 9cc3690..528fd19 100644
 --- a/gcc/cgraph.c
 +++ b/gcc/cgraph.c
 @@ -2766,7 +2766,7 @@ cgraph_propagate_frequency_1 (struct cgraph_node *node, 
 void *data)
 /* It makes sense to put main() together with the static constructors.
It will be executed for sure, but rest of functions called from
main are definitely not at startup only.  */
 -   if (MAIN_NAME_P (DECL_NAME (edge-caller-decl)))
 +   if (cgraph_main_function_p (edge-caller))
   d-only_called_at_startup = 0;
d-only_called_at_exit = edge-caller-only_called_at_exit;
   }
 diff --git a/gcc/cgraph.h b/gcc/cgraph.h
 index 191364c..089d851 100644
 --- a/gcc/cgraph.h
 +++ b/gcc/cgraph.h
 @@ 

Re: [Patch/cfgexpand]: also consider assembler_name to call expand_main_function

2012-03-20 Thread Tristan Gingold

On Mar 20, 2012, at 5:01 PM, Richard Guenther wrote:

 On Tue, 20 Mar 2012, Tristan Gingold wrote:
 
 
 On Mar 20, 2012, at 3:19 PM, Richard Guenther wrote:
 
 […]
 
 I'd rather get away from using a global main_identifier_node, instead
 make that frontend specific, and introduce targetm.main_assembler_name
 which the assembler-name creating langhook would make sure to use
 when mangling what the FE thinks main is.  main_identifier_node should
 not serve any purpose outside of Frontends.
 
 But I see both as a possible cleanup opportunity, not a necessary change.
 
 Something along these lines ?
 
 Yes, but I'd simply call the hook at the places you now use
 main_assembler_name and not create a global tree node for it.

But we use it at the beginning of graph_finalize_function, so caching it
makes sense, doesn't it ?

Tristan.



Re: [PATCH] Merge more of the expand phase into expand

2012-03-20 Thread Jan Hubicka
 
 This is the followup I promised to do on the expand and rtl_eh pass
 merge.  The following patch merges up to unshare_all_rtl, after which
 we should be able to pass all RTL testing(?)
 
 Bootstrapped and tested on x86_64-unknown-linux-gnu.
 
 Honza, does this look like what you had in mind?

Yes, this looks just fine ;)
 Index: gcc/passes.c
 ===
 *** gcc/passes.c  (revision 185564)
 --- gcc/passes.c  (working copy)
 *** init_optimization_passes (void)
 *** 1429,1438 
 NEXT_PASS (pass_rest_of_compilation);
   {
 struct opt_pass **p = pass_rest_of_compilation.pass.sub;
 -   NEXT_PASS (pass_init_function);
 -   NEXT_PASS (pass_jump);
 -   NEXT_PASS (pass_initial_value_sets);
 -   NEXT_PASS (pass_unshare_all_rtl);
 NEXT_PASS (pass_instantiate_virtual_regs);
 NEXT_PASS (pass_into_cfg_layout_mode);
These two are sane and resonable passes (though we eventually may go into 
cfg layout directly, but that will still require a lot of massaging of ages
old code)
 NEXT_PASS (pass_jump2);
This pass is good next candidate for removal.  I have it in my TODO now ;)

Thanks!
Honza


Re: [PATCH] i?86 AVX cross-lane 2 insn permutations (PR target/52607)

2012-03-20 Thread Richard Henderson
On 03/20/2012 04:31 AM, Jakub Jelinek wrote:
 2012-03-20  Jakub Jelinek  ja...@redhat.com
 
   PR target/52607
   * config/i386/i386.c (expand_vec_perm_vperm2f128): New function.
   (ix86_expand_vec_perm_const_1): Call it.

Ok.


r~


Re: [v3] minor simplification to std::list

2012-03-20 Thread Jonathan Wakely
On 18 March 2012 23:27, Jonathan Wakely wrote:
 Now that G++ supports it we can use a NSDMI for std::list::_M_size to
 avoid needing conditional compilation to set it in the constructors.
 I think the attached patch is an improvement so I plan to commit it to
 trunk soon unless I hear objections.

        * include/bits/stl_list.h (list::_M_size): Use NSDMI.
        * testsuite/23_containers/list/requirements/dr438/assign_neg.cc:
        Adjust line numbers.
        * testsuite/23_containers/list/requirements/dr438/
        constructor_1_neg.cc: Likewise.
        * testsuite/23_containers/list/requirements/dr438/
        constructor_2_neg.cc: Likewise.
        * testsuite/23_containers/list/requirements/dr438/insert_neg.cc:
        Likewise.

 Tested x86_64.

Committed to trunk.


[pph] Add reduced test case for a common lookup failure (issue5844068)

2012-03-20 Thread Diego Novillo
Add reduced test case for a common lookup failure.

This is reduced from the standard library.  The type mbstate_t is
declared in the global scope and then incorporated into 'std' via
using.

When the header doing this is converted into a PPH image, the lookup
for identifier 'mbstate_t' produces the USING_DECL instead of the
TYPE_DECL in the global scope.

This causes the parser to segfault while parsing 'typedef
std::mbstate_t state_type' in the user of that PPH image.

Working on a fix shortly.


2012-03-20   Diego Novillo  dnovi...@google.com

* g++.dg/pph/x0mbstate_t.h: New.
* g++.dg/pph/x1mbstate_t.h: New.

diff --git a/gcc/testsuite/g++.dg/pph/x0mbstate_t.h 
b/gcc/testsuite/g++.dg/pph/x0mbstate_t.h
new file mode 100644
index 000..dceb3e4
--- /dev/null
+++ b/gcc/testsuite/g++.dg/pph/x0mbstate_t.h
@@ -0,0 +1,10 @@
+#ifndef _X0_MBSTATE_T_H
+#define _X0_MBSTATE_T_H
+
+typedef int mbstate_t;
+
+namespace std
+{
+  using ::mbstate_t;
+}
+#endif
diff --git a/gcc/testsuite/g++.dg/pph/x1mbstate_t.h 
b/gcc/testsuite/g++.dg/pph/x1mbstate_t.h
new file mode 100644
index 000..c07a0cc
--- /dev/null
+++ b/gcc/testsuite/g++.dg/pph/x1mbstate_t.h
@@ -0,0 +1,9 @@
+// { dg-xfail-if identifier bindings not set properly { *-*-* } { 
-fpph-map=pph.map} }
+// { dg-bogus .*Segmentation fault ICE trying to parse std::mbstate_t  { 
xfail *-*-* } 0 }
+#ifndef _X1_MBSTATE_H
+#define _X1_MBSTATE_H
+#include x0mbstate_t.h
+// Name lookup for std::mbstate_t fails here.  Instead of returning the global
+// type_decl for mbstate_t, it returns the usings ::mbstate_t declaration.
+typedef std::mbstate_t state_type;
+#endif

--
This patch is available for review at http://codereview.appspot.com/5844068


Re: [PATCH] Replace a SRA FIXME with an assert

2012-03-20 Thread Martin Jambor
Hi,

On Tue, Mar 20, 2012 at 04:08:31PM +0100, Richard Guenther wrote:
 On Tue, 20 Mar 2012, Martin Jambor wrote:
 
  Hi,
  
  this patch which removes one of only two FIXMEs in tree-sra.c has been
  sitting in my patch queue for over a year.  Yesterday I noticed it
  there, bootstrapped and tested it on x86_64-linux and it passed.
  
  I'd like to either commit it or just remove the comment, if there
  likely still are size inconsistencies in assignments but we are not
  planning to do anything with them in foreseeable future (and perhaps
  add a note to the bug).
  
  So, which should it be?
 
 Well.  Aggregate assignments can still be off I think, especially
 because of the disconnect between TYPE_SIZE and DECL_SIZE in
 some cases, considering *p = x; with typeof (x) == typeof (*p)
 (tail-padding re-use).
 
 The comments in PR40058 hint at that that issue might be fixed,
 but I also remember issues with Ada.

The other FIXME in tree-sra.c suggests that Ada can produce
VIEW_CONVERT_EXPRs with a different size than its argument, perhaps
that is it (I'll try removing that one too).

 
 GIMPLE verification ensures compatible types (but not a match
 of type_size / decl_size which will be exposed by get_ref_base_and_extent)
 
 But the real question is what do you want to guard against here?
 The assert at least looks like it is going to triggert at some point,
 but, would it be a problem if the sizes to not match?
 

I really can't remember what exactly happened but I do remember it did
lead to a bug (it's been already part of the chck-in of new SRA so svn
history does not help).  We copy access tree children accross
assignments and also change the type of the LHS access to a scalar if
the RHS access is a scalar (assignments into a structure containing
just one scalar) and both could lead to some access tree children
covering larger part of the aggregate than the parent, making the
children un-findable or even creating overlaps which are prohibited
for SRA candidates.

But as I wrote before, I'll be happy to just remove the FIXME comment.

Martin


 Richard.
 
 
  2011-01-06  Martin Jambor  mjam...@suse.cz
  
  * tree-sra.c (build_accesses_from_assign): Make size equality test
  an assert.
  
  Index: src/gcc/tree-sra.c
  ===
  --- src.orig/gcc/tree-sra.c
  +++ src/gcc/tree-sra.c
  @@ -1175,13 +1175,11 @@ build_accesses_from_assign (gimple stmt)
  !lacc-grp_unscalarizable_region
  !racc-grp_unscalarizable_region
  AGGREGATE_TYPE_P (TREE_TYPE (lhs))
  -  /* FIXME: Turn the following line into an assert after PR 40058 is
  -fixed.  */
  -   lacc-size == racc-size
  useless_type_conversion_p (lacc-type, racc-type))
   {
 struct assign_link *link;
   
  +  gcc_assert (lacc-size == racc-size);
 link = (struct assign_link *) pool_alloc (link_pool);
 memset (link, 0, sizeof (struct assign_link));


Re: Remove anachronistic docs about G++ template instantiation

2012-03-20 Thread Jonathan Wakely
On 18 March 2012 14:56, Gerald Pfeifer wrote:
 On Thu, 8 Mar 2012, Jonathan Wakely wrote:
 The manual claims a future version of G++ will support a hybrid
 instantiation model, which I don't think is still planned, and
 describes extern templates as an extension when they are in C++11.

         * doc/extend.texi (Template Instantiation): Remove anachronisms.

 I was waiting for a C++ frontend maintainer to chime in.  The
 patch per se looks good to me, and based on some others mails
 around template instantiations I think you can go ahead.

Thanks, committed to trunk.


Re: PATCH: Properly generate X32 IE sequence

2012-03-20 Thread Uros Bizjak
On Tue, Mar 20, 2012 at 4:52 PM, H.J. Lu hjl.to...@gmail.com wrote:

 Yeah, my bootstrap just failed the same.  Will test:

 2012-03-20  Jakub Jelinek  ja...@redhat.com

       * config/i386/i386.c (ix86_decompose_address) case ZERO_EXTEND:
       If operand isn't UNSPEC, return 0.

 Committed as obvious now that bootstrap/regtest finished on x86_64-linux
 and i686-linux.

 --- gcc/config/i386/i386.c.jj 2012-03-20 09:35:06.0 +0100
 +++ gcc/config/i386/i386.c    2012-03-20 09:56:35.038835835 +0100
 @@ -11516,6 +11516,8 @@ ix86_decompose_address (rtx addr, struct

           case ZERO_EXTEND:
             op = XEXP (op, 0);
 +           if (GET_CODE (op) != UNSPEC)
 +             return 0;
             /* FALLTHRU */

           case UNSPEC:


 Uros,

 I think use the OS provided instruction to load TP into DImode register
 could simplify the code.

Which OS provided instruction?

Please see how TP is defined in get_thread_pointer, it is in ptr_mode:

  rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);

This says that TP is in SImode on X32.

Uros.


[google][4.6] Bug fixes to function reordering linker plugin to handle local and comdat functions. (issue5851044)

2012-03-20 Thread Sriraman Tallam
This patch fixes bugs in section_ordering linker plugin to correctly handle 
functions that are file local or comdat. Such functions could be
defined more than once in different object files and can have the same name. 
The callgraph edge profiles should be attributed to the right local function. 
Also, comdat functions in the same group must be treated as one function.

This linker plugin is only available in the google/gcc-4_6 branch. I am working 
on preparing a patch for trunk.


* callgraph.h (push_mm_ptr): New function.
(make_edge_list): Call push_mm_ptr for heap allocs.
(make_mode): Ditto.
(make_edge): Ditto.
(section_type, comdat_group, next, group): Add new fields
to struct Section_id.
(make_section_id): Initialize new fields. Add new args.
(parse_callgraph_section_contents): Add args.
* function_reordering_plugin.c (claim_file_hook): Call
push_mm_ptr. Fix call to parse_callgraph_section_contents.
(all_symbols_read_hook): Call push_mm_ptr. Remove calls to free.
* callgraph.c (mm_node): New struct.
(push_mm_ptr): New function.
(canonicalize_function_name): New function.
(parse_callgraph_section_contents): Add args. Call
canonicalize_function_name.
(num_sections): New global.
(NUM_SECTION_TYPES): New constant.
(section_types): New constant.
(section_priority): New constant.
(map_section_name_to_index): Remove sections. Replace sections with
section_types. Call canonicalize_function_name. Chain all created
sections. Check for comdat sections and group them together.
(write_out_node): Output sections into section_start and section_end
chain. Handle comdats.
(get_layout): Make new chains for each section type. Output each chain
one by one into handles and shndx. Output all the sections created.
(cleanup): Free all heap allocated objects tracked by mm_node_chain.


Index: callgraph.h
===
--- callgraph.h (revision 185543)
+++ callgraph.h (working copy)
@@ -1,6 +1,7 @@
 /* Callgraph implementation.
Copyright (C) 2011 Free Software Foundation, Inc.
-   Contributed by Sriraman Tallam (tmsri...@google.com).
+   Contributed by Sriraman Tallam (tmsri...@google.com)
+   and Easwaran Raman (era...@google.com).
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -26,6 +27,9 @@ along with this program; see the file COPYING3.  I
 #include string.h
 #include libiberty.h
 
+/* Push a pointer that should be freed after the plugin is done.  */
+void push_mm_ptr (void *ptr);
+
 struct edge_d;
 typedef struct edge_d Edge;
 
@@ -41,6 +45,7 @@ inline static Edge_list *
 make_edge_list (Edge *e)
 {
   Edge_list *list = XNEW (Edge_list);
+  push_mm_ptr (list);
   list-edge = e;
   list-next = NULL;
   list-prev = NULL;
@@ -69,6 +74,7 @@ inline static Node *
 make_node (unsigned int id, char *name)
 {
   Node *node = XNEW (Node);
+  push_mm_ptr (node);
   node-id = id;
   node-name = name;
   node-is_real_node = 0;
@@ -86,9 +92,9 @@ make_node (unsigned int id, char *name)
 inline static void
 merge_node (Node *merger, Node *mergee)
 {
-merger-last_merge_node-merge_next = mergee;
-merger-last_merge_node = mergee-last_merge_node;
-mergee-is_merged = 1;
+  merger-last_merge_node-merge_next = mergee;
+  merger-last_merge_node = mergee-last_merge_node;
+  mergee-is_merged = 1;
 }
 
 inline static void
@@ -136,6 +142,7 @@ inline static Edge *
 make_edge (Node *first, Node *second, unsigned int weight)
 {
   Edge *edge = XNEW (Edge);
+  push_mm_ptr (edge);
   edge-first_function = first;
   edge-second_function = second;
   edge-weight = weight;
@@ -148,21 +155,7 @@ make_edge (Node *first, Node *second, unsigned int
   return edge;
 }
 
-/* Frees the chain of edges.  */
 inline static void
-free_edge_chain (Edge *edge_chain)
-{
-  Edge *edge;
-
-  for (edge = edge_chain; edge != NULL; )
-{
-  Edge *next_edge = edge-next;
-  free (edge);
-  edge = next_edge;
-}
-}
-
-inline static void
 set_edge_type (Edge *edge)
 {
   if (edge-first_function-is_real_node
@@ -200,7 +193,7 @@ reset_functions (Edge *e, Node *n1, Node *n2)
 }
 
 /* A Section is represented by its object handle and the section index. */
-typedef struct
+typedef struct section_id_
 {
   /* Name of the function.  */
   char *name;
@@ -208,16 +201,34 @@ reset_functions (Edge *e, Node *n1, Node *n2)
   char *full_name;
   void *handle;
   int shndx;
+  /* Type of prefix in section name.  */
+  int section_type;
+  /* Pointer to the next section in the same comdat_group.  */
+  struct section_id_ *comdat_group;
+  /* Chain all the sections created.  */
+  struct section_id_ *next;
+  /* Used for grouping sections.  */
+  struct section_id_ *group;
+  /* Check if 

Re: PATCH: Properly generate X32 IE sequence

2012-03-20 Thread H.J. Lu
On Tue, Mar 20, 2012 at 10:54 AM, Uros Bizjak ubiz...@gmail.com wrote:
 On Tue, Mar 20, 2012 at 4:52 PM, H.J. Lu hjl.to...@gmail.com wrote:

 Yeah, my bootstrap just failed the same.  Will test:

 2012-03-20  Jakub Jelinek  ja...@redhat.com

       * config/i386/i386.c (ix86_decompose_address) case ZERO_EXTEND:
       If operand isn't UNSPEC, return 0.

 Committed as obvious now that bootstrap/regtest finished on x86_64-linux
 and i686-linux.

 --- gcc/config/i386/i386.c.jj 2012-03-20 09:35:06.0 +0100
 +++ gcc/config/i386/i386.c    2012-03-20 09:56:35.038835835 +0100
 @@ -11516,6 +11516,8 @@ ix86_decompose_address (rtx addr, struct

           case ZERO_EXTEND:
             op = XEXP (op, 0);
 +           if (GET_CODE (op) != UNSPEC)
 +             return 0;
             /* FALLTHRU */

           case UNSPEC:


 Uros,

 I think use the OS provided instruction to load TP into DImode register
 could simplify the code.

 Which OS provided instruction?

 Please see how TP is defined in get_thread_pointer, it is in ptr_mode:

  rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);

 This says that TP is in SImode on X32.

 Uros.

TP is defined as (unspec:DI [(const_int 0]) UNSPEC_TP)
and provided by OS.  It is a CONST_INT, but its value is opaque
to GCC. MODE here has no impact on its value provided by OS.
X32 OS provides instructions to load TP to into an SImode and
DImode registers.


-- 
H.J.


Re: PATCH: Properly generate X32 IE sequence

2012-03-20 Thread Uros Bizjak
On Tue, Mar 20, 2012 at 7:27 PM, H.J. Lu hjl.to...@gmail.com wrote:

 I think use the OS provided instruction to load TP into DImode register
 could simplify the code.

 Which OS provided instruction?

 Please see how TP is defined in get_thread_pointer, it is in ptr_mode:

  rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);

 This says that TP is in SImode on X32.

 TP is defined as (unspec:DI [(const_int 0]) UNSPEC_TP)
 and provided by OS.  It is a CONST_INT, but its value is opaque
 to GCC. MODE here has no impact on its value provided by OS.
 X32 OS provides instructions to load TP to into an SImode and
 DImode registers.

You must be looking to some other GCC sources than me.

(define_insn *load_tp_x32
  [(set (match_operand:SI 0 register_operand =r)
(unspec:SI [(const_int 0)] UNSPEC_TP))]
  TARGET_X32
  mov{l}\t{%%fs:0, %0|%0, DWORD PTR fs:0}
  [(set_attr type imov)
   (set_attr modrm 0)
   (set_attr length 7)
   (set_attr memory load)
   (set_attr imm_disp false)])

(define_insn *load_tp_x32_zext
  [(set (match_operand:DI 0 register_operand =r)
(zero_extend:DI (unspec:SI [(const_int 0)] UNSPEC_TP)))]
  TARGET_X32
  mov{l}\t{%%fs:0, %k0|%k0, DWORD PTR fs:0}
  [(set_attr type imov)
   (set_attr modrm 0)
   (set_attr length 7)
   (set_attr memory load)
   (set_attr imm_disp false)])

Uros.


PATCH to tree-streamer-in.c to fix bootstrap with --enable-gather-detailed-mem-stats

2012-03-20 Thread Jason Merrill

The _stat allocation functions aren't supposed to be called directly.

Tested x86_64-pc-linux-gnu, applying as obvious.
commit f0e560b78591a50e39bf3ccf41bc3d87f43927e8
Author: Jason Merrill ja...@redhat.com
Date:   Mon Mar 19 16:35:05 2012 -0400

	* tree-streamer-in.c (streamer_alloc_tree): Call
	ggc_alloc_zone_cleared_tree_node instead of
	ggc_alloc_zone_cleared_tree_node_stat.

diff --git a/gcc/tree-streamer-in.c b/gcc/tree-streamer-in.c
index 50ea77d..97c78cd 100644
--- a/gcc/tree-streamer-in.c
+++ b/gcc/tree-streamer-in.c
@@ -476,9 +476,9 @@ streamer_alloc_tree (struct lto_input_block *ib, struct data_in *data_in,
   else if (CODE_CONTAINS_STRUCT (code, TS_VECTOR))
 {
   HOST_WIDE_INT len = streamer_read_hwi (ib);
-  result = ggc_alloc_zone_cleared_tree_node_stat (tree_zone,
-		  (len - 1) * sizeof (tree)
-		  + sizeof (struct tree_vector));
+  result = ggc_alloc_zone_cleared_tree_node ((len - 1) * sizeof (tree)
+		 + sizeof (struct tree_vector),
+		 tree_zone);
   TREE_SET_CODE (result, VECTOR_CST);
 }
   else if (CODE_CONTAINS_STRUCT (code, TS_BINFO))


Re: [RFC PATCH 0/3] Fixing expansion of misaligned MEM_REFs on strict-alignment targets

2012-03-20 Thread Georg-Johann Lay
Martin Jambor wrote:
 Hi,
 
 this is another iteration of my attempts to fix expansion of
 misaligned memory accesses on strict-alignment platforms (which was
 suggested by Richi in
 http://gcc.gnu.org/ml/gcc-patches/2011-08/msg00931.html and my first
 attempt was posted as
 http://gcc.gnu.org/ml/gcc-patches/2012-01/msg00319.html).
 
 This time I got further, to big extent thanks to parts of Richi's
 fixes of PR 50444 which cleaned up expr.c considerably.  I have
 successfully bootstrapped the combined patch on x86_64-linux,
 i686-linux, ia64-linux (without Ada) and sparc64-linux (without Java).
 I have run the c and c++ testsuites on individual patches on sparc64
 and ia64 too.
 
 Nevertheless, since I still lack experience in this area, there will
 almost certainly be comments and suggestions and therefore I have
 divided the three main changes to three different patches, so that
 they are easier to comment on by both me and anybody reviewing them.
 
 Thanks in advance for any comments,
 
 Martin

Hi Martin.

The new test cases make implications on the size of int: for example they crash
 for targets with sizeof(int) == 2

Johann





C++ PATCH for c++/52510 (rejects-valid with list-initialization of pthread_cond)

2012-03-20 Thread Jason Merrill
My change for core issue 1270 to allow brace elision in all 
list-initialization resulted in multiple calls to reshape_init for the 
same CONSTRUCTOR, which we weren't handling properly.


Tested x86_64-pc-linux-gnu, applying to trunk.
commit d7b5c07030b11599b846342b19ff50306043965f
Author: Jason Merrill ja...@redhat.com
Date:   Mon Mar 19 16:57:48 2012 -0400

	PR c++/52510
	* decl.c (reshape_init_class): Handle repeated reshaping.
	* search.c (lookup_field_1): Add sanity check.

diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c
index a18b312..2b2a551 100644
--- a/gcc/cp/decl.c
+++ b/gcc/cp/decl.c
@@ -5110,7 +5110,11 @@ reshape_init_class (tree type, reshape_iter *d, bool first_initializer_p,
 	  return error_mark_node;
 	}
 
-	  field = lookup_field_1 (type, d-cur-index, /*want_type=*/false);
+	  if (TREE_CODE (d-cur-index) == FIELD_DECL)
+	/* We already reshaped this.  */
+	gcc_assert (d-cur-index == field);
+	  else
+	field = lookup_field_1 (type, d-cur-index, /*want_type=*/false);
 
 	  if (!field || TREE_CODE (field) != FIELD_DECL)
 	{
diff --git a/gcc/cp/search.c b/gcc/cp/search.c
index a1f8a3d..bd1bc57 100644
--- a/gcc/cp/search.c
+++ b/gcc/cp/search.c
@@ -384,6 +384,8 @@ lookup_field_1 (tree type, tree name, bool want_type)
 {
   tree field;
 
+  gcc_assert (TREE_CODE (name) == IDENTIFIER_NODE);
+
   if (TREE_CODE (type) == TEMPLATE_TYPE_PARM
   || TREE_CODE (type) == BOUND_TEMPLATE_TEMPLATE_PARM
   || TREE_CODE (type) == TYPENAME_TYPE)
diff --git a/gcc/testsuite/g++.dg/cpp0x/initlist66.C b/gcc/testsuite/g++.dg/cpp0x/initlist66.C
new file mode 100644
index 000..4fc162e
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/initlist66.C
@@ -0,0 +1,29 @@
+// PR c++/52510
+// { dg-do compile { target c++11 } }
+
+typedef unsigned char uint8_t;
+typedef unsigned short uint16_t;
+typedef unsigned int uint32_t;
+typedef unsigned long long uint64_t;
+typedef uint64_t upad64_t;
+
+typedef struct _pthread_cond {
+ struct {
+  uint8_t __pthread_cond_flag[4];
+  uint16_t __pthread_cond_type;
+  uint16_t __pthread_cond_magic;
+ } __pthread_cond_flags;
+ upad64_t __pthread_cond_data;
+} pthread_cond_t;
+
+class gtm_rwlock
+{
+  pthread_cond_t c_readers;
+ public:
+  gtm_rwlock();
+};
+
+gtm_rwlock::gtm_rwlock()
+  : c_readers ({{{0, 0, 0, 0}, 0, 0x4356}, 0})
+{ }
+


Re: [C++ Patch] PR 52487

2012-03-20 Thread Jason Merrill
That assert is there to make sure that we don't try to test for 
literality of an incomplete type.  We should check for completeness 
before trying to check for literality.


Jason


Re: [google][4.6] Bug fixes to function reordering linker plugin to handle local and comdat functions. (issue 5851044)

2012-03-20 Thread davidxl

It would be nice to add some unit/regression test cases of some sort.

David


http://codereview.appspot.com/5851044/diff/1/callgraph.c
File callgraph.c (right):

http://codereview.appspot.com/5851044/diff/1/callgraph.c#newcode309
callgraph.c:309: if (!is_prefix_of (_ZL, name))
How about static functions in namespace? How about functions in
anonymous namespace?

http://codereview.appspot.com/5851044/diff/1/callgraph.c#newcode511
callgraph.c:511: .text. };
How are the sections ordered in the array?  Keep it in mind that it is
possible to encode the actual profile count of the function in the
section name in the future.

http://codereview.appspot.com/5851044/


Re: PATCH: Properly generate X32 IE sequence

2012-03-20 Thread H.J. Lu
On Tue, Mar 20, 2012 at 11:43 AM, Uros Bizjak ubiz...@gmail.com wrote:
 On Tue, Mar 20, 2012 at 7:27 PM, H.J. Lu hjl.to...@gmail.com wrote:

 I think use the OS provided instruction to load TP into DImode register
 could simplify the code.

 Which OS provided instruction?

 Please see how TP is defined in get_thread_pointer, it is in ptr_mode:

  rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);

 This says that TP is in SImode on X32.

 TP is defined as (unspec:DI [(const_int 0]) UNSPEC_TP)
 and provided by OS.  It is a CONST_INT, but its value is opaque
 to GCC. MODE here has no impact on its value provided by OS.
 X32 OS provides instructions to load TP to into an SImode and
 DImode registers.

 You must be looking to some other GCC sources than me.

 (define_insn *load_tp_x32
  [(set (match_operand:SI 0 register_operand =r)
        (unspec:SI [(const_int 0)] UNSPEC_TP))]
  TARGET_X32
  mov{l}\t{%%fs:0, %0|%0, DWORD PTR fs:0}
  [(set_attr type imov)
   (set_attr modrm 0)
   (set_attr length 7)
   (set_attr memory load)
   (set_attr imm_disp false)])

 (define_insn *load_tp_x32_zext
  [(set (match_operand:DI 0 register_operand =r)
        (zero_extend:DI (unspec:SI [(const_int 0)] UNSPEC_TP)))]
  TARGET_X32
  mov{l}\t{%%fs:0, %k0|%k0, DWORD PTR fs:0}
  [(set_attr type imov)
   (set_attr modrm 0)
   (set_attr length 7)
   (set_attr memory load)
   (set_attr imm_disp false)])


Thread pointer (TP) points to thread control block (TCB).  X32 TCB is

typedef struct
{
  void *tcb;/* Pointer to the TCB.  Not necessarily the
   thread descriptor used by libpthread.  */
  ...
}

It is a 32bit address set up by OS.  That is where 0 in %fs:0 comes
from since it is the first field of the struct %fs points to.  X32 OS provides

mov %fs:0, %eax

to load the address of TCB into EAX and

mov %fs:0, %eax

to load the address of TCB into RAX since OS guarantees that the upper
32bits of the address of TCB are all 0s. We added *load_tp_x32_zext
since we zero-extend SI TP to DI TP.   Or we can use

mov %fs:0, %eax

to directly load the value of the tcb field into RAX and remove
*load_tp_x32_zext.  It will simplify the code.


-- 
H.J.


Re: [google][4.6] Bug fixes to function reordering linker plugin to handle local and comdat functions. (issue 5851044)

2012-03-20 Thread eraman


http://codereview.appspot.com/5851044/diff/1/callgraph.c
File callgraph.c (right):

http://codereview.appspot.com/5851044/diff/1/callgraph.c#newcode513
callgraph.c:513: const int section_priority[] = {0, 3, 4, 2, 1};
Add a comment about section_priority

http://codereview.appspot.com/5851044/diff/1/callgraph.c#newcode571
callgraph.c:571: if (section_priority[kept-section_type]
Add an example that shows why we want to do that

http://codereview.appspot.com/5851044/diff/1/callgraph.c#newcode655
callgraph.c:655: write_out_node (n_it-name, section_start[0],
section_end[0]);
In write_out_node, why take the function name and do a hash table lookup
to get the section, instead of directly passing Section_id * in the
caller. In all calls to write_out_node, you are in fact getting the name
from the Section_id *.

http://codereview.appspot.com/5851044/diff/1/callgraph.c#newcode674
callgraph.c:674: s_it-processed = 1;
setting processed to 1 is redundant as it is already done in
write_out_node.

http://codereview.appspot.com/5851044/diff/1/callgraph.h
File callgraph.h (right):

http://codereview.appspot.com/5851044/diff/1/callgraph.h#newcode31
callgraph.h:31: void push_mm_ptr (void *ptr);
push_allocated_ptr or save_allocated_ptr would be a better name.

http://codereview.appspot.com/5851044/diff/1/callgraph.h#newcode48
callgraph.h:48: push_mm_ptr (list);
It might be cleaner to create a wrapper around XNEW that calls
push_mm_ptr after XNEW. Similar for malloc as well.

http://codereview.appspot.com/5851044/


[testsuite,int=16,committed]: PR testsuite/52641

2012-03-20 Thread Georg-Johann Lay
http://gcc.gnu.org/viewcvs?view=revisionrevision=185588

Johann


Re: [PATCH] Optimize in VRP if ((x cst1) cmp cst2) (PR tree-optimization/52267)

2012-03-20 Thread Georg-Johann Lay
Jakub Jelinek wrote:
 Hi!
 
 This patch adds ASSERT_EXPRs derived from
   tmp_N = var_M  cst2;
   if (tmp_N cmp val)
 where cst2 and val are constants.  Bootstrapped/regtested
 on x86_64-linux and i686-linux, ok for trunk?
 
 2012-03-14  Jakub Jelinek  ja...@redhat.com
 
   PR tree-optimization/52267
   * tree-vrp.c (masked_increment): New function.
   (register_edge_assert_for_2): Derive ASSERT_EXPRs
   from (X  CST1) cmp CST2 tests.
 
   * gcc.dg/pr52267.c: New test.
   * gcc.dg/tree-ssa/vrp65.c: New test.
   * gcc.dg/tree-ssa/vrp66.c: New test.

Hi

gcc.dg/tree-ssa/vrp66.c: New test

fails when executed for avr where sizeof(int) = 2

Skimming the code I'd expect that it is general enough to work there so I
wonder why it fails for that target?

Johann



[Patch,AVR]: Hack around PR rtl-optimization/52543, Take #2

2012-03-20 Thread Georg-Johann Lay
Dropping the first patch which does not work because at expand-time there
must not be pre-/post-modify addressing :-(

This solutions turns completely away from MEM and addressing modes:
It represents loads from the 16-bits address-spaces as UNSPEC.

The code is as expected now with the additional improvement that loads
to RAMPZ can be factored out if the value is known to be the same
(at least the LDI part; the OUT part is still needed).

Moreover, the code gets simpler because loading the value to OUT to
RAMPZ can be open coded and need not to be hidden in the insn because
reload cannot handle these complicated addresses.

And the patch fixes some more issues:

- avr_load_libgcc_p must only allow __flash because __load_3/4 use LPM.

- Resetting RAMPZ after ELPM for EBI-devices in avr_out_lpm was void in
  some situations because of premature return.
  This is fixed now; the new code is located in avr_load_lpm.

Test suite results look good. There is just a ICE for
  gcc.target/avr/torture/addr-space-2-x.c  -O3 -g
which appears to be PR middle-end/52472

Ok to commit?

Johann

PR rtl-optimization/52543
PR target/52461
* config/avr/avr-protos.h (avr_load_lpm): New prototype.
* config/avr/avr.c (avr_mode_dependent_address_p): New function.
(TARGET_MODE_DEPENDENT_ADDRESS_P): New define.
(avr_load_libgcc_p): Restrict to __flash loads.
(avr_out_lpm): Only handle 1-byte loads from __flash.
(avr_load_lpm): New function.
(avr_find_unused_d_reg): Remove.
(avr_out_lpm_no_lpmx): Remove.
(adjust_insn_length): Handle ADJUST_LEN_LOAD_LPM.

* config/avr/avr.md (unspec): Add UNSPEC_LPM.
(load_mode_libgcc): Use UNSPEC_LPM instead of MEM.
(load_mode, load_mode_clobber): New insns.
(movmode): For multi-byte move from non-generic
16-bit address spaces: Expand to load_mode resp.
load_mode_clobber.
(loadmode_libgcc): Remove expander.
(split-lpmx): Remove split.


Georg-Johann Lay wrote:

 The problem with the PR is that lower-subreg.c happily splits multi-byte moves
 from address spaces without knowing anything about the additional costs this 
 is
 causing.
 
 The TARGET_MODE_DEPENDENT_ADDRESS_P hook cannot be used for 16-bit addresses
 because that hook is not sensitive to address spaces, but is is used for the
 24-bit address space to avoid subreg lowering for PSImode.
 
 For the 16-bit address spaces the mov expander now assigns the address 
 register
 by hand as post-increment.
 
 Luckily, post-increment is the only addressing mode that makes sense with the
 non-generic address spaces and there is no choice for the address register
 resp. addressing mode, anyway...
 
 This patch does not fix the PR issue, of course, it just avoids subreg 
 lowering
 by using/pretending mode-dependent addresses.
 
 Ok for trunk?
 
 Johann
 
   PR rtl-optimization/52543
   * config/avr/avr.c (avr_mode_dependent_address_p): New function.
   (TARGET_MODE_DEPENDENT_ADDRESS_P): New define.
 
   * config/avr/avr.md (unspec): Add UNSPEC_LPM.
   (load_mode_libgcc): Use UNSPEC_LPM instead of MEM.
   (movmode): For multi-byte move from non-generic
   16-bit address spaces: Expand to use Z++ as address for
   inline code and use UNSPEC_LPM (Z) for code from libgcc.
   (loadmode_libgcc): Remove expander.
   (split-lpmx): Remove split.


Re: [Patch,AVR]: Hack around PR rtl-optimization/52543, Take #2

2012-03-20 Thread Georg-Johann Lay
And here is the patch...

Georg-Johann Lay wrote:

 Dropping the first patch which does not work because at expand-time there
 must not be pre-/post-modify addressing :-(
 
 This solutions turns completely away from MEM and addressing modes:
 It represents loads from the 16-bits address-spaces as UNSPEC.
 
 The code is as expected now with the additional improvement that loads
 to RAMPZ can be factored out if the value is known to be the same
 (at least the LDI part; the OUT part is still needed).
 
 Moreover, the code gets simpler because loading the value to OUT to
 RAMPZ can be open coded and need not to be hidden in the insn because
 reload cannot handle these complicated addresses.
 
 And the patch fixes some more issues:
 
 - avr_load_libgcc_p must only allow __flash because __load_3/4 use LPM.
 
 - Resetting RAMPZ after ELPM for EBI-devices in avr_out_lpm was void in
   some situations because of premature return.
   This is fixed now; the new code is located in avr_load_lpm.
 
 Test suite results look good. There is just a ICE for
   gcc.target/avr/torture/addr-space-2-x.c  -O3 -g
 which appears to be PR middle-end/52472
 
 Ok to commit?
 
 Johann
 
   PR rtl-optimization/52543
   PR target/52461
   * config/avr/avr-protos.h (avr_load_lpm): New prototype.
   * config/avr/avr.c (avr_mode_dependent_address_p): New function.
   (TARGET_MODE_DEPENDENT_ADDRESS_P): New define.
   (avr_load_libgcc_p): Restrict to __flash loads.
   (avr_out_lpm): Only handle 1-byte loads from __flash.
   (avr_load_lpm): New function.
   (avr_find_unused_d_reg): Remove.
   (avr_out_lpm_no_lpmx): Remove.
   (adjust_insn_length): Handle ADJUST_LEN_LOAD_LPM.
   
   * config/avr/avr.md (unspec): Add UNSPEC_LPM.
   (load_mode_libgcc): Use UNSPEC_LPM instead of MEM.
   (load_mode, load_mode_clobber): New insns.
   (movmode): For multi-byte move from non-generic
   16-bit address spaces: Expand to load_mode resp.
   load_mode_clobber.
   (loadmode_libgcc): Remove expander.
   (split-lpmx): Remove split.
Index: config/avr/avr.md
===
--- config/avr/avr.md	(revision 185518)
+++ config/avr/avr.md	(working copy)
@@ -63,6 +63,7 @@ (define_c_enum unspec
   [UNSPEC_STRLEN
UNSPEC_MOVMEM
UNSPEC_INDEX_JMP
+   UNSPEC_LPM
UNSPEC_FMUL
UNSPEC_FMULS
UNSPEC_FMULSU
@@ -140,7 +141,7 @@ (define_attr adjust_len
   out_bitop, out_plus, out_plus_noclobber, plus64, addto_sp,
tsthi, tstpsi, tstsi, compare, compare64, call,
mov8, mov16, mov24, mov32, reload_in16, reload_in24, reload_in32,
-   xload, movmem,
+   xload, movmem, load_lpm,
ashlqi, ashrqi, lshrqi,
ashlhi, ashrhi, lshrhi,
ashlsi, ashrsi, lshrsi,
@@ -364,43 +365,60 @@ (define_split
 ;;
 ;; Move stuff around
 
-;; loadqi_libgcc
-;; loadhi_libgcc
-;; loadpsi_libgcc
-;; loadsi_libgcc
-;; loadsf_libgcc
-(define_expand loadmode_libgcc
-  [(set (match_dup 3)
-(match_dup 2))
-   (set (reg:MOVMODE 22)
-(match_operand:MOVMODE 1 memory_operand ))
-   (set (match_operand:MOVMODE 0 register_operand )
-(reg:MOVMODE 22))]
-  avr_load_libgcc_p (operands[1])
-  {
-operands[3] = gen_rtx_REG (HImode, REG_Z);
-operands[2] = force_operand (XEXP (operands[1], 0), NULL_RTX);
-operands[1] = replace_equiv_address (operands[1], operands[3]);
-set_mem_addr_space (operands[1], ADDR_SPACE_FLASH);
-  })
+;; Represent a load from __flash that needs libgcc support as UNSPEC.
+;; This is legal because we read from non-changing memory.
+;; For rationale see the FIXME below.
 
-;; load_qi_libgcc
-;; load_hi_libgcc
 ;; load_psi_libgcc
 ;; load_si_libgcc
 ;; load_sf_libgcc
 (define_insn load_mode_libgcc
   [(set (reg:MOVMODE 22)
-(match_operand:MOVMODE 0 memory_operand m,m))]
-  avr_load_libgcc_p (operands[0])
-REG_P (XEXP (operands[0], 0))
-REG_Z == REGNO (XEXP (operands[0], 0))
+(unspec:MOVMODE [(reg:HI REG_Z)]
+UNSPEC_LPM))]
+  
   {
-operands[0] = GEN_INT (GET_MODE_SIZE (MODEmode));
-return %~call __load_%0;
+rtx n_bytes = GEN_INT (GET_MODE_SIZE (MODEmode));
+output_asm_insn (%~call __load_%0, n_bytes);
+return ;
   }
-  [(set_attr length 1,2)
-   (set_attr isa rjmp,jmp)
+  [(set_attr type xcall)
+   (set_attr cc clobber)])
+
+
+;; Similar for inline reads from flash.  We use UNSPEC instead
+;; of MEM for the same reason as above: PR52543.
+;; $1 contains the memory segment.
+
+(define_insn load_mode
+  [(set (match_operand:MOVMODE 0 register_operand =r)
+(unspec:MOVMODE [(reg:HI REG_Z)
+ (match_operand:QI 1 reg_or_0_operand rL)]
+UNSPEC_LPM))]
+  (CONST_INT_P (operands[1])  AVR_HAVE_LPMX)
+   || (REG_P (operands[1])  AVR_HAVE_ELPMX)
+  {
+return avr_load_lpm (insn, operands, NULL);
+  }

Re: [Patch,AVR]: Hack around PR rtl-optimization/52543, Take #2

2012-03-20 Thread Steven Bosscher
On Tue, Mar 20, 2012 at 8:54 PM, Georg-Johann Lay a...@gjlay.de wrote:
 Dropping the first patch which does not work because at expand-time there
 must not be pre-/post-modify addressing :-(

Have you tried to fix that, instead? Or at least ask around a bit to
see what people would think about that idea? The reasons why things
are the way they are, may not be applicable anymore.

For example, perhaps the only reason for not having pre-/post-modify
addressing modes earlier is that the old flow dataflow frame work
didn't handle them. And it doesn't seem to be so black-and-white: The
very pass you ran into problems with first, cprop, does handle
pre-/post-modify addresses in local cprop. Some other passes simply
take the conservative path and drop pre-/post-modify (like CSE, which
doesn't record values from them). It may be a relatively small job to
make everything accept them, and you may be something that's also
helpful for other targets.

Ciao!
Steven


Re: [C++ Patch] PR 52487

2012-03-20 Thread Paolo Carlini

On 03/20/2012 08:22 PM, Jason Merrill wrote:
That assert is there to make sure that we don't try to test for 
literality of an incomplete type.  We should check for completeness 
before trying to check for literality.

You mean, in the relevant caller, here in check_field_decls:

  /* If at least one non-static data member is non-literal, the whole
 class becomes non-literal.  */
  if (!literal_type_p (type))
CLASSTYPE_LITERAL_P (t) = false;

essentially setting CLASSTYPE_LITERAL_P (t) = false; also when 
CLASS_TYPE_P (type)  !COMPLETE_TYPE_P (complete_type (type) or maybe 
just CLASS_TYPE_P (type)  !COMPLETE_TYPE_P (type) ?


Thanks,
Paolo.


[SH] PR 52479 - Remove fsca for DFmode

2012-03-20 Thread Oleg Endo
Hi,

The attached patch removes the fsca instruction support for DFmode on
SH4A when -ffast-math is enabled.

Tested against rev 18 with 
make -k check RUNTESTFLAGS=--target_board=sh-sim
\{-m2/-ml,-m2/-mb,-m2a-single/-mb,
-m4-single/-ml,-m4-single/-mb,
-m4a-single/-ml,-m4a-single/-mb}

and no new failures.

Cheers,
Oleg

ChangeLog:

PR target/52479
* config/sh/sh-protos.h (sh_fsca_df2int): Remove.
* config/sh/sh.c (sh_fsca_df2int_rtx, sh_fsca_df2int): Remove.
* config/sh/sh.md (sindf2, cosdf2): Remove.

testsuite/ChangeLog:

PR target/52479
* gcc.target/sh/sh4a-cos.c: Remove.
* gcc.target/sh/sh4a-sin.c: Remove.

Index: gcc/testsuite/gcc.target/sh/sh4a-cos.c
===
--- gcc/testsuite/gcc.target/sh/sh4a-cos.c	(revision 185554)
+++ gcc/testsuite/gcc.target/sh/sh4a-cos.c	(working copy)
@@ -1,11 +0,0 @@
-/* Verify that we generate single-precision sine and cosine approximate
-   (fsca) in fast math mode on SH4A with FPU.  */
-/* { dg-do compile { target sh*-*-* } } */
-/* { dg-options -O -ffast-math } */
-/* { dg-skip-if  { sh*-*-* } { * } { -m4a -m4a-single -m4a-single-only } }  */
-/* { dg-final { scan-assembler fsca } } */
-
-#include math.h
-
-double test(double f) { return cos(f); }
-
Index: gcc/testsuite/gcc.target/sh/sh4a-sin.c
===
--- gcc/testsuite/gcc.target/sh/sh4a-sin.c	(revision 185554)
+++ gcc/testsuite/gcc.target/sh/sh4a-sin.c	(working copy)
@@ -1,11 +0,0 @@
-/* Verify that we generate single-precision sine and cosine approximate
-   (fsca) in fast math mode on SH4A with FPU.  */
-/* { dg-do compile { target sh*-*-* } } */
-/* { dg-options -O -ffast-math } */
-/* { dg-skip-if  { sh*-*-* } { * } { -m4a -m4a-single -m4a-single-only } }  */
-/* { dg-final { scan-assembler fsca } } */
-
-#include math.h
-
-double test(double f) { return sin(f); }
-
Index: gcc/testsuite/gcc.target/sh/sh4a-sincos.c
===
--- gcc/testsuite/gcc.target/sh/sh4a-sincos.c	(revision 185554)
+++ gcc/testsuite/gcc.target/sh/sh4a-sincos.c	(working copy)
@@ -1,12 +0,0 @@
-/* Verify that we generate a single single-precision sine and cosine
-   approximate (fsca) in fast math mode when a function computes both
-   sine and cosine.  */
-/* { dg-do compile { target sh*-*-* } } */
-/* { dg-options -O -ffast-math } */
-/* { dg-skip-if  { sh*-*-* } { * } { -m4a -m4a-single -m4a-single-only } }  */
-/* { dg-final { scan-assembler-times fsca 1 } } */
-
-#include math.h
-
-double test(double f) { return sin(f) + cos(f); }
-
Index: gcc/config/sh/sh-protos.h
===
--- gcc/config/sh/sh-protos.h	(revision 185554)
+++ gcc/config/sh/sh-protos.h	(working copy)
@@ -39,7 +39,6 @@
 
 #ifdef RTX_CODE
 extern rtx sh_fsca_sf2int (void);
-extern rtx sh_fsca_df2int (void);
 extern rtx sh_fsca_int2sf (void);
 
 /* Declare functions defined in sh.c and used in templates.  */
Index: gcc/config/sh/sh.c
===
--- gcc/config/sh/sh.c	(revision 185554)
+++ gcc/config/sh/sh.c	(working copy)
@@ -11997,27 +11997,6 @@
   return sh_fsca_sf2int_rtx;
 }
 
-/* This function returns a constant rtx that represents pi / 2**15 in
-   DFmode.  it's used to scale DFmode angles, in radians, to a
-   fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
-   maps to 0x1).  */
-
-static GTY(()) rtx sh_fsca_df2int_rtx;
-
-rtx
-sh_fsca_df2int (void)
-{
-  if (! sh_fsca_df2int_rtx)
-{
-  REAL_VALUE_TYPE rv;
-
-  real_from_string (rv, 10430.378350470453);
-  sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
-}
-
-  return sh_fsca_df2int_rtx;
-}
-
 /* This function returns a constant rtx that represents 2**15 / pi in
SFmode.  it's used to scale a fixed-point signed 16.16-bit fraction
of a full circle back to a SFmode value, i.e., 0x1 maps to
Index: gcc/config/sh/sh.md
===
--- gcc/config/sh/sh.md	(revision 18)
+++ gcc/config/sh/sh.md	(working copy)
@@ -10658,48 +10658,6 @@
   DONE;
 })
 
-(define_expand sindf2
-  [(set (match_operand:DF 0 fp_arith_reg_operand )
-	(unspec:DF [(match_operand:DF 1 fp_arith_reg_operand )]
-		   UNSPEC_FSINA))]
-  TARGET_SH4A_FP  ! TARGET_FPU_SINGLE  flag_unsafe_math_optimizations
-{
-  rtx scaled = gen_reg_rtx (DFmode);
-  rtx truncated = gen_reg_rtx (SImode);
-  rtx fsca = gen_reg_rtx (V2SFmode);
-  rtx scale_reg = force_reg (DFmode, sh_fsca_df2int ());
-  rtx sfresult = gen_reg_rtx (SFmode);
-
-  emit_df_insn (gen_muldf3 (scaled, operands[1], scale_reg));
-  emit_df_insn (gen_fix_truncdfsi2 (truncated, scaled));
-  emit_sf_insn (gen_fsca (fsca, truncated, sh_fsca_int2sf (),
-			  get_fpscr_rtx ()));
-  emit_move_insn (sfresult, gen_rtx_SUBREG (SFmode, fsca, 0));

Re: [C++ Patch] PR 52487

2012-03-20 Thread Paolo Carlini

... this simple also passes testing.

Paolo.


/cp
2012-03-20  Paolo Carlini  paolo.carl...@oracle.com

PR c++/52487
* class.c (check_field_decls): Call literal_type_p only
on complete types.

/testsuite
2012-03-20  Paolo Carlini  paolo.carl...@oracle.com

PR c++/52487
* g++.dg/cpp0x/lambda/lambda-ice7.C: New.
Index: testsuite/g++.dg/cpp0x/lambda/lambda-ice7.C
===
--- testsuite/g++.dg/cpp0x/lambda/lambda-ice7.C (revision 0)
+++ testsuite/g++.dg/cpp0x/lambda/lambda-ice7.C (revision 0)
@@ -0,0 +1,9 @@
+// PR c++/52487
+// { dg-options -std=c++0x }
+
+struct A; // { dg-error forward declaration }
+
+void foo(A a)
+{
+  [=](){a;};  // { dg-error invalid use of incomplete type }
+}
Index: cp/class.c
===
--- cp/class.c  (revision 185588)
+++ cp/class.c  (working copy)
@@ -3150,7 +3150,7 @@ check_field_decls (tree t, tree *access_decls,
 
   /* If at least one non-static data member is non-literal, the whole
  class becomes non-literal.  */
-  if (!literal_type_p (type))
+  if (COMPLETE_TYPE_P (type)  !literal_type_p (type))
 CLASSTYPE_LITERAL_P (t) = false;
 
   /* A standard-layout class is a class that:


[SH] PR 52642 - libstdc++ failures

2012-03-20 Thread Oleg Endo
Hi,

Attached is Kaz's patch from the PR.

Tested against rev 18 with 
make -k check RUNTESTFLAGS=--target_board=sh-sim
\{-m2/-ml,-m2/-mb,-m2a-single/-mb,
-m4-single/-ml,-m4-single/-mb,
-m4a-single/-ml,-m4a-single/-mb}

It fixes a few sometimes failing libstdc++ failures.
No new failures otherwise.

OK to apply?

Maybe this one should be backported to 4.7.x, too?

Cheers,
Oleg


ChangeLog:

PR/target 52642
* config/sh/sh.c (sh_expand_prologue): Emit blockage at the end
of prologue for unwinder and profiler.

Index: gcc/config/sh/sh.c
===
--- gcc/config/sh/sh.c	(revision 185554)
+++ gcc/config/sh/sh.c	(working copy)
@@ -7239,6 +7239,13 @@
   emit_insn (gen_shcompact_incoming_args ());
 }
 
+  /* If we are profiling, make sure no instructions are scheduled before
+ the call to mcount.  Similarly if some call instructions are swapped
+ before frame related insns, it'll confuse the unwinder because
+ currently SH has no unwind info for function epilogues.  */
+  if (crtl-profile || flag_exceptions || flag_unwind_tables)
+emit_insn (gen_blockage ());
+
   if (flag_stack_usage_info)
 current_function_static_stack_size = stack_usage;
 }


Re: remove wrong code in immed_double_const

2012-03-20 Thread Mike Stump
On Mar 20, 2012, at 6:55 AM, Michael Matz wrote:
 Actually you did.  I've tried yesterday to come up with a text that would 
 do the same (because I agree with you that deleting the assert changes 
 the spec of the function,

The spec of the function is the text above the definition of the function, 
coupled with the information in the .texi file, would you agree?  If so, could 
you please quote the text of the spec which would be violated by removing the 
assert?  Could you please give a specific value with which we could talk about 
that shows a violation of the spec.

My position is simple, the spec is what is above the definition and the .texi 
files, and the stuff inside the definition are interesting implementation 
details of that spec, which _never_ modify the spec.  My position is that 0 is 
a value which the spec defines, and for which we assert.  Please quote the line 
from the spec that defines what we do in that case.  I've never seen anyone 
quote such a line.  To support your position, I will insist on a direct quote 
from the  spec.

 simply because the assert _is_ part of the spec of the function), and my 
 attempt was _much_ worse than yours, so I didn't send it :)

If you consider Eiffel, the pre and post condition on a function are indeed 
part of the spec of the function.  But, when they are wrong and need to be 
fixed, you can't argue that since the spec says if I is 42, abort, then 
trivially, we can't fix the spec because the spec says that if I is 42, we 
abort.  To back the position that spec must not be changed, you need to explain 
at least one thing for which the wrong thing will happen if the spec did 
change.  If you want to go down that path, you will need to furnish one example 
where badness happens with 0, not 2, not 3, but 0.  If you can't do that, you 
loose.  If you can, love to hear it.  Now, if you cite a buggy piece of 
software that does the wrong thing as support, I won't be swayed, I will 
concede the fact gcc has bugs and those bugs should be fixed, it always has, 
and always will.  See my other post for examples of existing bugs in gcc that 
are not protected by the assert.  I am sympathetic to preferring asserts over 
wrong code gen, so, I'd be willing to fix all the buggy routines or make them 
assert, before we loose the assert.  In that case, I'd really prefer a list of 
concrete places to fix.  An unbonded idea that the entire rest of the compiler 
needs fixing is, well, doesn't bode well for incremental forward progress.  
Given just how buggy it is, personally, I don't see the problem in just 
declaring that OI is completely buggy, and move on.


[google][4.6][i386]Support autocloning for corei7 with -mvarch= option to remove LCP stalls in loops (issue5865043)

2012-03-20 Thread Sriraman Tallam
This patch adds support to version for corei7 with -mvarch option. The 
versioning supported is in the case where a loop generates a LCP stalling 
instruction in corei7. In such cases, on corei7, limiting the unroll factor to 
try to keep the unrolled loop body small enough to fit in the Corei7's loop 
stream detector can hide LCP stalls in loops. With mvarch, the function 
containing the loop is multi-versioned and one version is tagged with 
tune=corei7 so that the unroll factor can be limited on this version.

Please see: http://gcc.gnu.org/ml/gcc-patches/2011-12/msg01230.html for 
discussion on mvarch option.
Please see: http://gcc.gnu.org/ml/gcc-patches/2011-12/msg00123.html for 
discussion  on LCP stalls in corei7.


The autocloning framework is only avaiable in google/gcc-4_6 branch. I am 
working on porting this to trunk.

* config/i386/i386.c (find_himode_assigns): New function.
(mversionable_for_core2_p): Add new param version_number.
(mversionable_for_corei7_p): New function.
(ix86_mversion_function): Check for corei7 versioning.
* params.def (PARAM_MAX_FUNCTION_SIZE_FOR_AUTO_CLONING): Bump
allowed limit to 5000.
*  mversn-dispatch.c (do_auto_clone): Reverse fn_ver_addr_chain.

Index: config/i386/i386.c
===
--- config/i386/i386.c  (revision 185514)
+++ config/i386/i386.c  (working copy)
@@ -26507,6 +26507,132 @@ any_loops_vectorizable_with_load_store (void)
   return vectorizable_loop_found;
 }
 
+/* Returns true if this function finds a loop that contains a possible LCP
+   stalling instruction on corei7.   This is used to multiversion functions
+   for corei7.  
+
+   This function looks for instructions that store a constant into
+   HImode (16-bit) memory. These require a length-changing prefix and on
+   corei7 are prone to LCP stalls. These stalls can be avoided if the loop
+   is streamed from the loop stream detector.  */
+
+static bool
+find_himode_assigns (void)
+{
+  gimple_stmt_iterator gsi;
+  gimple stmt;
+  enum gimple_code code;
+  tree lhs/*, rhs*/;
+  enum machine_mode mode;
+  basic_block *body;
+  unsigned i;
+  loop_iterator li;
+  struct loop *loop;
+  bool found = false;
+  location_t locus = 0;
+  int stmt_count;
+  unsigned HOST_WIDE_INT n_unroll, max_unroll;
+
+  if (!flag_unroll_loops)
+return false;
+
+  loop_optimizer_init (LOOPS_NORMAL
+   | LOOPS_HAVE_RECORDED_EXITS);
+  if (number_of_loops ()  1)
+return false;
+
+  scev_initialize();
+
+  if (profile_status == PROFILE_READ)
+max_unroll = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES_FEEDBACK);
+  else
+max_unroll = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
+
+  FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
+{
+  tree niter;
+
+  /* Will not peel/unroll cold areas.  */
+  if (optimize_loop_for_size_p (loop))
+continue;
+
+  /* Can the loop be manipulated?  */
+  if (!can_duplicate_loop_p (loop))
+continue;
+
+  niter = number_of_latch_executions (loop);
+  if (host_integerp (niter, 1))
+   {
+ n_unroll = tree_low_cst (niter, 1);
+ if (n_unroll = max_unroll)
+   continue;
+   }
+
+  body = get_loop_body (loop);
+  found = false;
+  stmt_count = 0;
+
+  for (i = 0; i  loop-num_nodes; i++)
+   {
+ for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (gsi))
+   {
+ stmt = gsi_stmt (gsi);
+ stmt_count++;
+ if (found)
+   continue;
+ code = gimple_code (stmt);
+ if (code != GIMPLE_ASSIGN)
+   continue;
+ lhs = gimple_assign_lhs (stmt);
+ if (TREE_CODE (lhs) != MEM_REF 
+ TREE_CODE (lhs) != COMPONENT_REF 
+ TREE_CODE (lhs) != ARRAY_REF)
+   continue;
+ if (gimple_assign_rhs_code(stmt) != INTEGER_CST)
+   continue;
+ mode = TYPE_MODE (TREE_TYPE (lhs));
+ if (mode == HImode)
+   {
+ locus = gimple_location (stmt);
+ found = true;
+   }
+  }
+   }
+  /* Don't worry about large loops that won't be unrolled anyway. In fact,
+   * don't worry about unrolling loops that are already over the size of 
the
+   * LSD (28 insts). Since instruction counts may be a little off at this
+   * point, due to downstream transformations, include loops a little 
bigger
+   * than the LSD size.
+   */
+  if (found  stmt_count  40)
+   {
+ n_unroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS)/stmt_count;
+ /* Check for a simple peel candidate */
+ if (!(loop-header-count
+expected_loop_iterations (loop)  2 * n_unroll))
+   {
+ location_t locus2;
+ edge exit;
+ if ((exit = single_exit(loop)) != 

[patch] Split parts of cse_insn out to a few new functions

2012-03-20 Thread Steven Bosscher
Hello,

This patch splits a couple of pieces of cse_insn out to new functions.
There are no functional changes, and no code generation differences as
far as I could tell on x86_64 (-m64 and -m32).

The purpose of the patch is and, loto hopefully make cse_insn easier
to understand. In a follow-up patch, I will make canonicalize_insn run
only once per insn (it currently, i.e. before and after this patch,
runs multiple times for CSE on extended basic blocks if a block is in
multiple extended basic blocks).

Bootstrapped  tested on x86_64-unknown-linux-gnu. OK for trunk?

Ciao!
Steven
* cse.c (invalidate_from_sets_and_clobbers, try_back_substitute_reg,
find_sets_in_insn, canonicalize_insn): Split out from ...
(cse_insn): ... here.
(invalidate_from_clobbers): Take an insn instead of the pattern.

Index: cse.c
===
--- cse.c   (revision 185515)
+++ cse.c   (working copy)
@@ -597,6 +597,7 @@ static void record_jump_cond (enum rtx_c
 static void cse_insn (rtx);
 static void cse_prescan_path (struct cse_basic_block_data *);
 static void invalidate_from_clobbers (rtx);
+static void invalidate_from_sets_and_clobbers (rtx);
 static rtx cse_process_notes (rtx, rtx, bool *);
 static void cse_extended_basic_block (struct cse_basic_block_data *);
 static void count_reg_usage (rtx, int *, rtx, int);
@@ -4089,10 +4090,22 @@ record_jump_cond (enum rtx_code code, en
 }
 
 /* CSE processing for one instruction.
-   First simplify sources and addresses of all assignments
-   in the instruction, using previously-computed equivalents values.
-   Then install the new sources and destinations in the table
-   of available values.  */
+
+   Most true common subexpressions are mostly optimized away in GIMPLE,
+   but the few that leak through are cleaned up by cse_insn, and complex
+   addressing modes are often formed here.
+
+   The main function is cse_insn, and between here and that function
+   a couple of helper functions is defined to keep the size of cse_insn
+   within reasonable proportions.
+   
+   Data is shared between the main and helper functions via STRUCT SET,
+   that contains all data related for every set in the instruction that
+   is being processed.
+   
+   Note that cse_main processes all sets in the instruction.  Most
+   passes in GCC only process simple SET insns or single_set insns, but
+   CSE processes insns with multiple sets as well.  */
 
 /* Data on one SET contained in the instruction.  */
 
@@ -4128,50 +4141,93 @@ struct set
   /* Table entry for the destination address.  */
   struct table_elt *dest_addr_elt;
 };
+
+/* Special handling for (set REG0 REG1) where REG0 is the
+   cheapest, cheaper than REG1.  After cse, REG1 will probably not
+   be used in the sequel, so (if easily done) change this insn to
+   (set REG1 REG0) and replace REG1 with REG0 in the previous insn
+   that computed their value.  Then REG1 will become a dead store
+   and won't cloud the situation for later optimizations.
+
+   Do not make this change if REG1 is a hard register, because it will
+   then be used in the sequel and we may be changing a two-operand insn
+   into a three-operand insn.
+   
+   This is the last transformation that cse_insn will try to do.  */
 
 static void
-cse_insn (rtx insn)
+try_back_substitute_reg (rtx set, rtx insn)
 {
-  rtx x = PATTERN (insn);
-  int i;
-  rtx tem;
-  int n_sets = 0;
+  rtx dest = SET_DEST (set);
+  rtx src = SET_SRC (set);
 
-  rtx src_eqv = 0;
-  struct table_elt *src_eqv_elt = 0;
-  int src_eqv_volatile = 0;
-  int src_eqv_in_memory = 0;
-  unsigned src_eqv_hash = 0;
+  if (REG_P (dest)
+   REG_P (src)  ! HARD_REGISTER_P (src)
+   REGNO_QTY_VALID_P (REGNO (src)))
+{
+  int src_q = REG_QTY (REGNO (src));
+  struct qty_table_elem *src_ent = qty_table[src_q];
 
-  struct set *sets = (struct set *) 0;
+  if (src_ent-first_reg == REGNO (dest))
+   {
+ /* Scan for the previous nonnote insn, but stop at a basic
+block boundary.  */
+ rtx prev = insn;
+ rtx bb_head = BB_HEAD (BLOCK_FOR_INSN (insn));
+ do
+   {
+ prev = PREV_INSN (prev);
+   }
+ while (prev != bb_head  (NOTE_P (prev) || DEBUG_INSN_P (prev)));
 
-  this_insn = insn;
-#ifdef HAVE_cc0
-  /* Records what this insn does to set CC0.  */
-  this_insn_cc0 = 0;
-  this_insn_cc0_mode = VOIDmode;
-#endif
+ /* Do not swap the registers around if the previous instruction
+attaches a REG_EQUIV note to REG1.
 
-  /* Find all the SETs and CLOBBERs in this instruction.
- Record all the SETs in the array `set' and count them.
- Also determine whether there is a CLOBBER that invalidates
- all memory references, or all references at varying addresses.  */
+??? It's not entirely clear whether we can transfer a REG_EQUIV
+from the pseudo that originally shadowed an 

[SH] PR 50751 - some test cases

2012-03-20 Thread Oleg Endo
Hi,

The attached patch adds some test cases for PR 50751 to check whether
mov.b insns are generated.

Tested on sh-sim with 
make check-gcc RUNTESTFLAGS=sh.exp=pr50751* --target_board=sh-sim
\{-m2/-ml,-m2/-mb,-m2a-single/-mb,-m4-single/-ml,
-m4-single/-mb,-m4a-single/-ml,-m4a-single/-mb}

to confirm that the tests pass as expected.

Cheers,
Oleg

testsuite/ChangeLog:

PR target/50751
* gcc/target/sh/pr50751-1.c: New.
* gcc/target/sh/pr50751-2.c: New.
* gcc/target/sh/pr50751-3.c: New.

Index: gcc/testsuite/gcc.target/sh/pr50751-1.c
===
--- gcc/testsuite/gcc.target/sh/pr50751-1.c	(revision 0)
+++ gcc/testsuite/gcc.target/sh/pr50751-1.c	(revision 0)
@@ -0,0 +1,30 @@
+/* Check that the mov.b displacement addressing insn is generated.
+   If the insn is generated as expected, there should be no address 
+   calculations outside the mov insns.  */
+/* { dg-do compile { target sh*-*-* } } */
+/* { dg-options -O1 } */
+/* { dg-skip-if  { sh*-*-* } { -m5*} {  } } */
+/* { dg-final { scan-assembler-not add|sub } } */
+
+void
+testfunc_00 (const char* ap, char* bp, char val)
+{
+  bp[0] = ap[15];
+  bp[2] = ap[5];
+  bp[9] = ap[7];
+  bp[0] = ap[15];
+  bp[4] = val;
+  bp[14] = val;
+}
+
+void
+testfunc_01 (volatile const char* ap, volatile char* bp, char val)
+{
+  bp[0] = ap[15];
+  bp[2] = ap[5];
+  bp[9] = ap[7];
+  bp[0] = ap[15];
+  bp[4] = val;
+  bp[14] = val;
+}
+
Index: gcc/testsuite/gcc.target/sh/pr50751-2.c
===
--- gcc/testsuite/gcc.target/sh/pr50751-2.c	(revision 0)
+++ gcc/testsuite/gcc.target/sh/pr50751-2.c	(revision 0)
@@ -0,0 +1,27 @@
+/* Check that the mov.b displacement addressing insn is generated and the 
+   base address is adjusted only once.  On SH2A this test is skipped because
+   there is a 4 byte mov.b insn that can handle larger displacements.  Thus
+   on SH2A the base address will not be adjusted in this case.  */
+/* { dg-do compile { target sh*-*-* } } */
+/* { dg-options -O1 } */
+/* { dg-skip-if  { sh*-*-* } { -m5* -m2a* } {  } } */
+/* { dg-final { scan-assembler-times add 2 } } */
+
+void
+testfunc_00 (const char* ap, char* bp)
+{
+  bp[0] = ap[15];
+  bp[2] = ap[5];
+  bp[9] = ap[7];
+  bp[0] = ap[25];
+}
+
+void
+testfunc_01 (volatile const char* ap, volatile char* bp)
+{
+  bp[0] = ap[15];
+  bp[2] = ap[5];
+  bp[9] = ap[7];
+  bp[0] = ap[25];
+}
+
Index: gcc/testsuite/gcc.target/sh/pr50751-3.c
===
--- gcc/testsuite/gcc.target/sh/pr50751-3.c	(revision 0)
+++ gcc/testsuite/gcc.target/sh/pr50751-3.c	(revision 0)
@@ -0,0 +1,26 @@
+/* Check that on SH2A the 4 byte mov.b displacement insn is generated to
+   handle larger displacements.  If it is generated correctly, there should
+   be no base address adjustments outside the mov.b insns.  */
+/* { dg-do compile { target sh*-*-* } } */
+/* { dg-options -O1 } */
+/* { dg-skip-if  { sh*-*-* } { * } { -m2a* } } */
+/* { dg-final { scan-assembler-not add|sub } } */
+
+void
+testfunc_00 (const char* ap, char* bp)
+{
+  bp[100] = ap[15];
+  bp[200] = ap[50];
+  bp[900] = ap[71];
+  bp[0] = ap[25];
+}
+
+void
+testfunc_01 (volatile const char* ap, volatile char* bp)
+{
+  bp[100] = ap[15];
+  bp[200] = ap[50];
+  bp[900] = ap[71];
+  bp[0] = ap[25];
+}
+


Re: [google][4.6][i386]Support autocloning for corei7 with -mvarch= option to remove LCP stalls in loops (issue5865043)

2012-03-20 Thread Teresa Johnson
On Tue, Mar 20, 2012 at 2:04 PM, Sriraman Tallam tmsri...@google.com wrote:
 This patch adds support to version for corei7 with -mvarch option. The 
 versioning supported is in the case where a loop generates a LCP stalling 
 instruction in corei7. In such cases, on corei7, limiting the unroll factor 
 to try to keep the unrolled loop body small enough to fit in the Corei7's 
 loop stream detector can hide LCP stalls in loops. With mvarch, the function 
 containing the loop is multi-versioned and one version is tagged with 
 tune=corei7 so that the unroll factor can be limited on this version.

 Please see: http://gcc.gnu.org/ml/gcc-patches/2011-12/msg01230.html for 
 discussion on mvarch option.
 Please see: http://gcc.gnu.org/ml/gcc-patches/2011-12/msg00123.html for 
 discussion  on LCP stalls in corei7.


 The autocloning framework is only avaiable in google/gcc-4_6 branch. I am 
 working on porting this to trunk.

        * config/i386/i386.c (find_himode_assigns): New function.
        (mversionable_for_core2_p): Add new param version_number.
        (mversionable_for_corei7_p): New function.
        (ix86_mversion_function): Check for corei7 versioning.
        * params.def (PARAM_MAX_FUNCTION_SIZE_FOR_AUTO_CLONING): Bump
        allowed limit to 5000.
        *  mversn-dispatch.c (do_auto_clone): Reverse fn_ver_addr_chain.

 Index: config/i386/i386.c
 ===
 --- config/i386/i386.c  (revision 185514)
 +++ config/i386/i386.c  (working copy)
 @@ -26507,6 +26507,132 @@ any_loops_vectorizable_with_load_store (void)
   return vectorizable_loop_found;
  }

 +/* Returns true if this function finds a loop that contains a possible LCP
 +   stalling instruction on corei7.   This is used to multiversion functions
 +   for corei7.
 +
 +   This function looks for instructions that store a constant into
 +   HImode (16-bit) memory. These require a length-changing prefix and on
 +   corei7 are prone to LCP stalls. These stalls can be avoided if the loop
 +   is streamed from the loop stream detector.  */
 +
 +static bool
 +find_himode_assigns (void)
 +{
 +  gimple_stmt_iterator gsi;
 +  gimple stmt;
 +  enum gimple_code code;
 +  tree lhs/*, rhs*/;

Can rhs be removed?

 +  enum machine_mode mode;
 +  basic_block *body;
 +  unsigned i;
 +  loop_iterator li;
 +  struct loop *loop;
 +  bool found = false;
 +  location_t locus = 0;

locus is dead (assigned but not read).

 +  int stmt_count;
 +  unsigned HOST_WIDE_INT n_unroll, max_unroll;
 +
 +  if (!flag_unroll_loops)
 +    return false;
 +
 +  loop_optimizer_init (LOOPS_NORMAL
 +                       | LOOPS_HAVE_RECORDED_EXITS);
 +  if (number_of_loops ()  1)
 +    return false;
 +
 +  scev_initialize();
 +
 +  if (profile_status == PROFILE_READ)
 +    max_unroll = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES_FEEDBACK);
 +  else
 +    max_unroll = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);

It might be clearer to rename max_unroll to max_peel_times or
something like that to be clearer.

 +
 +  FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
 +    {
 +      tree niter;
 +
 +      /* Will not peel/unroll cold areas.  */
 +      if (optimize_loop_for_size_p (loop))
 +        continue;
 +
 +      /* Can the loop be manipulated?  */
 +      if (!can_duplicate_loop_p (loop))
 +        continue;
 +
 +      niter = number_of_latch_executions (loop);
 +      if (host_integerp (niter, 1))
 +       {
 +         n_unroll = tree_low_cst (niter, 1);
 +         if (n_unroll = max_unroll)
 +           continue;
 +       }
 +
 +      body = get_loop_body (loop);
 +      found = false;
 +      stmt_count = 0;
 +
 +      for (i = 0; i  loop-num_nodes; i++)
 +       {
 +         for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next 
 (gsi))
 +           {
 +             stmt = gsi_stmt (gsi);
 +             stmt_count++;
 +             if (found)
 +               continue;
 +             code = gimple_code (stmt);
 +             if (code != GIMPLE_ASSIGN)
 +               continue;
 +             lhs = gimple_assign_lhs (stmt);
 +             if (TREE_CODE (lhs) != MEM_REF 
 +                 TREE_CODE (lhs) != COMPONENT_REF 
 +                 TREE_CODE (lhs) != ARRAY_REF)
 +               continue;
 +             if (gimple_assign_rhs_code(stmt) != INTEGER_CST)
 +               continue;
 +             mode = TYPE_MODE (TREE_TYPE (lhs));
 +             if (mode == HImode)
 +               {
 +                 locus = gimple_location (stmt);
 +                 found = true;
 +               }
 +          }
 +       }
 +      /* Don't worry about large loops that won't be unrolled anyway. In 
 fact,
 +       * don't worry about unrolling loops that are already over the size of 
 the
 +       * LSD (28 insts). Since instruction counts may be a little off at this
 +       * point, due to downstream transformations, include loops a little 
 bigger
 +       * than the LSD size.
 +       */
 +      if (found  

[google/4.6] Fix DW_OP_GNU_addr_index problem with -gfission. (issue5866047)

2012-03-20 Thread Cary Coutant
For google/gcc-4_6 branch.

Fix bug where we were outputting a garbage value for the index operand
of DW_OP_GNU_addr_index.

Tested: incremental remake in GCC build directory and hand tested.


2012-03-20   Cary Coutant  ccout...@google.com

* dwarf2out.c (size_of_loc_descr): Use val_index instead of
v.val_unsigned.
(output_loc_operands): Likewise.


Index: dwarf2out.c
===
--- dwarf2out.c (revision 185585)
+++ dwarf2out.c (working copy)
@@ -4902,7 +4902,7 @@ size_of_loc_descr (dw_loc_descr_ref loc)
   size += DWARF2_ADDR_SIZE;
   break;
 case DW_OP_GNU_addr_index:
-  size += size_of_uleb128 (loc-dw_loc_oprnd1.v.val_unsigned);
+  size += size_of_uleb128 (loc-dw_loc_oprnd1.val_index);
   break;
 case DW_OP_const1u:
 case DW_OP_const1s:
@@ -5283,7 +5283,7 @@ output_loc_operands (dw_loc_descr_ref lo
   break;
 
 case DW_OP_GNU_addr_index:
-  dw2_asm_output_data_uleb128 (loc-dw_loc_oprnd1.v.val_unsigned,
+  dw2_asm_output_data_uleb128 (loc-dw_loc_oprnd1.val_index,
(address index));
   break;
 

--
This patch is available for review at http://codereview.appspot.com/5866047


Merge from 4.7 branch to gccgo branch

2012-03-20 Thread Ian Lance Taylor
I've merged revision 185588 of the 4.7 branch to the gccgo branch.

This is a switch from merging from mainline.

Ian


Re: [patch] Split parts of cse_insn out to a few new functions

2012-03-20 Thread Ian Lance Taylor
On Tue, Mar 20, 2012 at 2:06 PM, Steven Bosscher stevenb@gmail.com wrote:

 This patch splits a couple of pieces of cse_insn out to new functions.
 There are no functional changes, and no code generation differences as
 far as I could tell on x86_64 (-m64 and -m32).

 The purpose of the patch is and, loto hopefully make cse_insn easier
 to understand. In a follow-up patch, I will make canonicalize_insn run
 only once per insn (it currently, i.e. before and after this patch,
 runs multiple times for CSE on extended basic blocks if a block is in
 multiple extended basic blocks).

This is OK.

Thanks.

Ian


Re: [google/4.6] Fix DW_OP_GNU_addr_index problem with -gfission. (issue 5866047)

2012-03-20 Thread dje

LGTM

http://codereview.appspot.com/5866047/


Re: remove wrong code in immed_double_const

2012-03-20 Thread Mike Stump
On Mar 20, 2012, at 5:26 AM, Richard Sandiford wrote:
 So what I was trying to say was that if we remove the assert
 altogether, and allow CONST_DOUBLEs to be wider than 2 HWIs,
 we need to define what the implicit high-order HWIs of a
 CONST_DOUBLE are, just like we already do for CONST_INT.

Now, since you expressed a preference for sign extending, and a worry that 
there might be new bugs exposed in the handling of CONST_DOUBLEs in the face of 
my change, I went through all the code again and tried my best to fix every 
other bug in the compiler at all related to this area that I could find ; that 
patch is below.  In this one, I updated the spec for CONST_DOUBLE to be sign 
extending.

Curious, plus_constant is just terribly broken in this are, now fixed.  
mode_signbit_p is speced in English, so, I didn't want to misread or 
misunderstand it and swizzle it, so I left it alone for now.   Someone will 
have to describe what it does and I can try my hand at fixing it, if broken, I 
suspect it is.  As for simplify_const_unary_operation, I don't know what they 
were thinking, return 0 seems safer to me.

If there is any other code that I missed that people know about, I'd be happy 
to fix it, just let me know what code.  I did a pass on all the ports as well, 
and they seem reasonably clean about it.  The biggest problem is OImode -1, 
would come out as hex digits, and all the upper 0xf digits implied by sign 
extension would be missing.  A port that cared about OImode, trivially, would 
fix their output routine.  The debugging code has similar problems.

Is this closer to something you think is in the right direction?  If so, let 
figure out the right solution for mode_signbit_p and proceed from there.


diff --git a/gcc/doc/rtl.texi b/gcc/doc/rtl.texi
index de45a22..0c6dc45 100644
--- a/gcc/doc/rtl.texi
+++ b/gcc/doc/rtl.texi
@@ -1530,7 +1530,9 @@ Represents either a floating-point constant of mode 
@var{m} or an
 integer constant too large to fit into @code{HOST_BITS_PER_WIDE_INT}
 bits but small enough to fit within twice that number of bits (GCC
 does not provide a mechanism to represent even larger constants).  In
-the latter case, @var{m} will be @code{VOIDmode}.
+the latter case, @var{m} will be @code{VOIDmode}.  For integral values
+the value is a signed value, meaning the top bit of
+@code{CONST_DOUBLE_HIGH} is a sign bit.
 
 @findex CONST_DOUBLE_LOW
 If @var{m} is @code{VOIDmode}, the bits of the value are stored in
diff --git a/gcc/emit-rtl.c b/gcc/emit-rtl.c
index 78ddfc3..c0b24e4 100644
--- a/gcc/emit-rtl.c
+++ b/gcc/emit-rtl.c
@@ -531,10 +531,9 @@ immed_double_const (HOST_WIDE_INT i0, HOST_WIDE_INT i1, 
enum machine_mode mode)
 
  1) If GET_MODE_BITSIZE (mode) = HOST_BITS_PER_WIDE_INT, then we use
gen_int_mode.
- 2) GET_MODE_BITSIZE (mode) == 2 * HOST_BITS_PER_WIDE_INT, but the value of
-   the integer fits into HOST_WIDE_INT anyway (i.e., i1 consists only
-   from copies of the sign bit, and sign of i0 and i1 are the same),  then
-   we return a CONST_INT for i0.
+ 2) If the value of the integer fits into HOST_WIDE_INT anyway
+(i.e., i1 consists only from copies of the sign bit, and sign
+   of i0 and i1 are the same), then we return a CONST_INT for i0.
  3) Otherwise, we create a CONST_DOUBLE for i0 and i1.  */
   if (mode != VOIDmode)
 {
diff --git a/gcc/explow.c b/gcc/explow.c
index 2fae1a1..6284d61 100644
--- a/gcc/explow.c
+++ b/gcc/explow.c
@@ -96,6 +96,9 @@ plus_constant (rtx x, HOST_WIDE_INT c)
   switch (code)
 {
 case CONST_INT:
+  if (GET_MODE_BITSIZE (mode)  HOST_WIDE_INT)
+   /* Punt for now.  */
+   goto overflow;
   return GEN_INT (INTVAL (x) + c);
 
 case CONST_DOUBLE:
@@ -103,10 +106,14 @@ plus_constant (rtx x, HOST_WIDE_INT c)
unsigned HOST_WIDE_INT l1 = CONST_DOUBLE_LOW (x);
HOST_WIDE_INT h1 = CONST_DOUBLE_HIGH (x);
unsigned HOST_WIDE_INT l2 = c;
-   HOST_WIDE_INT h2 = c  0 ? ~0 : 0;
+   HOST_WIDE_INT h2 = c  0 ? ~(HOST_WIDE_INT)0 : 0;
unsigned HOST_WIDE_INT lv;
HOST_WIDE_INT hv;
 
+   if (GET_MODE_BITSIZE (mode)  2*HOST_WIDE_INT)
+ /* Punt for now.  */
+ goto overflow;
+
add_double (l1, h1, l2, h2, lv, hv);
 
return immed_double_const (lv, hv, VOIDmode);
@@ -141,6 +148,9 @@ plus_constant (rtx x, HOST_WIDE_INT c)
   break;
 
 case PLUS:
+  if (GET_MODE_BITSIZE (mode)  HOST_WIDE_INT)
+   /* Punt for now.  */
+   goto overflow;
   /* The interesting case is adding the integer to a sum.
 Look for constant term in the sum and combine
 with C.  For an integer constant term, we make a combined
@@ -185,6 +195,7 @@ plus_constant (rtx x, HOST_WIDE_INT c)
   break;
 }
 
+ overflow:
   if (c != 0)
 x = gen_rtx_PLUS (mode, x, GEN_INT (c));
 
diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
index ce4eab4..37e46b1 100644
--- a/gcc/simplify-rtx.c
+++ 

Re: [SH] PR 52642 - libstdc++ failures

2012-03-20 Thread Kaz Kojima
Oleg Endo oleg.e...@t-online.de wrote:
 It fixes a few sometimes failing libstdc++ failures.
 No new failures otherwise.
 
 OK to apply?

OK.

 Maybe this one should be backported to 4.7.x, too?

Sure.

Regards,
kaz


Re: [SH] PR 52479 - Remove fsca for DFmode

2012-03-20 Thread Kaz Kojima
Oleg Endo oleg.e...@t-online.de wrote:
 The attached patch removes the fsca instruction support for DFmode on
 SH4A when -ffast-math is enabled.
 
 Tested against rev 18 with 
 make -k check RUNTESTFLAGS=--target_board=sh-sim
 \{-m2/-ml,-m2/-mb,-m2a-single/-mb,
 -m4-single/-ml,-m4-single/-mb,
 -m4a-single/-ml,-m4a-single/-mb}
 
 and no new failures.

OK for trunk.

Regards,
kaz


Re: [SH] PR 50751 - some test cases

2012-03-20 Thread Kaz Kojima
Oleg Endo oleg.e...@t-online.de wrote:
 The attached patch adds some test cases for PR 50751 to check whether
 mov.b insns are generated.

OK.

Regards,
kaz


C++ PATCH to mangling of 'new auto'

2012-03-20 Thread Jason Merrill
GCC 4.7 adds mangling for new-expressions in a function signature, but I 
now notice it produces wrong mangling for 'new auto' in simple cases. 
This patch fixes it.


Tested x86_64-pc-linux-gnu, applying to trunk.

This also seems like it might be a candidate for 4.7.0.  What do you 
think, Jakub?
commit 96472bd7b10415d61a3d5e0d640825baf80eb576
Author: Jason Merrill ja...@redhat.com
Date:   Wed Mar 7 19:28:34 2012 -0500

gcc/cp/
	* mangle.c (write_type): Handle 'auto'.
	* init.c (build_new): Don't do auto deduction where it might
	affect template mangling.
libiberty/
	* cp-demangle.c (cplus_demangle_type): Handle 'auto'.

diff --git a/gcc/cp/init.c b/gcc/cp/init.c
index 1b2a1ef..bcb5ab7 100644
--- a/gcc/cp/init.c
+++ b/gcc/cp/init.c
@@ -2774,7 +2774,9 @@ build_new (VEC(tree,gc) **placement, tree type, tree nelts,
   if (type == error_mark_node)
 return error_mark_node;
 
-  if (nelts == NULL_TREE  VEC_length (tree, *init) == 1)
+  if (nelts == NULL_TREE  VEC_length (tree, *init) == 1
+  /* Don't do auto deduction where it might affect mangling.  */
+   (!processing_template_decl || at_function_scope_p ()))
 {
   tree auto_node = type_uses_auto (type);
   if (auto_node)
diff --git a/gcc/cp/mangle.c b/gcc/cp/mangle.c
index 5d6beb5..1536828 100644
--- a/gcc/cp/mangle.c
+++ b/gcc/cp/mangle.c
@@ -1933,6 +1933,13 @@ write_type (tree type)
 	  break;
 
 	case TEMPLATE_TYPE_PARM:
+	  if (is_auto (type))
+		{
+		  write_identifier (Da);
+		  ++is_builtin_type;
+		  break;
+		}
+	  /* else fall through.  */
 	case TEMPLATE_PARM_INDEX:
 	  write_template_param (type);
 	  break;
diff --git a/gcc/testsuite/g++.dg/cpp0x/auto32.C b/gcc/testsuite/g++.dg/cpp0x/auto32.C
new file mode 100644
index 000..2aad34e
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/auto32.C
@@ -0,0 +1,9 @@
+// { dg-do compile { target c++11 } }
+
+// { dg-final { scan-assembler _Z1fIiEDTnw_Dapifp_EET_ } }
+template class T auto f(T t) - decltype (new auto(t));
+
+int main()
+{
+  f(1);
+}
diff --git a/libiberty/cp-demangle.c b/libiberty/cp-demangle.c
index 2b3d182..d95b56c 100644
--- a/libiberty/cp-demangle.c
+++ b/libiberty/cp-demangle.c
@@ -2270,6 +2270,11 @@ cplus_demangle_type (struct d_info *di)
 			 cplus_demangle_type (di), NULL);
 	  can_subst = 1;
 	  break;
+
+	case 'a':
+	  /* auto */
+	  ret = d_make_name (di, auto, 4);
+	  break;
 	  
 	case 'f':
 	  /* 32-bit decimal floating point */
diff --git a/libiberty/testsuite/demangle-expected b/libiberty/testsuite/demangle-expected
index 036c481..d489692 100644
--- a/libiberty/testsuite/demangle-expected
+++ b/libiberty/testsuite/demangle-expected
@@ -4075,6 +4075,12 @@ decltype (new int{}) f1int(int)
 --format=gnu-v3
 _Zli2_wPKc
 operator _w(char const*)
+--format=gnu-v3
+_Z1fIiEDTnw_Dapifp_EET_
+decltype (new auto({parm#1})) fint(int)
+--format=gnu-v3
+_Z1fIiERDaRKT_S1_
+auto fint(int const, int)
 #
 # Ada (GNAT) tests.
 #


Re: [google][4.6] Bug fixes to function reordering linker plugin to handle local and comdat functions. (issue 5851044)

2012-03-20 Thread tmsriram

Uploaded new patch.

On 2012/03/20 19:25:38, davidxl wrote:

It would be nice to add some unit/regression test cases of some sort.


Made the existing unit test case check the final layout.



David



http://codereview.appspot.com/5851044/diff/1/callgraph.c
File callgraph.c (right):



http://codereview.appspot.com/5851044/diff/1/callgraph.c#newcode309
callgraph.c:309: if (!is_prefix_of (_ZL, name))
How about static functions in namespace? How about functions in

anonymous

namespace?


Thanks for pointing this out. One solution is to add more plugin
interfaces to plugin-api.h to find the section flags and the section
group. This way, comdats can be detected. All other duplicates can be
treated as file static functions. I marked this as TODO for now.

I am also thinking of sending a patch to generate unique section names
for file static functions. This will help --section-ordering-file in
gold too.


http://codereview.appspot.com/5851044/diff/1/callgraph.c#newcode511
callgraph.c:511: .text. };
How are the sections ordered in the array?  Keep it in mind that it is

possible

to encode the actual profile count of the function in the section name

in the

future.


Right, for now this parsing will work. The parsing needs to be updated
once the section names change.



http://codereview.appspot.com/5851044/


Re: [google][4.6] Bug fixes to function reordering linker plugin to handle local and comdat functions. (issue 5851044)

2012-03-20 Thread tmsriram

On 2012/03/20 19:26:26, eraman wrote:

http://codereview.appspot.com/5851044/diff/1/callgraph.c
File callgraph.c (right):



http://codereview.appspot.com/5851044/diff/1/callgraph.c#newcode513
callgraph.c:513: const int section_priority[] = {0, 3, 4, 2, 1};
Add a comment about section_priority


Done.



http://codereview.appspot.com/5851044/diff/1/callgraph.c#newcode571
callgraph.c:571: if (section_priority[kept-section_type]
Add an example that shows why we want to do that


I added a comment.



http://codereview.appspot.com/5851044/diff/1/callgraph.c#newcode655
callgraph.c:655: write_out_node (n_it-name, section_start[0],
section_end[0]);
In write_out_node, why take the function name and do a hash table

lookup to get

the section, instead of directly passing Section_id * in the caller.

In all

calls to write_out_node, you are in fact getting the name from the

Section_id *.

That is not true, I pass the Node * in the first two calls. What you
said holds for the other calls. So, I made this more efficient.



http://codereview.appspot.com/5851044/diff/1/callgraph.c#newcode674
callgraph.c:674: s_it-processed = 1;
setting processed to 1 is redundant as it is already done in

write_out_node.

Done.



http://codereview.appspot.com/5851044/diff/1/callgraph.h
File callgraph.h (right):



http://codereview.appspot.com/5851044/diff/1/callgraph.h#newcode31
callgraph.h:31: void push_mm_ptr (void *ptr);
push_allocated_ptr or save_allocated_ptr would be a better name.



Done.


http://codereview.appspot.com/5851044/diff/1/callgraph.h#newcode48
callgraph.h:48: push_mm_ptr (list);
It might be cleaner to create a wrapper around XNEW that calls

push_mm_ptr after

XNEW. Similar for malloc as well.


Done.

http://codereview.appspot.com/5851044/


Re: [google][4.6] Bug fixes to function reordering linker plugin to handle local and comdat functions. (issue 5851044)

2012-03-20 Thread davidxl

ok for google branches after checkin validation.

David

http://codereview.appspot.com/5851044/


PATCH to add -std=c++1y

2012-03-20 Thread Jason Merrill
I've been working on a proposal for return type deduction for normal 
functions for the next C++ standard, and so I'm adding -std=c++1y to 
control it.


Tested x86_64-pc-linux-gnu, applying to trunk.
commit ef9ed182fec9249e396fdb0ed2bd0cc4d725e956
Author: Jason Merrill ja...@redhat.com
Date:   Sat Mar 3 10:04:22 2012 -0500

	* c-common.h (enum cxx_dialect): Add cxx1y.
	* c-common.c (c_common_nodes_and_builtins): Use = for cxx_dialect
	test.
	* c-cppbuiltin.c (c_cpp_builtins): Likewise.
	* c-opts.c (c_common_post_options): Likewise.
	(set_std_cxx1y): New.
	(c_common_handle_option): Call it.
	* c.opt (-std=c++1y, -std=gnu++1y): New flags.
cp/
	* lex.c (init_reswords): Use = for cxx_dialect test.
	* parser.c (cp_parser_exception_specification_opt): Likewise.
testsuite/
	* lib/target-supports.exp: Add { target c++1y }.

diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c
index b83f45b..fc83b04 100644
--- a/gcc/c-family/c-common.c
+++ b/gcc/c-family/c-common.c
@@ -4940,7 +4940,7 @@ c_common_nodes_and_builtins (void)
 {
   char16_type_node = make_unsigned_type (char16_type_size);
 
-  if (cxx_dialect == cxx0x)
+  if (cxx_dialect = cxx0x)
 	record_builtin_type (RID_CHAR16, char16_t, char16_type_node);
 }
 
@@ -4956,7 +4956,7 @@ c_common_nodes_and_builtins (void)
 {
   char32_type_node = make_unsigned_type (char32_type_size);
 
-  if (cxx_dialect == cxx0x)
+  if (cxx_dialect = cxx0x)
 	record_builtin_type (RID_CHAR32, char32_t, char32_type_node);
 }
 
diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h
index 835b13b..8552f0c 100644
--- a/gcc/c-family/c-common.h
+++ b/gcc/c-family/c-common.h
@@ -649,7 +649,9 @@ enum cxx_dialect {
   cxx03 = cxx98,
   /* C++11  */
   cxx0x,
-  cxx11 = cxx0x
+  cxx11 = cxx0x,
+  /* C++1y (C++17?) */
+  cxx1y
 };
 
 /* The C++ dialect being used. C++98 is the default.  */
diff --git a/gcc/c-family/c-cppbuiltin.c b/gcc/c-family/c-cppbuiltin.c
index 40a0a62..49804f9 100644
--- a/gcc/c-family/c-cppbuiltin.c
+++ b/gcc/c-family/c-cppbuiltin.c
@@ -714,7 +714,7 @@ c_cpp_builtins (cpp_reader *pfile)
 	cpp_define (pfile, __DEPRECATED);
   if (flag_rtti)
 	cpp_define (pfile, __GXX_RTTI);
-  if (cxx_dialect == cxx0x)
+  if (cxx_dialect = cxx0x)
 cpp_define (pfile, __GXX_EXPERIMENTAL_CXX0X__);
 }
   /* Note that we define this for C as well, so that we know if
diff --git a/gcc/c-family/c-opts.c b/gcc/c-family/c-opts.c
index f2a7971..0ee4390 100644
--- a/gcc/c-family/c-opts.c
+++ b/gcc/c-family/c-opts.c
@@ -111,6 +111,7 @@ static size_t include_cursor;
 static void handle_OPT_d (const char *);
 static void set_std_cxx98 (int);
 static void set_std_cxx11 (int);
+static void set_std_cxx1y (int);
 static void set_std_c89 (int, int);
 static void set_std_c99 (int);
 static void set_std_c11 (int);
@@ -774,6 +775,12 @@ c_common_handle_option (size_t scode, const char *arg, int value,
 	set_std_cxx11 (code == OPT_std_c__11 /* ISO */);
   break;
 
+case OPT_std_c__1y:
+case OPT_std_gnu__1y:
+  if (!preprocessing_asm_p)
+	set_std_cxx1y (code == OPT_std_c__11 /* ISO */);
+  break;
+
 case OPT_std_c90:
 case OPT_std_iso9899_199409:
   if (!preprocessing_asm_p)
@@ -990,7 +997,7 @@ c_common_post_options (const char **pfilename)
   if (warn_implicit_function_declaration == -1)
 warn_implicit_function_declaration = flag_isoc99;
 
-  if (cxx_dialect == cxx0x)
+  if (cxx_dialect = cxx0x)
 {
   /* If we're allowing C++0x constructs, don't warn about C++98
 	 identifiers which are keywords in C++0x.  */
@@ -1522,6 +1529,20 @@ set_std_cxx11 (int iso)
   cxx_dialect = cxx11;
 }
 
+/* Set the C++ 201y draft standard (without GNU extensions if ISO).  */
+static void
+set_std_cxx1y (int iso)
+{
+  cpp_set_lang (parse_in, iso ? CLK_CXX11: CLK_GNUCXX11);
+  flag_no_gnu_keywords = iso;
+  flag_no_nonansi_builtin = iso;
+  flag_iso = iso;
+  /* C++11 includes the C99 standard library.  */
+  flag_isoc94 = 1;
+  flag_isoc99 = 1;
+  cxx_dialect = cxx1y;
+}
+
 /* Args to -d specify what to dump.  Silently ignore
unrecognized options; they may be aimed at toplev.c.  */
 static void
diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
index 1ec5504..f785b60 100644
--- a/gcc/c-family/c.opt
+++ b/gcc/c-family/c.opt
@@ -1215,6 +1215,10 @@ std=c++0x
 C++ ObjC++ Alias(std=c++11)
 Deprecated in favor of -std=c++11
 
+std=c++1y
+C++ ObjC++
+Conform to the ISO 201y(7?) C++ draft standard (experimental and incomplete support)
+
 std=c11
 C ObjC
 Conform to the ISO 2011 C standard (experimental and incomplete support)
@@ -1257,6 +1261,10 @@ std=gnu++0x
 C++ ObjC++ Alias(std=gnu++11)
 Deprecated in favor of -std=gnu++11
 
+std=gnu++1y
+C++ ObjC++
+Conform to the ISO 201y(7?) C++ draft standard with GNU extensions (experimental and incomplete support)
+
 std=gnu11
 C ObjC
 Conform to the ISO 2011 C standard with GNU extensions (experimental and incomplete