[PATCH] Update sparc-linux-gnu baselines

2012-03-23 Thread David Miller

I verified that this matches what other Linux platforms have added
recently, and I was careful not to accidently include the TLS symbols.

In fact, this baseline file now matches powerpc-linux-gnu's precisely.

Ok for mainline and the 4.7 branch?

Thanks.

2012-03-23  David S. Miller  da...@davemloft.net

* config/abi/post/sparc-linux-gnu/baseline_symbols.txt: Update.

diff --git a/libstdc++-v3/config/abi/post/sparc-linux-gnu/baseline_symbols.txt 
b/libstdc++-v3/config/abi/post/sparc-linux-gnu/baseline_symbols.txt
index e025a2e..6c09022 100644
--- a/libstdc++-v3/config/abi/post/sparc-linux-gnu/baseline_symbols.txt
+++ b/libstdc++-v3/config/abi/post/sparc-linux-gnu/baseline_symbols.txt
@@ -43,6 +43,10 @@ 
FUNC:_ZN11__gnu_debug19_Safe_sequence_base13_M_detach_allEv@@GLIBCXX_3.4
 FUNC:_ZN11__gnu_debug19_Safe_sequence_base18_M_detach_singularEv@@GLIBCXX_3.4
 
FUNC:_ZN11__gnu_debug19_Safe_sequence_base22_M_revalidate_singularEv@@GLIBCXX_3.4
 FUNC:_ZN11__gnu_debug19_Safe_sequence_base7_M_swapERS0_@@GLIBCXX_3.4
+FUNC:_ZN11__gnu_debug25_Safe_local_iterator_base9_M_attachEPNS_19_Safe_sequence_baseEb@@GLIBCXX_3.4.17
+FUNC:_ZN11__gnu_debug25_Safe_local_iterator_base9_M_detachEv@@GLIBCXX_3.4.17
+FUNC:_ZN11__gnu_debug30_Safe_unordered_container_base13_M_detach_allEv@@GLIBCXX_3.4.17
+FUNC:_ZN11__gnu_debug30_Safe_unordered_container_base7_M_swapERS0_@@GLIBCXX_3.4.17
 FUNC:_ZN14__gnu_parallel9_Settings3getEv@@GLIBCXX_3.4.10
 FUNC:_ZN14__gnu_parallel9_Settings3setERS0_@@GLIBCXX_3.4.10
 FUNC:_ZN9__gnu_cxx12__atomic_addEPVii@@GLIBCXX_3.4
@@ -877,6 +881,7 @@ FUNC:_ZNSaIwEC2ERKS_@@GLIBCXX_3.4
 FUNC:_ZNSaIwEC2Ev@@GLIBCXX_3.4
 FUNC:_ZNSaIwED1Ev@@GLIBCXX_3.4
 FUNC:_ZNSaIwED2Ev@@GLIBCXX_3.4
+FUNC:_ZNSbIwSt11char_traitsIwESaIwEE10_S_compareEjj@@GLIBCXX_3.4.16
 FUNC:_ZNSbIwSt11char_traitsIwESaIwEE12_Alloc_hiderC1EPwRKS1_@@GLIBCXX_3.4
 FUNC:_ZNSbIwSt11char_traitsIwESaIwEE12_Alloc_hiderC2EPwRKS1_@@GLIBCXX_3.4
 FUNC:_ZNSbIwSt11char_traitsIwESaIwEE12_M_leak_hardEv@@GLIBCXX_3.4
@@ -961,6 +966,7 @@ 
FUNC:_ZNSbIwSt11char_traitsIwESaIwEE7replaceEjjRKS2_@@GLIBCXX_3.4
 FUNC:_ZNSbIwSt11char_traitsIwESaIwEE7replaceEjjRKS2_jj@@GLIBCXX_3.4
 FUNC:_ZNSbIwSt11char_traitsIwESaIwEE7replaceEjjjw@@GLIBCXX_3.4
 FUNC:_ZNSbIwSt11char_traitsIwESaIwEE7reserveEj@@GLIBCXX_3.4
+FUNC:_ZNSbIwSt11char_traitsIwESaIwEE8pop_backEv@@GLIBCXX_3.4.17
 FUNC:_ZNSbIwSt11char_traitsIwESaIwEE9_M_assignEPwjw@@GLIBCXX_3.4.5
 FUNC:_ZNSbIwSt11char_traitsIwESaIwEE9_M_assignEPwjw@GLIBCXX_3.4
 FUNC:_ZNSbIwSt11char_traitsIwESaIwEE9_M_mutateEjjj@@GLIBCXX_3.4
@@ -1116,6 +1122,7 @@ FUNC:_ZNSolsEs@@GLIBCXX_3.4
 FUNC:_ZNSolsEt@@GLIBCXX_3.4
 FUNC:_ZNSolsEx@@GLIBCXX_3.4
 FUNC:_ZNSolsEy@@GLIBCXX_3.4
+FUNC:_ZNSs10_S_compareEjj@@GLIBCXX_3.4.16
 FUNC:_ZNSs12_Alloc_hiderC1EPcRKSaIcE@@GLIBCXX_3.4
 FUNC:_ZNSs12_Alloc_hiderC2EPcRKSaIcE@@GLIBCXX_3.4
 FUNC:_ZNSs12_M_leak_hardEv@@GLIBCXX_3.4
@@ -1200,6 +1207,7 @@ FUNC:_ZNSs7replaceEjjRKSs@@GLIBCXX_3.4
 FUNC:_ZNSs7replaceEjjRKSsjj@@GLIBCXX_3.4
 FUNC:_ZNSs7replaceEjjjc@@GLIBCXX_3.4
 FUNC:_ZNSs7reserveEj@@GLIBCXX_3.4
+FUNC:_ZNSs8pop_backEv@@GLIBCXX_3.4.17
 FUNC:_ZNSs9_M_assignEPcjc@@GLIBCXX_3.4.5
 FUNC:_ZNSs9_M_assignEPcjc@GLIBCXX_3.4
 FUNC:_ZNSs9_M_mutateEjjj@@GLIBCXX_3.4
@@ -1433,6 +1441,9 @@ 
FUNC:_ZNSt13__future_base12_Result_baseC2Ev@@GLIBCXX_3.4.15
 FUNC:_ZNSt13__future_base12_Result_baseD0Ev@@GLIBCXX_3.4.15
 FUNC:_ZNSt13__future_base12_Result_baseD1Ev@@GLIBCXX_3.4.15
 FUNC:_ZNSt13__future_base12_Result_baseD2Ev@@GLIBCXX_3.4.15
+FUNC:_ZNSt13__future_base19_Async_state_commonD0Ev@@GLIBCXX_3.4.17
+FUNC:_ZNSt13__future_base19_Async_state_commonD1Ev@@GLIBCXX_3.4.17
+FUNC:_ZNSt13__future_base19_Async_state_commonD2Ev@@GLIBCXX_3.4.17
 FUNC:_ZNSt13bad_exceptionD0Ev@@GLIBCXX_3.4
 FUNC:_ZNSt13bad_exceptionD1Ev@@GLIBCXX_3.4
 FUNC:_ZNSt13bad_exceptionD2Ev@@GLIBCXX_3.4
@@ -1741,6 +1752,8 @@ 
FUNC:_ZNSt15__exception_ptreqERKNS_13exception_ptrES2_@@CXXABI_1.3.3
 FUNC:_ZNSt15__exception_ptrneERKNS_13exception_ptrES2_@@CXXABI_1.3.3
 
FUNC:_ZNSt15basic_streambufIcSt11char_traitsIcEE10pubseekoffExSt12_Ios_SeekdirSt13_Ios_Openmode@@GLIBCXX_3.4
 
FUNC:_ZNSt15basic_streambufIcSt11char_traitsIcEE10pubseekposESt4fposI11__mbstate_tESt13_Ios_Openmode@@GLIBCXX_3.4
+FUNC:_ZNSt15basic_streambufIcSt11char_traitsIcEE12__safe_gbumpEi@@GLIBCXX_3.4.16
+FUNC:_ZNSt15basic_streambufIcSt11char_traitsIcEE12__safe_pbumpEi@@GLIBCXX_3.4.16
 FUNC:_ZNSt15basic_streambufIcSt11char_traitsIcEE4setgEPcS3_S3_@@GLIBCXX_3.4
 FUNC:_ZNSt15basic_streambufIcSt11char_traitsIcEE4setpEPcS3_@@GLIBCXX_3.4
 FUNC:_ZNSt15basic_streambufIcSt11char_traitsIcEE4syncEv@@GLIBCXX_3.4
@@ -1780,6 +1793,8 @@ 
FUNC:_ZNSt15basic_streambufIcSt11char_traitsIcEED2Ev@@GLIBCXX_3.4
 FUNC:_ZNSt15basic_streambufIcSt11char_traitsIcEEaSERKS2_@@GLIBCXX_3.4
 
FUNC:_ZNSt15basic_streambufIwSt11char_traitsIwEE10pubseekoffExSt12_Ios_SeekdirSt13_Ios_Openmode@@GLIBCXX_3.4
 
FUNC:_ZNSt15basic_streambufIwSt11char_traitsIwEE10pubseekposESt4fposI11__mbstate_tESt13_Ios_Openmode@@GLIBCXX_3.4

[PATCH] Fix PR52682

2012-03-23 Thread Richard Guenther

Committed as obvious.

Richard.

2012-03-23  William Bader  williamba...@hotmail.com

c-family/
* c-lex.c (c_lex_with_flags): Avoid declarations after stmts.

Index: gcc/c-family/c-lex.c
===
*** gcc/c-family/c-lex.c(revision 185724)
--- gcc/c-family/c-lex.c(working copy)
*** c_lex_with_flags (tree *value, location_
*** 342,347 
--- 342,349 
  
if (flags  CPP_N_USERDEF)
  {
+   char *str;
+   tree literal;
tree suffix_id = get_identifier (suffix);
int len = tok-val.str.len - strlen (suffix);
/* If this is going to be used as a C string to pass to a
*** c_lex_with_flags (tree *value, location_
*** 350,358 
(const char *) tok-val.str.text);
TREE_TYPE (num_string) = char_array_type_node;
num_string = fix_string_type (num_string);
!   char *str = CONST_CAST (char *, TREE_STRING_POINTER (num_string));
str[len] = '\0';
!   tree literal = build_userdef_literal (suffix_id, *value,
  num_string);
*value = literal;
  }
--- 352,360 
(const char *) tok-val.str.text);
TREE_TYPE (num_string) = char_array_type_node;
num_string = fix_string_type (num_string);
!   str = CONST_CAST (char *, TREE_STRING_POINTER (num_string));
str[len] = '\0';
!   literal = build_userdef_literal (suffix_id, *value,
  num_string);
*value = literal;
  }


[PATCH] Testcase for PR52638

2012-03-23 Thread Richard Guenther

Committed.

Richard.


[Patch, fortran] Remove unused GFC_MAX_LINE macro

2012-03-23 Thread Janne Blomqvist
Hi,

the maximum line length is nowadays decided dynamically at runtime as
specified by the chosen standard version, or overridden by the
-fline-length option. The patch below removes the GFC_MAX_LINE macro
which is no longer used. Committed to trunk as obvious.


Index: gfortran.h
===
--- gfortran.h  (revision 185726)
+++ gfortran.h  (working copy)
@@ -42,7 +42,6 @@ along with GCC; see the file COPYING3.
 /* Major control parameters.  */

 #define GFC_MAX_SYMBOL_LEN 63   /* Must be at least 63 for F2003.  */
-#define GFC_MAX_LINE 132   /* Characters beyond this are not seen.  */
 #define GFC_LETTERS 26 /* Number of letters in the alphabet.  */

 #define MAX_SUBRECORD_LENGTH 2147483639   /* 2**31-9 */
Index: ChangeLog
===
--- ChangeLog   (revision 185726)
+++ ChangeLog   (working copy)
@@ -1,3 +1,7 @@
+2012-03-23  Janne Blomqvist  j...@gcc.gnu.org
+
+   * gfortran.h (GFC_MAX_LINE): Remove unused macro.
+
 2012-03-19  Francois-Xavier Coudert  fxcoud...@gcc.gnu.org

PR fortran/52559


-- 
Janne Blomqvist


Re: [PATCH] Fix PRs 52080, 52097 and 48124, rewrite bitfield expansion, enable the C++ memory model wrt bitfields everywhere

2012-03-23 Thread Richard Guenther
On Thu, 22 Mar 2012, Richard Guenther wrote:

 On Thu, 22 Mar 2012, Eric Botcazou wrote:
 
   bitregion_start == 11 looks bogus.  The representative is starting at
  
 DECL_FIELD_BIT_OFFSET (repr)
   = size_binop (BIT_AND_EXPR,
 DECL_FIELD_BIT_OFFSET (field),
 bitsize_int (~(BITS_PER_UNIT - 1)));
  
   which looks ok
  
  It cannot be OK if you want it to be on a byte boundary, since the field 
  isn't 
  on a byte boundary itself and they have the same DECL_FIELD_BIT_OFFSET (0).
 
 Huh?  If they have DECL_FIELD_BIT_OFFSET of zero they are at a byte
 boundary, no?  Wait - the RECORD_TYPE itself is at non-zero
 DECL_FIELD_BIT_OFFSET and thus a zero DECL_FIELD_BIT_OFFSET for its
 fields does not mean anything?!  But how can DECL_OFFSET_ALIGN
 be still valid for such field?  Obviously if DECL_FIELD_OFFSET == 0,
 DECL_FIELD_BIT_OFFSET == 0 then the offset needs to be aligned
 to DECL_OFFSET_ALIGN.  Which then means DECL_OFFSET_ALIGN is a
 bit-alignment?
 
 Anyway, since we are trying to compute a nice mode to use for
 the bitfield representative we can give up in the second that
 we do not know how to reach BITS_PER_UNIT alignment.  Or we can
 simply only try to ensure MIN (BITS_PER_UNIT, DECL_OFFSET_ALIGN)
 alignment/size of the representative.  Of course the bitfield
 expansion code has to deal with non-byte-aligned representatives
 then, and we'd always have to use BLKmode for them.

Btw, now checking with gdb, DECL_OFFSET_ALIGN is always 128 for
all of the fields - that looks bogus.  DECL_ALIGN is 1, but that
doesn't mean DECL_OFFSET_ALIGN should not be 1 as well, no?

Thanks,
Richard.


Re: [PATCH] Update sparc-linux-gnu baselines

2012-03-23 Thread Jonathan Wakely
On 23 March 2012 07:47, David Miller wrote:

 I verified that this matches what other Linux platforms have added
 recently, and I was careful not to accidently include the TLS symbols.

 In fact, this baseline file now matches powerpc-linux-gnu's precisely.

 Ok for mainline and the 4.7 branch?

OK, thanks.


Re: [PATCH, ARM] Don't force vget_lane returning a 64-bit result to transfer to core registers

2012-03-23 Thread Julian Brown
On Wed, 21 Mar 2012 11:20:13 +
Richard Earnshaw rearn...@arm.com wrote:

 Semantically the neon intrinsic vgetq_lane_[su]64 returns a 64 bit
 sub-object of a 128-bit vector; there's no real need for the intrinsic
 to map onto a specific machine instruction.
 
 Indeed, if force a particular instruction that moves the result into a
 core register, but then want to use the result in the vector unit, we
 don't really want to have to move the result back to the other
 register bank.  However, that's what we do today.
 
 This patch changes the way we expand these operations so that we
 no-longer force selection of the get-lane operation.
 
 A side effect of this change is that we now spit out the fmrrd
 mnemonic rather than the vmov equivalent.  As a consequence I've
 updated the testsuite to allow for this change.  The changes to the
 ML files are pretty mechanical, but I don't speak ML so it would be
 helpful if another pair of eyes could check that bit over and tell me
 if I've missed something subtle.

The Ocaml bits look fine to me (the compiler won't accept incorrect
programs, as I'm sure you've noticed ;-)).

 Tested on trunk and gcc-4.7, but only installed on trunk.

Don't forget to check big-endian mode too...

Cheers,

Julian


Re: remove wrong code in immed_double_const

2012-03-23 Thread Richard Sandiford
Mike Stump mikest...@comcast.net writes:
 Sorry, with this bit, I meant that the current svn code is correct
 for GET_MODE_BITSIZE (op_mode) == HOST_BITS_PER_WIDE_INT * 2.
 In that case, hv  0 can just mean that we have a uint128_t
 (or whatever) whose high bit happens to be set.

(To be clear, I was using uint128_t as an example of a 2-HWI type,
assuming we're using 64-bit HWIs -- which I hope we are for targets
where this assert matters.)

 Well, according to the spec, one cannot use CONST_DOUBLE to represent
 a uint128 value with the high bit set.

We can!  And do now, even without your patch.  Because...

 The C frontend type plays this game, but they can, because they track
 the type with the constant the the values of the constant are
 interpreted exclusively in the context of the type.  Since we don't
 have the unsigned bit, we can't, so, either, they are speced to be
 values on their own, or values dependent upon some external notion.
 By changing the spec to say sign extending, we mean if the high bit is
 set, the value is negative.

...it doesn't mean that we interpret the value as a negative _rtx_.
As with all rtx calculations, things like signedness and saturation are
decided by the operation rather than the type (type == rtx mode).
For things like addition where signed vs. unsigned interpretation
doesn't matter, we have a single rtx op like PLUS.  For things like
multiplication where it does matter, we have separate signed and
unsigned variants.  There is nothing to distinguish a uint128_t
_register_ (i.e. TImode REG) that has the upper bit set from an
int128_t register that happens to be negative.  Instead the
interpretation is decided by the operation.  And the same principle
applies to constants.  There isn't, and doesn't need to be,
a separate CONST_DOUBLE representation for:

  - an unsigned 2-HWI value that has the upper bit set and
  - a signed 2-HWI value that is negative

The sign-extending thing is simply there to specify what happens when an
N2 HWI value is represented as a 2-HWI rtx.  I.e. it's simply there to
say what the implicit N-2 HWIs are.  (That's why the definition only
matters now that we're allowing N2 by removing the assert.)

In this context we're interpreting the value as unsigned because we have
an UNSIGNED_FLOAT operation.  So if the mode of the operand is exactly
2 HWIs in size, a negative high HWI simply indicates an unsigned value
that has the high bit set.

The same principle already applies to CONST_INT.  We have long defined
CONST_INT to be a sign-extending representation, in the sense that it
is allowed to represent 2-HWI modes in which the upper HWI happens
to be a sign extension of the lower HWI.  That doesn't mean the 2-HWI
constant itself is negative: it can just as easily be a high unsigned
value.  Whether it is signed, unsigned or neutral depends on the context
of the rtx operation.

All we're doing here is extending that principle to CONST_DOUBLE
and modes wider than 2 HWIs.

Sorry for the rather rambling explanation :-)  I still think the
version I suggested for this hunk is right though.

Richard


RFA: Document addition of Epiphany support

2012-03-23 Thread Joern Rennecke


2012-03-23  Joern Rennecke  joern.renne...@embecosm.com

* htdocs/gcc-4.7/changes.html: Document addition of Epiphany support.

Index: htdocs/gcc-4.7/changes.html
===
RCS file: /cvs/gcc/wwwdocs/htdocs/gcc-4.7/changes.html,v
retrieving revision 1.102
diff -p -u -r1.102 changes.html
--- htdocs/gcc-4.7/changes.html 22 Mar 2012 09:40:43 -  1.102
+++ htdocs/gcc-4.7/changes.html 23 Mar 2012 10:51:26 -
@@ -705,6 +705,11 @@ int add_values (const __flash int *p, in
   architecture./li
   /ul
 
+h3Epiphany/h3
+  ul
+liSupport has been added for Adapteva's Epiphany architecture./li
+  /ul
+
 h3IA-32/x86-64/h3
   ul
 liSupport for Intel AVX2 intrinsics, built-in functions and code 
generation is


Re: RFA: Document addition of Epiphany support

2012-03-23 Thread Rainer Orth
Joern Rennecke amyl...@spamcop.net writes:

 2012-03-23  Joern Rennecke  joern.renne...@embecosm.com

   * htdocs/gcc-4.7/changes.html: Document addition of Epiphany support.

 Index: htdocs/gcc-4.7/changes.html
 ===
 RCS file: /cvs/gcc/wwwdocs/htdocs/gcc-4.7/changes.html,v
 retrieving revision 1.102
 diff -p -u -r1.102 changes.html
 --- htdocs/gcc-4.7/changes.html   22 Mar 2012 09:40:43 -  1.102
 +++ htdocs/gcc-4.7/changes.html   23 Mar 2012 10:51:26 -
 @@ -705,6 +705,11 @@ int add_values (const __flash int *p, in
architecture./li
/ul
  
 +h3Epiphany/h3
 +  ul
 +liSupport has been added for Adapteva's Epiphany architecture./li
 +  /ul
 +

Shouldn't there be some link to a description?

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


[PATCH] Fix PR52678

2012-03-23 Thread Richard Guenther

This patch fixes PR52678 - re-computing the evolution of a loop
PHI node after updating it during prologue loop peeling is fragile
(and I remember several cases in the past where we ICEd here and
adjusted the code).  So, instead of re-computing the evolution
this patch saves the evolution part at analysis time and re-uses
it at this place.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.

Richard.

2012-03-23  Richard Guenther  rguent...@suse.de

PR tree-optimization/52678
* tree-vectorizer.h (struct _stmt_vec_info): Add
loop_phi_evolution_part member.
(STMT_VINFO_LOOP_PHI_EVOLUTION_PART): New define.
* tree-vect-loop.c (vect_analyze_scalar_cycles_1): Initialize
STMT_VINFO_LOOP_PHI_EVOLUTION_PART.
* tree-vect-loop-manip.c (vect_update_ivs_after_vectorizer):
Use the cached evolution part and the PHI nodes value from
the loop preheader edge instead of re-analyzing the evolution.

* gfortran.dg/pr52678.f: New testcase.

Index: gcc/tree-vectorizer.h
===
*** gcc/tree-vectorizer.h   (revision 185724)
--- gcc/tree-vectorizer.h   (working copy)
*** typedef struct _stmt_vec_info {
*** 476,481 
--- 476,488 
tree dr_step;
tree dr_aligned_to;
  
+   /* For loop PHI nodes, the evolution part of it.  This makes sure
+  this information is still available in vect_update_ivs_after_vectorizer
+  where we may not be able to re-analyze the PHI nodes evolution as
+  peeling for the prologue loop can make it unanalyzable.  The evolution
+  part is still correct though.  */
+   tree loop_phi_evolution_part;
+ 
/* Used for various bookkeeping purposes, generally holding a pointer to
   some other stmt S that is in some way related to this stmt.
   Current use of this field is:
*** typedef struct _stmt_vec_info {
*** 572,577 
--- 579,585 
  #define STMT_VINFO_GROUP_SAME_DR_STMT(S)   (S)-same_dr_stmt
  #define STMT_VINFO_GROUP_READ_WRITE_DEPENDENCE(S)  (S)-read_write_dep
  #define STMT_VINFO_STRIDED_ACCESS(S)  ((S)-first_element != NULL  
(S)-data_ref_info)
+ #define STMT_VINFO_LOOP_PHI_EVOLUTION_PART(S) (S)-loop_phi_evolution_part
  
  #define GROUP_FIRST_ELEMENT(S)  (S)-first_element
  #define GROUP_NEXT_ELEMENT(S)   (S)-next_element
Index: gcc/tree-vect-loop.c
===
*** gcc/tree-vect-loop.c(revision 185724)
--- gcc/tree-vect-loop.c(working copy)
*** vect_analyze_scalar_cycles_1 (loop_vec_i
*** 579,584 
--- 579,588 
  continue;
}
  
+   STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo)
+   = evolution_part_in_loop_num (access_fn, loop-num);
+   gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo) != 
NULL_TREE);
+ 
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, Detected induction.);
STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
Index: gcc/tree-vect-loop-manip.c
===
*** gcc/tree-vect-loop-manip.c  (revision 185724)
--- gcc/tree-vect-loop-manip.c  (working copy)
*** vect_update_ivs_after_vectorizer (loop_v
*** 1797,1809 
 !gsi_end_p (gsi)  !gsi_end_p (gsi1);
 gsi_next (gsi), gsi_next (gsi1))
  {
-   tree access_fn = NULL;
-   tree evolution_part;
tree init_expr;
tree step_expr, off;
tree type;
tree var, ni, ni_name;
gimple_stmt_iterator last_gsi;
  
phi = gsi_stmt (gsi);
phi1 = gsi_stmt (gsi1);
--- 1797,1808 
 !gsi_end_p (gsi)  !gsi_end_p (gsi1);
 gsi_next (gsi), gsi_next (gsi1))
  {
tree init_expr;
tree step_expr, off;
tree type;
tree var, ni, ni_name;
gimple_stmt_iterator last_gsi;
+   stmt_vec_info stmt_info;
  
phi = gsi_stmt (gsi);
phi1 = gsi_stmt (gsi1);
*** vect_update_ivs_after_vectorizer (loop_v
*** 1822,1866 
}
  
/* Skip reduction phis.  */
!   if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
  {
if (vect_print_dump_info (REPORT_DETAILS))
  fprintf (vect_dump, reduc phi. skip.);
continue;
  }
  
!   access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
!   gcc_assert (access_fn);
!   /* We can end up with an access_fn like
!(short int) {(short unsigned int) i_49, +, 1}_1
!for further analysis we need to strip the outer cast but we
!need to preserve the original type.  */
!   type = TREE_TYPE (access_fn);
!   STRIP_NOPS (access_fn);
!   evolution_part =
!unshare_expr (evolution_part_in_loop_num (access_fn, loop-num));
!   gcc_assert (evolution_part != 

Re: [PATCH] Replace a SRA FIXME with an assert

2012-03-23 Thread Martin Jambor
On Wed, Mar 21, 2012 at 08:46:49AM +0100, Richard Guenther wrote:
 On Tue, 20 Mar 2012, Martin Jambor wrote:
 
  Hi,
  
  On Tue, Mar 20, 2012 at 04:08:31PM +0100, Richard Guenther wrote:
   On Tue, 20 Mar 2012, Martin Jambor wrote:
   
Hi,

this patch which removes one of only two FIXMEs in tree-sra.c has been
sitting in my patch queue for over a year.  Yesterday I noticed it
there, bootstrapped and tested it on x86_64-linux and it passed.

I'd like to either commit it or just remove the comment, if there
likely still are size inconsistencies in assignments but we are not
planning to do anything with them in foreseeable future (and perhaps
add a note to the bug).

So, which should it be?
   
   Well.  Aggregate assignments can still be off I think, especially
   because of the disconnect between TYPE_SIZE and DECL_SIZE in
   some cases, considering *p = x; with typeof (x) == typeof (*p)
   (tail-padding re-use).
   
   The comments in PR40058 hint at that that issue might be fixed,
   but I also remember issues with Ada.
  
  The other FIXME in tree-sra.c suggests that Ada can produce
  VIEW_CONVERT_EXPRs with a different size than its argument, perhaps
  that is it (I'll try removing that one too).
 
 Yeah, it does that.
 
   
   GIMPLE verification ensures compatible types (but not a match
   of type_size / decl_size which will be exposed by get_ref_base_and_extent)
   
   But the real question is what do you want to guard against here?
   The assert at least looks like it is going to triggert at some point,
   but, would it be a problem if the sizes to not match?
   
  
  I really can't remember what exactly happened but I do remember it did
  lead to a bug (it's been already part of the chck-in of new SRA so svn
  history does not help).  We copy access tree children accross
  assignments and also change the type of the LHS access to a scalar if
  the RHS access is a scalar (assignments into a structure containing
  just one scalar) and both could lead to some access tree children
  covering larger part of the aggregate than the parent, making the
  children un-findable or even creating overlaps which are prohibited
  for SRA candidates.
  
  But as I wrote before, I'll be happy to just remove the FIXME comment.
 
 I'd just remove the comment then.
 

OK, I committed the following (after including it in a bootstrap of
another patch)

Thanks,

Martin


2012-03-23  Martin Jambor  mjam...@suse.cz

* tree-sra.c (build_accesses_from_assign): Remove FIXME comment.

Index: src/gcc/tree-sra.c
===
--- src.orig/gcc/tree-sra.c
+++ src/gcc/tree-sra.c
@@ -1175,8 +1175,6 @@ build_accesses_from_assign (gimple stmt)
!lacc-grp_unscalarizable_region
!racc-grp_unscalarizable_region
AGGREGATE_TYPE_P (TREE_TYPE (lhs))
-  /* FIXME: Turn the following line into an assert after PR 40058 is
-fixed.  */
lacc-size == racc-size
useless_type_conversion_p (lacc-type, racc-type))
 {


Re: [Ping][PATCH, libstdc++-v3] Enable to cross-test libstdc++ on simulator

2012-03-23 Thread Paolo Carlini

Hi,

On 03/07/2012 06:22 AM, Terry Guo wrote:

Hello,

Can anybody please review and approve the following simple patch? Thanks
very much.

http://gcc.gnu.org/ml/libstdc++/2011-08/msg00063.html

apparently somebody, somewhere, approved this patch, because I see:

2012-03-13  Terry Guo terry@arm.com

* testsuite/Makefile.in (TEST_GCC_EXEC_PREFIX): New.

but frankly the commit as-is didn't make much sense, because the *.in 
files are generated: a sensible patch should have touched the *.am file 
and then, upon approval, committed also the files change by 'autoreconf'.


Anyway, the change doesn't exist anymore, because the next 'autoreconf' 
(of mine, as it happens) of course wiped it out.


Please clarify.

Thanks,
Paolo.


Re: [Ping][PATCH, libstdc++-v3] Enable to cross-test libstdc++ on simulator

2012-03-23 Thread Paolo Bonzini
Il 23/03/2012 13:40, Paolo Carlini ha scritto:
 Hi,
 
 On 03/07/2012 06:22 AM, Terry Guo wrote:
 Hello,

 Can anybody please review and approve the following simple patch? Thanks
 very much.

 http://gcc.gnu.org/ml/libstdc++/2011-08/msg00063.html
 apparently somebody, somewhere, approved this patch, because I see:
 
 2012-03-13  Terry Guo terry@arm.com
 
 * testsuite/Makefile.in (TEST_GCC_EXEC_PREFIX): New.
 
 but frankly the commit as-is didn't make much sense, because the *.in
 files are generated: a sensible patch should have touched the *.am file
 and then, upon approval, committed also the files change by 'autoreconf'.

It was approved by Mike Stump who obviously confused the *.in with *.am,
or libstdc++-v3 with gcc.  It happens. :)

The same change done to Makefile.am is of course ok, but you don't need
my approval to install it.

(the other) Paolo



Re: [Ping][PATCH, libstdc++-v3] Enable to cross-test libstdc++ on simulator

2012-03-23 Thread Paolo Carlini

On 03/23/2012 02:02 PM, Paolo Bonzini wrote:

It was approved by Mike Stump who obviously confused the *.in with *.am,
or libstdc++-v3 with gcc.  It happens. :)
Good, good, thanks Paolo. The messages on the mailing list aren't fully 
threaded and I didn't see immediately the approval.


And of course it happens, no problem, but then, don't be surprised if 
the change is wiped out by the next 'autoreconf' ;)


Paolo.


[PATCH] Remove strict-alignment checks in SRA

2012-03-23 Thread Martin Jambor
Hi,

since we now should be able to expand misaligned MEM_REFs properly and
both SRA and IPA-SRA now tag the memory accesses with the appropriate
alignment information, we should now be able to get rid off the SRA
disabling in the face of potential strict-alignment expansion
problems.  The patch below does that.

When I applied it before applying the patches fixing misaligned
expansion, testcases gcc.dg/torture/pr52402.c and
gcc.dg/tree-ssa/pr49923.c failed on strict-alignment platforms.
However, when applied to the current trunk, they pass bootstrap and
testsuite on sparc64-linux, ia64-linux, x86_64-linux and i686-linux.
OK for trunk?

Thanks,

Martin


2012-03-20  Martin Jambor  mjam...@suse.cz

PR tree-optimization/50052
* tree-sra.c (tree_non_aligned_mem_p): Removed.
(tree_non_aligned_mem_for_access_p): Likewise.
(build_accesses_from_assign): Removed strict alignment requirements
checks.
(access_precludes_ipa_sra_p): Likewise.

* testsuite/gcc.dg/ipa/ipa-sra-2.c: Also run on strict-alignment
platforms.


Index: src/gcc/tree-sra.c
===
--- src.orig/gcc/tree-sra.c
+++ src/gcc/tree-sra.c
@@ -1081,53 +1081,6 @@ disqualify_ops_if_throwing_stmt (gimple
   return false;
 }
 
-/* Return true if EXP is a memory reference less aligned than ALIGN.  This is
-   invoked only on strict-alignment targets.  */
-
-static bool
-tree_non_aligned_mem_p (tree exp, unsigned int align)
-{
-  unsigned int exp_align;
-
-  if (TREE_CODE (exp) == VIEW_CONVERT_EXPR)
-exp = TREE_OPERAND (exp, 0);
-
-  if (TREE_CODE (exp) == SSA_NAME || is_gimple_min_invariant (exp))
-return false;
-
-  /* get_object_alignment will fall back to BITS_PER_UNIT if it cannot
- compute an explicit alignment.  Pretend that dereferenced pointers
- are always aligned on strict-alignment targets.  */
-  if (TREE_CODE (exp) == MEM_REF || TREE_CODE (exp) == TARGET_MEM_REF)
-exp_align = get_object_or_type_alignment (exp);
-  else
-exp_align = get_object_alignment (exp);
-
-  if (exp_align  align)
-return true;
-
-  return false;
-}
-
-/* Return true if EXP is a memory reference less aligned than what the access
-   ACC would require.  This is invoked only on strict-alignment targets.  */
-
-static bool
-tree_non_aligned_mem_for_access_p (tree exp, struct access *acc)
-{
-  unsigned int acc_align;
-
-  /* The alignment of the access is that of its expression.  However, it may
- have been artificially increased, e.g. by a local alignment promotion,
- so we cap it to the alignment of the type of the base, on the grounds
- that valid sub-accesses cannot be more aligned than that.  */
-  acc_align = get_object_alignment (acc-expr);
-  if (acc-base  acc_align  TYPE_ALIGN (TREE_TYPE (acc-base)))
-acc_align = TYPE_ALIGN (TREE_TYPE (acc-base));
-
-  return tree_non_aligned_mem_p (exp, acc_align);
-}
-
 /* Scan expressions occuring in STMT, create access structures for all accesses
to candidates for scalarization and remove those candidates which occur in
statements or expressions that prevent them from being split apart.  Return
@@ -1154,11 +1107,7 @@ build_accesses_from_assign (gimple stmt)
   lacc = build_access_from_expr_1 (lhs, stmt, true);
 
   if (lacc)
-{
-  lacc-grp_assignment_write = 1;
-  if (STRICT_ALIGNMENT  tree_non_aligned_mem_for_access_p (rhs, lacc))
-lacc-grp_unscalarizable_region = 1;
-}
+lacc-grp_assignment_write = 1;
 
   if (racc)
 {
@@ -1166,8 +1115,6 @@ build_accesses_from_assign (gimple stmt)
   if (should_scalarize_away_bitmap  !gimple_has_volatile_ops (stmt)
   !is_gimple_reg_type (racc-type))
bitmap_set_bit (should_scalarize_away_bitmap, DECL_UID (racc-base));
-  if (STRICT_ALIGNMENT  tree_non_aligned_mem_for_access_p (lhs, racc))
-racc-grp_unscalarizable_region = 1;
 }
 
   if (lacc  racc
@@ -3835,10 +3782,6 @@ access_precludes_ipa_sra_p (struct acces
  || gimple_code (access-stmt) == GIMPLE_ASM))
 return true;
 
-  if (STRICT_ALIGNMENT
-   tree_non_aligned_mem_p (access-expr, TYPE_ALIGN (access-type)))
-return true;
-
   return false;
 }
 
Index: src/gcc/testsuite/gcc.dg/ipa/ipa-sra-2.c
===
--- src.orig/gcc/testsuite/gcc.dg/ipa/ipa-sra-2.c
+++ src/gcc/testsuite/gcc.dg/ipa/ipa-sra-2.c
@@ -1,6 +1,5 @@
 /* { dg-do compile } */
 /* { dg-options -O2 -fipa-sra -fdump-tree-eipa_sra-details  } */
-/* { dg-require-effective-target non_strict_align } */
 
 struct bovid
 {


Re: [PATCH] Preserve loops from tree to RTL loop optimizers

2012-03-23 Thread Richard Guenther
On Wed, 21 Mar 2012, Richard Sandiford wrote:

 Richard Guenther rguent...@suse.de writes:
  This patch makes us preserve loop structures from the start of tree
  loop optimizers to the end of RTL loop optimizers.  It uses a new
  property, PROP_loops to indicate we want to preserve them and
  massages loop_optimizer_init/finalize to honor that.
 
  On the RTL side the CFG manipulation was not prepared to handle
  updating loops, so this patch fills in enough to pass bootstrap
  and regtesting.  We still do too much loop fixing from cleanup_cfg
  basically because callers do not tell cleanup_cfg if they have
  modified the CFG themselves (CSE for example does in some cases).
  It was suggested to use a new flag to cleanup_cfg to do that,
  other suggestions welcome.

The updated patch below does that now.

  Bootstrapped on x86_64-unknown-linux-gnu, testing shows some
  remaining libstdc++ errors, I am investigating them now but
  don't expect major issues.

As expected, this was a missed patch hunk that got lost during
some intermediate merging.

  Comments?  The patch is ontop of the early RTL pass merge.
 
 Thanks for doing this (and for keeping the ~PROP_loops case around for
 passes after rtl_loop_done -- I have a patch that uses it for SMS).

It should even be possible to preserve loop information until SMS
(basically until IRA, IRA invalidates loop information
it computes in a weird way so verification between IRA / reload would 
fail).

Bootstrapped and tested on x86_64-unknown-linux-gnu.

If there are no further comments I am inclined to commit this
patch early next week (possibly causing quite some fallout ...).

Thanks,
Richard.

2012-03-23  Richard Guenther  rguent...@suse.de

* loop-init.c (loop_optimizer_init): If loops are preserved
perform incremental initialization of required loop features.
(loop_optimizer_finalize): If loops are to be preserved only
clean up optional loop features.
(rtl_loop_done): Forcefully free loops here.
* cgraph.c (cgraph_release_function_body): Forcefully free
loops.
* cfgexpand.c (expand_gimple_cond): Properly add new basic-blocks
to existing loops.
(construct_init_block): Likewise.
(construct_exit_block): Likewise.
(gimple_expand_cfg): Clear LOOP_CLOSED_SSA loop state.  Cleanup
the CFG after expanding.
* cfgloop.c (verify_loop_structure): Calculate or verify
dominators.  If we needed to calculate them, free them afterwards.
* tree-pass.h (PROP_loops): New define.
* tree-ssa-loop.c (pass_tree_loop_init): Provide PROP_loops.
* basic-block.h (CLEANUP_CFG_CHANGED): New.
* cfgcleanup.c (merge_blocks_move): Protect loop latches.
(cleanup_cfg): If we did something and have loops around, fix
them up.
* cse.c (rest_of_handle_cse_after_global_opts): Call cleanup_cfg
with CLEANUP_CFG_CHANGED.
* cfghooks.c (merge_blocks): If we merge a loop header into
its predecessor, update the loop structure.
(duplicate_block): If we copy a loop latch, adjust loop state
to note we may have multiple latches.
(delete_basic_block): Mark loops for fixup if we remove a loop.
* cfganal.c (forwarder_block_p): Protect loop latches, headers
and preheaders.
* cfgrtl.c (rtl_can_merge_blocks): Protect loop latches.
(cfg_layout_can_merge_blocks_p): Likewise.
* cprop.c (bypass_block): If we create a loop with multiple
entries, mark it for removal.
* except.c (emit_to_new_bb_before): Add the new basic-block
to existing loops.
* tree-eh.c (lower_resx): Likewise.
* omp-low.c (finalize_task_copyfn): Do not copy PROP_loops.
(expand_omp_taskreg): Likewise.
* tree-inline.c (initialize_cfun): Likewise.
* tree-mudflap.c (add_bb_to_loop): Prototype.
(mf_build_check_statement_for): Properly add new basic-blocks
to existing loops.
* tree-ssa-threadupdate.c (thread_block): Mark loops for fixup
if we remove a loop.
(thread_through_loop_header): Likewise.
* trans-mem.c (tm_log_emit_save_or_restores): Properly add
new basic-blocks to existing loops.
(expand_transaction): Likewise.
* Makefile.in (except.o): Add $(CFGLOOP_H).
(expr.o): Likewise.
(cgraph.o): Likewise.
(cprop.o): Likewise.
(cfgexpand.o): Likewise.
(cfganal.o): Likewise.
(trans-mem.o): Likewise.
(tree-eh.o): Likewise.

Index: gcc/loop-init.c
===
*** gcc/loop-init.c.orig2012-03-22 13:10:50.0 +0100
--- gcc/loop-init.c 2012-03-23 11:59:20.0 +0100
*** along with GCC; see the file COPYING3.
*** 42,56 
  void
  loop_optimizer_init (unsigned flags)
  {
!   struct loops *loops;
  
!   gcc_assert 

[Patch, libfortran] Use calloc instead of malloc+memset

2012-03-23 Thread Janne Blomqvist
Hi,

in a few places in libgfortran we use malloc (or rather, our own
xmalloc-like wrapper, get_mem) followed by a memset to zero the
memory.  The attached patch replaces this usage with calloc().
Regtested on x86_64-unknown-linux-gnu, unless somebody objects I'll
commit it as obvious in a few days.

2012-03-23  Janne Blomqvist  j...@gcc.gnu.org

* runtime/memory.c (xcalloc): New function.
* libgfortran.h (xcalloc): New prototype.
* io/list_read.c (push_char): Use xcalloc instead of get_mem and
memset.
(l_push_char): Likewise.
* io/unit.c (insert_unit): Likewise.
(get_internal_unit): Likewise.
* io/unix.c (open_internal): Likewise.
(open_internal4): Likewise.
(fd_to_stream): Likewise.


-- 
Janne Blomqvist
diff --git a/libgfortran/io/list_read.c b/libgfortran/io/list_read.c
index 11a35c9..91b345c 100644
--- a/libgfortran/io/list_read.c
+++ b/libgfortran/io/list_read.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2002, 2003, 2004, 2005, 2007, 2008, 2009, 2010, 2011
+/* Copyright (C) 2002, 2003, 2004, 2005, 2007, 2008, 2009, 2010, 2011, 2012
Free Software Foundation, Inc.
Contributed by Andy Vaught
Namelist input contributed by Paul Thomas
@@ -75,9 +75,8 @@ push_char (st_parameter_dt *dtp, char c)
 
   if (dtp-u.p.saved_string == NULL)
 {
-  dtp-u.p.saved_string = get_mem (SCRATCH_SIZE);
-  // memset below should be commented out.
-  memset (dtp-u.p.saved_string, 0, SCRATCH_SIZE);
+  // Plain malloc should suffice here, zeroing not needed?
+  dtp-u.p.saved_string = xcalloc (SCRATCH_SIZE, 1);
   dtp-u.p.saved_length = SCRATCH_SIZE;
   dtp-u.p.saved_used = 0;
 }
@@ -622,10 +621,7 @@ static void
 l_push_char (st_parameter_dt *dtp, char c)
 {
   if (dtp-u.p.line_buffer == NULL)
-{
-  dtp-u.p.line_buffer = get_mem (SCRATCH_SIZE);
-  memset (dtp-u.p.line_buffer, 0, SCRATCH_SIZE);
-}
+dtp-u.p.line_buffer = xcalloc (SCRATCH_SIZE, 1);
 
   dtp-u.p.line_buffer[dtp-u.p.item_count++] = c;
 }
diff --git a/libgfortran/io/unit.c b/libgfortran/io/unit.c
index 819d0e9..5fe9eb2 100644
--- a/libgfortran/io/unit.c
+++ b/libgfortran/io/unit.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2002, 2003, 2005, 2007, 2008, 2009, 2010, 2011
+/* Copyright (C) 2002, 2003, 2005, 2007, 2008, 2009, 2010, 2011, 2012
Free Software Foundation, Inc.
Contributed by Andy Vaught
F2003 I/O support contributed by Jerry DeLisle
@@ -188,8 +188,7 @@ insert (gfc_unit *new, gfc_unit *t)
 static gfc_unit *
 insert_unit (int n)
 {
-  gfc_unit *u = get_mem (sizeof (gfc_unit));
-  memset (u, '\0', sizeof (gfc_unit));
+  gfc_unit *u = xcalloc (1, sizeof (gfc_unit));
   u-unit_number = n;
 #ifdef __GTHREAD_MUTEX_INIT
   {
@@ -385,14 +384,8 @@ get_internal_unit (st_parameter_dt *dtp)
 
   /* Allocate memory for a unit structure.  */
 
-  iunit = get_mem (sizeof (gfc_unit));
-  if (iunit == NULL)
-{
-  generate_error (dtp-common, LIBERROR_INTERNAL_UNIT, NULL);
-  return NULL;
-}
+  iunit = xcalloc (1, sizeof (gfc_unit));
 
-  memset (iunit, '\0', sizeof (gfc_unit));
 #ifdef __GTHREAD_MUTEX_INIT
   {
 __gthread_mutex_t tmp = __GTHREAD_MUTEX_INIT;
diff --git a/libgfortran/io/unix.c b/libgfortran/io/unix.c
index 978c3ff..31fa005 100644
--- a/libgfortran/io/unix.c
+++ b/libgfortran/io/unix.c
@@ -1,5 +1,5 @@
 /* Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
-   2011
+   2011, 2012
Free Software Foundation, Inc.
Contributed by Andy Vaught
F2003 I/O support contributed by Jerry DeLisle
@@ -919,13 +919,11 @@ open_internal (char *base, int length, gfc_offset offset)
 {
   unix_stream *s;
 
-  s = get_mem (sizeof (unix_stream));
-  memset (s, '\0', sizeof (unix_stream));
+  s = xcalloc (1, sizeof (unix_stream));
 
   s-buffer = base;
   s-buffer_offset = offset;
 
-  s-logical_offset = 0;
   s-active = s-file_length = length;
 
   s-st.vptr = mem_vtable;
@@ -941,13 +939,11 @@ open_internal4 (char *base, int length, gfc_offset offset)
 {
   unix_stream *s;
 
-  s = get_mem (sizeof (unix_stream));
-  memset (s, '\0', sizeof (unix_stream));
+  s = xcalloc (1, sizeof (unix_stream));
 
   s-buffer = base;
   s-buffer_offset = offset;
 
-  s-logical_offset = 0;
   s-active = s-file_length = length;
 
   s-st.vptr = mem4_vtable;
@@ -965,13 +961,9 @@ fd_to_stream (int fd)
   struct stat statbuf;
   unix_stream *s;
 
-  s = get_mem (sizeof (unix_stream));
-  memset (s, '\0', sizeof (unix_stream));
+  s = xcalloc (1, sizeof (unix_stream));
 
   s-fd = fd;
-  s-buffer_offset = 0;
-  s-physical_offset = 0;
-  s-logical_offset = 0;
 
   /* Get the current length of the file. */
 
diff --git a/libgfortran/libgfortran.h b/libgfortran/libgfortran.h
index f2f6c36..186bfbe 100644
--- a/libgfortran/libgfortran.h
+++ b/libgfortran/libgfortran.h
@@ -759,6 +759,10 @@ internal_proto(get_mem);
 extern void *internal_malloc_size (size_t) __attribute__ ((malloc));
 internal_proto(internal_malloc_size);
 

[PATCH] PR bootstrap/52623 Disable libquadmath on AIX

2012-03-23 Thread David Edelsohn
The build process of libquadmath sometimes encounters problems on AIX
due to multilib and LD_LIBRARY_PATH interfering with GCC's own library
dependencies.  Libquadmath is not used on AIX, so this patch adds it
to noconfigdirs.

Thanks, David

* configure.ac: Add libquadmath to noconfigdirs for AIX.
* configure: Rebuild.

Index: configure.ac
===
--- configure.ac(revision 185711)
+++ configure.ac(working copy)
@@ -509,6 +509,13 @@
 fi
 fi

+# Disable libquadmath for some systems.
+case ${target} in
+  powerpc-*-aix* | rs6000-*-aix*)
+noconfigdirs=$noconfigdirs target-libquadmath
+;;
+esac
+
 # Disable libssp for some systems.
 case ${target} in
   avr-*-*)


Re: [C11-atomic] test invalid hoist across and acquire load

2012-03-23 Thread Aldy Hernandez

On 03/21/12 12:54, Andrew MacLeod wrote:

On 03/21/2012 01:35 PM, Aldy Hernandez wrote:

In the test below, we cannot cache either [x] or [y] neither before
the load of flag1 nor the load of flag2. This is because the
corresponding store/release can flush a different value of x or y:

+ if (__atomic_load_n (flag1, __ATOMIC_ACQUIRE))
+ i = x + y;
+
+ if (__atomic_load_n (flag2, __ATOMIC_ACQUIRE))
+ a = 10;
+ j = x + y;



Actually, does it need to be that complicated?

can't you simply have the other_thread process monotonically increase
x by 1 every cycle?

then if the load is hoisted and commoned, simulate_thread_final_verify()
can simply check that if i == j, it knows that x was loaded as a common
value and reused when calculating j. with the other thread increasing x
eveyr sycle, they should never be the same value.


Hmmm, for this particular case I know CSA is commoning x + y, but what 
if another combination of passes hoists only y and leaves x alone.  It 
would be nice to test that y isn't hoisted independently of x.  Would it 
not, or do you only want to test this particular behavior?


Aldy


Re: [C11-atomic] test invalid hoist across and acquire load

2012-03-23 Thread Andrew MacLeod

On 03/23/2012 10:39 AM, Aldy Hernandez wrote:

On 03/21/12 12:54, Andrew MacLeod wrote:

On 03/21/2012 01:35 PM, Aldy Hernandez wrote:

In the test below, we cannot cache either [x] or [y] neither before
the load of flag1 nor the load of flag2. This is because the
corresponding store/release can flush a different value of x or y:

+ if (__atomic_load_n (flag1, __ATOMIC_ACQUIRE))
+ i = x + y;
+
+ if (__atomic_load_n (flag2, __ATOMIC_ACQUIRE))
+ a = 10;
+ j = x + y;



Actually, does it need to be that complicated?

can't you simply have the other_thread process monotonically increase
x by 1 every cycle?

then if the load is hoisted and commoned, simulate_thread_final_verify()
can simply check that if i == j, it knows that x was loaded as a common
value and reused when calculating j. with the other thread increasing x
eveyr sycle, they should never be the same value.


Hmmm, for this particular case I know CSA is commoning x + y, but what 
if another combination of passes hoists only y and leaves x alone.  It 
would be nice to test that y isn't hoisted independently of x.  Would 
it not, or do you only want to test this particular behavior?


so enter it as 2 testcases, one increasing x and one increasing y, or 
better yet  set it up so that this function is called twice from 
simulate_main, with other_process() increasing x the first time and 
increasing y the second time...  or something like that.


Andrew


Re: [C11-atomic] test invalid hoist across and acquire load

2012-03-23 Thread Aldy Hernandez
After much pondering and talking with both you and Torvald, it has been 
determined that the test at hand is technically allowed to hoist the 
value of x/y because the standard guarantees that the code below is data 
race free:


if (__atomic_load_n (flag1, __ATOMIC_ACQUIRE))
i = x + y;
if (__atomic_load_n (flag2, __ATOMIC_ACQUIRE))
a = 10;
j = x + y;

So, since j=x+y is an unconditional load of x/y, we can assume there are 
no other threads writing to x/y.


However...

Depending on such undefined behaviors is liable to cause confusion and 
frustration with our ourselves and our users, so it is best to limit 
these optimizations across atomics altogether.  It's best to err on the 
side of caution than overly optimize across confusing data races.  As we 
become better versed in optimizing across atomics, we can relax things 
and perhaps make optimizations more aggressive.  But for now, let's mark 
this as a must fix, and make sure hoists are not allowed across acquire 
operations.


I have detailed the problem in the testcase.

 so enter it as 2 testcases, one increasing x and one increasing y, or
 better yet set it up so that this function is called twice from
 simulate_main, with other_process() increasing x the first time and
 increasing y the second time... or something like that.

 Andrew

Two testcases?  Now you just want to see me work more :).

Implemented as loop.

OK for branch?
Index: atomic-hoist-1.c
===
--- atomic-hoist-1.c(revision 0)
+++ atomic-hoist-1.c(revision 0)
@@ -0,0 +1,89 @@
+/* { dg-do link } */
+/* { dg-require-effective-target sync_int_long } */
+/* { dg-final { simulate-thread } } */
+
+/* Test that a hoist is not performed across an acquire barrier.  */
+
+#include stdio.h
+#include simulate-thread.h
+
+int iteration = 0;
+int flag1=1, flag2=1;
+unsigned int x=1, y=2, i=0x1234, j=0x5678, a;
+
+
+/* At each instruction, get a new X or Y to later verify that we have
+   not reused a value incorrectly.  */
+void simulate_thread_other_threads ()
+{
+  if (iteration == 0)
+x++;
+  else
+y++;
+}
+
+/* Return true if error, otherwise 0.  */
+int verify_result ()
+{
+  /* [i] should not equal [j], because that would mean that we hoisted
+ [x] or [y] instead of loading them again.  */
+  int fail = i == j;
+  if (fail)
+printf(FAIL: i (%u) should not equal j (%u)\n, i, j);
+  return fail;
+}
+
+int simulate_thread_step_verify ()
+{
+  return verify_result ();
+}
+
+int simulate_thread_final_verify ()
+{
+  return verify_result ();
+}
+
+__attribute__((noinline))
+void simulate_thread_main()
+{
+  for (; iteration  2; ++iteration)
+{
+  /* The values of x or y should not be hoisted across reads of
+flag[12].
+
+For example, when the second load below synchronizes with
+another thread, the synchronization is with a release, and
+that release may cause a stored value of x/y to be flushed
+and become visible.  So, for this case, it is incorrect for
+CSE/CSA/and-others to hoist x or y above the load of
+flag2.  */
+  if (__atomic_load_n (flag1, __ATOMIC_ACQUIRE))
+   i = x + y;
+  if (__atomic_load_n (flag2, __ATOMIC_ACQUIRE))
+   a = 10;
+  /* NOTE: According to the standard we can assume that the
+testcase is data race free, so if there is an unconditional
+load of x+y here at j=x+y, there should not be any other
+thread writing to x or y if we are indeed data race free.
+
+This means that we are technically free to hoist x/y.
+However, since depending on these undefined behaviors is
+liable to get many confused, it is best to be conservative
+with optimizations on atomics, hence the current test.  As
+we become better versed in optimizations across atomics, we
+can relax the optimizations a bit.  */
+  j = x + y;
+
+  /* Since x or y have been changing at each instruction above, i
+and j should be different.  If they are the same, we have
+hoisted something incorrectly.  */
+}
+
+}
+
+main()
+{
+  simulate_thread_main ();
+  simulate_thread_done ();
+  return 0;
+}


[C11-atomic] new test: limit precomputing values across acquire barriers

2012-03-23 Thread Aldy Hernandez
This is a similar test to the previous acquire test.  Here we are 
incorrectly caching 'x' and failing to reload it after the __ATOMIC_ACQUIRE.


+  i = x + y;
+
+  if (__atomic_load_n (flag, __ATOMIC_ACQUIRE))
+{
+  /* x here should not be reused from above.  */
+  k = x;
+}

Note that there is technically a data race on the load of x+y.  See the 
explanation on my previous testcase.


OK for branch?
Index: testsuite/gcc.dg/simulate-thread/atomic-hoist-2.c
===
--- testsuite/gcc.dg/simulate-thread/atomic-hoist-2.c   (revision 0)
+++ testsuite/gcc.dg/simulate-thread/atomic-hoist-2.c   (revision 0)
@@ -0,0 +1,60 @@
+/* { dg-do link } */
+/* { dg-require-effective-target sync_int_long } */
+/* { dg-final { simulate-thread } } */
+
+/* Test that a load is not precomputed before an acquire.  */
+
+#include stdio.h
+#include simulate-thread.h
+
+int flag=0;
+int x = 0, y = 10, i = 0, k = -1;
+
+__attribute__((noinline))
+void simulate_thread_main()
+{
+  /* Test that the first load of x is not cached and reused in the second
+ load of x.  */
+
+  /* Note: Technically this first load of x/y is a data race.  See
+ note on atomic-hoist-1.c.  */
+  i = x + y;
+
+  if (__atomic_load_n (flag, __ATOMIC_ACQUIRE))
+{
+  /* x here should not be reused from above.  */
+  k = x;
+}
+}
+
+void simulate_thread_other_threads ()
+{
+  /* Once i has been calculated in thread 1, change the value of x.  */
+  if (i != 0)
+{
+  x = -1;
+  flag = 1;
+}
+}
+
+int simulate_thread_step_verify ()
+{
+  return 0;
+}
+
+int simulate_thread_final_verify ()
+{
+  if (k != -1)
+{
+  printf(FAIL: k != -1\n);
+  return 1;
+}
+  return 0;
+}
+
+main()
+{
+  simulate_thread_main ();
+  simulate_thread_done ();
+  return 0;
+}


[v3] remove duplicate cxxflags

2012-03-23 Thread Benjamin De Kosnik

Removes duplicates on command line when running test suite. CXXFLAG
info was being quoted twice.

tested x86/linux

-benjamin


Re: [v3] remove duplicate cxxflags

2012-03-23 Thread Benjamin De Kosnik

... here's the patch.2012-03-22  Benjamin Kosnik  b...@redhat.com

	* scripts/testsuite_flags.in (--cxxflags): Let CXXFLAGS set -g -O2.
	* testsuite/lib/libstdc++.exp: Same.


diff --git a/libstdc++-v3/scripts/testsuite_flags.in b/libstdc++-v3/scripts/testsuite_flags.in
index 457adaf..f77784b 100755
--- a/libstdc++-v3/scripts/testsuite_flags.in
+++ b/libstdc++-v3/scripts/testsuite_flags.in
@@ -54,7 +54,7 @@ case ${query} in
   echo ${CC}
   ;;
 --cxxflags)
-  CXXFLAGS_default=-g -O2 -D_GLIBCXX_ASSERT -fmessage-length=0
+  CXXFLAGS_default=-D_GLIBCXX_ASSERT -fmessage-length=0
   CXXFLAGS_config=@SECTION_FLAGS@ @CXXFLAGS@ @EXTRA_CXX_FLAGS@
   echo ${CXXFLAGS_default} ${CXXFLAGS_config}
   ;;
diff --git a/libstdc++-v3/testsuite/lib/libstdc++.exp b/libstdc++-v3/testsuite/lib/libstdc++.exp
index f24d877..c1ccfa5 100644
--- a/libstdc++-v3/testsuite/lib/libstdc++.exp
+++ b/libstdc++-v3/testsuite/lib/libstdc++.exp
@@ -212,7 +212,7 @@ proc libstdc++_init { testfile } {
 
 # Default settings.
 set cxx [transform g++]
-set cxxflags -g -O2 -D_GLIBCXX_ASSERT -fmessage-length=0
+set cxxflags -D_GLIBCXX_ASSERT -fmessage-length=0
 set cxxpchflags 
 set cxxldflags 
 set cc [transform gcc]


AW: [PATCH] eh_personality.cc: unwinding on ARM

2012-03-23 Thread EXTERNAL Waechtler Peter (Fa. TCP, CM-AI/PJ-CF31)
 -Ursprüngliche Nachricht-
 Von: Andrew Stubbs [mailto:a...@codesourcery.com]
 Gesendet: Montag, 19. März 2012 17:12
 An: EXTERNAL Waechtler Peter (Fa. TCP, CM-AI/PJ-CF31)
 Cc: gcc-patches@gcc.gnu.org; libstd...@gcc.gnu.org;
 p...@codesourcery.com; pwaecht...@mac.com; d...@false.org
 Betreff: Re: [PATCH] eh_personality.cc: unwinding on ARM

 On 16/03/12 13:29, EXTERNAL Waechtler Peter (Fa. TCP,
 CM-AI/PJ-CF31) wrote:
  The CodeSourcery toolchain contains a fix like the following,
  please consider for adding it.

 Here's the full original patch with ChangeLog.

 I don't know why Dan never submitted this one. Perhaps it's
 not suitable
 for upstream or not considered the correct fix?

 Anyway, as far as copyright goes, I don't believe
 CodeSourcery has any
 problem with this being committed.



And here is a stub for a test case.
I don't know how to run the testsuite, just put in include and VERIFY-thingie



#include unistd.h
#include stdlib.h
#include string.h
#include signal.h
#include execinfo.h

#include iostream
#include vector
using namespace std;


static void abort_handler(int n_signal, siginfo_t *siginfo, void *ptr);


static void abort_handler(int n_signal, siginfo_t *siginfo, void *ptr)
{
void *address[20];
int depth;

depth = backtrace(address, sizeof(address)/sizeof(void*));

backtrace_symbols_fd(address, depth, 0);
/* this is a dumb check, better look for main */
if (depth == sizeof(address)/sizeof(void*))
cerr  failed  endl;
else
cerr  passed  endl;
}

int tst_eh01(void)
{
int rc = 0;

std::vectorint  v(10);
rc = v.at(42);

return rc;
}

int main(int argc, char *argv[])
{
int c;
struct sigaction sa;

memset(sa, 0 , sizeof(sa));
sa.sa_sigaction = abort_handler;
sa.sa_flags = SA_SIGINFO;

sigaction(SIGABRT, sa, NULL);

c = tst_eh01();
return c;
}


With a fixed CodeSourcery version:

cs-minimal-sysroot/usr/lib/bin/sysroot-qemu src/bt/tst-eh01
terminate called after throwing an instance of 'std::out_of_range'
  what():  vector::_M_range_check
src/bt/tst-eh01[0x9654]
cs-minimal-sysroot/usr/lib/bin/../../../lib/libc.so.6(__default_rt_sa_restorer_v1+0x0)[0x40a06ce0]
cs-minimal-sysroot/usr/lib/bin/../../../lib/libc.so.6(gsignal+0x40)[0x40a059bc]
cs-minimal-sysroot/usr/lib/bin/../../../lib/libc.so.6(abort+0x1d4)[0x40a0acec]
cs-minimal-sysroot/usr/lib/bin/../../../usr/lib/libstdc++.so.6(_ZN9__gnu_cxx27__verbose_terminate_handlerEv+0x110)[0x408e5f4c]
cs-minimal-sysroot/usr/lib/bin/../../../usr/lib/libstdc++.so.6(+0xa707c)[0x408e407c]
cs-minimal-sysroot/usr/lib/bin/../../../usr/lib/libstdc++.so.6(_ZSt9terminatev+0x1c)[0x408e40a4]
cs-minimal-sysroot/usr/lib/bin/../../../usr/lib/libstdc++.so.6(__cxa_throw+0x9c)[0x408e4220]
cs-minimal-sysroot/usr/lib/bin/../../../usr/lib/libstdc++.so.6(_ZSt20__throw_out_of_rangePKc+0x64)[0x4088dc04]
src/bt/tst-eh01(_ZNKSt6vectorIiSaIiEE14_M_range_checkEj+0x44)[0x9c24]
src/bt/tst-eh01(_ZNSt6vectorIiSaIiEE2atEj+0x20)[0x99a8]
src/bt/tst-eh01(_Z8tst_eh01v+0x5c)[0x972c]
src/bt/tst-eh01(main+0x50)[0x97c8]
cs-minimal-sysroot/usr/lib/bin/../../../lib/libc.so.6(__libc_start_main+0x114)[0x409ee754]
passed
qemu: uncaught target signal 6 (Aborted) - core dumped
Aborted



with an unfixed version:

$ ./tst-eh01
terminate called after throwing an instance of 'std::out_of_range'
  what():  vector::_M_range_check
./tst-eh01[0x9580]
/lib/libc.so.6(__default_rt_sa_restorer_v2+0x0)[0x4c883770]
/lib/libc.so.6(gsignal+0x40)[0x4c88241c]
/lib/libc.so.6(abort+0x1c0)[0x4c88680c]
/lib/libstdc++.so.6(_ZN9__gnu_cxx27__verbose_terminate_handlerEv+0x134)[0x4cb2ca0c]
/lib/libstdc++.so.6(_ZN9__gnu_cxx27__verbose_terminate_handlerEv+0x134)[0x4cb2ca0c]
/lib/libstdc++.so.6(_ZN9__gnu_cxx27__verbose_terminate_handlerEv+0x134)[0x4cb2ca0c]
/lib/libstdc++.so.6(_ZN9__gnu_cxx27__verbose_terminate_handlerEv+0x134)[0x4cb2ca0c]
/lib/libstdc++.so.6(_ZN9__gnu_cxx27__verbose_terminate_handlerEv+0x134)[0x4cb2ca0c]
/lib/libstdc++.so.6(_ZN9__gnu_cxx27__verbose_terminate_handlerEv+0x134)[0x4cb2ca0c]
/lib/libstdc++.so.6(_ZN9__gnu_cxx27__verbose_terminate_handlerEv+0x134)[0x4cb2ca0c]
/lib/libstdc++.so.6(_ZN9__gnu_cxx27__verbose_terminate_handlerEv+0x134)[0x4cb2ca0c]
/lib/libstdc++.so.6(_ZN9__gnu_cxx27__verbose_terminate_handlerEv+0x134)[0x4cb2ca0c]
/lib/libstdc++.so.6(_ZN9__gnu_cxx27__verbose_terminate_handlerEv+0x134)[0x4cb2ca0c]
/lib/libstdc++.so.6(_ZN9__gnu_cxx27__verbose_terminate_handlerEv+0x134)[0x4cb2ca0c]
/lib/libstdc++.so.6(_ZN9__gnu_cxx27__verbose_terminate_handlerEv+0x134)[0x4cb2ca0c]
/lib/libstdc++.so.6(_ZN9__gnu_cxx27__verbose_terminate_handlerEv+0x134)[0x4cb2ca0c]
/lib/libstdc++.so.6(_ZN9__gnu_cxx27__verbose_terminate_handlerEv+0x134)[0x4cb2ca0c]
/lib/libstdc++.so.6(_ZN9__gnu_cxx27__verbose_terminate_handlerEv+0x134)[0x4cb2ca0c]
/lib/libstdc++.so.6(_ZN9__gnu_cxx27__verbose_terminate_handlerEv+0x134)[0x4cb2ca0c]
failed
Aborted


Propagate profile counts after switch case expansion (issue5896043)

2012-03-23 Thread Easwaran Raman
This patch propagates execution count of thee case labels of a
switch-case statement after its expansion. Bootstraps and all
tests pass. OK for trunk?

2012-03-23   Easwaran Raman  era...@google.com

* cfgbuild.c (non_zero_profile_counts): New function.
(compute_outgoing_frequencies): If at least one successor of a
BB has non-zero profile count, retain the counts.
* expr.c (do_tablejump): Add a REG_BR_PROB note on the
jump to default label.
(try_tablejump): Add a parameter to specify the probability
of jumping to the default label.
* expr.h (try_tablejump): Add a new parameter.
* stmt.c (case_node): Add new fields COUNT and SUBTREE_COUNT.
(add_case_node): Pass execution count of the case node and use
it to initialize COUNT field.
(case_probability): New macro.
(expand_case): Propagate execution counts to generated
branches using REG_BR_PROB notes.
(emit_case_nodes): Likewise.
(do_jump_if_equal): Pass probability for REG_BR_PROB note.
(compute_subtree_counts): New function to compute
SUBTREE_COUNT fields of case nodes.
(add_prob_note_to_last_insn): Add a REG_BR_PROB note with the
given probability to the last generated instruction.

gcc/testsuite/ChangeLog:
2012-03-23   Easwaran Raman  era...@google.com
* gcc.dg/tree-prof/switch-case-1.c: New test case.
* gcc.dg/tree-prof/switch-case-2.c: New test case.

diff --git a/gcc/cfgbuild.c b/gcc/cfgbuild.c
index 692fea8..d75fbda 100644
--- a/gcc/cfgbuild.c
+++ b/gcc/cfgbuild.c
@@ -534,6 +534,21 @@ find_bb_boundaries (basic_block bb)
 purge_dead_tablejump_edges (bb, table);
 }
 
+/* Check if there is at least one edge in EDGES with a non-zero count
+   field.  */
+
+static bool
+non_zero_profile_counts ( VEC(edge,gc) *edges) {
+  edge e;
+  edge_iterator ei;
+  FOR_EACH_EDGE(e, ei, edges)
+{
+  if (e-count  0)
+return true;
+}
+  return false;
+}
+
 /*  Assume that frequency of basic block B is known.  Compute frequencies
 and probabilities of outgoing edges.  */
 
@@ -569,6 +584,10 @@ compute_outgoing_frequencies (basic_block b)
   e-count = b-count;
   return;
 }
+  else if (non_zero_profile_counts (b-succs)){
+/*Profile counts already set, but REG_NOTE missing. Retain the counts.  */
+return;
+  }
   guess_outgoing_edge_probabilities (b);
   if (b-count)
 FOR_EACH_EDGE (e, ei, b-succs)
diff --git a/gcc/expr.c b/gcc/expr.c
index f9de908..fb8eef9 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -156,7 +156,7 @@ static rtx do_store_flag (sepops, rtx, enum machine_mode);
 #ifdef PUSH_ROUNDING
 static void emit_single_push_insn (enum machine_mode, rtx, tree);
 #endif
-static void do_tablejump (rtx, enum machine_mode, rtx, rtx, rtx);
+static void do_tablejump (rtx, enum machine_mode, rtx, rtx, rtx, int);
 static rtx const_vector_from_tree (tree);
 static void write_complex_part (rtx, rtx, bool);
 
@@ -10694,11 +10694,13 @@ try_casesi (tree index_type, tree index_expr, tree 
minval, tree range,
TABLE_LABEL is a CODE_LABEL rtx for the table itself.
 
DEFAULT_LABEL is a CODE_LABEL rtx to jump to if the
-   index value is out of range.  */
+   index value is out of range.
+   DEFAULT_PROBABILITY is the probability of jumping to the
+   DEFAULT_LABEL.  */
 
 static void
 do_tablejump (rtx index, enum machine_mode mode, rtx range, rtx table_label,
- rtx default_label)
+ rtx default_label, int default_probability)
 {
   rtx temp, vector;
 
@@ -10714,8 +10716,16 @@ do_tablejump (rtx index, enum machine_mode mode, rtx 
range, rtx table_label,
  the maximum value of the range.  */
 
   if (default_label)
-emit_cmp_and_jump_insns (index, range, GTU, NULL_RTX, mode, 1,
-default_label);
+{
+  emit_cmp_and_jump_insns (index, range, GTU, NULL_RTX, mode, 1,
+  default_label);
+  if (default_probability != -1)
+{
+  rtx jump_insn = get_last_insn();
+  add_reg_note (jump_insn, REG_BR_PROB, GEN_INT (default_probability));
+}
+}
+
 
   /* If index is in range, it must fit in Pmode.
  Convert to Pmode so we can index with it.  */
@@ -10758,7 +10768,7 @@ do_tablejump (rtx index, enum machine_mode mode, rtx 
range, rtx table_label,
 
 int
 try_tablejump (tree index_type, tree index_expr, tree minval, tree range,
-  rtx table_label, rtx default_label)
+  rtx table_label, rtx default_label, int default_probability)
 {
   rtx index;
 
@@ -10776,7 +10786,7 @@ try_tablejump (tree index_type, tree index_expr, tree 
minval, tree range,
   TYPE_MODE (TREE_TYPE (range)),
   expand_normal (range),
   TYPE_UNSIGNED (TREE_TYPE (range))),
-   table_label, default_label);
+   table_label, default_label, 

libgo patch committed: Fix errno handling in syscall.Syscall

2012-03-23 Thread Ian Lance Taylor
This patch to libgo fixes the handling of errno in syscall.Syscall and
friends.  Previously it would return a non-zero errno value even when
the system call succeeded.  Bootstrapped and ran Go testsuite on
x86_64-unknown-linux-gnu.  Committed to mainline and 4.7 branch.

Ian

diff -r b626c77ab40e libgo/go/syscall/syscall_unix.go
--- a/libgo/go/syscall/syscall_unix.go	Tue Mar 13 15:57:41 2012 -0700
+++ b/libgo/go/syscall/syscall_unix.go	Fri Mar 23 11:35:53 2012 -0700
@@ -31,6 +31,7 @@
 // expects a 32-bit one.
 func Syscall(trap, a1, a2, a3 uintptr) (r1, r2 uintptr, err Errno) {
 	Entersyscall()
+	SetErrno(0)
 	var r uintptr
 	if unsafe.Sizeof(r) == 4 {
 		r1 := c_syscall32(int32(trap), int32(a1), int32(a2), int32(a3), 0, 0, 0)
@@ -46,6 +47,7 @@
 
 func Syscall6(trap, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2 uintptr, err Errno) {
 	Entersyscall()
+	SetErrno(0)
 	var r uintptr
 	if unsafe.Sizeof(r) == 4 {
 		r1 := c_syscall32(int32(trap), int32(a1), int32(a2), int32(a3),
@@ -63,6 +65,7 @@
 
 func RawSyscall(trap, a1, a2, a3 uintptr) (r1, r2 uintptr, err Errno) {
 	var r uintptr
+	SetErrno(0)
 	if unsafe.Sizeof(r) == 4 {
 		r1 := c_syscall32(int32(trap), int32(a1), int32(a2), int32(a3), 0, 0, 0)
 		r = uintptr(r1)
@@ -76,6 +79,7 @@
 
 func RawSyscall6(trap, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2 uintptr, err Errno) {
 	var r uintptr
+	SetErrno(0)
 	if unsafe.Sizeof(r) == 4 {
 		r1 := c_syscall32(int32(trap), int32(a1), int32(a2), int32(a3),
 			int32(a4), int32(a5), int32(a6))


[PATCH] AIX malloc.h fixincludes

2012-03-23 Thread David Edelsohn
Some early releases of AIX 6 include a malloc.h header with an
incorrect use of builtin keyword:

#ifdef __cplusplus
extern C {
   extern builtin char *__alloca (size_t);
#  define alloca __alloca
}
#endif /* def __cplusplus */

GCC is not too happy about this.  Now that GCC mainline and 4.7 use
C++ for bootstrap, this breaks bootstrap on a narrow range of AIX 6
systems.  The following patch applies the same fix to the header file
that IBM applied in its own update.

Okay for trunk, 4.7 and 4.6?

Thanks, David


* inclhack.def (aix_malloc): New.
* fixincl.x: Regenerate.
* tests/base/malloc.h [AIX_MALLOC_CHECK]: New.

Index: inclhack.def
===
--- inclhack.def(revision 185739)
+++ inclhack.def(working copy)
@@ -370,6 +370,19 @@
 };

 /*
+ *  malloc.h on AIX6 uses XLC++ specific builtin syntax
+ */
+fix = {
+hackname  = aix_malloc;
+mach  = *-*-aix*;
+files = malloc.h;
+select= #ifdef __cplusplus;
+c_fix = format;
+c_fix_arg = #if (defined(__cplusplus)  defined(__IBMCPP__));
+test_text = #ifdef __cplusplus;
+};
+
+/*
  * net/if_arp.h defines a variable fc_softc instead of adding a
  * typedef for the struct on AIX 5.2, 5.3, 6.1 and 7.1
  */


Re: [PATCH] AIX malloc.h fixincludes

2012-03-23 Thread Bruce Korb
On Fri, Mar 23, 2012 at 11:47 AM, David Edelsohn dje@gmail.com wrote:
 Some early releases of AIX 6 include a malloc.h header with an
 incorrect use of builtin keyword:

 #ifdef __cplusplus
 extern C {
   extern builtin char *__alloca (size_t);
 #  define alloca __alloca
 }
 #endif /* def __cplusplus */

 GCC is not too happy about this.

clearly.

 Okay for trunk, 4.7 and 4.6?
OK for any open branch you care to apply it to.


Re: Propagate profile counts after switch case expansion (issue5896043)

2012-03-23 Thread Easwaran Raman
Some more background on this patch: Right now, while the execution
counts of different case labels of a switch statement are obtained
during profile collection, they are not propagated to RTL. Instead,
counts are regenerated at the RTL level using static heuristics that
tend to weigh branches equally which can cause poor optimization of
hot code. This patch ensures that the counts collected during profile
collection are correctly propagated allowing hot code to be better
optimized by RTL optimizations.  Patch tested on x86_64.

- Easwaran

On Fri, Mar 23, 2012 at 10:43 AM, Easwaran Raman era...@google.com wrote:
 This patch propagates execution count of thee case labels of a
 switch-case statement after its expansion. Bootstraps and all
 tests pass. OK for trunk?

 2012-03-23   Easwaran Raman  era...@google.com

        * cfgbuild.c (non_zero_profile_counts): New function.
        (compute_outgoing_frequencies): If at least one successor of a
        BB has non-zero profile count, retain the counts.
        * expr.c (do_tablejump): Add a REG_BR_PROB note on the
        jump to default label.
        (try_tablejump): Add a parameter to specify the probability
        of jumping to the default label.
        * expr.h (try_tablejump): Add a new parameter.
        * stmt.c (case_node): Add new fields COUNT and SUBTREE_COUNT.
        (add_case_node): Pass execution count of the case node and use
        it to initialize COUNT field.
        (case_probability): New macro.
        (expand_case): Propagate execution counts to generated
        branches using REG_BR_PROB notes.
        (emit_case_nodes): Likewise.
        (do_jump_if_equal): Pass probability for REG_BR_PROB note.
        (compute_subtree_counts): New function to compute
        SUBTREE_COUNT fields of case nodes.
        (add_prob_note_to_last_insn): Add a REG_BR_PROB note with the
        given probability to the last generated instruction.

 gcc/testsuite/ChangeLog:
 2012-03-23   Easwaran Raman  era...@google.com
        * gcc.dg/tree-prof/switch-case-1.c: New test case.
        * gcc.dg/tree-prof/switch-case-2.c: New test case.

 diff --git a/gcc/cfgbuild.c b/gcc/cfgbuild.c
 index 692fea8..d75fbda 100644
 --- a/gcc/cfgbuild.c
 +++ b/gcc/cfgbuild.c
 @@ -534,6 +534,21 @@ find_bb_boundaries (basic_block bb)
     purge_dead_tablejump_edges (bb, table);
  }

 +/* Check if there is at least one edge in EDGES with a non-zero count
 +   field.  */
 +
 +static bool
 +non_zero_profile_counts ( VEC(edge,gc) *edges) {
 +  edge e;
 +  edge_iterator ei;
 +  FOR_EACH_EDGE(e, ei, edges)
 +    {
 +      if (e-count  0)
 +        return true;
 +    }
 +  return false;
 +}
 +
  /*  Assume that frequency of basic block B is known.  Compute frequencies
     and probabilities of outgoing edges.  */

 @@ -569,6 +584,10 @@ compute_outgoing_frequencies (basic_block b)
       e-count = b-count;
       return;
     }
 +  else if (non_zero_profile_counts (b-succs)){
 +    /*Profile counts already set, but REG_NOTE missing. Retain the counts.  
 */
 +    return;
 +  }
   guess_outgoing_edge_probabilities (b);
   if (b-count)
     FOR_EACH_EDGE (e, ei, b-succs)
 diff --git a/gcc/expr.c b/gcc/expr.c
 index f9de908..fb8eef9 100644
 --- a/gcc/expr.c
 +++ b/gcc/expr.c
 @@ -156,7 +156,7 @@ static rtx do_store_flag (sepops, rtx, enum machine_mode);
  #ifdef PUSH_ROUNDING
  static void emit_single_push_insn (enum machine_mode, rtx, tree);
  #endif
 -static void do_tablejump (rtx, enum machine_mode, rtx, rtx, rtx);
 +static void do_tablejump (rtx, enum machine_mode, rtx, rtx, rtx, int);
  static rtx const_vector_from_tree (tree);
  static void write_complex_part (rtx, rtx, bool);

 @@ -10694,11 +10694,13 @@ try_casesi (tree index_type, tree index_expr, tree 
 minval, tree range,
    TABLE_LABEL is a CODE_LABEL rtx for the table itself.

    DEFAULT_LABEL is a CODE_LABEL rtx to jump to if the
 -   index value is out of range.  */
 +   index value is out of range.
 +   DEFAULT_PROBABILITY is the probability of jumping to the
 +   DEFAULT_LABEL.  */

  static void
  do_tablejump (rtx index, enum machine_mode mode, rtx range, rtx table_label,
 -             rtx default_label)
 +             rtx default_label, int default_probability)
  {
   rtx temp, vector;

 @@ -10714,8 +10716,16 @@ do_tablejump (rtx index, enum machine_mode mode, rtx 
 range, rtx table_label,
      the maximum value of the range.  */

   if (default_label)
 -    emit_cmp_and_jump_insns (index, range, GTU, NULL_RTX, mode, 1,
 -                            default_label);
 +    {
 +      emit_cmp_and_jump_insns (index, range, GTU, NULL_RTX, mode, 1,
 +                              default_label);
 +      if (default_probability != -1)
 +        {
 +          rtx jump_insn = get_last_insn();
 +          add_reg_note (jump_insn, REG_BR_PROB, GEN_INT 
 (default_probability));
 +        }
 +    }
 +

   /* If index is in range, it must fit in Pmode.
      Convert to Pmode so we can index with it.  */
 @@ -10758,7 +10768,7 

[pph] Fix bindings for using declarations (issue5900043)

2012-03-23 Thread Diego Novillo
Fix bindings for using declarations.

When an identifier is declared in a using declaration, we were
registering the USING_DECL object in the corresponding binding.

This was wrong, and at the time I hacked around it by simply ignoring
USING_DECLs when setting bindings.  That was wrong too.  What we
actually need to do is discover what is the USING_DECL pointing to and
register that.

I traced the regular parser, and I'm mimicking the lookup of the
associated TYPE_DECL by calling do_nonmember_using_decl.

The only issue I have with this is that I'm not sure where to get the
scope from. In the testcase I'm fixing, the USING_DECL's type has the
namespace_decl where the search should be done. 

I'll test in the internal codebase and see if this is always the case.


2012-03-23   Diego Novillo  dnovi...@google.com

cp/ChangeLog.pph
* name-lookup.c (pph_set_identifier_binding): If DECL is a USING_DECL,
register the TYPE_DECL it is referring to.
(pph_set_namespace_decl_binding): Remove previous hack that skipped
USING_DECLs.

testsuite/ChangeLog.pph
* g++.dg/pph/x1mbstate_t.h: Mark fixed.

diff --git a/gcc/cp/name-lookup.c b/gcc/cp/name-lookup.c
index 1b33ce3..26d4f86 100644
--- a/gcc/cp/name-lookup.c
+++ b/gcc/cp/name-lookup.c
@@ -6197,15 +6197,27 @@ pph_set_identifier_binding (tree id, tree decl,
cp_binding_level *bl, int flags)
 {
   tree old_value;
+  cxx_binding *b;
 
   /* FIXME pph: This code plagarizes from push_overloaded_decl_1 and
  binding_for_name.  It may be incomplete.  */
-
-  cxx_binding *b = cp_binding_level_find_binding_for_name (bl, id);
+  b = cp_binding_level_find_binding_for_name (bl, id);
   if (!b)
 {
   b = cxx_binding_make_for_name (bl, id);
-  b-value = decl;
+  if (TREE_CODE (decl) == USING_DECL)
+   {
+ /* USING_DECLs cannot be registered into the binding.  Instead, we
+look up the TYPE_DECL it is pointing to by calling
+do_nonmember_using_decl.  */
+ tree new_value, new_type;
+ do_nonmember_using_decl (TREE_TYPE (decl), id, b-value, b-type,
+  new_value, new_type);
+ b-value = new_value;
+ b-type = new_type;
+   }
+  else
+   b-value = decl;
   pph_debug_binding_action (new bind, decl);
   return;
 }
@@ -6271,11 +6283,7 @@ pph_set_namespace_decl_binding (tree decl, 
cp_binding_level *bl, int flags)
 {
   /* Set the namespace identifier binding for a single decl.  */
   tree id = DECL_NAME (decl);
-  /* FIXME pph.  USING_DECLs do not seem to be used in bindings by
- the parser. This was causing the SEGV in
- testsuite/g++.dg/pph/x1mbstate_t.h.  It's unclear whether this is
- the right fix.  */
-  if (id  TREE_CODE (decl) != USING_DECL)
+  if (id)
 pph_set_identifier_binding (id, decl, bl, flags);
 }
 
diff --git a/gcc/testsuite/g++.dg/pph/x1mbstate_t.h 
b/gcc/testsuite/g++.dg/pph/x1mbstate_t.h
index 69323d1..4d473e4 100644
--- a/gcc/testsuite/g++.dg/pph/x1mbstate_t.h
+++ b/gcc/testsuite/g++.dg/pph/x1mbstate_t.h
@@ -1,10 +1,8 @@
-// { xfail-if  { *-*-* } { -fpph-map=pph.map } }
-
 #ifndef _X1_MBSTATE_H
 #define _X1_MBSTATE_H
 #include x0mbstate_t.h
 // Name lookup for std::mbstate_t was failingfails here.  Instead of returning
 // the global type_decl for mbstate_t, it was returning the
 // usings ::mbstate_t declaration.
-typedef std::mbstate_t state_type;  // { dg-bogus 'mbstate_t' in namespace 
'std' does not name a type  { xfail *-*-* } }
+typedef std::mbstate_t state_type;
 #endif

--
This patch is available for review at http://codereview.appspot.com/5900043


Re: Propagate profile counts after switch case expansion (issue5896043)

2012-03-23 Thread Andi Kleen
Easwaran Raman era...@google.com writes:

 Some more background on this patch: Right now, while the execution
 counts of different case labels of a switch statement are obtained
 during profile collection, they are not propagated to RTL. Instead,
 counts are regenerated at the RTL level using static heuristics that
 tend to weigh branches equally which can cause poor optimization of
 hot code. This patch ensures that the counts collected during profile
 collection are correctly propagated allowing hot code to be better
 optimized by RTL optimizations.  Patch tested on x86_64.

I think your patch doesn't use the probably to weight the decision 
tree for non tablejump, right? I looked at this some time ago,
but the patch always had problems.

-Andi

-- 
a...@linux.intel.com -- Speaking for myself only


Re: Propagate profile counts after switch case expansion (issue5896043)

2012-03-23 Thread Easwaran Raman
On Fri, Mar 23, 2012 at 3:29 PM, Andi Kleen a...@firstfloor.org wrote:
 Easwaran Raman era...@google.com writes:

 Some more background on this patch: Right now, while the execution
 counts of different case labels of a switch statement are obtained
 during profile collection, they are not propagated to RTL. Instead,
 counts are regenerated at the RTL level using static heuristics that
 tend to weigh branches equally which can cause poor optimization of
 hot code. This patch ensures that the counts collected during profile
 collection are correctly propagated allowing hot code to be better
 optimized by RTL optimizations.  Patch tested on x86_64.

 I think your patch doesn't use the probably to weight the decision
 tree for non tablejump, right? I looked at this some time ago,
 but the patch always had problems.

Do you mean use the weights to decide the shape of the binary tree
(similar to COST_TABLE heuristic)? I am planning to send a separate
patch for that. This one just makes sure that the profile counts are
propagated correctly. So you will still have a situation where a
branch corresponding to an infrequently executed case dominates a
frequently executed case, but the BB of the cases gets the right
profile weight.

- Easwaran

 -Andi

 --
 a...@linux.intel.com -- Speaking for myself only


Re: Propagate profile counts after switch case expansion (issue5896043)

2012-03-23 Thread Andi Kleen
 Do you mean use the weights to decide the shape of the binary tree

Yes.

 (similar to COST_TABLE heuristic)?

COST_TABLE should die I hope.

 I am planning to send a separate patch for that. 

Great.

-Andi


[google/4.6] For -gfission, remove address table entry when removing location list entry (issue5900045)

2012-03-23 Thread Cary Coutant
2012-03-23   Cary Coutant  ccout...@google.com

* dwarf2out.c (resolve_addr): Remove address table entry for symbol
when removing location list entry.


Index: dwarf2out.c
===
--- dwarf2out.c (revision 185594)
+++ dwarf2out.c (working copy)
@@ -23803,11 +23803,15 @@ resolve_addr (dw_die_ref die)
if (!resolve_addr_in_expr ((*curr)-expr))
  {
dw_loc_list_ref next = (*curr)-dw_loc_next;
+   dw_loc_descr_ref l = (*curr)-expr;
+
if (next  (*curr)-ll_symbol)
  {
gcc_assert (!next-ll_symbol);
next-ll_symbol = (*curr)-ll_symbol;
  }
+   if (l-dw_loc_oprnd1.val_index != -1U)
+ remove_addr_table_entry (l-dw_loc_oprnd1.val_index);
*curr = next;
  }
else

--
This patch is available for review at http://codereview.appspot.com/5900045


Re: [google/4.6] For -gfission, remove address table entry when removing location list entry (issue5900045)

2012-03-23 Thread Sterling Augustine
On Fri, Mar 23, 2012 at 3:40 PM, Cary Coutant ccout...@google.com wrote:

 2012-03-23   Cary Coutant  ccout...@google.com

        * dwarf2out.c (resolve_addr): Remove address table entry for symbol
        when removing location list entry.


 Index: dwarf2out.c
 ===
 --- dwarf2out.c (revision 185594)
 +++ dwarf2out.c (working copy)
 @@ -23803,11 +23803,15 @@ resolve_addr (dw_die_ref die)
                if (!resolve_addr_in_expr ((*curr)-expr))
                  {
                    dw_loc_list_ref next = (*curr)-dw_loc_next;
 +                   dw_loc_descr_ref l = (*curr)-expr;
 +
                    if (next  (*curr)-ll_symbol)
                      {
                        gcc_assert (!next-ll_symbol);
                        next-ll_symbol = (*curr)-ll_symbol;
                      }
 +                   if (l-dw_loc_oprnd1.val_index != -1U)
 +                     remove_addr_table_entry
 (l-dw_loc_oprnd1.val_index);
                    *curr = next;
                  }
                else

 --
 This patch is available for review at
 http://codereview.appspot.com/5900045

This patch is OK for Google 4.6.

Sterling