[patch] Fix PR tree-optimization/50039

2011-08-10 Thread Ira Rosen
Hi,

This patch adds a check in vect_operation_fits_smaller_type ()  that a
widening statement has a stmt_vec_info, i.e., that it is a loop
statement.

Bootstrapped and tested on powerpc64-suse-linux.
Committed.

Ira

ChangeLog:

PR tree-optimization/50039
* tree-vect-patterns.c (vect_operation_fits_smaller_type): Check
that DEF_STMT has a stmt_vec_info.

testsuite/ChangeLog:

PR tree-optimization/50039
* gcc.dg/vect/vect.exp: Run no-tree-fre-* tests with -fno-tree-fre.
* gcc.dg/vect/no-tree-fre-pr50039.c: New test.

Index: testsuite/gcc.dg/vect/vect.exp
===
--- testsuite/gcc.dg/vect/vect.exp  (revision 177646)
+++ testsuite/gcc.dg/vect/vect.exp  (working copy)
@@ -257,6 +257,12 @@ lappend VECT_SLP_CFLAGS "-fno-tree-reassoc"
 dg-runtest [lsort [glob -nocomplain
$srcdir/$subdir/no-tree-reassoc-bb-slp-*.\[cS\]]]  \
 "" $VECT_SLP_CFLAGS

+# -fno-tree-fre
+set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS
+lappend DEFAULT_VECTCFLAGS "-fno-tree-fre"
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/no-tree-fre-*.\[cS\]]]  \
+"" $DEFAULT_VECTCFLAGS
+
 # Clean up.
 set dg-do-what-default ${save-dg-do-what-default}

Index: testsuite/gcc.dg/vect/no-tree-fre-pr50039.c
===
--- testsuite/gcc.dg/vect/no-tree-fre-pr50039.c (revision 0)
+++ testsuite/gcc.dg/vect/no-tree-fre-pr50039.c (revision 0)
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+
+extern unsigned char g_5;
+extern int g_31, g_76;
+int main(void) {
+ int i, j;
+for (j=0; j < 2; ++j) {
+g_31 = -3;
+for (i=0; i < 2; ++i)
+  g_76 = (g_31 ? g_31+1 : 0) ^ g_5;
+}
+}
+
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
Index: tree-vect-patterns.c
===
--- tree-vect-patterns.c(revision 177646)
+++ tree-vect-patterns.c(working copy)
@@ -897,7 +897,8 @@ vect_operation_fits_smaller_type (gimple stmt, tre
   else
 {
   first = true;
-  if (!widened_name_p (oprnd, stmt, &half_type, &def_stmt, false))
+  if (!widened_name_p (oprnd, stmt, &half_type, &def_stmt, false)
+  || !vinfo_for_stmt (def_stmt))
 return false;
 }


[pph] Re-write streamer to use new generic tree streaming (issue4868041)

2011-08-10 Thread Diego Novillo

This patch applies on top of http://codereview.appspot.com/4863041/
It re-implements the PPH streamer to use the new tree streaming API.
This allows us to handle the pickle cache directly in PPH, on top of
which I am implementing the external referencing for decls.

The big changes are in pph_write_tree and pph_read_tree.  Instead of
being callbacks to handle parts of a tree node, they are in charge of
handling the whole tree.

Both pph_write_tree and pph_read_tree use pph_*_start_record() to put
every tree node in the pickle cache.  All the previous callbacks are
now embedded inside the routines dealing with the header and the body
of the tree.

The changes in the testsuite are needed because the ICEs in some
template test cases are now in a different location.

Tested on x86_64.  Committed to branch.


Diego.

* pph-streamer-in.c (pph_unpack_value_fields): Make static.
Move down in the file.
(pph_alloc_tree): Remove.
(pph_read_tree_body): Factor out of pph_read_tree.
Call lto_input_tree_pointers.
(pph_read_tree_header): New.
(pph_read_tree): Call pph_read_tree_header and
pph_read_tree_body.
* pph-streamer-out.c (pph_pack_value_fields): Make static.
Move down in the file.
(pph_out_tree_header): Remove.
(pph_write_tree_body): Factor out of pph_write_tree.
Call lto_output_tree_pointers.
(pph_write_tree_header): New.
(pph_write_tree): Call lto_output_builtin_tree,
pph_write_tree_header and pph_write_tree_body.
* pph-streamer.c (pph_is_streamable): Remove.
(pph_indexable_with_decls_p): Remove.
(pph_cache_preload): Rename from pph_preload_common_nodes.
Re-implement using pickle cache in struct pph_stream.
(pph_hooks_init): Remove setting for all removed hooks.
(pph_stream_open): Call pph_cache_preload.
(pph_cache_insert_at): Allow the same pointer to be at two
different cache locations.
(pph_cache_add): If IX_P is NULL, do not try to set it.
* pph-streamer.h (pph_pack_value_fields): Remove.
(pph_out_tree_header): Remove.
(pph_unpack_value_fields): Remove.
(pph_alloc_tree): Remove.
(pph_read_tree): Update prototype.
(pph_out_tree): Call pph_write_tree instead of lto_output_tree.
(pph_out_tree_array): Likewise.
(pph_out_tree_or_ref_1): Likewise.
(pph_out_tree_VEC): Likewise.
(pph_in_tree): Call pph_read_tree instead of lto_input_tree.
(pph_in_tree_array): Likewise.
(pph_in_tree_VEC): Likewise.

testsuite/ChangeLog.pph
* g++.dg/pph/x1tmplclass2.cc: Update expected ICE message.
* g++.dg/pph/x4tmplclass2.cc: Likewise.
* g++.dg/pph/z4tmplclass2.cc: Likewise.
---
 gcc/cp/pph-streamer-in.c |  205 --
 gcc/cp/pph-streamer-out.c|  175 +-
 gcc/cp/pph-streamer.c|   81 
 gcc/cp/pph-streamer.h|   21 +--
 gcc/testsuite/g++.dg/pph/x1tmplclass2.cc |2 +-
 gcc/testsuite/g++.dg/pph/x4tmplclass2.cc |2 +-
 gcc/testsuite/g++.dg/pph/z4tmplclass2.cc |2 +-
 9 files changed, 336 insertions(+), 198 deletions(-)

diff --git a/gcc/cp/pph-streamer-in.c b/gcc/cp/pph-streamer-in.c
index fcf1c12..92bfac0 100644
--- a/gcc/cp/pph-streamer-in.c
+++ b/gcc/cp/pph-streamer-in.c
@@ -72,44 +72,6 @@ static VEC(char_p,heap) *string_tables = NULL;
which every streamed in token must add to it's serialized source_location. 
*/
 static int pph_loc_offset;
 
-/* Callback for unpacking value fields in ASTs.  BP is the bitpack 
-   we are unpacking from.  EXPR is the tree to unpack.  */
-
-void
-pph_unpack_value_fields (struct bitpack_d *bp, tree expr)
-{
-  if (TYPE_P (expr))
-{
-  TYPE_LANG_FLAG_0 (expr) = bp_unpack_value (bp, 1);
-  TYPE_LANG_FLAG_1 (expr) = bp_unpack_value (bp, 1);
-  TYPE_LANG_FLAG_2 (expr) = bp_unpack_value (bp, 1);
-  TYPE_LANG_FLAG_3 (expr) = bp_unpack_value (bp, 1);
-  TYPE_LANG_FLAG_4 (expr) = bp_unpack_value (bp, 1);
-  TYPE_LANG_FLAG_5 (expr) = bp_unpack_value (bp, 1);
-  TYPE_LANG_FLAG_6 (expr) = bp_unpack_value (bp, 1);
-}
-  else if (DECL_P (expr))
-{
-  DECL_LANG_FLAG_0 (expr) = bp_unpack_value (bp, 1);
-  DECL_LANG_FLAG_1 (expr) = bp_unpack_value (bp, 1);
-  DECL_LANG_FLAG_2 (expr) = bp_unpack_value (bp, 1);
-  DECL_LANG_FLAG_3 (expr) = bp_unpack_value (bp, 1);
-  DECL_LANG_FLAG_4 (expr) = bp_unpack_value (bp, 1);
-  DECL_LANG_FLAG_5 (expr) = bp_unpack_value (bp, 1);
-  DECL_LANG_FLAG_6 (expr) = bp_unpack_value (bp, 1);
-  DECL_LANG_FLAG_7 (expr) = bp_unpack_value (bp, 1);
-  DECL_LANG_FLAG_8 (expr) = bp_unpack_value (bp, 1);
-}
-
-  TREE_LANG_FLAG_0 (expr) = bp_unpack_value (bp, 1);
-  TREE_LANG_FLAG_1 (expr) = bp_unpack_value (bp, 1);
-  TREE_LANG_FLAG_2 (expr) = bp_unpack_value (bp,

[pph] Template test upgrade. (issue4864041)

2011-08-10 Thread Lawrence Crowl
This patch adds new well-factored PPH template tests.  In the process,
some old template tests get removed.

Enable the e and z tests for PPH files with unsharable headers.
Add xdiff markers where appropriate.

Compile tests with -fno-dwarf2-cfi-asm to reduce environmental differences.

Tests run on x64.


Index: gcc/testsuite/ChangeLog.pph

2011-08-10  Lawrence Crowl  

* lib/dg-pph.exp: Compile with -fno-dwarf2-cfi-asm to reduce
environmental differences.
* g++.dg/pph/z0expinstnin1.h: Remove in favor of new tests.
* g++.dg/pph/x0tmplfuncninl3.h: New.
* g++.dg/pph/x0tmplclass12.h: New.
* g++.dg/pph/a0tmplfuncninl_u.h: New.
* g++.dg/pph/x4tmplfuncninl.cc: New.
* g++.dg/pph/z4tmplfuncninl.cc: New.
* g++.dg/pph/x0tmplclass2.h: Remove in favor of new tests.
* g++.dg/pph/x1tmplfunc.cc: Remove in favor of new tests.
* g++.dg/pph/x0tmplfunc.h: Remove in favor of new tests.
* g++.dg/pph/x0tmplfuncinln3.h: New.
* g++.dg/pph/x0template2.h: Remove in favor of new tests.
* g++.dg/pph/z4expinstinl.cc: Remove in favor of new tests.
* g++.dg/pph/x0tmplclass21.h: New.
* g++.dg/pph/a0tmplfuncinln_u.h: New.
* g++.dg/pph/x4tmplfuncinln.cc: New.
* g++.dg/pph/z4tmplfuncinln.cc: New.
* g++.dg/pph/z0expinstnin2.h: Remove in favor of new tests.
* g++.dg/pph/a0template.h: Remove in favor of new tests.
* g++.dg/pph/pph.exp: Enable e and z tests.
* g++.dg/pph/z4tmplclass2.cc: New.
* g++.dg/pph/x0tmplfuncninl4.h: New.
* g++.dg/pph/a0tmplclass2_g.h: New.
* g++.dg/pph/x0tmplclass13.h: New.
* g++.dg/pph/x1template.cc: Remove in favor of new tests.
* g++.dg/pph/a0tmplclass2_s.h: New.
* g++.dg/pph/a0tmplclass1_u.h: New.
* g++.dg/pph/x4tmplclass1.cc: New.
* g++.dg/pph/x0tmplfuncinln4.h: New.
* g++.dg/pph/x1tmplclass.cc: Remove in favor of new tests.
* g++.dg/pph/a0expinstinl.h: Remove in favor of new tests.
* g++.dg/pph/x0tmplclass22.h: New.
* g++.dg/pph/x1tmplfuncninl.cc: New.
* g++.dg/pph/a0tmplclass.h: Remove in favor of new tests.
* g++.dg/pph/x4template.cc: Remove in favor of new tests.
* g++.dg/pph/x1tmplclass1.cc: New.
* g++.dg/pph/z4nontrivinit.cc: Add xdiff.
* g++.dg/pph/e4variables.cc: Add xdiff.
* g++.dg/pph/a0tmplfuncninl_g.h: New.
* g++.dg/pph/x0tmplfuncninl1.h: New.
* g++.dg/pph/z4expinstnin.cc: Remove in favor of new tests.
* g++.dg/pph/a0tmplfuncninl_s.h: New.
* g++.dg/pph/x0tmplclass14.h: New.
* g++.dg/pph/a0expinstnin.h: Remove in favor of new tests.
* g++.dg/pph/x1tmplfuncinln.cc: New.
* g++.dg/pph/z0expinstinl1.h: Remove in favor of new tests.
* g++.dg/pph/a0tmplfuncinln_g.h: New.
* g++.dg/pph/x0tmplfuncinln1.h: New.
* g++.dg/pph/a0tmplfuncinln_s.h: New.
* g++.dg/pph/x0tmplclass23.h: New.
* g++.dg/pph/x4tmplclass2.cc: New.
* g++.dg/pph/x0tmplfuncninl2.h: New.
* g++.dg/pph/a0tmplclass1_g.h: New.
* g++.dg/pph/x0tmplclass11.h: New.
* g++.dg/pph/e4noninline.cc: Remove xdiff.
* g++.dg/pph/a0tmplclass1_s.h: New.
* g++.dg/pph/z0expinstinl2.h: Remove in favor of new tests.
* g++.dg/pph/a0tmplclass2_u.h: New.
* g++.dg/pph/x0tmplclass1.h: Remove in favor of new tests.
* g++.dg/pph/x1tmplclass2.cc: New.
* g++.dg/pph/z4tmplclass1.cc: New.
* g++.dg/pph/x0tmplfuncinln2.h: New.
* g++.dg/pph/x0template1.h: Remove in favor of new tests.
* g++.dg/pph/x0tmplclass24.h: New.


Index: gcc/testsuite/lib/dg-pph.exp
===
--- gcc/testsuite/lib/dg-pph.exp(revision 177632)
+++ gcc/testsuite/lib/dg-pph.exp(working copy)
@@ -54,7 +54,7 @@ proc dg-pph-neg { subdir test options ma
 verbose -log "\nTesting $nshort, $options"
 
 set dg-do-what-default compile
-dg-test -keep-output $test "$options $mapflag -I." ""
+dg-test -keep-output $test "-fno-dwarf2-cfi-asm $options $mapflag -I." ""
 
 if { [file_on_host exists "$bname.s"] } {
file_on_host delete "$bname.s"
@@ -76,7 +76,7 @@ proc dg-pph-pos { subdir test options ma
 
 # Compile the file the first time for a base case.
 set dg-do-what-default compile
-dg-test -keep-output $test "$options -I." ""
+dg-test -keep-output $test "-fno-dwarf2-cfi-asm $options -I." ""
 
 # Determine whether this is an assembly comparison test
 set is_exec [llength [grep $test "dg-do run"]]
@@ -102,7 +102,7 @@ proc dg-pph-pos { subdir test options ma
 verbose -log ""
 
 # Compile a second time using the pph files.
-dg-test -keep-output $test "$options $mapflag -I." ""
+dg-test -keep-output $test "-fno-dwarf2-cfi-asm $options $mapflag -I." ""
 
 if { !$is_asm } {

Re: [PATCH] Work around PR 50031, sphinx3 slowdown in powerpc on GCC 4.6 and GCC 4.7

2011-08-10 Thread Michael Meissner
On Wed, Aug 10, 2011 at 10:08:54AM +0200, Richard Guenther wrote:
> Are the arrays all well-aligned in practice?  Thus, would versioning the loop
> for all-good-alignment help?

I suspect yes on 64-bit, but no on 32-bit, due to malloc not returning 128-bit
aligned memory in 32-bit.  It only returns memory that is aligned to double the
alignment of size_t.  Long doubles in powerpc are 128 bits, as are the vector
types.

I did a test, eliminating the vec_realign stuff under switch control.  This has
the effect of versioning the loop into a vector loop that is run when all are
aligned, and a scalar loop that is run when they aren't all aligned.  I ran
spec 2006 in 32-bit, and I see the following differences (eliminating the ones
that are close enough).

Benchmark  % of baseline
=  =
400.perlbench96.09%
429.mcf 104.50%
456.hmmer95.85%
458.sjeng   104.23%
464.h264ref 112.18%
483.xalancbmk   102.35%
410.bwaves  107.02%
416.gamess   96.01%
433.milc 98.90%
434.zeusmp   94.92%
435.gromacs 105.55%
450.soplex  108.58%
453.povray  103.71%
454.calculix 97.54%
459.GemsFDTD 97.35%
465.tonto97.79%
470.lbm  98.56%
481.wrf  87.11%
482.sphinx3 110.33%

I was hoping that doing the versioning for an aligned loop and unaligned loop
would eliminate the percentages under 100%.

Note, the powerpc VSX memory instructions for V4SF/V4SI types can run if the
pointer is not aligned to a 128-bit boundary, but there is a slowdown if they
get pointers that aren't aligned to a 64-bit boundary.  I'm doing a run right
now, with movmisalign enabled for V4SF/V4SI, and I am seeing some regressions
in the run.

> If we have 4 permutes and then 8 further ones - can we combine for example
> an unaligned load permute and the following permute for the sf->df conversion?

I don't think so.

The unaligned stuff is to load up a 128-bit value in a register using a left
half and a right half, and a mask.  The Altivec instruction set has an
instruction (lvsl) that computes the mask based on the address, and the loads
and stores ignore the bottom 4 bits.

The unaligned loop looks something like:

left = vector_load (addr & -16)
mask = lvsl (addr)
for (...) {
addr += 16;
right = vector_load (addr & -16)
value = permute (left, right, mask);
  /* ... */
left = right;
}

The two permutes for the conversion, get the values in the correct place for
the conversion instruction, ie if you have a vector with the parts:

+++++
|  A |  B |  C |  D |
+++++

The first permute (xxmrghw) in the conversion would create a vector:

+++++
|  A |  A |  B |  B |
+++++

and the second (xxmrglw) would create:

+++++
|  C |  C |  D |  D |
+++++

Note, the values are doubled, because the instruction takes 2 registers as
input, and we just give the same register for both inputs.

The xvcvspdp instruction then takes a vector of the form (ignoring the 2nd and
4th fields):

+++++
|  X | ?? |  Y | ?? |
+++++

and converts it to double precision:

+=+=+
|   X |   Y |
+=+=+

> Does ppc have a VSX tuned cost-model and is it applied correctly in this case?
> Maybe we need more fine-grained costs?

The ppc has a cost model, but as I said in 50031, I think it needs to be
improved.

-- 
Michael Meissner, IBM
5 Technology Place Drive, M/S 2757, Westford, MA 01886-3141, USA
meiss...@linux.vnet.ibm.com fax +1 (978) 399-6899


Re: [graphite] Move to cloog.org interface

2011-08-10 Thread Sebastian Pop
Hi Tobi,

The patch looks good modulo some formatting changes:

+free(c->free_name);
+  int length = strlen(e->name);
[...]

Please follow the GNU style: there should be a space between the
function name and the open parenthesis.

-  dom = new_Cloog_Domain_from_ppl_Pointset_Powerset (PBB_DOMAIN (pbb),
+  domain = new_Cloog_Domain_from_ppl_Pointset_Powerset (PBB_DOMAIN (pbb),
  scop_nb_params (scop),
  cloog_state);

You would still need proper indentation on the last two lines.

+  /* Dump a .cloog input file, if requested.  This feature is only
+   * enabled in the Graphite branch.  */

Please remove the * from the beginning of the new line in this comment.

Ok with these changes and after the other patches are committed.

Thanks,
Sebastian


Re: [PATCH 3/3] Remove code that supported legacy CLooG.

2011-08-10 Thread Tobias Grosser

On 08/02/2011 04:47 PM, Sebastian Pop wrote:

Ping.

Could one of the configure maintainers review these changes?

Thanks,
Sebastian


Ping II.

This is needed for move graphite to the official cloog.org library 
interface [1].


Thanks a lot for your time
Tobi

[1] http://gcc.gnu.org/ml/gcc-patches/2011-08/msg00976.html




On Thu, Jul 21, 2011 at 18:00, Tobias Grosser  wrote:

2011-07-21  Tobias Grosser

* configure: Regenerated.
* config/cloog.m4: Do not define CLOOG_ORG

and in gcc/

2011-07-21  Tobias Grosser

* Makefile.in (graphite-clast-to-gimple.o, graphite-cloog-util.o):
Remove graphite-cloog-util.h.
* graphite-clast-to-gimple.c (gcc_type_for_iv_of_clast_loop,
build_iv_mapping, translate_clast_user, translate_clast,
free_scattering, initialize_cloog_names, build_cloog_prog,
create_params_index): Do not use old compatibility functions.
(clast_name_to_index, set_cloog_options): Remove code for legacy cloog.
* graphite-cloog-util.c (openscop_print_cloog_matrix): Do not use old
compatibility functions.
(new_Cloog_Scattering_from_ppl_Polyhedron): Remove code for legacy
cloog.
* graphite-cloog-util.h: Remove include of graphite-cloog-util.h.
* graphite.c (graphite.c): Do not call outdated cloog_initialize() and
cloog_finalize().
* graphite-cloog-compat.h: Remove.
---
  ChangeLog  |5 +
  config/cloog.m4|2 +-
  configure  |2 +-
  gcc/ChangeLog  |   18 +++
  gcc/Makefile.in|4 +-
  gcc/graphite-clast-to-gimple.c |   93 ++
  gcc/graphite-cloog-compat.h|  275 
  gcc/graphite-cloog-util.c  |   15 +--
  gcc/graphite-cloog-util.h  |1 -
  gcc/graphite.c |2 -
  10 files changed, 72 insertions(+), 345 deletions(-)
  delete mode 100644 gcc/graphite-cloog-compat.h

diff --git a/ChangeLog b/ChangeLog
index 3d83bd2..9499da4 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,6 +1,11 @@
  2011-07-21  Tobias Grosser

* configure: Regenerated.
+   * config/cloog.m4: Do not define CLOOG_ORGt
+
+2011-07-21  Tobias Grosser
+
+   * configure: Regenerated.
* configure.ac: Require cloog isl 0.16.3

  2011-07-21  Tobias Grosser
diff --git a/config/cloog.m4 b/config/cloog.m4
index 8662acd..9c42445 100644
--- a/config/cloog.m4
+++ b/config/cloog.m4
@@ -109,7 +109,7 @@ AC_DEFUN([CLOOG_FIND_FLAGS],
   _cloog_saved_LDFLAGS=$LDFLAGS
   _cloog_saved_LIBS=$LIBS

-  _cloogorginc="-DCLOOG_INT_GMP -DCLOOG_ORG"
+  _cloogorginc="-DCLOOG_INT_GMP"

   dnl clooglibs&  clooginc may have been initialized by CLOOG_INIT_FLAGS.
   CFLAGS="${CFLAGS} ${clooginc} ${gmpinc}"
diff --git a/configure b/configure
index 57f099b..8de7bc69 100755
--- a/configure
+++ b/configure
@@ -5771,7 +5771,7 @@ if test "x$with_cloog" != "xno"; then
   _cloog_saved_LDFLAGS=$LDFLAGS
   _cloog_saved_LIBS=$LIBS

-  _cloogorginc="-DCLOOG_INT_GMP -DCLOOG_ORG"
+  _cloogorginc="-DCLOOG_INT_GMP"

 CFLAGS="${CFLAGS} ${clooginc} ${gmpinc}"
   CPPFLAGS="${CPPFLAGS} ${_cloogorginc}"
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index b9d95fa..b6009b7 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,21 @@
+2011-07-21  Tobias Grosser
+
+   * Makefile.in (graphite-clast-to-gimple.o, graphite-cloog-util.o):
+   Remove graphite-cloog-util.h.
+   * graphite-clast-to-gimple.c (gcc_type_for_iv_of_clast_loop,
+   build_iv_mapping, translate_clast_user, translate_clast,
+   free_scattering, initialize_cloog_names, build_cloog_prog,
+   create_params_index): Do not use old compatibility functions.
+   (clast_name_to_index, set_cloog_options): Remove code for legacy cloog.
+   * graphite-cloog-util.c (openscop_print_cloog_matrix): Do not use old
+   compatibility functions.
+   (new_Cloog_Scattering_from_ppl_Polyhedron): Remove code for legacy
+   cloog.
+   * graphite-cloog-util.h: Remove include of graphite-cloog-util.h.
+   * graphite.c (graphite.c): Do not call outdated cloog_initialize() and
+   cloog_finalize().
+   * graphite-cloog-compat.h: Remove.
+
  2011-07-21  Georg-Johann Lay

* config/avr/avr.c (final_prescan_insn): Fix printing of rtx_costs.
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index d924fb6..c5a2f7f 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -2690,9 +2690,9 @@ graphite-clast-to-gimple.o : graphite-clast-to-gimple.c 
$(CONFIG_H) \
$(SYSTEM_H) coretypes.h $(DIAGNOSTIC_CORE_H) $(TREE_FLOW_H) $(TREE_DUMP_H) \
$(CFGLOOP_H) $(TREE_DATA_REF_H) sese.h graphite-cloog-util.h \
graphite-ppl.h graphite-poly.h graphite-clast-to-gimple.h \
-   graphite-dependences.h graphite-cloog-compat.h
+   graphite-dependences.h
  graphite-cloog-util.o : graphite-cloog-util.c $(CONFIG_H) $(SYSTEM_H) \
-   coretypes.h graphite-cloog-util.h graphite-cloog-comp

Re: [PATCH 2/3] Require cloog 0.16.3

2011-08-10 Thread Tobias Grosser

On 08/02/2011 04:47 PM, Sebastian Pop wrote:

Ping.

Could one of the configure maintainers review these changes?


Ping II.

This is needed for move to the official cloog.org library interface [1].

Thanks a lot for your time
Tobi

[1] http://gcc.gnu.org/ml/gcc-patches/2011-08/msg00976.html


Thanks,
Sebastian


On Thu, Jul 21, 2011 at 18:00, Tobias Grosser  wrote:

2011-07-21  Tobias Grosser

* configure: Regenerated.
* configure.ac: Require cloog isl 0.16.3
---
  ChangeLog|5 +
  configure|6 +++---
  configure.ac |2 +-
  3 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index a08a780..3d83bd2 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,6 +1,11 @@
  2011-07-21  Tobias Grosser

* configure: Regenerated.
+   * configure.ac: Require cloog isl 0.16.3
+
+2011-07-21  Tobias Grosser
+
+   * configure: Regenerated.
* config/cloog.m4: Remove support for CLooG-ppl and CLooG-parma,
both cloog.org and legacy versions. The only supported version will
be CLooG with the isl backend.
diff --git a/configure b/configure
index 6608b86..57f099b 100755
--- a/configure
+++ b/configure
@@ -5834,8 +5834,8 @@ $as_echo "$gcc_cv_cloog_type">&6; }
 CFLAGS="${_cloog_saved_CFLAGS} ${clooginc} ${pplinc} ${gmpinc}"
 LDFLAGS="${_cloog_saved_LDFLAGS} ${clooglibs} ${ppllibs}"

-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for version 0.16.1 of 
CLooG">&5
-$as_echo_n "checking for version 0.16.1 of CLooG... ">&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for version 0.16.3 of 
CLooG">&5
+$as_echo_n "checking for version 0.16.3 of CLooG... ">&6; }
  if test "${gcc_cv_cloog+set}" = set; then :
   $as_echo_n "(cached) ">&6
  else
@@ -5847,7 +5847,7 @@ main ()
  {
  #if CLOOG_VERSION_MAJOR != 0 \
 || CLOOG_VERSION_MINOR != 16 \
-|| CLOOG_VERSION_REVISION<  1
+|| CLOOG_VERSION_REVISION<  3
 choke me
#endif
   ;
diff --git a/configure.ac b/configure.ac
index e64e577..00325a1 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1588,7 +1588,7 @@ if test "x$with_cloog" != "xno"; then
   dnl
   dnl If we use CLooG-Legacy, the provided version information is
   dnl ignored.
-  CLOOG_CHECK_VERSION(0,16,1)
+  CLOOG_CHECK_VERSION(0,16,3)

   dnl Only execute fail-action, if CLooG has been requested.
   CLOOG_IF_FAILED([
--
1.7.4.1






Re: [PATCH 1/3] Make CLooG isl the only supported CLooG version.

2011-08-10 Thread Tobias Grosser

On 08/02/2011 04:46 PM, Sebastian Pop wrote:

Ping.

Could one of the configure maintainers review these changes?


Ping II.

Would anyone be so kind to review this?

The missing documentation changes are here
http://gcc.gnu.org/ml/gcc-patches/2011-07/msg02309.html

Cheers
Tobi



Thanks,
Sebastian

On Thu, Jul 21, 2011 at 18:00, Tobias Grosser  wrote:

2011-07-21  Tobias Grosser

* configure: Regenerated.
* config/cloog.m4: Remove support for CLooG-ppl and CLooG-parma,
both cloog.org and legacy versions. The only supported version will
be CLooG with the isl backend.
---
  ChangeLog   |7 ++
  config/cloog.m4 |  107 +++---
  configure   |  170 +++
  3 files changed, 26 insertions(+), 258 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 6a27fb7..a08a780 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2011-07-21  Tobias Grosser
+
+   * configure: Regenerated.
+   * config/cloog.m4: Remove support for CLooG-ppl and CLooG-parma,
+   both cloog.org and legacy versions. The only supported version will
+   be CLooG with the isl backend.
+
  2011-07-21  Joseph Myers

* MAINTAINERS (Global Reviewers): Add self.
diff --git a/config/cloog.m4 b/config/cloog.m4
index e95b98d..8662acd 100644
--- a/config/cloog.m4
+++ b/config/cloog.m4
@@ -37,17 +37,6 @@ AC_DEFUN([CLOOG_INIT_FLAGS],
   [--with-cloog-lib=PATH],
   [Specify the directory for the installed CLooG library])])

-  AC_ARG_ENABLE(cloog-backend,
-[AS_HELP_STRING(
-  [--enable-cloog-backend[[=BACKEND]]],
-  [set the CLooG BACKEND used to either isl, ppl or ppl-legacy 
(default)])],
-[ if   test "x${enableval}" = "xisl"; then
-   cloog_backend=isl
-  elif test "x${enableval}" = "xppl"; then
-   cloog_backend=ppl
-  else
-   cloog_backend=ppl-legacy
-  fi], cloog_backend=ppl-legacy)
   AC_ARG_ENABLE(cloog-version-check,
 [AS_HELP_STRING(
   [--disable-cloog-version-check],
@@ -107,23 +96,6 @@ m4_define([_CLOOG_ORG_PROG_ISL],[AC_LANG_PROGRAM(
   [#include "cloog/cloog.h" ],
   [cloog_version ()])])

-# _CLOOG_ORG_PROG_PPL ()
-# --
-# Helper for detecting CLooG.org's PPL backend.
-m4_define([_CLOOG_ORG_PROG_PPL],[AC_LANG_PROGRAM(
-  [#include "cloog/cloog.h"
-   #include "cloog/ppl/cloog.h"],
-  [cloog_version ()])])
-
-# _CLOOG_PPL_LEGACY_PROG ()
-# -
-# Helper for detecting CLooG-Legacy (CLooG-PPL).
-m4_define([_CLOOG_PPL_LEGACY_PROG], [AC_LANG_PROGRAM(
-  [#include "cloog/cloog.h"],
-  [#ifndef CLOOG_PPL_BACKEND
-choke me
-   #endif ])])
-
  # CLOOG_FIND_FLAGS ()
  # --
  # Detect the used CLooG-backend and set clooginc/clooglibs/cloog_org.
@@ -144,49 +116,17 @@ AC_DEFUN([CLOOG_FIND_FLAGS],
   CPPFLAGS="${CPPFLAGS} ${_cloogorginc}"
   LDFLAGS="${LDFLAGS} ${clooglibs}"

-  case $cloog_backend in
-"ppl-legacy")
-CFLAGS="${CFLAGS} ${pplinc}"
-LDFLAGS="${LDFLAGS} ${ppllibs}"
-AC_CACHE_CHECK([for installed CLooG PPL Legacy], [gcc_cv_cloog_type],
-  [LIBS="-lcloog ${_cloog_saved_LIBS}"
-  AC_LINK_IFELSE([_CLOOG_PPL_LEGACY_PROG], [gcc_cv_cloog_type="PPL 
Legacy"],
-[gcc_cv_cloog_type=no])])
-;;
-"isl")
-AC_CACHE_CHECK([for installed CLooG ISL], [gcc_cv_cloog_type],
-  [LIBS="-lcloog-isl ${_cloog_saved_LIBS}"
-  AC_LINK_IFELSE([_CLOOG_ORG_PROG_ISL], [gcc_cv_cloog_type="ISL"],
-[gcc_cv_cloog_type=no])])
-;;
-"ppl")
-CFLAGS="${CFLAGS} ${pplinc}"
-LDFLAGS="${LDFLAGS} ${ppllibs}"
-AC_CACHE_CHECK([for installed CLooG PPL], [gcc_cv_cloog_type],
-  [LIBS="-lcloog-ppl ${_cloog_saved_LIBS}"
-  AC_LINK_IFELSE([_CLOOG_ORG_PROG_PPL], [gcc_cv_cloog_type="PPL"],
-[gcc_cv_cloog_type=no])])
-;;
-*)
-  gcc_cv_cloog_type=""
-  esac
+  AC_CACHE_CHECK([for installed CLooG ISL], [gcc_cv_cloog_type],
+[LIBS="-lcloog-isl ${_cloog_saved_LIBS}"
+AC_LINK_IFELSE([_CLOOG_ORG_PROG_ISL], [gcc_cv_cloog_type="ISL"],
+  [gcc_cv_cloog_type=no])])

   case $gcc_cv_cloog_type in
-"PPL Legacy")
-  clooginc="${clooginc}"
-  clooglibs="${clooglibs} -lcloog"
-  cloog_org=no
-  ;;
 "ISL")
   clooginc="${clooginc} ${_cloogorginc}"
   clooglibs="${clooglibs} -lcloog-isl -lisl"
   cloog_org=yes
   ;;
-"PPL")
-  clooginc="${clooginc} ${_cloogorginc}"
-  clooglibs="${clooglibs} -lcloog-ppl"
-  cloog_org=yes
-  ;;
 *)
   clooglibs=
   clooginc=
@@ -212,25 +152,10 @@ m4_define([_CLOOG_CHECK_CT_PROG],[AC_LANG_PROGRAM(
 choke me
#endif])])

-# _CLOOG_CHECK_RT_PROG ()
-# ---
-# Helper for verifying that CLooG's compile time version
-# matches the run time version.
-m4_define([_CLOOG_CHECK_RT_PROG],[AC_LANG_PROGRAM(
-  [#include "cloog/cloog.h"],
-  [if ((cloog_version_major () != CLOOG_

[graphite] Move to cloog.org interface

2011-08-10 Thread Tobias Grosser

Hi,

this patch moves the graphite code to the documented cloog library 
interface and removes the use of old/deprecated interfaces. It ensures 
that graphite will also compile flawless with future cloog.org versions.


  * graphite-clast-to-gimple.c (new_clast_name_index): Store a copy
  of the string, no just a reference.
  (clast_name_index): Add a new field, that specifies if we need to 
free the

  name.
  (free_clast_name_index): If necessary, free the name string.
  (clast_name_index_elt_info): Calculate the hash based on the string 
content,

  not the memory location it is stored in.
  (clast_name_to_level): Specify that we do not need to free the name.
  (clast_name_to_index): Dito.
  (clast_name_to_lb_ub): Dito.
  (eq_clast_name_indexes): Compare the strings, not their base pointers.
  (free_scattering): Removed.
  (initialize_cloog_names): Renamed to add_names_to_union_domain().
  (add_names_to_union_domain): Changed to work on a union_domain, 
instead of a

  CloogNames structure.
  (build_cloog_prog): Removed.
  (build_cloog_union_domain): New.
  (generate_cloog_input): New.
  (scop_to_clast): Use CloogInput instead of CloogProgram.
  (print_generated_program): Adapt to new scop_to_clast() and do not
  print the CloogProgram any more.
  (create_params_index): Removed, functionality integrated in
  add_names_to_union_domain().
  (gloog): Adapt to new scop_to_clast().
  * graphite-clast-to-gimple.h (scop_to_clast): Remove.

This patch depends on my previous patches [1], that remove supports for 
cloog-ppl (which does not implement the new interface).


Bootstrapped and 'make check RUNTESTFLAGS=graphite.exp' tested on Linux 
amd64. OK to commit, after support for cloog-ppl was removed?


Cheers
Tobi

[1] http://gcc.gnu.org/ml/gcc-patches/2011-07/msg01892.html
>From f07adb059a8793378073a2f4b2a6d1702c56771b Mon Sep 17 00:00:00 2001
From: Tobias Grosser 
Date: Tue, 9 Aug 2011 18:26:28 +0100
Subject: [PATCH] Move to new Cloog interface.

  * graphite-clast-to-gimple.c (new_clast_name_index): Store a copy
  of the string, no just a reference.
  (clast_name_index): Add a new field, that specifies if we need to free the
  name.
  (free_clast_name_index): If necessary, free the name string.
  (clast_name_index_elt_info): Calculate the hash based on the string content,
  not the memory location it is stored in.
  (clast_name_to_level): Specify that we do not need to free the name.
  (clast_name_to_index): Dito.
  (clast_name_to_lb_ub): Dito.
  (eq_clast_name_indexes): Compare the strings, not their base pointers.
  (free_scattering): Removed.
  (initialize_cloog_names): Renamed to add_names_to_union_domain().
  (add_names_to_union_domain): Changed to work on a union_domain, instead of a
  CloogNames structure.
  (build_cloog_prog): Removed.
  (build_cloog_union_domain): New.
  (generate_cloog_input): New.
  (scop_to_clast): Use CloogInput instead of CloogProgram.
  (print_generated_program): Adapt to new scop_to_clast() and do not
  print the CloogProgram any more.
  (create_params_index): Removed, functionality integrated in
  add_names_to_union_domain().
  (gloog): Adapt to new scop_to_clast().
  * graphite-clast-to-gimple.h (scop_to_clast): Remove.
---
 gcc/graphite-clast-to-gimple.c |  321 +++-
 gcc/graphite-clast-to-gimple.h |1 -
 2 files changed, 123 insertions(+), 199 deletions(-)

diff --git a/gcc/graphite-clast-to-gimple.c b/gcc/graphite-clast-to-gimple.c
index c8c5024..7a8e89d 100644
--- a/gcc/graphite-clast-to-gimple.c
+++ b/gcc/graphite-clast-to-gimple.c
@@ -70,6 +70,9 @@ typedef struct clast_name_index {
   int level;
   mpz_t bound_one, bound_two;
   const char *name;
+  /* If free_name is set, the content of name was allocated by us and needs
+ to be freed.  */
+  char *free_name;
 } *clast_name_index_p;
 
 /* Returns a pointer to a new element of type clast_name_index_p built
@@ -80,8 +83,11 @@ new_clast_name_index (const char *name, int index, int level,
 		  mpz_t bound_one, mpz_t bound_two)
 {
   clast_name_index_p res = XNEW (struct clast_name_index);
+  char *new_name = XNEWVEC(char, strlen(name) + 1);
+  strcpy(new_name, name);
 
-  res->name = name;
+  res->name = new_name;
+  res->free_name = new_name;
   res->level = level;
   res->index = index;
   mpz_init (res->bound_one);
@@ -97,6 +103,8 @@ static void
 free_clast_name_index (void *ptr)
 {
   struct clast_name_index *c = (struct clast_name_index *) ptr;
+  if (c->free_name)
+free(c->free_name);
   mpz_clear (c->bound_one);
   mpz_clear (c->bound_two);
   free (ptr);
@@ -115,6 +123,7 @@ clast_name_to_level (clast_name_p name, htab_t index_table)
 
   gcc_assert (name->type == clast_expr_name);
   tmp.name = ((const struct clast_name *) name)->name;
+  tmp.free_name = NULL;
 
   slot = htab_find_slot (index_table, &tmp, NO_INSERT);
 
@@ -136,6 +145,7 @@ clast_name_to_index (clast_name_p name, htab_t index_table)
 
   gcc_assert (name->type == clast_expr_name);
   tmp.name = (

[RFC PATCH] Make devirtualization use BINFO_VTABLE instead of BINFO_VIRTUALS

2011-08-10 Thread Martin Jambor
Hi,

the patch below changes devirtualization to use BINFO_VTABLE instead
of BINFO_VIRTUALS.  This is in fact slightly more cumbersome to use in
almost any respect but has one huge advantage, it uses _much_ less
memory in LTO, because the virtual tables are unified just like all
other VAR_DECLs and there are already elaborate rules in place to tell
whether it needs to be output at all from a given TU, whereas only
streaming all the different lists of virtual functions from different
TUs out and then back in consumes a lot of both memory and time and
unifying them (in BASE_BINFOS) would also be very complex.

With this change, the memory peak of WPA of 483.xalancbmk dropped from
554909 kB to 437129 kB (by 21%) and WPA of Mozilla Firefox the memory
peak went from 4599 MB to 2912 MB (decrease by 36%).  The number of
virtual calls with discovered targets during IPA-CP and during
inlining remains exactly the same (total 171 occurrences in xalancbmk
but sadly only 8 in Firefox at the moment).  All of the above is on
x86_64-linux.

There are drawbacks.  Most importantly, simple thunks adjusting this
by a constant delta are not converted to a this adjustment at the call
site and a call to the real function which currently prevents
inlining.  I had to XFAIL two testcases ivinline-7.C and ivinline-9.C
exactly for this reason.  However, I believe we do want to be able to
process thunks at IPA level in general and so this is acceptable (also
because our current rate of devirtualization is not really
staggering).  At least we have testcases for Honza to ponder about :-)

Since we no longer de-thunkize, all the various delta parameters and
the thunk_adjust field in cgraph_node could go so I removed them.
This is what makes the patch so big even though the real change in
gimple_get_virt_method_for_binfo is smallish.  When I thought I would
be removing infrastructure that might still be useful, I stopped,
which resulted into two empty functions and some ATTRIBUTE_UNUSEDs in
streaming code.  Function gimple_adjust_this_by_delta is now probably
unused too but I kept it because it can be useful for inlining.  I can
nuke more stuff if requested, of course.

I certainly expect there will be comments and I will be asked to
change some details but the main reason why this is still only RFC
material is that I suspect the code will break horribly on
architectures defining TARGET_VTABLE_USES_DESCRIPTORS.  I have not yet
had a look on what the tables look like there and whether we can do
something along these lines for them at all.

Otherwise, this passes bootstrap and testing on x86_64-linux and I
also tested it by LTOing Mozilla Firefox and xalancbmk with the
results indicated above.  I certainly believe we should do something
like this for 4.7 because of the memory improvements.

Thanks,

Martin


2011-08-09  Martin Jambor  

* cgraph.h (cgraph_indirect_call_info): Removed field thunk_delta.
* gimple-fold.c (gimple_get_virt_method_for_binfo): Rewritten to use
BINFO_VTABLE.  Parameter delta removed, all callers updated.
* tree.c (free_lang_data_in_binfo): Clear BINFO_VIRTUALs instead
BINFO_VTABLE.
* cgraph.c (cgraph_make_edge_direct): Removed parameter delta, updated
all calls.
* cgraphunit.c (cgraph_redirect_edge_call_stmt_to_callee): Removed
handling of thunk_delta.
* ipa-cp.c (get_indirect_edge_target): Removed parameter delta.
(devirtualization_time_bonus): Do not handle thunk deltas.
(ipcp_discover_new_direct_edges): Likewise.
* ipa-prop.c (ipa_make_edge_direct_to_target): Likewise.
(try_make_edge_direct_simple_call): Likewise.
(try_make_edge_direct_virtual_call): Likewise.
* lto-cgraph.c (output_cgraph_opt_summary_p): Likewise.  Mark
parameter set as unused.
(output_edge_opt_summary): Likewise.  Mark both parameters as unused.
* lto-cgraph.c (output_cgraph_opt_summary_p): Likewise.  Mark
parameter set as unused.
(output_edge_opt_summary): Likewise.  Mark both parameters as unused.
(input_edge_opt_summary): Likewise.
* lto-streamer-out.c (lto_output_ts_binfo_tree_pointers): Do not stream
BINFO_VIRTUALS at all.
* lto-streamer-in.c (lto_input_ts_binfo_tree_pointers): Likewise.

* testsuite/g++.dg/ipa/ivinline-7.C: Added a test for direct call
discovery, xfailed test for inlining.
* testsuite/g++.dg/ipa/ivinline-9.C: Likewise.

Index: src/gcc/gimple-fold.c
===
--- src.orig/gcc/gimple-fold.c
+++ src/gcc/gimple-fold.c
@@ -1381,51 +1381,6 @@ gimple_fold_builtin (gimple stmt)
   return result;
 }
 
-/* Return a declaration of a function which an OBJ_TYPE_REF references. TOKEN
-   is integer form of OBJ_TYPE_REF_TOKEN of the reference expression.
-   KNOWN_BINFO carries the binfo describing the true type of
-   OBJ_TYPE_REF_OBJECT(REF).  If a call to 

[PATCH, LTO] Re-assign BINFOs when unifying types

2011-08-10 Thread Martin Jambor
Hi,

the following patch sets BINFOs of prevailing but non-main-variant
types to what the main variant has, so that for a type t with a binfo
the following is always true:

TYPE_BINFO (t) == TYPE_BINFO (TYPE_MAIN_VARIANT (t))

as I I believe in non-LTO world it always is.  Moreover, the BINFO
trees of non-prevailing type nodes now always get garbage collected
(at least as far as my experiments with 483.xalancbmk go).  The memory
savings are negligible, but still I think we should do it.

Bootstrapped and tested on x86_64-linux, also tested by LTOing Mozilla
Firefox and 483.xalancbmk.

OK for trunk?

Thanks,

Martin


2011-08-03  Martin Jambor  

* lto.c (uniquify_nodes): Use main variant's BINFO too.

Index: src/gcc/lto/lto.c
===
--- src.orig/gcc/lto/lto.c
+++ src/gcc/lto/lto.c
@@ -720,6 +720,8 @@ uniquify_nodes (struct data_in *data_in,
{
  TYPE_NEXT_VARIANT (t) = TYPE_NEXT_VARIANT (mv);
  TYPE_NEXT_VARIANT (mv) = t;
+ if (RECORD_OR_UNION_TYPE_P (t))
+   TYPE_BINFO (t) = TYPE_BINFO (mv);
}
 
  /* Finally adjust our main variant and fix it up.  */


Re: [PATCH 8/8] Only merge deps status for true dependencies

2011-08-10 Thread Vladimir Makarov

On 08/03/2011 11:30 AM, Alexander Monakov wrote:

From: Sergey Grechanik

This patch avoids changing speculative bits of a register use when it is moved
up above a speculation check where that register is used as the address
register.  We should only call ds_full_merge when the producer (speculation
check) writes to a register.

(A similar check was in place when selective scheduler was in active
development, but was dropped before it was merged, probably by accident)

2011-08-04  Sergey Grechanik

* sel-sched-ir.c (has_dependence_note_reg_use): Call ds_full_merge
only if producer writes to the register given by regno.

diff --git a/gcc/sel-sched-ir.c b/gcc/sel-sched-ir.c
index 91f9dd9..745fcc1 100644
--- a/gcc/sel-sched-ir.c
+++ b/gcc/sel-sched-ir.c
@@ -3227,7 +3227,10 @@ has_dependence_note_reg_use (int regno)
  pro_spec_checked_ds = INSN_SPEC_CHECKED_DS (has_dependence_data.pro);
  pro_spec_checked_ds = ds_get_max_dep_weak (pro_spec_checked_ds);

- if (pro_spec_checked_ds != 0)
+ if (pro_spec_checked_ds != 0
+ /* FIXME: if mode for REGNO was available here, we could use
+register_unavailable_p that tests all hard regs for mode.  */
I don't think you need the mode here.  For pseudo register it is always 
1. For hard registers note_reg_use is always called for every hard 
register containing a value.


So the patch is ok, if you remove the comment or write what I mentioned 
as the comment.


Thanks.

+   &&  bitmap_bit_p (INSN_REG_SETS (has_dependence_data.pro), regno))
/* Merge BE_IN_SPEC bits into *DSP.  */
*dsp = ds_full_merge (*dsp, pro_spec_checked_ds,
  NULL_RTX, NULL_RTX);




Re: [PATCH 7/8] Factor out caching logic for INSN_COND

2011-08-10 Thread Vladimir Makarov

On 08/03/2011 11:30 AM, Alexander Monakov wrote:

From: Sergey Grechanik

Sometimes we need to be able to call sched_get_condition_with_rev from
selective scheduler for an instruction with zero luid (i.e. before h_d_i_d
had been extended).  On such occasion, we need to bypass the caching and use
"old", uncached lookup.

The patch factors out caching logic to a separate function and amends it to
skip cached lookup for instructions with zero luid.  It also renames INSN_COND
to INSN_CACHED_COND to avoid clash with the same macro in predication patch
for sel-sched.

2011-08-04  Sergey Grechanik

* sched-deps.c (sched_get_condition_with_rev): Rename to ...
(sched_get_condition_with_rev_uncached): ... this.  Factor out
condition caching logic into ...
(sched_get_condition_with_rev): ... this.  Reimplement.  Do not
attempt to use cache for instructions with zero luid.
(sched_analyze_insn): Use INSN_CACHED_COND instead of INSN_COND.
* sched-int.h (INSN_COND): Rename to INSN_CACHED_COND.

Ok, thanks.



Re: [PATCH 6/8] Try successors to find seqno

2011-08-10 Thread Vladimir Makarov

On 08/03/2011 11:30 AM, Alexander Monakov wrote:

From: Sergey Grechanik

This patch fixes a problem when new jumps created in
sel_redirect_edge_and_branch_force could not get correct seqnos.

get_seqno_of_a_pred is renamed to get_seqno_for_a_jump.
Implementation-wise, it supports looking at multiple predecessors, and, if
that fails, also at successors.  get_seqno_by_succs is implemented similar to
get_seqno_by_preds.

2011-08-04  Sergey Grechanik

* sel-sched-ir.c (get_seqno_of_a_pred): Rename to
get_seqno_for_a_jump.  Update the caller.
(get_seqno_by_succs): New.  Use it ...
(get_seqno_for_a_jump): ... here to find a seqno if looking at
predecessors was not sufficient.
(get_seqno_by_preds): Include head in iteration range, exclude insn.

Ok, thanks.


Re: [PATCH 5/8] Drop an incorrect assert

2011-08-10 Thread Vladimir Makarov

On 08/03/2011 11:30 AM, Alexander Monakov wrote:

From: Dmitry Melnik

This fixes a bug caused by trying to initialize BB_AV_SET of a newly generated
jump.  It assumes that jump is only generated in new bb, while it can be
replaced in same BB by try_redirect_by_replacing_jump, if jump was conditional
and was pointing to the same BB by its both edges (this may happen due to
predication, when moving all insns up from a "diamond").

2011-08-04  Dmitry Melnik

* sel-sched-ir.c (invalidate_av_set): Remove the assert.


Ok, thanks.


Re: [PATCH 4/8] Properly loop over all hard regs for mode

2011-08-10 Thread Vladimir Makarov

On 08/03/2011 11:30 AM, Alexander Monakov wrote:

From: Sergey Grechanik

There are several places where bitmap_bit_p function is used to test if some
register is in the regset.  We need to take into account the fact that
depending on mode, we need to test multiple hard regs.

2011-08-04  Sergey Grechanik

* sel-sched-ir.h (register_unavailable_p): Declare.
* sel-sched-ir.c (register_unavailable_p): New.  Use it...
(set_unavailable_target_for_expr): ... here to properly test
availability of a register.
(speculate_expr): Ditto.
* sel-sched.c (substitute_reg_in_expr): Ditto.
(av_set_could_be_blocked_by_bookkeeping_p): Ditto.

Ok.


Re: [PATCH 3/8] Fix usage of hard_regno_nregs before reload

2011-08-10 Thread Vladimir Makarov

On 08/03/2011 11:30 AM, Alexander Monakov wrote:

From: Sergey Grechanik

This fixes one place where hard_regno_nregs is incorrectly guarded by
reload_completed (as if before reload all regs are pseudos).

2011-08-04  Sergey Grechanik

* sel-sched.c (verify_target_availability): Fix usage of
hard_regno_nregs.


Ok, thanks.



Re: [PATCH 2/8] Make more insns unique

2011-08-10 Thread Vladimir Makarov

On 08/03/2011 11:30 AM, Alexander Monakov wrote:

From: Dmitry Melnik

This patch prevents duplicating (as bookkeeping code) instructions that are
either volatile or recognized by cannot_copy_insn_p target hook (in addition
to already present restrictions).  This avoids generating incorrect assembler
with duplicate labels on ARM.

2011-08-04  Dmitry Melnik

* sel-sched-ir.c (init_global_and_expr_for_insn): Forbid copying of
recognized by cannot_copy_insn_p hook and volatile instructions.

OK, thanks.



Re: [PATCH 1/8] Take maximum spec when merging exprs

2011-08-10 Thread Vladimir Makarov

On 08/03/2011 11:30 AM, Alexander Monakov wrote:

From: Dmitry Melnik

EXPR_SPEC is an indicator of the speculativeness of an expression (an
instruction or just an rhs), as it is incremented each time the expression is
moved up across a conditional branch.  When merging expr attributes for
similar exprs available from two destinations of a branch, sel-sched assigns
the minimum of EXPR_SPEC's to the result, effectively making the resulting
expr non-speculative if only one of those exprs was non-speculative.  However,
since we are relying on EXPR_SPEC being a correct indication of expr's
speculativeness when deciding whether it would need a bookkeeping copy, we
really want to avoid that.

The patch changes minimum to maximum, making the code match what was
originally intended.

2011-08-04  Dmitry Melnik

* sel-sched-ir.c (merge_expr_data): Take maximum spec.

OK, thanks.



Re: [PATCH, testsuite, i386] AVX2 support for GCC

2011-08-10 Thread Uros Bizjak
On Wed, Aug 10, 2011 at 9:39 PM, Uros Bizjak  wrote:

> diff --git a/gcc/common/config/i386/i386-common.c
> b/gcc/common/config/i386/i386-common.c
> index 1fd33bd..1e0ca5e 100644
> --- a/gcc/common/config/i386/i386-common.c
> +++ b/gcc/common/config/i386/i386-common.c
> @@ -52,6 +52,8 @@ along with GCC; see the file COPYING3.  If not see
>   (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
>  #define OPTION_MASK_ISA_FMA_SET \
>   (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
> +#define OPTION_MASK_ISA_AVX2_SET \
> +  (OPTION_MASK_ISA_AVX2 | OPTION_MASK_ISA_AVX_SET)
>
>  /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
>    as -msse4.2.  */
> @@ -114,8 +116,10 @@ along with GCC; see the file COPYING3.  If not see
>   (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
>  #define OPTION_MASK_ISA_AVX_UNSET \
>   (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET \
> -   | OPTION_MASK_ISA_FMA4_UNSET | OPTION_MASK_ISA_F16C_UNSET)
> +   | OPTION_MASK_ISA_FMA4_UNSET | OPTION_MASK_ISA_F16C_UNSET \
> +   | OPTION_MASK_ISA_AVX)
>
> OPTION_MASK_ISA_AVX2

Hrm, OPTION_MASK_ISA_AVX2_UNSET.

Uros.


Re: [PATCH, testsuite, i386] AVX2 support for GCC

2011-08-10 Thread Uros Bizjak
On Tue, Aug 9, 2011 at 2:42 PM, Kirill Yukhin  wrote:

> Here is second stage patch.
> It introduces AVX2 option, define etc.
>
> ChangeLog entry:
>
> 2011-08-09  Kirill Yukhin  
>
>        * common/config/i386/i386-common.c (OPTION_MASK_ISA_AVX2_SET): New.
>        (OPTION_MASK_ISA_FMA_UNSET): Update.

Where?

>        (OPTION_MASK_ISA_AVX2_UNSET): New.
>        (ix86_handle_option): Handle OPT_mavx2 case.
>        * config/i386/cpuid.h (bit_AVX2): New.
>        * config/i386/driver-i386.c (host_detect_local_cpu): Detect
>        AVX2 feature.
>        * config/i386/i386-c.c (ix86_target_macros_internal): Define
>        __AVX2_ if needed.

Conditionally define __AVX2__.

>        * config/i386/i386.c (ix86_option_override_internal): Handle
>        AVX2 option, define new processor alias - "core-avx2".

Define PTA_AVX2.  Define "core-avx2" processor alias.  Handle avx2 option.

>        (ix86_valid_target_attribute_inner_p): Likewise.

Handle avx2 option.

>        * config/i386/i386.h (TARGET_AVX2): New.
>        * config/i386/i386.opt (mavx2): New.
>        * doc/invoke.texi: Document -mavx2.


diff --git a/gcc/common/config/i386/i386-common.c
b/gcc/common/config/i386/i386-common.c
index 1fd33bd..1e0ca5e 100644
--- a/gcc/common/config/i386/i386-common.c
+++ b/gcc/common/config/i386/i386-common.c
@@ -52,6 +52,8 @@ along with GCC; see the file COPYING3.  If not see
   (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
 #define OPTION_MASK_ISA_FMA_SET \
   (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
+#define OPTION_MASK_ISA_AVX2_SET \
+  (OPTION_MASK_ISA_AVX2 | OPTION_MASK_ISA_AVX_SET)

 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
as -msse4.2.  */
@@ -114,8 +116,10 @@ along with GCC; see the file COPYING3.  If not see
   (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
 #define OPTION_MASK_ISA_AVX_UNSET \
   (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET \
-   | OPTION_MASK_ISA_FMA4_UNSET | OPTION_MASK_ISA_F16C_UNSET)
+   | OPTION_MASK_ISA_FMA4_UNSET | OPTION_MASK_ISA_F16C_UNSET \
+   | OPTION_MASK_ISA_AVX)

OPTION_MASK_ISA_AVX2

> Is it OK for trunk, after int64 is comitted?

OK with the change above.

Thanks,
Uros.


Re: Linemap force location and remove LINEMAP_POSITION_FOR_COLUMN (issue4801090)

2011-08-10 Thread Gabriel Charette
Tested with bootstrap and full regression testing on x64.

On Wed, Aug 10, 2011 at 11:22 AM, Gabriel Charette  wrote:
> There was a bug where c_finish_options would create some builtins and assign 
> them source_locations in the linemap other than BUILTINS_LOCATION == 1.
>
> Thus, when calling DECL_IS_BUILTIN to know if a decl is a builtin, some of 
> them would return false as they had a source_location other than 
> BUILTINS_LOCATION within the line_map entry that was incorrectly created in 
> c_finish_options.
>
> The problem to fix this is that _cpp_lex_direct just gets the location of the 
> currently lexed token by calling linemap_position_for_column. It has no 
> notion of whether this token is a builtin or whatever else it may be.
>
> My solution is to add a forced_location field to the line_table which when 
> set is returned by default by linemap_position_for_column instead of getting 
> a new loc from the line_table.
>
> Furthermore, this mechanism will allow us to inject locations for a similar 
> problem we have in pph when lexing replayed pre-processor definitions (which 
> as it is now are getting assigned new source_locations which is creating 
> problems for us).
>
> I'm open to other solutions if anyone sees another way to do it. We could 
> also leave linemap_position_for_column as is and have a separate inline 
> function, say linemap_pos_for_col_or_forced, which makes the forced_location 
> check and simply calls linemap_position_for_column when no forced_location.
>
> I also removed LINEMAP_POSITION_FOR_COLUMN, it did the EXACT same thing as 
> linemap_position_for_column, so maintaining both in parallel seems like 
> overkill to me. The only thing I can think of is that it's more optimal as 
> it's inlined (but if that's really needed we can always make 
> linemap_position_for_column an inline function).
>
> Gabriel
>
> 2011-08-10  Gabriel Charette  
>
>        * c-opts.c (c_finish_options): Don't create built-in line_table entry;
>        instead force BUILTINS_LOCATION when creating builtins.
>
>        * include/line-map.h (struct line_maps): Add field forced_location.
>        (LINEMAP_POSITION_FOR_COLUMN): Remove.
>        * line-map.c (linemap_init): Init forced_location to 0.
>        (linemap_position_for_column): Return forced_location by default if set
>
> diff --git a/gcc/c-family/c-opts.c b/gcc/c-family/c-opts.c
> index 3227f7b..1af8e7b 100644
> --- a/gcc/c-family/c-opts.c
> +++ b/gcc/c-family/c-opts.c
> @@ -1306,13 +1306,15 @@ c_finish_options (void)
>     {
>       size_t i;
>
> -      cb_file_change (parse_in,
> -                     linemap_add (line_table, LC_RENAME, 0,
> -                                  _(""), 0));
> +      /* Make sure all of the builtins about to be declared have
> +         BUILTINS_LOCATION has their source_location.  */
> +      line_table->forced_location = BUILTINS_LOCATION;
>
>       cpp_init_builtins (parse_in, flag_hosted);
>       c_cpp_builtins (parse_in);
>
> +      line_table->forced_location = 0;
> +
>       /* We're about to send user input to cpplib, so make it warn for
>         things that we previously (when we sent it internal definitions)
>         told it to not warn.
> diff --git a/gcc/go/gofrontend/lex.cc b/gcc/go/gofrontend/lex.cc
> index 9f26911..167c7dd 100644
> --- a/gcc/go/gofrontend/lex.cc
> +++ b/gcc/go/gofrontend/lex.cc
> @@ -518,9 +518,7 @@ Lex::require_line()
>  source_location
>  Lex::location() const
>  {
> -  source_location location;
> -  LINEMAP_POSITION_FOR_COLUMN(location, line_table, this->lineoff_ + 1);
> -  return location;
> +  return linemap_position_for_column (line_table, this->lineoff_ + 1);
>  }
>
>  // Get a location slightly before the current one.  This is used for
> @@ -529,9 +527,7 @@ Lex::location() const
>  source_location
>  Lex::earlier_location(int chars) const
>  {
> -  source_location location;
> -  LINEMAP_POSITION_FOR_COLUMN(location, line_table, this->lineoff_ + 1 - 
> chars);
> -  return location;
> +  return linemap_position_for_column (line_table, this->lineoff_ + 1 - 
> chars);
>  }
>
>  // Get the next token.
> diff --git a/libcpp/directives-only.c b/libcpp/directives-only.c
> index e19f806..c6772af 100644
> --- a/libcpp/directives-only.c
> +++ b/libcpp/directives-only.c
> @@ -142,7 +142,7 @@ _cpp_preprocess_dir_only (cpp_reader *pfile,
>            flags |= DO_LINE_COMMENT;
>          else if (!(flags & DO_SPECIAL))
>            /* Mark the position for possible error reporting. */
> -           LINEMAP_POSITION_FOR_COLUMN (loc, pfile->line_table, col);
> +           loc = linemap_position_for_column (pfile->line_table, col);
>
>          break;
>
> diff --git a/libcpp/include/line-map.h b/libcpp/include/line-map.h
> index f1d5bee..d14528e 100644
> --- a/libcpp/include/line-map.h
> +++ b/libcpp/include/line-map.h
> @@ -95,6 +95,11 @@ struct GTY(()) line_maps {
>   /* If non-null, the allocator to use when resizing 'maps'.  If null,
>      xrealloc is used.  */
>   line_map_rea

Linemap force location and remove LINEMAP_POSITION_FOR_COLUMN (issue4801090)

2011-08-10 Thread Gabriel Charette
There was a bug where c_finish_options would create some builtins and assign 
them source_locations in the linemap other than BUILTINS_LOCATION == 1.

Thus, when calling DECL_IS_BUILTIN to know if a decl is a builtin, some of them 
would return false as they had a source_location other than BUILTINS_LOCATION 
within the line_map entry that was incorrectly created in c_finish_options.

The problem to fix this is that _cpp_lex_direct just gets the location of the 
currently lexed token by calling linemap_position_for_column. It has no notion 
of whether this token is a builtin or whatever else it may be.

My solution is to add a forced_location field to the line_table which when set 
is returned by default by linemap_position_for_column instead of getting a new 
loc from the line_table.

Furthermore, this mechanism will allow us to inject locations for a similar 
problem we have in pph when lexing replayed pre-processor definitions (which as 
it is now are getting assigned new source_locations which is creating problems 
for us).

I'm open to other solutions if anyone sees another way to do it. We could also 
leave linemap_position_for_column as is and have a separate inline function, 
say linemap_pos_for_col_or_forced, which makes the forced_location check and 
simply calls linemap_position_for_column when no forced_location.

I also removed LINEMAP_POSITION_FOR_COLUMN, it did the EXACT same thing as 
linemap_position_for_column, so maintaining both in parallel seems like 
overkill to me. The only thing I can think of is that it's more optimal as it's 
inlined (but if that's really needed we can always make 
linemap_position_for_column an inline function).

Gabriel

2011-08-10  Gabriel Charette  

* c-opts.c (c_finish_options): Don't create built-in line_table entry;
instead force BUILTINS_LOCATION when creating builtins.

* include/line-map.h (struct line_maps): Add field forced_location.
(LINEMAP_POSITION_FOR_COLUMN): Remove.
* line-map.c (linemap_init): Init forced_location to 0.
(linemap_position_for_column): Return forced_location by default if set

diff --git a/gcc/c-family/c-opts.c b/gcc/c-family/c-opts.c
index 3227f7b..1af8e7b 100644
--- a/gcc/c-family/c-opts.c
+++ b/gcc/c-family/c-opts.c
@@ -1306,13 +1306,15 @@ c_finish_options (void)
 {
   size_t i;
 
-  cb_file_change (parse_in,
- linemap_add (line_table, LC_RENAME, 0,
-  _(""), 0));
+  /* Make sure all of the builtins about to be declared have
+ BUILTINS_LOCATION has their source_location.  */
+  line_table->forced_location = BUILTINS_LOCATION;
 
   cpp_init_builtins (parse_in, flag_hosted);
   c_cpp_builtins (parse_in);
 
+  line_table->forced_location = 0;
+
   /* We're about to send user input to cpplib, so make it warn for
 things that we previously (when we sent it internal definitions)
 told it to not warn.
diff --git a/gcc/go/gofrontend/lex.cc b/gcc/go/gofrontend/lex.cc
index 9f26911..167c7dd 100644
--- a/gcc/go/gofrontend/lex.cc
+++ b/gcc/go/gofrontend/lex.cc
@@ -518,9 +518,7 @@ Lex::require_line()
 source_location
 Lex::location() const
 {
-  source_location location;
-  LINEMAP_POSITION_FOR_COLUMN(location, line_table, this->lineoff_ + 1);
-  return location;
+  return linemap_position_for_column (line_table, this->lineoff_ + 1);
 }
 
 // Get a location slightly before the current one.  This is used for
@@ -529,9 +527,7 @@ Lex::location() const
 source_location
 Lex::earlier_location(int chars) const
 {
-  source_location location;
-  LINEMAP_POSITION_FOR_COLUMN(location, line_table, this->lineoff_ + 1 - 
chars);
-  return location;
+  return linemap_position_for_column (line_table, this->lineoff_ + 1 - chars);
 }
 
 // Get the next token.
diff --git a/libcpp/directives-only.c b/libcpp/directives-only.c
index e19f806..c6772af 100644
--- a/libcpp/directives-only.c
+++ b/libcpp/directives-only.c
@@ -142,7 +142,7 @@ _cpp_preprocess_dir_only (cpp_reader *pfile,
flags |= DO_LINE_COMMENT;
  else if (!(flags & DO_SPECIAL))
/* Mark the position for possible error reporting. */
-   LINEMAP_POSITION_FOR_COLUMN (loc, pfile->line_table, col);
+   loc = linemap_position_for_column (pfile->line_table, col);
 
  break;
 
diff --git a/libcpp/include/line-map.h b/libcpp/include/line-map.h
index f1d5bee..d14528e 100644
--- a/libcpp/include/line-map.h
+++ b/libcpp/include/line-map.h
@@ -95,6 +95,11 @@ struct GTY(()) line_maps {
   /* If non-null, the allocator to use when resizing 'maps'.  If null,
  xrealloc is used.  */
   line_map_realloc reallocator;
+
+  /* If non-zero, linemap_position_for_column automatically returns
+ the value stored at this memory location, instead of caclulating
+ a new source_location.  */
+  source_location forced_location;
 };
 
 /* Initialize a line map set.  */
@@ -165,23 +170,6 @@ extern const str

Re: RFA: Do not use cmpstrnsi to implement builtin memcmp

2011-08-10 Thread Jeff Law
-BEGIN PGP SIGNED MESSAGE-
Hash: SHA1

On 08/10/11 09:49, Nick Clifton wrote:
> Hi Guys,
> 
> This is a resend of a previously submitted patch:
> 
> http://gcc.gnu.org/ml/gcc-patches/2009-03/msg00363.html
> 
> This version of the patch has been adjusted to apply to today's
> sources, but otherwise it remains the same.
> 
> The point of the patch is that the cmpstrnsi machine pattern should
> not be used to implement the memcmp builtin function.  This is
> because a string comparison will terminate if two zero bytes are read
> whereas a memory comparison should continue.
> 
> Tested without regressions on i686-pc-linux-gnu and rx-elf
> toolchains.
> 
> OK to apply ?
> 
> Cheers Nick
> 
> gcc/ChangeLog 2011-08-17  Nick Clifton  
> 
> * builtins.c (expand_builtin_memcmp): Do not use cmpstrnsi pattern. *
> doc/md.texi (cmpstrn): Note that the comparison stops if both fetched
> bytes are zero. (cmpstr): Likewise. (cmpmem): Note that the
> comparison does not stop if both of the fetched bytes are zero.
OK.  Kindof surprised this wasn't dealt with before.  This sounds soo
much like something I've fixed in the past...

Jeff

-BEGIN PGP SIGNATURE-
Version: GnuPG v1.4.11 (GNU/Linux)
Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org/

iQEcBAEBAgAGBQJOQsWZAAoJEBRtltQi2kC7tK0H/1bCmSvyrnDU+pmPo1blJzNo
MGcfo5pHiWnmEDO96HfR0OikLDj63Gu/Fks+c6NuZ+8dFvYCAldIDIprb7+27soi
VIxnGDkXFDJnqOLl7Nri6TW6CWazEPjQtALIcK2a4D+D20ZPlqLao6MIZSzdGdlk
5+giDVZ2sed7XmJYH413R53uTin/TxVCMijYP6smmBydXkdTgjRntNiq/kNBsnEo
v050MH5A5Wiyg4m22FB1vnmL3S4i8Vhq/lZANy/tMzdyO6eyoY36RvBUmBJ9ShAN
QYXrllnqwpzlsAdou5L1TmeBRk0iI9xxn1NkqDkDHo2er5oZTAzqqit0bht0zEw=
=paRW
-END PGP SIGNATURE-


Re: [c++] Keep tm, div_t, ldiv_t, lconv mangling on Solaris (PR libstdc++-v3/1773)

2011-08-10 Thread Paolo Carlini

Hi,
This seems like a problem with the Solaris headers that should be 
handled by fixincludes.


The goal of the set of patches, apart from defining 
__cplusplus=199711L, was to start using the Solaris headers properly, 
without setting weird macros to ignore half of them or fixincluding 
half of their content out. Fixincluding this seems like a shame. 
Solaris headers declare std::tm, which is great. It just happens to 
break binary compatibility, so until the next great g++ ABI break, we 
need some kind of workaround. Telling the compiler that std::tm should 
be mangled as if it was ::tm looked like a simple enough solution.


We could of course surround the 4 struct definitions with:
#if __cplusplus >= 199711L
}
#endif

and:
#if __cplusplus >= 199711L
namespace std {
using ::thing;
#endif

Or the switch to __cplusplus=199711L will have to wait until the next 
ABI break...
All in all it seems to me that we are pretty close to be able to fix 
this old issue now and we are even in Stage 1, thus we can afford to 
take a bit of risk and handle possible fallout, thus I would recommend 
we do for now the above preprocessor dance (we are talking only about 4 
instances) but controlled by a macro in os_defines.h, as Rainer 
correctly did already elsewhere. Rainer, can you test such change?


Thanks,
Paolo.


Re: [pph] Version 2: Clear test commit conflicts (issue4844060)

2011-08-10 Thread Diego Novillo
On Wed, Aug 10, 2011 at 13:17, Gabriel Charette  wrote:

> 2011-08-10  Gabriel Charette  
>
>        * g++.dg/pph/x5dynarray7.h: Remove 2 bogus errors.
>        * g++.dg/pph/x6dynarray6.h: Remove 2 bogus errors.
>        Add 2 bogus errors.
>        * g++.dg/pph/x7dynarray5.cc: Remove 2 bogus errors.
>        * g++.dg/pph/x7dynarray6.cc: Remove 2 bogus errors.
>        * g++.dg/pph/x7dynarray7.cc: Remove 2 bogus errors.

OK.


Diego.


Re: [libcpp] Correctly define __cplusplus (PR libstdc++-v3/1773)

2011-08-10 Thread Gabriel Dos Reis
On Wed, Aug 10, 2011 at 7:12 AM, Rainer Orth
 wrote:
> Jason Merrill  writes:
>
>> On 08/09/2011 09:14 AM, Marc Glisse wrote:
>>> I don't think we should define the C++ 2011 value yet. In my opinion, we
>>> should wait until:
>>> 1) the standard is official
>>> 2) gcc implements most of it: people will want to use __cplusplus as a
>>> test to know if they can use C++0X features, not if the compiler does
>>> some effort to implement half of them.
>>
>> I'm of two minds about this, but I see that clang and edg still use 199711L
>> in C++0x mode, so let's stick with that for now.
>
> Ok, I just wanted to bring it up.  I'll resubmit with the 201103L change
> removed once the other patches (tm etc. mangling and libstdc++) are
> resolved.

After the experience with C++98 implementation, I have also become two-minded
about this.  In the end I agree with Jason that we should just keep
the C++03 value
for the moment.


Re: [pph] small multi-pph tests (issue4810074)

2011-08-10 Thread Gabriel Charette
It didn't fix everything.

Potentially another commit conflict with Diego's trunk merge from yesterday?

I just sent out another patch that fixes everything that was left on
my end. Let me know if it works on your end.

Cheers,
Gab

On Tue, Aug 9, 2011 at 7:26 PM, Lawrence Crowl  wrote:
> Let me know if my recent push has not solved this problem.
>
> On 8/5/11, Gabriel Charette  wrote:
>> I now get the following test failure output after pulling this patch
>> (potentially from the almost concurrent checkin of my linetable
>> patch?)
>>
>> I can send you my diff's if you need to compare.
>>
>> FAIL: g++.dg/pph/c4inline.cc  (assembly comparison, sums 46031=>36250)
>> FAIL: g++.dg/pph/x1keyed.cc  (assembly comparison, sums 17458=>63070)
>> FAIL: g++.dg/pph/x1keyno.cc  (assembly comparison, sums 20949=>46318)
>> XPASS: g++.dg/pph/x4keyed.cc  -fpph-map=pph.map -I.  (test for bogus
>> messages, line )
>> XPASS: g++.dg/pph/x4keyed.cc  -fpph-map=pph.map -I.  (test for bogus
>> messages, line )
>> FAIL: g++.dg/pph/x4keyno.cc  (assembly comparison, sums 64958=>17472)
>> FAIL: g++.dg/pph/x4template.cc  (assembly comparison, sums 23306=>52012)
>> XPASS: g++.dg/pph/x6rtti.cc  -fpph-map=pph.map -I.  (test for bogus
>> messages, line )
>> XPASS: g++.dg/pph/x6rtti.cc  -fpph-map=pph.map -I.  (test for bogus
>> messages, line )
>> XPASS: g++.dg/pph/x7rtti.cc  -fpph-map=pph.map -I.  (test for bogus
>> messages, line )
>> XPASS: g++.dg/pph/x7rtti.cc  -fpph-map=pph.map -I.  (test for bogus
>> messages, line )
>> XPASS: g++.dg/pph/x7rtti.cc  -fpph-map=pph.map -I.  (test for bogus
>> messages, line )
>> XPASS: g++.dg/pph/x7rtti.cc  -fpph-map=pph.map -I.  (test for bogus
>> messages, line )
>> XPASS: g++.dg/pph/x7rtti.cc  -fpph-map=pph.map -I.  (test for bogus
>> messages, line )
>> XPASS: g++.dg/pph/x7rtti.cc  -fpph-map=pph.map -I.  (test for bogus
>> messages, line )
>> XPASS: g++.dg/pph/x7rtti.cc  -fpph-map=pph.map -I.  (test for bogus
>> messages, line )
>> XPASS: g++.dg/pph/x7rtti.cc  -fpph-map=pph.map -I.  (test for bogus
>> messages, line )
>> XPASS: g++.dg/pph/x7rtti.cc  -fpph-map=pph.map -I.  (test for bogus
>> messages, line )
>> XPASS: g++.dg/pph/x7rtti.cc  -fpph-map=pph.map -I.  (test for bogus
>> messages, line )
>> XPASS: g++.dg/pph/x7rtti.cc  -fpph-map=pph.map -I.  (test for bogus
>> messages, line )
>> # of expected passes          277
>> # of unexpected failures      5
>> # of unexpected successes     45
>> # of expected failures                42
>>
>> Gab
>>
>> On Fri, Aug 5, 2011 at 11:15 AM, Lawrence Crowl  wrote:
>>> This patch ads a bunch of small tests for multi-pph includes.
>>>
>>> Tested on x64.
>>>
>>>
>>> Index: gcc/testsuite/ChangeLog.pph
>>>
>>> 2011-08-04  Lawrence Crowl  
>>>
>>>        * g++.dg/pph/README: Add new file types.
>>>        * g++.dg/pph/a0expinstinl.h: New.
>>>        * g++.dg/pph/a0expinstnin.h: New.
>>>        * g++.dg/pph/a0inline.h: New.
>>>        * g++.dg/pph/a0keyed.h: New.
>>>        * g++.dg/pph/a0keyno.h: New.
>>>        * g++.dg/pph/a0noninline.h: New.
>>>        * g++.dg/pph/a0nontrivinit.h: New.
>>>        * g++.dg/pph/a0rawstruct.h: New.
>>>        * g++.dg/pph/a0rtti.h: New.
>>>        * g++.dg/pph/a0template.h: New.
>>>        * g++.dg/pph/a0tmplclass.h: New.
>>>        * g++.dg/pph/a0typedef.h: New.
>>>        * g++.dg/pph/a0variables1.h: New.
>>>        * g++.dg/pph/a0variables2.h: New.
>>>        * g++.dg/pph/c0inline1.h: New.
>>>        * g++.dg/pph/c0inline2.h: New.
>>>        * g++.dg/pph/c0rawstruct1.h: New.
>>>        * g++.dg/pph/c0rawstruct2.h: New.
>>>        * g++.dg/pph/c0typedef1.h: New.
>>>        * g++.dg/pph/c0typedef2.h: New.
>>>        * g++.dg/pph/c0variables.h: Contents to a0variables.h.
>>>        Renamed to c0variables1.h.
>>>        * g++.dg/pph/c0variables1.h: New.
>>>        * g++.dg/pph/c0variables2.h: New.
>>>        * g++.dg/pph/c0variables3.h: New.
>>>        * g++.dg/pph/c0variables4.h: New.
>>>        * g++.dg/pph/c1variables.cc: Handle renaming.
>>>        * g++.dg/pph/c3rawstruct.cc: New.
>>>        * g++.dg/pph/c3rawstruct.s: New.
>>>        * g++.dg/pph/c3typedef.cc: New.
>>>        * g++.dg/pph/c3variables.cc: New.
>>>        * g++.dg/pph/c4inline.cc: New.
>>>        * g++.dg/pph/e0noninline1.h: New.
>>>        * g++.dg/pph/e0noninline2.h: New.
>>>        * g++.dg/pph/e4noninline.cc: New.
>>>        * g++.dg/pph/e4variables.cc: New.
>>>        * g++.dg/pph/pph.exp: Add FIXME.
>>>        * g++.dg/pph/x0keyed1.h: New.
>>>        * g++.dg/pph/x0keyed2.h: New.
>>>        * g++.dg/pph/x0keyno1.h: New.
>>>        * g++.dg/pph/x0keyno2.h: New.
>>>        * g++.dg/pph/x0nontrivinit.h: Contents to a0nontrivinit.h.
>>>        Renamed to x0nontrivinit1.h.
>>>        * g++.dg/pph/x0nontrivinit1.h: Renamed from x0nontrivinit.h.
>>>        * g++.dg/pph/x0nontrivinit2.h: New.
>>>        * g++.dg/pph/x0template.h: Contents to a0template.h.
>>>        Renamed to x0template1.h.
>>>        * g++.dg/pph/x0template1.h: Renamed from x0template.
>>>     

[pph] Version 2: Clear test commit conflicts (issue4844060)

2011-08-10 Thread Gabriel Charette
Here is what it takes, on top of Lawrence's first patch, to clear new test 
errors on my end.

Not sure what fixed the common removal of
// { dg-bogus "unistd.h:1144:34: error: declaration of .* ctermid.* has a 
different exception specifier" "" { xfail *-*-* } 0 }
// { dg-bogus "stdio.h:858:14: error: from previous declaration .* ctermid.*" 
"" { xfail *-*-* } 0 }
maybe the trunk merge??

Seems like x6dynarray6 now has the same errors x6dynarray5 already had, don't 
know why this only shows up now...?

Also, since the trunk merge, `make check-c++ RUNTESTFLAGS=pph.exp` ran from 
bld/ now also runs c++0x tests it seems.
Diego said we don't care about those yet, so I changed my test run to be `make 
check-g++ RUNTESTFLAGS=pph.exp` ran from bld/gcc/

Gab

2011-08-10  Gabriel Charette  

* g++.dg/pph/x5dynarray7.h: Remove 2 bogus errors.
* g++.dg/pph/x6dynarray6.h: Remove 2 bogus errors.
Add 2 bogus errors.
* g++.dg/pph/x7dynarray5.cc: Remove 2 bogus errors.
* g++.dg/pph/x7dynarray6.cc: Remove 2 bogus errors.
* g++.dg/pph/x7dynarray7.cc: Remove 2 bogus errors.

diff --git a/gcc/testsuite/g++.dg/pph/x5dynarray7.h 
b/gcc/testsuite/g++.dg/pph/x5dynarray7.h
index 7aae396..5ee5d8c 100644
--- a/gcc/testsuite/g++.dg/pph/x5dynarray7.h
+++ b/gcc/testsuite/g++.dg/pph/x5dynarray7.h
@@ -1,7 +1,5 @@
 // { dg-xfail-if "BOGUS" { "*-*-*" } { "-fpph-map=pph.map" } }
 // { dg-bogus "wchar.h:1:0: error: PPH file stdio.pph fails macro validation, 
_WCHAR_H" "" { xfail *-*-* } 0 }
-// { dg-bogus "unistd.h:1144:34: error: declaration of .* ctermid.* has a 
different exception specifier" "" { xfail *-*-* } 0 }
-// { dg-bogus "stdio.h:858:14: error: from previous declaration .* ctermid.*" 
"" { xfail *-*-* } 0 }
 
 #ifndef X5DYNARRAY7_H
 #define X5DYNARRAY7_H
diff --git a/gcc/testsuite/g++.dg/pph/x6dynarray6.h 
b/gcc/testsuite/g++.dg/pph/x6dynarray6.h
index a8e48c1..497eb46 100644
--- a/gcc/testsuite/g++.dg/pph/x6dynarray6.h
+++ b/gcc/testsuite/g++.dg/pph/x6dynarray6.h
@@ -1,7 +1,7 @@
 // { dg-xfail-if "BOGUS" { "*-*-*" } { "-fpph-map=pph.map" } }
 // { dg-bogus "wchar.h:1:0: error: PPH file stdio.pph fails macro validation, 
_WCHAR_H" "" { xfail *-*-* } 0 }
-// { dg-bogus "unistd.h:1144:34: error: declaration of .* ctermid.* has a 
different exception specifier" "" { xfail *-*-* } 0 }
-// { dg-bogus "stdio.h:858:14: error: from previous declaration .* ctermid.*" 
"" { xfail *-*-* } 0 }
+// { dg-bogus "a0dynarray-dfn1b.hi:3:19: error: there are no arguments to 
.alloc. that depend on a template parameter, so a declaration of .alloc. must 
be available" "" { xfail *-*-* } 0 }
+// { dg-bogus "a0dynarray-dfn3c.hi:2:36: error: no .void 
tst::dynarray::check.tst::dynarray::size_type.. member function declared 
in class .tst::dynarray." "" { xfail *-*-* } 0 }
 
 #ifndef X6DYNARRAY6_H
 #define X6DYNARRAY6_H
diff --git a/gcc/testsuite/g++.dg/pph/x7dynarray5.cc 
b/gcc/testsuite/g++.dg/pph/x7dynarray5.cc
index f512bad..d7b17a3 100644
--- a/gcc/testsuite/g++.dg/pph/x7dynarray5.cc
+++ b/gcc/testsuite/g++.dg/pph/x7dynarray5.cc
@@ -1,7 +1,5 @@
 // { dg-xfail-if "BOGUS" { "*-*-*" } { "-fpph-map=pph.map" } }
 // { dg-bogus "wchar.h:1:0: error: PPH file stdio.pph fails macro validation, 
_WCHAR_H" "" { xfail *-*-* } 0 }
-// { dg-bogus "unistd.h:1144:34: error: declaration of .* ctermid.* has a 
different exception specifier" "" { xfail *-*-* } 0 }
-// { dg-bogus "stdio.h:858:14: error: from previous declaration .* ctermid.*" 
"" { xfail *-*-* } 0 }
 
 #include "x0dynarray4.h"
 #include "x6dynarray5.h"
diff --git a/gcc/testsuite/g++.dg/pph/x7dynarray6.cc 
b/gcc/testsuite/g++.dg/pph/x7dynarray6.cc
index 1585be0..0292890 100644
--- a/gcc/testsuite/g++.dg/pph/x7dynarray6.cc
+++ b/gcc/testsuite/g++.dg/pph/x7dynarray6.cc
@@ -1,7 +1,5 @@
 // { dg-xfail-if "BOGUS" { "*-*-*" } { "-fpph-map=pph.map" } }
 // { dg-bogus "wchar.h:1:0: error: PPH file stdio.pph fails macro validation, 
_WCHAR_H" "" { xfail *-*-* } 0 }
-// { dg-bogus "unistd.h:1144:34: error: declaration of .* ctermid.* has a 
different exception specifier" "" { xfail *-*-* } 0 }
-// { dg-bogus "stdio.h:858:14: error: from previous declaration .* ctermid.*" 
"" { xfail *-*-* } 0 }
 
 #include 
 #include 
diff --git a/gcc/testsuite/g++.dg/pph/x7dynarray7.cc 
b/gcc/testsuite/g++.dg/pph/x7dynarray7.cc
index bf0a047..08398be 100644
--- a/gcc/testsuite/g++.dg/pph/x7dynarray7.cc
+++ b/gcc/testsuite/g++.dg/pph/x7dynarray7.cc
@@ -1,7 +1,5 @@
 // { dg-xfail-if "BOGUS" { "*-*-*" } { "-fpph-map=pph.map" } }
 // { dg-bogus "wchar.h:1:0: error: PPH file stdio.pph fails macro validation, 
_WCHAR_H" "" { xfail *-*-* } 0 }
-// { dg-bogus "unistd.h:1144:34: error: declaration of .* ctermid.* has a 
different exception specifier" "" { xfail *-*-* } 0 }
-// { dg-bogus "stdio.h:858:14: error: from previous declaration .* ctermid.*" 
"" { xfail *-*-* } 0 }
 
 #include 
 #include 

--
This patch is available for review at http://codereview.appspot.com/4844060


[PATCH] [JAVA] patch for Java on RTEMS

2011-08-10 Thread Jie Liu
Hi,

For the previous analysis "libjava patches for RTEMS"[1], there are 6
cases need to pay more attention. PR18699, TLtest, Thread_Interrupt,
Thread_Sleep_2, Throw_2 and bclink.

After add patch to bdwgc for RTEMS pthread support[2]:
PR18699PASS after Modify   [A bug in boehm-gc on
RTEMS, not related to libjava]
TLtestPASS
Thread_Interrupt  PASS

For the other 3 cases:
Thread_Sleep_2  PASS   [After add patch to rtems]
Throw_2 PASS   [After add patch to
libjava, still need some work][3]
bclinkUnSupport
[-findirect-dispatch do not support in this case]

I think it's time to send out the patch for review, because it may
need much time to modify. The patch is attached.

How to mark those unsupported cases as expected failures on *-*-rtems* ?

[1]http://gcc.gnu.org/ml/java-patches/2011-q3/msg00016.html
[2]http://gcc.gnu.org/ml/java-patches/2011-q3/msg00042.html
[3]http://gcc.gnu.org/ml/java-patches/2011-q3/msg00037.html

Thanks,
Jie


libjava.patch
Description: Binary data


Re: [patch, ia64] Fix unaligned accesses on IA64 from dwarf2out.c

2011-08-10 Thread Richard Henderson
On 08/10/2011 09:52 AM, Steve Ellcey wrote:
> * md5.c (md5_read_ctx): Handle mis-aligned resbuf pointer.

Ok.


r~


Re: [patch, ia64] Fix unaligned accesses on IA64 from dwarf2out.c

2011-08-10 Thread Steve Ellcey
On Wed, 2011-08-10 at 11:19 +0200, Richard Guenther wrote:
> On Wed, Aug 10, 2011 at 10:48 AM, Pedro Alves  wrote:
> >
> > which makes me wonder if the right fix isn't to change
> > libiberty internally to not rely on the alignment.
> 
> I think that would be the best fix.  It hardly can be a performance
> critical part - libiberty could use a local aligned buffer for
> compute and do a copy on return.
> 
> Richard.

I like this idea.  How about this patch.  I am still testing it but it
should work.

Steve Ellcey
s...@cup.hp.com


2011-08-10  Steve Ellcey  

* md5.c (md5_read_ctx): Handle mis-aligned resbuf pointer.


Index: md5.c
===
--- md5.c   (revision 177411)
+++ md5.c   (working copy)
@@ -76,15 +76,19 @@ md5_init_ctx (struct md5_ctx *ctx)
 /* Put result from CTX in first 16 bytes following RESBUF.  The result
must be in little endian byte order.
 
-   IMPORTANT: On some systems it is required that RESBUF is correctly
-   aligned for a 32 bits value.  */
+   IMPORTANT: RESBUF may not be aligned as strongly as MD5_UNIT32 so we
+   put things in a local (aligned) buffer first, then memcpy into RESBUF.  */
 void *
 md5_read_ctx (const struct md5_ctx *ctx, void *resbuf)
 {
-  ((md5_uint32 *) resbuf)[0] = SWAP (ctx->A);
-  ((md5_uint32 *) resbuf)[1] = SWAP (ctx->B);
-  ((md5_uint32 *) resbuf)[2] = SWAP (ctx->C);
-  ((md5_uint32 *) resbuf)[3] = SWAP (ctx->D);
+  md5_unit32 buffer[4];
+
+  buffer[0] = SWAP (ctx->A);
+  buffer[1] = SWAP (ctx->B);
+  buffer[2] = SWAP (ctx->C);
+  buffer[3] = SWAP (ctx->D);
+
+  memcpy (resbuf, buffer, 16);
 
   return resbuf;
 }




Re: [build] Move unwinder to toplevel libgcc (v2)

2011-08-10 Thread Pedro Alves
On Wednesday 10 August 2011 17:05:08, Rainer Orth wrote:
> Paolo Bonzini  writes:
> 
> >> True: it is called once per multilib.
> >
> > Just to doublecheck, are we sure that unwind.h is always the same?
> 
> Yep: it's unwind-generic.h for almost all targets, just a few arm
> targets use config/arm/unwind-arm.h for all multilibs.

Doesn't each multilib get its own build subdir?
Can't the file be copied there instead and thus get rid of this wart?

-- 
Pedro Alves


Re: [c++] Keep tm, div_t, ldiv_t, lconv mangling on Solaris (PR libstdc++-v3/1773)

2011-08-10 Thread Marc Glisse

On Wed, 10 Aug 2011, Jason Merrill wrote:

This seems like a problem with the Solaris headers that should be handled by 
fixincludes.


The goal of the set of patches, apart from defining __cplusplus=199711L, 
was to start using the Solaris headers properly, without setting weird 
macros to ignore half of them or fixincluding half of their content out. 
Fixincluding this seems like a shame. Solaris headers declare std::tm, 
which is great. It just happens to break binary compatibility, so until 
the next great g++ ABI break, we need some kind of workaround. Telling the 
compiler that std::tm should be mangled as if it was ::tm looked like a 
simple enough solution.


We could of course surround the 4 struct definitions with:
#if __cplusplus >= 199711L
}
#endif

and:
#if __cplusplus >= 199711L
namespace std {
using ::thing;
#endif

Or the switch to __cplusplus=199711L will have to wait until the next ABI 
break...


I understand that you may not be happy with the idea of touching the 
mangler for a platform-specific temporary issue.


(I am speaking independently of the quality of the patch itself)

--
Marc Glisse


Re: [build] Move unwinder to toplevel libgcc (v2)

2011-08-10 Thread Paolo Bonzini

On 08/10/2011 06:05 PM, Rainer Orth wrote:

>>  True: it is called once per multilib.

>
>  Just to doublecheck, are we sure that unwind.h is always the same?

Yep: it's unwind-generic.h for almost all targets, just a few arm
targets use config/arm/unwind-arm.h for all multilibs.


Patch doing rm -f is preapproved then.

Paolo



Re: [PATCH] [JAVA] Double.parseDouble(null) throw NullPointerException

2011-08-10 Thread Tom Tromey
> "Jie" == Jie Liu  writes:

Jie> +  *-*-rtems*)
Jie> +  can_unwind_signal=no
Jie> +  CHECKREFSPEC=-fcheck-references
Jie> +  DIVIDESPEC=-fuse-divide-subroutine
Jie> +  ;;

This part is OK with a ChangeLog entry.

Jie> +

Spurious newline addition.

Jie> But it does not work as we want, is there something wrong?

Did you rebuild all of libgcj?

If you did, then I don't know, you'll have to debug it, sorry.

I vaguely recollect that -fcheck-references adds a check for 'this' at
the start of final methods.  If I'm misremembering, then that is
probably the problem.

Tom


Re: [build] Move unwinder to toplevel libgcc (v2)

2011-08-10 Thread Rainer Orth
Paolo Bonzini  writes:

>> True: it is called once per multilib.
>
> Just to doublecheck, are we sure that unwind.h is always the same?

Yep: it's unwind-generic.h for almost all targets, just a few arm
targets use config/arm/unwind-arm.h for all multilibs.

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: [build] Move unwinder to toplevel libgcc (v2)

2011-08-10 Thread Paolo Bonzini

On 08/10/2011 03:56 PM, Rainer Orth wrote:

"Joseph S. Myers"  writes:


This is strange: they copy explicitly goes into $(gcc_objdir): from
libgcc/Makefile.in:

install-unwind_h:
 cp unwind.h $(gcc_objdir)/include/unwind.h
 chmod a+r $(gcc_objdir)/include/unwind.h

For an in-tree build, the source directory cannot be read-only, for a
VPATH build I don't see how this can happen.  Could you please check?


This is a VPATH build and the issue is that the *file* unwind.h is
readonly having been copied from a readonly source (and install-unwind_h


I see.  I'd been thinking of a source tree mounted read-only, not the
actual files changed to be read-only.


must, I suppose, end up getting called more than once so that the second
copy tries to copy over a readonly file; the 26478 fix was to remove the


True: it is called once per multilib.


Just to doublecheck, are we sure that unwind.h is always the same?

Paolo


Re: RFC: [build, ada] Centralize PICFLAG configuration

2011-08-10 Thread Paolo Bonzini

On 08/10/2011 01:42 PM, Rainer Orth wrote:

* Centralize the determination of PICFLAG.  Currently, three libraries
   inside the gcc tree are built PIC without libtool: libgcc, libiberty,
   and libgnat/libgnarl.

   libiberty/configure.ac has a hardcoded list of PICFLAG that could be
   moved to a toplevel config/picflag.m4.


That's the simplest alternative.  It would need however a pass through 
the config/ directory for targets that are never used as hosts for GCC 
(and thus libiberty).


Alternatively, the libtool code could be extracted to config/picflag.m4.


   Alternatively, one could think about using libtool --config | grep
   pic_flag to determine the flag without actually using libtool.

   Last, one completely could go for libtool, but I very much doubt such
   a suggestion would get much traction.

My current plan is to merge the PICFLAG information from libiberty and
libgcc into picflag.m4 and use that.


Yes, that needs to be done of course.  I'm not sure if we still support 
gnatlib_and_tools to build libada/gnattools.  If so, we would need the 
PICFLAG to be available somehow in the gcc Makefile (perhaps by 
providing GCC_TARGET_PICFLAG in addition to GCC_PICFLAG in picflag.m4).


Paolo


RFA: Do not use cmpstrnsi to implement builtin memcmp

2011-08-10 Thread Nick Clifton
Hi Guys,

This is a resend of a previously submitted patch:

  http://gcc.gnu.org/ml/gcc-patches/2009-03/msg00363.html

This version of the patch has been adjusted to apply to today's sources,
but otherwise it remains the same.

The point of the patch is that the cmpstrnsi machine pattern should not
be used to implement the memcmp builtin function.  This is because a
string comparison will terminate if two zero bytes are read whereas a
memory comparison should continue.

Tested without regressions on i686-pc-linux-gnu and rx-elf toolchains.

OK to apply ?

Cheers
  Nick

gcc/ChangeLog
2011-08-17  Nick Clifton  

* builtins.c (expand_builtin_memcmp): Do not use cmpstrnsi pattern.
* doc/md.texi (cmpstrn): Note that the comparison stops if both
fetched bytes are zero.
(cmpstr): Likewise.
(cmpmem): Note that the comparison does not stop if both of the
fetched bytes are zero.

Index: gcc/doc/md.texi
===
--- gcc/doc/md.texi	(revision 177611)
+++ gcc/doc/md.texi	(working copy)
@@ -4680,8 +4680,9 @@
 string.  The instruction is not allowed to prefetch more than one byte
 at a time since either string may end in the first byte and reading past
 that may access an invalid page or segment and cause a fault.  The
-effect of the instruction is to store a value in operand 0 whose sign
-indicates the result of the comparison.
+comparison terminates early if the fetched bytes are different or if
+they are equal to zero.  The effect of the instruction is to store a
+value in operand 0 whose sign indicates the result of the comparison.
 
 @cindex @code{cmpstr@var{m}} instruction pattern
 @item @samp{cmpstr@var{m}}
@@ -4699,8 +4700,10 @@
 order starting at the beginning of each string.  The instruction is not allowed
 to prefetch more than one byte at a time since either string may end in the
 first byte and reading past that may access an invalid page or segment and
-cause a fault.  The effect of the instruction is to store a value in operand 0
-whose sign indicates the result of the comparison.
+cause a fault.  The comparison will terminate when the fetched bytes
+are different or if they are equal to zero.  The effect of the
+instruction is to store a value in operand 0 whose sign indicates the
+result of the comparison.
 
 @cindex @code{cmpmem@var{m}} instruction pattern
 @item @samp{cmpmem@var{m}}
@@ -4708,9 +4711,10 @@
 of @samp{cmpstr@var{m}}.  The two memory blocks specified are compared
 byte by byte in lexicographic order starting at the beginning of each
 block.  Unlike @samp{cmpstr@var{m}} the instruction can prefetch
-any bytes in the two memory blocks.  The effect of the instruction is
-to store a value in operand 0 whose sign indicates the result of the
-comparison.
+any bytes in the two memory blocks.  Also unlike @samp{cmpstr@var{m}}
+the comparison will not stop if both bytes are zero.  The effect of
+the instruction is to store a value in operand 0 whose sign indicates
+the result of the comparison.
 
 @cindex @code{strlen@var{m}} instruction pattern
 @item @samp{strlen@var{m}}
Index: gcc/builtins.c
===
--- gcc/builtins.c	(revision 177611)
+++ gcc/builtins.c	(working copy)
@@ -3634,9 +3634,9 @@
 }
 
 /* Expand expression EXP, which is a call to the memcmp built-in function.
-   Return NULL_RTX if we failed and the
-   caller should emit a normal call, otherwise try to get the result in
-   TARGET, if convenient (and in mode MODE, if that's convenient).  */
+   Return NULL_RTX if we failed and the caller should emit a normal call,
+   otherwise try to get the result in TARGET, if convenient (and in mode
+   MODE, if that's convenient).  */
 
 static rtx
 expand_builtin_memcmp (tree exp, ATTRIBUTE_UNUSED rtx target,
@@ -3648,7 +3648,10 @@
  			 POINTER_TYPE, POINTER_TYPE, INTEGER_TYPE, VOID_TYPE))
 return NULL_RTX;
 
-#if defined HAVE_cmpmemsi || defined HAVE_cmpstrnsi
+  /* Note: The cmpstrnsi pattern, if it exists, is not suitable for
+ implementing memcmp because it will stop if it encounters two
+ zero bytes.  */
+#if defined HAVE_cmpmemsi
   {
 rtx arg1_rtx, arg2_rtx, arg3_rtx;
 rtx result;
@@ -3663,16 +3666,9 @@
   = get_pointer_alignment (arg2, BIGGEST_ALIGNMENT) / BITS_PER_UNIT;
 enum machine_mode insn_mode;
 
-#ifdef HAVE_cmpmemsi
 if (HAVE_cmpmemsi)
   insn_mode = insn_data[(int) CODE_FOR_cmpmemsi].operand[0].mode;
 else
-#endif
-#ifdef HAVE_cmpstrnsi
-if (HAVE_cmpstrnsi)
-  insn_mode = insn_data[(int) CODE_FOR_cmpstrnsi].operand[0].mode;
-else
-#endif
   return NULL_RTX;
 
 /* If we don't have POINTER_TYPE, call the function.  */
@@ -3697,18 +3693,10 @@
 	set_mem_size (arg2_rtx, INTVAL (arg3_rtx));
   }
 
-#ifdef HAVE_cmpmemsi
 if (HAVE_cmpmemsi)
   insn = gen_cmpmemsi (result, arg1_rtx, arg2_rtx, arg3_rtx,
 			   GEN_INT (MIN (arg1_align, arg2_align)));

Re: [PATCH][C++] Remove last use of can_trust_pointer_alignment

2011-08-10 Thread Richard Guenther
On Wed, 10 Aug 2011, Richard Guenther wrote:

> On Wed, 10 Aug 2011, Jason Merrill wrote:
> 
> > On 08/10/2011 08:35 AM, Richard Guenther wrote:
> > >   * call.c (build_over_call): Call memcpy unconditionally.
> > 
> > OK.  Have you tested the MEM_REF patch?
> 
> No, not yet.  I'll throw it to testing now.

The following is what passed bootstrap sofar and is in testing now.

Richard.

Index: gcc/cp/call.c
===
--- gcc/cp/call.c   (revision 177625)
+++ gcc/cp/call.c   (working copy)
@@ -6766,19 +6766,22 @@ build_over_call (struct z_candidate *can
}
   else
{
- /* We must only copy the non-tail padding parts.
-Use __builtin_memcpy for the bitwise copy.  */
- tree arg0, arg1, arg2, t;
+ /* We must only copy the non-tail padding parts.  */
+ tree arg0, arg2, t;
+ tree array_type, alias_set;
 
  arg2 = TYPE_SIZE_UNIT (as_base);
- arg1 = arg;
  arg0 = cp_build_addr_expr (to, complain);
 
- t = implicit_built_in_decls[BUILT_IN_MEMCPY];
- t = build_call_n (t, 3, arg0, arg1, arg2);
-
- t = convert (TREE_TYPE (arg0), t);
- val = cp_build_indirect_ref (t, RO_NULL, complain);
+ array_type = build_array_type (char_type_node,
+build_index_type
+  (size_binop (MINUS_EXPR,
+   arg2, size_int (1;
+ alias_set = build_int_cst (build_pointer_type (type), 0);
+ t = build2 (MODIFY_EXPR, void_type_node,
+ build2 (MEM_REF, array_type, arg0, alias_set),
+ build2 (MEM_REF, array_type, arg, alias_set));
+ val = build2 (COMPOUND_EXPR, TREE_TYPE (to), t, to);
   TREE_NO_WARNING (val) = 1;
}
 


Re: [pph] Add initial support for including nested pph images (issue4847044)

2011-08-10 Thread Diego Novillo
On Tue, Aug 9, 2011 at 20:52, Gabriel Charette  wrote:

> An ICE in lto_streamer_cache_get is not very intuitive to point out
> the problem to the user in this case imo

Right.

> I'm thinking we should at least warn when some flags like this one
> differ and when that difference has an impact on the internal
> representation and can produce ICEs.

Yes, we can use the header of the PPH file to add anything we need to
do compatibility tests: original flags, version numbers, etc.  When we
open the file we can do these checks to avoid reading files
incompatible with the current compilation context.


Diego.


Re: [build] Move unwinder to toplevel libgcc (v2)

2011-08-10 Thread Joseph S. Myers
On Wed, 10 Aug 2011, Rainer Orth wrote:

> Could you try the obvious patch?  It's probably quicker than me
> recreating the setup.

Actually I think it will be quicker for you to do this test.

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: plugin event for C/C++ declarations

2011-08-10 Thread Diego Novillo
On Mon, Aug 8, 2011 at 07:39, Romain Geissler  wrote:
> 2011/7/20 Diego Novillo :
>> On Mon, Jul 18, 2011 at 03:06, Romain Geissler
>> I will commit this patch shortly.
>>
>>
>> Diego.
>>
>
> Ping !

Romain, please send me a current patch against today's trunk.


Thanks.  Diego.


Re: [PLUGIN] Install c-tree.h header

2011-08-10 Thread Diego Novillo
On Tue, Aug 2, 2011 at 10:09, Romain Geissler  wrote:

> 2011-08-02  Romain Geissler  
>
>        * Makefile.in (PLUGIN_HEADERS): Add C_TREE_H.

OK.


Diego.


Re: [PATCH] [JAVA] Double.parseDouble(null) throw NullPointerException

2011-08-10 Thread Jie Liu
2011/8/10 Tom Tromey :
>> "Jie" == Jie Liu  writes:
>
> Jie> RTEMS does not have virtual memory management, so there is no error
> Jie> when access the 0 address on rtems.
> Jie> So 'str->length()' donot throw NPE and just return an meaningless value.
>
> If you compile the Java parts of the library with -fcheck-references, it
> should work.  This is something you have to set up as part of the port,
> as it is decided at (libgcj-) configure time.
>
Thank you very much for the information you provide. :)

I add the following patch for -fcheck-references:
Index: configure.host
===
--- configure.host  (revision 172224)
+++ configure.host  (working copy)
@@ -347,8 +347,14 @@
slow_pthread_self=
can_unwind_signal=yes
;;
+  *-*-rtems*)
+   can_unwind_signal=no
+   CHECKREFSPEC=-fcheck-references
+   DIVIDESPEC=-fuse-divide-subroutine
+   ;;
 esac

+
 case "${host}" in
   *-cygwin* | *-mingw*)
fallback_backtrace_h=sysdep/i386/backtrace.h

And we can see the following have -fcheck-references:
/mnt/gcj/mytoolchain/libexec/gcc/i386-rtems/4.7.0/jc1
/tmp/ccYuDgD5.jar -fsource-filename=HelloWorld.java
-fhash-synchronization -fuse-divide-subroutine -fcheck-references
-fuse-boehm-gc -fkeep-inline-functions -quiet -dumpbase
HelloWorld.java -mtune=i386 -march=i386 -auxbase HelloWorld -g -g
-Wall -version -fsaw-java-file
-fbootclasspath=./:/usr/share/java/ecj.jar:/mnt/gcj/mytoolchain/share/java/libgcj-4.7.0.jar
-faux-classpath /tmp/cc6nFx0I.zip -o /tmp/ccCuf7ju.s
GNU Java (GCC) version 4.7.0 20110409 (experimental) (i386-rtems)

But it does not work as we want, is there something wrong?

Thanks,
Jie

> This won't help with the native code.  IIRC in the end we just gave up
> on that; if you wanted real correctness you would have to add a null
> check at every dereference in the C++ code.  I believe Andrew had a g++
> patch to do this in the compiler, but it was rejected.
>
> Tom
>


Re: [PATCH][C++] Remove last use of can_trust_pointer_alignment

2011-08-10 Thread Richard Guenther
On Wed, 10 Aug 2011, Jason Merrill wrote:

> On 08/10/2011 08:35 AM, Richard Guenther wrote:
> > * call.c (build_over_call): Call memcpy unconditionally.
> 
> OK.  Have you tested the MEM_REF patch?

No, not yet.  I'll throw it to testing now.

Richard.


Re: [c++] Keep tm, div_t, ldiv_t, lconv mangling on Solaris (PR libstdc++-v3/1773)

2011-08-10 Thread Jason Merrill
This seems like a problem with the Solaris headers that should be 
handled by fixincludes.


Jason


Re: [PATCH][C++] Remove last use of can_trust_pointer_alignment

2011-08-10 Thread Jason Merrill

On 08/10/2011 08:35 AM, Richard Guenther wrote:

* call.c (build_over_call): Call memcpy unconditionally.


OK.  Have you tested the MEM_REF patch?

Jason



Re: [PATCH Atom][PR middle-end/44382] Tree reassociation improvement

2011-08-10 Thread Ilya Enkovich
Hello,

Here is a new version of the patch. Changes from the previous version
(http://gcc.gnu.org/ml/gcc-patches/2011-07/msg02240.html):
 - updated to trunk
 - TODO_remove_unused_locals flag was removed from todo_flags_finish
of reassoc pass

Bootstrapped and checked on x86_64-linux.

Thanks,
Ilya
---
gcc/

2011-08-10  Enkovich Ilya  

PR middle-end/44382
* target.def (reassociation_width): New hook.

* doc/tm.texi.in (reassociation_width): Likewise.

* doc/tm.texi (reassociation_width): Likewise.

* doc/invoke.texi (tree-reassoc-width): New param documented.

* hooks.h (hook_int_uint_mode_1): New default hook.

* hooks.c (hook_int_uint_mode_1): Likewise.

* config/i386/i386.h (ix86_tune_indices): Add
X86_TUNE_REASSOC_INT_TO_PARALLEL and
X86_TUNE_REASSOC_FP_TO_PARALLEL.

(TARGET_REASSOC_INT_TO_PARALLEL): New.
(TARGET_REASSOC_FP_TO_PARALLEL): Likewise.

* config/i386/i386.c (initial_ix86_tune_features): Add
X86_TUNE_REASSOC_INT_TO_PARALLEL and
X86_TUNE_REASSOC_FP_TO_PARALLEL.

(ix86_reassociation_width) implementation of
new hook for i386 target.

* params.def (PARAM_TREE_REASSOC_WIDTH): New param added.

* tree-ssa-reassoc.c (get_required_cycles): New function.
(get_reassociation_width): Likewise.
(swap_ops_for_binary_stmt): Likewise.
(rewrite_expr_tree_parallel): Likewise.

(rewrite_expr_tree): Refactored. Part of code moved into
swap_ops_for_binary_stmt.

(reassociate_bb): Now checks reassociation width to be used
and call rewrite_expr_tree_parallel instead of rewrite_expr_tree
if needed.

gcc/testsuite/

2011-08-10  Enkovich Ilya  

* gcc.dg/tree-ssa/pr38533.c (dg-options): Added option
--param tree-reassoc-width=1.

* gcc.dg/tree-ssa/reassoc-24.c: New test.
* gcc.dg/tree-ssa/reassoc-25.c: Likewise.


PR44382.diff
Description: Binary data


Re: PATCH: PR target/35757: [4.4 Regression] Incorrect contraint on sse4_1_blendp

2011-08-10 Thread Richard Guenther
On Wed, Aug 10, 2011 at 4:42 PM, H.J. Lu  wrote:
> On Wed, Aug 10, 2011 at 7:12 AM, Richard Guenther
>  wrote:
>> On Wed, Aug 10, 2011 at 4:04 PM, H.J. Lu  wrote:
>>> On Wed, Aug 10, 2011 at 6:46 AM, Richard Guenther
>>>  wrote:
 On Sat, Mar 29, 2008 at 10:11 PM, H.J. Lu  wrote:
> This patch restores proper checking the third argument on blendpd and
> and blendps.  It also adds 2 tests, including pblendw.  Tested on
> Linux/Intel64. OK to install?

 The gcc.target/i386/sse4_1-blendps-2.c test randomly fails because
 src3 is used uninitialized.

>>>
>>> SRC2 may be uninitialized.  But I never saw random failures
>>> since it checks if random value in SRC2 is properly blended.
>>
>> No, src2 is initialized via init_blendps (src1.f, src2.f), src3 is
>> uninitialized.
>> I see random execute fails on Nehalem.
>>
>> I suppose we might optimize the uninitialized memory (it's probably
>> committed to registers) based on the undefined behavior.
>>
>> Please avoid this by initializing src3 properly.
>>
>
> Here is a patch.  OK for trunk?

Ok.

Richard.

>
> --
> H.J.
> 
> diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-blendps-2.c
> b/gcc/testsuite/gcc.target/i386/sse4_1-blendps-2.c
> index b66bbfd..af56e14 100644
> --- a/gcc/testsuite/gcc.target/i386/sse4_1-blendps-2.c
> +++ b/gcc/testsuite/gcc.target/i386/sse4_1-blendps-2.c
> @@ -6,6 +6,7 @@
>
>  #include 
>  #include 
> +#include 
>
>  #define NUM 20
>
> @@ -52,11 +53,15 @@ sse4_1_test (void)
>     {
>       __m128 x;
>       float f[4];
> +      int i[4];
>     } src3;
>   int i;
>
>   init_blendps (src1.f, src2.f);
>
> +  for (i = 0; i < 4; i++)
> +    src3.i[i] = (int) random ();
> +
>   /* Check blendps imm8, m128, xmm */
>   for (i = 0; i < NUM; i++)
>     {
>


Re: Scalar vector binary operation

2011-08-10 Thread Richard Guenther
On Tue, Aug 9, 2011 at 10:23 PM, Artem Shinkarov
 wrote:
> Sorry, I didn't attach the patch itself.
> Here we go, in the attachment.

I have committed the patch after re-bootstrapping and testing it
on x86_64-unknown-linux-gnu with {,-m32}.

Richard.

>
> Artem.
>


Re: PATCH: PR target/35757: [4.4 Regression] Incorrect contraint on sse4_1_blendp

2011-08-10 Thread H.J. Lu
On Wed, Aug 10, 2011 at 7:12 AM, Richard Guenther
 wrote:
> On Wed, Aug 10, 2011 at 4:04 PM, H.J. Lu  wrote:
>> On Wed, Aug 10, 2011 at 6:46 AM, Richard Guenther
>>  wrote:
>>> On Sat, Mar 29, 2008 at 10:11 PM, H.J. Lu  wrote:
 This patch restores proper checking the third argument on blendpd and
 and blendps.  It also adds 2 tests, including pblendw.  Tested on
 Linux/Intel64. OK to install?
>>>
>>> The gcc.target/i386/sse4_1-blendps-2.c test randomly fails because
>>> src3 is used uninitialized.
>>>
>>
>> SRC2 may be uninitialized.  But I never saw random failures
>> since it checks if random value in SRC2 is properly blended.
>
> No, src2 is initialized via init_blendps (src1.f, src2.f), src3 is
> uninitialized.
> I see random execute fails on Nehalem.
>
> I suppose we might optimize the uninitialized memory (it's probably
> committed to registers) based on the undefined behavior.
>
> Please avoid this by initializing src3 properly.
>

Here is a patch.  OK for trunk?


-- 
H.J.

diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-blendps-2.c
b/gcc/testsuite/gcc.target/i386/sse4_1-blendps-2.c
index b66bbfd..af56e14 100644
--- a/gcc/testsuite/gcc.target/i386/sse4_1-blendps-2.c
+++ b/gcc/testsuite/gcc.target/i386/sse4_1-blendps-2.c
@@ -6,6 +6,7 @@

 #include 
 #include 
+#include 

 #define NUM 20

@@ -52,11 +53,15 @@ sse4_1_test (void)
 {
   __m128 x;
   float f[4];
+  int i[4];
 } src3;
   int i;

   init_blendps (src1.f, src2.f);

+  for (i = 0; i < 4; i++)
+src3.i[i] = (int) random ();
+
   /* Check blendps imm8, m128, xmm */
   for (i = 0; i < NUM; i++)
 {


Re: New SPU failures (Re: [PATCH, PR 49923] Check for misaligned accesses before doing SRA)

2011-08-10 Thread Richard Guenther
On Wed, 10 Aug 2011, Ulrich Weigand wrote:

> Richard Guenther wrote:
> > On Fri, 5 Aug 2011, Martin Jambor wrote:
> > > the patch below fixes PR 49923 by checking for misaligned accesses
> > > before doing IPA-SRA (on strict alignment targets).  I have checked it
> > > fixes the issue on compile farm sparc64 and I also included this in a
> > > bootstrap and testsuite run on an x86_64-linux just to double check.
> > > 
> > > OK for trunk and the 4.6 branch?
> > 
> > Ok for now.
> > 
> > I think we need to move this to generic middle-end code and also
> > do something about partly strict-alignment targets such as x86_64.
> > Iff expansion would treat expr as if it had non-natural alignment
> > then when building a MEM_REF replacement with non-BLKmode we have
> > to use a properly aligned variant type for it.
> > 
> > I think we can trick FRE/PRE to run into exactly the same situation.
> > 
> > I'll put it on my TODO.
> 
> This caused new failures on spu-elf:
> 
> FAIL: gcc.dg/ipa/ipa-sra-2.c scan-tree-dump eipa_sra "About to replace expr 
> cow_.*D.->red with \\*ISRA"
> FAIL: gcc.dg/ipa/ipa-sra-2.c scan-tree-dump eipa_sra "About to replace expr 
> cow_.*D.->green with ISRA"
> FAIL: gcc.dg/ipa/ipa-sra-2.c scan-tree-dump eipa_sra "About to replace expr 
> calf_.*D.->red with \\*ISRA"
> FAIL: gcc.dg/ipa/ipa-sra-2.c scan-tree-dump eipa_sra "About to replace expr 
> calf_.*D.->green with ISRA"
> FAIL: gcc.dg/ipa/ipa-sra-6.c scan-tree-dump-times eipa_sra "foo " 1
> 
> Looking at the last case, tree_non_mode_aligned_mem_p gets called with
> expressions like those:
> 
>   type  type  size 
> unit size 
> align 32 symtab 0 alias set 2 canonical type 0xf6f60180 fields 
>  context 
> pointer_to_this  chain  0xf6ec1030 D.1991>>
> sizes-gimplified public unsigned SI
> size 
> unit size 
> align 32 symtab 0 alias set 5 canonical type 0xf6f602a0>
>
> arg 0 
>
> arg 0 
> visited var def_stmt GIMPLE_NOP
> 
> version 3
> ptr-info 0xf6e760d0>
> arg 1 
> 
> /home/uweigand/fsf/gcc-head/gcc/testsuite/gcc.dg/ipa/ipa-sra-6.c:16:10>
> arg 1 
> unsigned SI file 
> /home/uweigand/fsf/gcc-head/gcc/testsuite/gcc.dg/ipa/ipa-sra-6.c line 8 col 
> 17 size  unit size 
> align 32 offset_align 128
> offset 
> bit offset  context  0xf6f60180 bovid>>
> /home/uweigand/fsf/gcc-head/gcc/testsuite/gcc.dg/ipa/ipa-sra-6.c:16:10>
> 
> Now, the component reference as such doesn't introduce any misalignment;
> there are no attribute-aligned in place anywhere.
> 
> However, the ptr-info of the "cow" parameter is set up to assume nothing
> about alignment.  Therefore, get_object_alignment returns 8, and
> tree_non_mode_aligned_mem_p then of course fails.
> 
> I must admin I continue to be confused about exactly what it is that
> tree_non_mode_aligned_mem_p is supposed to be testing for.  We have:
> 
>   if (TREE_CODE (exp) == SSA_NAME
>   || TREE_CODE (exp) == MEM_REF
>   || mode == BLKmode
>   || is_gimple_min_invariant (exp)
>   || !STRICT_ALIGNMENT)
> return false;
> 
> So if we passed in the plain MEM_REF, this would be considered no problem.
> The COMPONENT_REF does not add any additional misalignment, so one would
> hope that this also shouldn't be a problem.
> 
> However, just because there *is* a COMPONENT_REF around it, we suddenly
> realize the fact that don't have points-to information for the MEM_REF
> and therefore consider *it* (and consequently the whole COMPONENT_REF)
> to be potentially misaligned ...

Yep.  That's because we are totally confused about alignment :/

Martin, what we need to do is get expands idea of what alignment
it woudl assume for a handled_component_ref and compare that
to what it would say if we re-materialize the mem as a MEM_REF.

Unfortunately there isn't a function that you can use to mimic
expands behavior (that of the normal_inner_ref: case), the closest
would be to look for TYPE_PACKED or TYPE_ALIGN in addition to
what get_object_alignment gives us.

Thus something like

  align = get_object_alignment (exp);
  if (!TYPE_PACKED (TREE_TYPE (exp))
  && (TREE_CODE (exp) != COMPONENT_REF
  || !DECL_PACKED (TREE_OPERAND (exp, 1
align = MAX (TYPE_ALIGN (TREE_TYPE (exp)), align);
  if (GET_MODE_ALIGNMENT (mode) > align)
return true;

but really the twisted maze of how we compute whether something
is misaligned during expansion should be cleaned up / factored
somehow, including eventually fixing misaligned indirect refs
for STRICT_ALIGNMENT targets ...

Richard.


New SPU failures (Re: [PATCH, PR 49923] Check for misaligned accesses before doing SRA)

2011-08-10 Thread Ulrich Weigand
Richard Guenther wrote:
> On Fri, 5 Aug 2011, Martin Jambor wrote:
> > the patch below fixes PR 49923 by checking for misaligned accesses
> > before doing IPA-SRA (on strict alignment targets).  I have checked it
> > fixes the issue on compile farm sparc64 and I also included this in a
> > bootstrap and testsuite run on an x86_64-linux just to double check.
> > 
> > OK for trunk and the 4.6 branch?
> 
> Ok for now.
> 
> I think we need to move this to generic middle-end code and also
> do something about partly strict-alignment targets such as x86_64.
> Iff expansion would treat expr as if it had non-natural alignment
> then when building a MEM_REF replacement with non-BLKmode we have
> to use a properly aligned variant type for it.
> 
> I think we can trick FRE/PRE to run into exactly the same situation.
> 
> I'll put it on my TODO.

This caused new failures on spu-elf:

FAIL: gcc.dg/ipa/ipa-sra-2.c scan-tree-dump eipa_sra "About to replace expr 
cow_.*D.->red with \\*ISRA"
FAIL: gcc.dg/ipa/ipa-sra-2.c scan-tree-dump eipa_sra "About to replace expr 
cow_.*D.->green with ISRA"
FAIL: gcc.dg/ipa/ipa-sra-2.c scan-tree-dump eipa_sra "About to replace expr 
calf_.*D.->red with \\*ISRA"
FAIL: gcc.dg/ipa/ipa-sra-2.c scan-tree-dump eipa_sra "About to replace expr 
calf_.*D.->green with ISRA"
FAIL: gcc.dg/ipa/ipa-sra-6.c scan-tree-dump-times eipa_sra "foo " 1

Looking at the last case, tree_non_mode_aligned_mem_p gets called with
expressions like those:

 
unit size 
align 32 symtab 0 alias set 2 canonical type 0xf6f60180 fields 
 context 
pointer_to_this  chain >
sizes-gimplified public unsigned SI
size 
unit size 
align 32 symtab 0 alias set 5 canonical type 0xf6f602a0>
   
arg 0 
   
arg 0 
visited var def_stmt GIMPLE_NOP

version 3
ptr-info 0xf6e760d0>
arg 1 
/home/uweigand/fsf/gcc-head/gcc/testsuite/gcc.dg/ipa/ipa-sra-6.c:16:10>
arg 1 
unsigned SI file 
/home/uweigand/fsf/gcc-head/gcc/testsuite/gcc.dg/ipa/ipa-sra-6.c line 8 col 17 
size  unit size 
align 32 offset_align 128
offset 
bit offset  context >
/home/uweigand/fsf/gcc-head/gcc/testsuite/gcc.dg/ipa/ipa-sra-6.c:16:10>

Now, the component reference as such doesn't introduce any misalignment;
there are no attribute-aligned in place anywhere.

However, the ptr-info of the "cow" parameter is set up to assume nothing
about alignment.  Therefore, get_object_alignment returns 8, and
tree_non_mode_aligned_mem_p then of course fails.

I must admin I continue to be confused about exactly what it is that
tree_non_mode_aligned_mem_p is supposed to be testing for.  We have:

  if (TREE_CODE (exp) == SSA_NAME
  || TREE_CODE (exp) == MEM_REF
  || mode == BLKmode
  || is_gimple_min_invariant (exp)
  || !STRICT_ALIGNMENT)
return false;

So if we passed in the plain MEM_REF, this would be considered no problem.
The COMPONENT_REF does not add any additional misalignment, so one would
hope that this also shouldn't be a problem.

However, just because there *is* a COMPONENT_REF around it, we suddenly
realize the fact that don't have points-to information for the MEM_REF
and therefore consider *it* (and consequently the whole COMPONENT_REF)
to be potentially misaligned ...

Bye,
Ulrich

-- 
  Dr. Ulrich Weigand
  GNU Toolchain for Linux on System z and Cell BE
  ulrich.weig...@de.ibm.com


Re: PATCH: PR target/35757: [4.4 Regression] Incorrect contraint on sse4_1_blendp

2011-08-10 Thread Richard Guenther
On Wed, Aug 10, 2011 at 4:04 PM, H.J. Lu  wrote:
> On Wed, Aug 10, 2011 at 6:46 AM, Richard Guenther
>  wrote:
>> On Sat, Mar 29, 2008 at 10:11 PM, H.J. Lu  wrote:
>>> This patch restores proper checking the third argument on blendpd and
>>> and blendps.  It also adds 2 tests, including pblendw.  Tested on
>>> Linux/Intel64. OK to install?
>>
>> The gcc.target/i386/sse4_1-blendps-2.c test randomly fails because
>> src3 is used uninitialized.
>>
>
> SRC2 may be uninitialized.  But I never saw random failures
> since it checks if random value in SRC2 is properly blended.

No, src2 is initialized via init_blendps (src1.f, src2.f), src3 is
uninitialized.
I see random execute fails on Nehalem.

I suppose we might optimize the uninitialized memory (it's probably
committed to registers) based on the undefined behavior.

Please avoid this by initializing src3 properly.

Richard.

> --
> H.J.
>


Re: PATCH: PR target/35757: [4.4 Regression] Incorrect contraint on sse4_1_blendp

2011-08-10 Thread H.J. Lu
On Wed, Aug 10, 2011 at 6:46 AM, Richard Guenther
 wrote:
> On Sat, Mar 29, 2008 at 10:11 PM, H.J. Lu  wrote:
>> This patch restores proper checking the third argument on blendpd and
>> and blendps.  It also adds 2 tests, including pblendw.  Tested on
>> Linux/Intel64. OK to install?
>
> The gcc.target/i386/sse4_1-blendps-2.c test randomly fails because
> src3 is used uninitialized.
>

SRC2 may be uninitialized.  But I never saw random failures
since it checks if random value in SRC2 is properly blended.

-- 
H.J.


Re: Fix debug/49825

2011-08-10 Thread H.J. Lu
On Sun, Jul 24, 2011 at 8:33 PM, Richard Henderson  wrote:
> With the last two fixes, the only remaining i686 regression
> from Thursday is 1 fortran testcase, whose number I have
> misplaced.  This highlights an accidental change I made while
> moving code around between functions, and an assertion I
> added trying to see if we'd Done The Right Thing already.
>
> The entire treatment of args_size is Very Confusing.
> I suspect it ought to be all re-written, but I'll save that
> for later once the dust has settled.
>
>
> r~
>
>
>
>        PR debug/49825
>        * dwarf2cfi.c (dwarf2out_stack_adjust): Move A_O_A test earlier.
>        (dwarf2out_notice_stack_adjust): Use args_size from call_insn.
>

Not all regressions are fixed.  The last one has a new PR:

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=49972

-- 
H.J.


Re: [build] Move unwinder to toplevel libgcc (v2)

2011-08-10 Thread Rainer Orth
"Joseph S. Myers"  writes:

>> This is strange: they copy explicitly goes into $(gcc_objdir): from
>> libgcc/Makefile.in:
>> 
>> install-unwind_h:
>> cp unwind.h $(gcc_objdir)/include/unwind.h
>> chmod a+r $(gcc_objdir)/include/unwind.h
>> 
>> For an in-tree build, the source directory cannot be read-only, for a
>> VPATH build I don't see how this can happen.  Could you please check?
>
> This is a VPATH build and the issue is that the *file* unwind.h is 
> readonly having been copied from a readonly source (and install-unwind_h 

I see.  I'd been thinking of a source tree mounted read-only, not the
actual files changed to be read-only.

> must, I suppose, end up getting called more than once so that the second 
> copy tries to copy over a readonly file; the 26478 fix was to remove the 

True: it is called once per multilib.

> target of the copy with rm -f before copying).

Makes sense.  Toplevel dependencies should take care that the file isn't
used by another target library in the small time window between removal
and copy.

Could you try the obvious patch?  It's probably quicker than me
recreating the setup.

Thanks.
Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


[Patch,AVR]: Add mul_highpart patterns

2011-08-10 Thread Georg-Johann Lay
This patch adds {u|s}mul{qi|hi}3_highpart patterns which
can help dividing by a constant when MUL is available, e.g.

int8_t sdiv1 (int8_t a)
{
return a / 3;
}

uint8_t udiv1 (uint8_t a)
{
return a / 3;
}

uint16_t udiv2 (uint16_t a)
{
return a / 10;
}

compiles with -O2 -mmcu=atmega8 to

sdiv1:
ldi r25,lo8(86)  ;  6   *movqi/2[length = 1]
muls r24,r25 ;  7   smulqi3_highpart[length = 3]
mov r25,r1
clr __zero_reg__
sbrc r24,7   ;  14  *subqi3.ashiftrt7   [length = 2]
inc r25
mov r24,r25  ;  22  *movqi/1[length = 1]
ret  ;  25  return  [length = 1]

udiv1:
ldi r25,lo8(-85) ;  6   *movqi/2[length = 1]
mul r24,r25  ;  7   umulqi3_highpart[length = 3]
mov r24,r1
clr __zero_reg__
lsr r24  ;  13  *lshrqi3/3  [length = 1]
ret  ;  22  return  [length = 1]

udiv2:
movw r18,r24 ;  2   *movhi/1[length = 1]
ldi r26,lo8(-13107)  ;  7   *movhi/4[length = 2]
ldi r27,hi8(-13107)
call __umulhisi3 ;  8   *umulhi3_highpart_call  [length = 2]
lsr r25  ;  30  *lshrhi3_const/5[length = 6]
ror r24
lsr r25
ror r24
lsr r25
ror r24
ret  ;  28  return  [length = 1]

For -Os these patterns are too expensive and the code is unchanged,
i.e. __[u]divmod is called.

Tested without regressions.

Ok to commit?

Johann

PR target/49687
* config/avr/avr.md (smulqi3_highpart): New insn.
(umulqi3_highpart): New insn.
(*subqi3.ashiftrt7): New insn.
(smulhi3_highpart): New expander.
(umulhi3_highpart): Nex expander.
(*smulhi3_highpart_call): New insn.
(*umulhi3_highpart_call): New insn.
(extend_u): New code attribute.
(extend_prefix): Rename code attribute to extend_su.
* config/avr/avr.c (avr_rtx_costs): Report costs of highpart of
widening QI/HI multiply.
Index: config/avr/avr.md
===
--- config/avr/avr.md	(revision 177616)
+++ config/avr/avr.md	(working copy)
@@ -141,10 +141,14 @@ (define_code_iterator any_extend  [sign_
 (define_code_iterator any_extend2 [sign_extend zero_extend])
 
 ;; Define code attributes
-(define_code_attr extend_prefix
+(define_code_attr extend_su
   [(sign_extend "s")
(zero_extend "u")])
 
+(define_code_attr extend_u
+  [(sign_extend "")
+   (zero_extend "u")])
+
 
 ;;
 ;; The following is used by nonlocal_goto and setjmp.
@@ -1015,6 +1019,43 @@ (define_insn "*mulqi3_call"
   [(set_attr "type" "xcall")
(set_attr "cc" "clobber")])
 
+(define_insn "smulqi3_highpart"
+  [(set (match_operand:QI 0 "register_operand" "=r")
+	(truncate:QI
+ (lshiftrt:HI (mult:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "d"))
+   (sign_extend:HI (match_operand:QI 2 "register_operand" "d")))
+  (const_int 8]
+  "AVR_HAVE_MUL"
+  "muls %1,%2
+	mov %0,r1
+	clr __zero_reg__"
+  [(set_attr "length" "3")
+   (set_attr "cc" "clobber")])
+  
+(define_insn "umulqi3_highpart"
+  [(set (match_operand:QI 0 "register_operand" "=r")
+	(truncate:QI
+ (lshiftrt:HI (mult:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "r"))
+   (zero_extend:HI (match_operand:QI 2 "register_operand" "r")))
+  (const_int 8]
+  "AVR_HAVE_MUL"
+  "mul %1,%2
+	mov %0,r1
+	clr __zero_reg__"
+  [(set_attr "length" "3")
+   (set_attr "cc" "clobber")])
+
+;; Used when expanding div or mod inline for some special values
+(define_insn "*subqi3.ashiftrt7"
+  [(set (match_operand:QI 0 "register_operand"   "=r")
+(minus:QI (match_operand:QI 1 "register_operand"  "0")
+  (ashiftrt:QI (match_operand:QI 2 "register_operand" "r")
+   (const_int 7]
+  ""
+  "sbrc %2,7\;inc %0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "clobber")])
+
 (define_insn "mulqihi3"
   [(set (match_operand:HI 0 "register_operand" "=r")
 	(mult:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "d"))
@@ -1367,9 +1408,7 @@ (define_insn "*mulhi3_call"
   [(set_attr "type" "xcall")
(set_attr "cc" "clobber")])
 
-;; Operand 2 (reg:SI 18) not clobbered on the enhanced core.
-;; All call-used registers clobbered otherwise - normal library call.
-;;To support widening multiplicatioon with constant we postpone
+;; To support widening multiplicatioon with constant we postpone
 ;; expanding to the implicit library call until post combine and
 ;; prior to register allocation.  Clobber all hard registers that
 ;; might be used by the (widening) multiply until it is split and
@@ -1535,19 +1574,12 @@ (define_insn_and_split 

Re: [build] Move unwinder to toplevel libgcc (v2)

2011-08-10 Thread Joseph S. Myers
On Wed, 10 Aug 2011, Rainer Orth wrote:

> "Joseph S. Myers"  writes:
> 
> > This appears to have brought back PR 26478, build failure with readonly 
> > source directory:
> >
> > cp unwind.h ../../.././gcc/include/unwind.h
> > cp: cannot create regular file `../../.././gcc/include/unwind.h': 
> > Permission denied
> > make[4]: *** [install-unwind_h] Error 1
> 
> This is strange: they copy explicitly goes into $(gcc_objdir): from
> libgcc/Makefile.in:
> 
> install-unwind_h:
> cp unwind.h $(gcc_objdir)/include/unwind.h
> chmod a+r $(gcc_objdir)/include/unwind.h
> 
> For an in-tree build, the source directory cannot be read-only, for a
> VPATH build I don't see how this can happen.  Could you please check?

This is a VPATH build and the issue is that the *file* unwind.h is 
readonly having been copied from a readonly source (and install-unwind_h 
must, I suppose, end up getting called more than once so that the second 
copy tries to copy over a readonly file; the 26478 fix was to remove the 
target of the copy with rm -f before copying).

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: [build] Move unwinder to toplevel libgcc (v2)

2011-08-10 Thread Rainer Orth
"Joseph S. Myers"  writes:

> This appears to have brought back PR 26478, build failure with readonly 
> source directory:
>
> cp unwind.h ../../.././gcc/include/unwind.h
> cp: cannot create regular file `../../.././gcc/include/unwind.h': Permission 
> denied
> make[4]: *** [install-unwind_h] Error 1

This is strange: they copy explicitly goes into $(gcc_objdir): from
libgcc/Makefile.in:

install-unwind_h:
cp unwind.h $(gcc_objdir)/include/unwind.h
chmod a+r $(gcc_objdir)/include/unwind.h

For an in-tree build, the source directory cannot be read-only, for a
VPATH build I don't see how this can happen.  Could you please check?

Thanks.
Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: PATCH: PR target/35757: [4.4 Regression] Incorrect contraint on sse4_1_blendp

2011-08-10 Thread Richard Guenther
On Sat, Mar 29, 2008 at 10:11 PM, H.J. Lu  wrote:
> This patch restores proper checking the third argument on blendpd and
> and blendps.  It also adds 2 tests, including pblendw.  Tested on
> Linux/Intel64. OK to install?

The gcc.target/i386/sse4_1-blendps-2.c test randomly fails because
src3 is used uninitialized.

Richard.

> Thanks.
>
> H.J.
> ---
> gcc/
>
> 2008-03-29  H.J. Lu  
>
>        PR target/35757
>        * config/i386/i386.c (ix86_expand_sse_4_operands_builtin): Issue
>        proper error message for the third argument on blendpd and
>        blendps.
>
>        * config/i386/sse.md (blendbits): New.
>        (sse4_1_blendp): Use it.
>
> gcc/testsuite/
>
> 2008-03-29  H.J. Lu  
>
>        PR target/35757
>        * gcc.target/i386/sse4_1-blendps-2.c: New.
>        * gcc.target/i386/sse4_1-pblendw-2.c: Likewise.
>
> --- gcc/config/i386/i386.c.imm  2008-03-29 07:29:40.0 -0700
> +++ gcc/config/i386/i386.c      2008-03-29 13:55:36.0 -0700
> @@ -19791,9 +19791,14 @@ ix86_expand_sse_4_operands_builtin (enum
>
>       case CODE_FOR_sse4_1_roundsd:
>       case CODE_FOR_sse4_1_roundss:
> +      case CODE_FOR_sse4_1_blendps:
>        error ("the third argument must be a 4-bit immediate");
>        return const0_rtx;
>
> +      case CODE_FOR_sse4_1_blendpd:
> +       error ("the third argument must be a 2-bit immediate");
> +       return const0_rtx;
> +
>       default:
>        error ("the third argument must be an 8-bit immediate");
>        return const0_rtx;
> --- gcc/config/i386/sse.md.imm  2008-03-29 07:29:40.0 -0700
> +++ gcc/config/i386/sse.md      2008-03-29 14:01:10.0 -0700
> @@ -53,6 +53,9 @@
>  ;; Mapping of vector modes back to the scalar modes
>  (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")])
>
> +;; Mapping of immediate bits for blend instructions
> +(define_mode_attr blendbits [(V4SF "15") (V2DF "3")])
> +
>  ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
>
>  ;
> @@ -6306,7 +6309,7 @@
>        (vec_merge:SSEMODEF2P
>          (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
>          (match_operand:SSEMODEF2P 1 "register_operand" "0")
> -         (match_operand:SI 3 "const_0_to_3_operand" "n")))]
> +         (match_operand:SI 3 "const_0_to__operand" "n")))]
>   "TARGET_SSE4_1"
>   "blendp\t{%3, %2, %0|%0, %2, %3}"
>   [(set_attr "type" "ssemov")
> --- gcc/testsuite/gcc.target/i386/sse4_1-blendps-2.c.imm        2008-03-29 
> 09:54:08.0 -0700
> +++ gcc/testsuite/gcc.target/i386/sse4_1-blendps-2.c    2008-03-29 
> 09:57:35.0 -0700
> @@ -0,0 +1,77 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target sse4 } */
> +/* { dg-options "-O2 -msse4.1" } */
> +
> +#include "sse4_1-check.h"
> +
> +#include 
> +#include 
> +
> +#define NUM 20
> +
> +#undef MASK
> +#define MASK 0xe
> +
> +static void
> +init_blendps (float *src1, float *src2)
> +{
> +  int i, sign = 1;
> +
> +  for (i = 0; i < NUM * 4; i++)
> +    {
> +      src1[i] = i * i * sign;
> +      src2[i] = (i + 20) * sign;
> +      sign = -sign;
> +    }
> +}
> +
> +static int
> +check_blendps (__m128 *dst, float *src1, float *src2)
> +{
> +  float tmp[4];
> +  int j;
> +
> +  memcpy (&tmp[0], src1, sizeof (tmp));
> +  for (j = 0; j < 4; j++)
> +    if ((MASK & (1 << j)))
> +      tmp[j] = src2[j];
> +
> +  return memcmp (dst, &tmp[0], sizeof (tmp));
> +}
> +
> +static void
> +sse4_1_test (void)
> +{
> +  __m128 x, y;
> +  union
> +    {
> +      __m128 x[NUM];
> +      float f[NUM * 4];
> +    } dst, src1, src2;
> +  union
> +    {
> +      __m128 x;
> +      float f[4];
> +    } src3;
> +  int i;
> +
> +  init_blendps (src1.f, src2.f);
> +
> +  /* Check blendps imm8, m128, xmm */
> +  for (i = 0; i < NUM; i++)
> +    {
> +      dst.x[i] = _mm_blend_ps (src1.x[i], src2.x[i], MASK);
> +      if (check_blendps (&dst.x[i], &src1.f[i * 4], &src2.f[i * 4]))
> +       abort ();
> +    }
> +
> +   /* Check blendps imm8, xmm, xmm */
> +  x = _mm_blend_ps (dst.x[2], src3.x, MASK);
> +  y = _mm_blend_ps (src3.x, dst.x[2], MASK);
> +
> +  if (check_blendps (&x, &dst.f[8], &src3.f[0]))
> +    abort ();
> +
> +  if (check_blendps (&y, &src3.f[0], &dst.f[8]))
> +    abort ();
> +}
> --- gcc/testsuite/gcc.target/i386/sse4_1-pblendw-2.c.imm        2008-03-29 
> 09:55:29.0 -0700
> +++ gcc/testsuite/gcc.target/i386/sse4_1-pblendw-2.c    2008-03-29 
> 09:57:25.0 -0700
> @@ -0,0 +1,79 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target sse4 } */
> +/* { dg-options "-O2 -msse4.1" } */
> +
> +#include "sse4_1-check.h"
> +
> +#include 
> +#include 
> +
> +#define NUM 20
> +
> +#undef MASK
> +#define MASK 0xfe
> +
> +static void
> +init_pblendw (short *src1, short *src2)
> +{
> +  int i, sign = 1;
> +
> +  for (i = 0; i < NUM * 8; i++)
> +    {
> +      src1[i] = i * i * sign;
> +      src2[i] = (i + 20) * sign;
> +      sign = -sign;
> +    }
> +}
> +
> +static in

Re: [build] Move unwinder to toplevel libgcc (v2)

2011-08-10 Thread Joseph S. Myers
This appears to have brought back PR 26478, build failure with readonly 
source directory:

cp unwind.h ../../.././gcc/include/unwind.h
cp: cannot create regular file `../../.././gcc/include/unwind.h': Permission 
denied
make[4]: *** [install-unwind_h] Error 1

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: [PATCH][C++] Remove last use of can_trust_pointer_alignment

2011-08-10 Thread Richard Guenther
On Wed, 10 Aug 2011, Richard Guenther wrote:

> 
> Nothing in the middle-end happens conditional on 
> can_trust_pointer_alignment anymore - we can always "trust" pointer
> alignment, that function and its comment is somewhat gross.
> In fact we can now track alignment properly via CCP and thus
> the hack in cfgexpand.c:expand_call_stmt should not be neccessary
> anymore.
> 
> Now, looking at the use of can_trust_pointer_alignment in build_over_call
> and especially its comment:
> 
>   if (!can_trust_pointer_alignment ())
> {
>   /* If we can't be sure about pointer alignment, a call
>  to __builtin_memcpy is expanded as a call to memcpy, which
>  is invalid with identical args.  Otherwise it is
>  expanded as a block move, which should be safe.  */
>   arg0 = save_expr (arg0);
>   arg1 = save_expr (arg1);
>   test = build2 (EQ_EXPR, boolean_type_node, arg0, arg1);
> }
> 
> "Otherwise it is expanded as a block move" is certainly not true.
> Whether it is expanded as block move depends on optimization setting,
> target costs and, well, a way to do block moves (we fall back to
> memcpy after all).
> 
> So the patch changes the above to if (!optimize), but I'd argue
> we can as well either remove the conditional or emit it unconditional.
> Dependent on whether we believe a memcpy implementation exists in
> the wild that asserts that src != dst.

The following patch changes us to unconditionally call memcpy, like
the backend may decide to do for the
tree_int_cst_equal (TYPE_SIZE (type), TYPE_SIZE (as_base))
MODIFY_EXPR case.

Bootstrapped and tested on x86_64-unknown-linux-gnu.

Ok?

Thanks,
Richard.

2011-08-10  Richard Guenther  

* tree.h (can_trust_pointer_alignment): Remove.
* builtins.c (can_trust_pointer_alignment): Remove.

cp/
* call.c (build_over_call): Call memcpy unconditionally.

Index: trunk/gcc/builtins.c
===
*** trunk.orig/gcc/builtins.c   2011-08-10 14:25:04.0 +0200
--- trunk/gcc/builtins.c2011-08-10 14:25:53.0 +0200
*** get_object_alignment (tree exp)
*** 453,468 
return align;
  }
  
- /* Returns true iff we can trust that alignment information has been
-calculated properly.  */
- 
- bool
- can_trust_pointer_alignment (void)
- {
-   /* We rely on TER to compute accurate alignment information.  */
-   return (optimize && flag_tree_ter);
- }
- 
  /* Return the alignment in bits of EXP, a pointer valued expression.
 The alignment returned is, by default, the alignment of the thing that
 EXP points to.  If it is not a POINTER_TYPE, 0 is returned.
--- 453,458 
Index: trunk/gcc/cp/call.c
===
*** trunk.orig/gcc/cp/call.c2011-08-10 14:19:23.0 +0200
--- trunk/gcc/cp/call.c 2011-08-10 14:26:58.0 +0200
*** build_over_call (struct z_candidate *can
*** 6767,6799 
else
{
  /* We must only copy the non-tail padding parts.
!Use __builtin_memcpy for the bitwise copy.
!FIXME fix 22488 so we can go back to using MODIFY_EXPR
!instead of an explicit call to memcpy.  */
!   
  tree arg0, arg1, arg2, t;
- tree test = NULL_TREE;
  
  arg2 = TYPE_SIZE_UNIT (as_base);
  arg1 = arg;
  arg0 = cp_build_addr_expr (to, complain);
  
- if (!can_trust_pointer_alignment ())
-   {
- /* If we can't be sure about pointer alignment, a call
-to __builtin_memcpy is expanded as a call to memcpy, which
-is invalid with identical args.  Otherwise it is
-expanded as a block move, which should be safe.  */
- arg0 = save_expr (arg0);
- arg1 = save_expr (arg1);
- test = build2 (EQ_EXPR, boolean_type_node, arg0, arg1);
-   }
  t = implicit_built_in_decls[BUILT_IN_MEMCPY];
  t = build_call_n (t, 3, arg0, arg1, arg2);
  
  t = convert (TREE_TYPE (arg0), t);
- if (test)
-   t = build3 (COND_EXPR, TREE_TYPE (t), test, arg0, t);
  val = cp_build_indirect_ref (t, RO_NULL, complain);
TREE_NO_WARNING (val) = 1;
}
--- 6767,6783 
else
{
  /* We must only copy the non-tail padding parts.
!Use __builtin_memcpy for the bitwise copy.  */
  tree arg0, arg1, arg2, t;
  
  arg2 = TYPE_SIZE_UNIT (as_base);
  arg1 = arg;
  arg0 = cp_build_addr_expr (to, complain);
  
  t = implicit_built_in_decls[BUILT_IN_MEMCPY];
  t = build_call_n (t, 3, arg0, arg1, arg2);
  
  t = convert (TREE_TYPE (arg0), t);
  val = cp_build_indirect_ref (t, RO_NULL, complain);
TREE_NO_WARNING (val) = 1;
   

[PATCH] Remove max-align parameters from get_{pointer,object}_alignment

2011-08-10 Thread Richard Guenther

Those functions now return conservative alignments and all callers
that call them with an argument of BIGGEST_ALIGNMENT do so without
any good reason.  If the functions return a larger alignment then
it is known to be larger.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.

Richard.

2011-08-10  Richard Guenther  

* tree.h (get_pointer_alignment): Remove max-align argument.
(get_object_alignment): Likewise.
* builtins.c (get_object_alignment_1): Adjust.
(get_object_alignment): Remove max-align argument.
(get_pointer_alignment): Likewise.
(expand_builtin_strlen): Adjust.
(expand_builtin_memcpy): Likewise.
(expand_builtin_mempcpy_args): Likewise.
(expand_builtin_strncpy): Likewise.
(expand_builtin_memset_args): Likewise.
(expand_builtin_memcmp): Likewise.
(expand_builtin_strcmp): Likewise.
(expand_builtin_strncmp): Likewise.
(get_builtin_sync_mem): Likewise.
(fold_builtin_memset): Likewise.
(fold_builtin_memory_op): Likewise.
(expand_builtin_memory_chk): Likewise.
* emit-rtl.c (get_mem_align_offset): Likewise.
(set_mem_attributes_minus_bitpos): Likewise.
* expr.c (expand_assignment): Likewise.
(expand_expr_real_1): Likewise.
* tree-sra.c (tree_non_mode_aligned_mem_p): Likewise.
* tree-ssa-forwprop.c (simplify_builtin_call): Likewise.
* tree-ssa-loop-ivopts.c (may_be_unaligned_p): Likewise.
* tree-vect-data-refs.c (vect_compute_data_ref_alignment): Likewise.
* value-prof.c (gimple_stringops_transform): Likewise.

Index: gcc/tree.h
===
*** gcc/tree.h.orig 2011-08-10 11:22:33.0 +0200
--- gcc/tree.h  2011-08-10 14:15:14.0 +0200
*** extern tree build_string_literal (int, c
*** 5359,5369 
  extern bool validate_arglist (const_tree, ...);
  extern rtx builtin_memset_read_str (void *, HOST_WIDE_INT, enum machine_mode);
  extern bool can_trust_pointer_alignment (void);
- extern unsigned int get_pointer_alignment (tree, unsigned int);
  extern bool is_builtin_name (const char *);
  extern bool is_builtin_fn (tree);
  extern unsigned int get_object_alignment_1 (tree, unsigned HOST_WIDE_INT *);
! extern unsigned int get_object_alignment (tree, unsigned int);
  extern tree fold_call_stmt (gimple, bool);
  extern tree gimple_fold_builtin_snprintf_chk (gimple, tree, enum 
built_in_function);
  extern tree make_range (tree, int *, tree *, tree *, bool *);
--- 5359,5369 
  extern bool validate_arglist (const_tree, ...);
  extern rtx builtin_memset_read_str (void *, HOST_WIDE_INT, enum machine_mode);
  extern bool can_trust_pointer_alignment (void);
  extern bool is_builtin_name (const char *);
  extern bool is_builtin_fn (tree);
  extern unsigned int get_object_alignment_1 (tree, unsigned HOST_WIDE_INT *);
! extern unsigned int get_object_alignment (tree);
! extern unsigned int get_pointer_alignment (tree);
  extern tree fold_call_stmt (gimple, bool);
  extern tree gimple_fold_builtin_snprintf_chk (gimple, tree, enum 
built_in_function);
  extern tree make_range (tree, int *, tree *, tree *, bool *);
Index: gcc/builtins.c
===
*** gcc/builtins.c.orig 2011-08-10 11:22:33.0 +0200
--- gcc/builtins.c  2011-08-10 14:16:05.0 +0200
*** get_object_alignment_1 (tree exp, unsign
*** 341,347 
  align = MAX (pi->align * BITS_PER_UNIT, align);
}
else if (TREE_CODE (addr) == ADDR_EXPR)
!   align = MAX (align, get_object_alignment (TREE_OPERAND (addr, 0), ~0U));
bitpos += mem_ref_offset (exp).low * BITS_PER_UNIT;
  }
else if (TREE_CODE (exp) == TARGET_MEM_REF)
--- 341,347 
  align = MAX (pi->align * BITS_PER_UNIT, align);
}
else if (TREE_CODE (addr) == ADDR_EXPR)
!   align = MAX (align, get_object_alignment (TREE_OPERAND (addr, 0)));
bitpos += mem_ref_offset (exp).low * BITS_PER_UNIT;
  }
else if (TREE_CODE (exp) == TARGET_MEM_REF)
*** get_object_alignment_1 (tree exp, unsign
*** 365,371 
  align = MAX (pi->align * BITS_PER_UNIT, align);
}
else if (TREE_CODE (addr) == ADDR_EXPR)
!   align = MAX (align, get_object_alignment (TREE_OPERAND (addr, 0), ~0U));
if (TMR_OFFSET (exp))
bitpos += TREE_INT_CST_LOW (TMR_OFFSET (exp)) * BITS_PER_UNIT;
if (TMR_INDEX (exp) && TMR_STEP (exp))
--- 365,371 
  align = MAX (pi->align * BITS_PER_UNIT, align);
}
else if (TREE_CODE (addr) == ADDR_EXPR)
!   align = MAX (align, get_object_alignment (TREE_OPERAND (addr, 0)));
if (TMR_OFFSET (exp))
bitpos += TREE_INT_CST_LOW (TMR_OFFSET (exp)) * BITS_PER_UNIT;
if (TMR_INDEX (exp) && TMR_STEP (exp))
*** get_object_a

Re: [libcpp] Correctly define __cplusplus (PR libstdc++-v3/1773)

2011-08-10 Thread Rainer Orth
Jason Merrill  writes:

> On 08/09/2011 09:14 AM, Marc Glisse wrote:
>> I don't think we should define the C++ 2011 value yet. In my opinion, we
>> should wait until:
>> 1) the standard is official
>> 2) gcc implements most of it: people will want to use __cplusplus as a
>> test to know if they can use C++0X features, not if the compiler does
>> some effort to implement half of them.
>
> I'm of two minds about this, but I see that clang and edg still use 199711L
> in C++0x mode, so let's stick with that for now.

Ok, I just wanted to bring it up.  I'll resubmit with the 201103L change
removed once the other patches (tm etc. mangling and libstdc++) are
resolved.

Thanks.
Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


RFC: [build, ada] Centralize PICFLAG configuration

2011-08-10 Thread Rainer Orth
As has been mentioned before, one impediment to complete the toplevel
libgcc move is libada's use of TARGET_LIBGCC2_CFLAGS.  AFAICS, it is
primarily used for gnatlib-*shared targets, so I assume that this was
done mostly (exclusively?) to get the flags necessary for PIC code
generation.  Since those flags aren't used for the static libgnat.a, it
were strange that the non-PIC flags were actually required, given that
all gnat.dg and ada/acats testing is only done with the static library.

If this is true, then we need two things:

* Change gcc-interface/Makefile.in and libada/Makefile.in to use PICFLAG
  instead of TARGET_LIBGCC2_CFLAGS.  This is what the patch below does.

* Centralize the determination of PICFLAG.  Currently, three libraries
  inside the gcc tree are built PIC without libtool: libgcc, libiberty,
  and libgnat/libgnarl.

  libiberty/configure.ac has a hardcoded list of PICFLAG that could be
  moved to a toplevel config/picflag.m4.

  Alternatively, one could think about using libtool --config | grep
  pic_flag to determine the flag without actually using libtool.

  Last, one completely could go for libtool, but I very much doubt such
  a suggestion would get much traction.

My current plan is to merge the PICFLAG information from libiberty and
libgcc into picflag.m4 and use that.

I've used the patch below as a proof-of-concept to complete the toplevel
libgcc move.  Together with companion patches to actually move all the
LIBGCC2* and LIB2* variables and corresponding files to toplevel libgcc,
I've sucessfully bootstrapped on i386-pc-solaris2.11.

Comments, suggestions?

Rainer


2011-07-31  Rainer Orth  

gcc/ada:
* gcc-interface/Makefile.in (GNATLIBCFLAGS_FOR_C): Replace
TARGET_LIBGCC2_CFLAGS by PICFLAG.
(gnatlib-shared-default, gnatlib-shared-dual-win32,
gnatlib-shared-win32, gnatlib-shared-darwin, gnatlib-shared,
gnatlib-sjlj, gnatlib-zcx): Likewise.

libada:
* configure.ac (PICFLAG): Set.
Substitute.
* configure: Regenerate.
* Makefile.in (TARGET_LIBGCC2_CFLAGS): Replace by PICFLAG.
(GNATLIBCFLAGS_FOR_C): Replace TARGET_LIBGCC2_CFLAGS by PICFLAG.
(LIBADA_FLAGS_TO_PASS): Likewise.
Don't include $(GCC_DIR)/libgcc.mvars.

diff --git a/gcc/ada/gcc-interface/Makefile.in b/gcc/ada/gcc-interface/Makefile.in
--- a/gcc/ada/gcc-interface/Makefile.in
+++ b/gcc/ada/gcc-interface/Makefile.in
@@ -114,7 +114,7 @@ GNATLIBCFLAGS = -g -O2
 # Pretend that _Unwind_GetIPInfo is available for the target by default.  This
 # should be autodetected during the configuration of libada and passed down to
 # here, but we need something for --disable-libada and hope for the best.
-GNATLIBCFLAGS_FOR_C = $(GNATLIBCFLAGS) $(TARGET_LIBGCC2_CFLAGS) -fexceptions \
+GNATLIBCFLAGS_FOR_C = $(GNATLIBCFLAGS) $(PICFLAG) -fexceptions \
 	-DIN_RTS -DHAVE_GETIPINFO
 ALL_ADAFLAGS = $(CFLAGS) $(ADA_CFLAGS) $(ADAFLAGS)
 MOST_ADAFLAGS = $(CFLAGS) $(ADA_CFLAGS) $(SOME_ADAFLAGS)
@@ -2482,7 +2482,7 @@ gnatlib: ../stamp-gnatlib1-$(RTSDIR) ../
 gnatlib-shared-default:
 	$(MAKE) $(FLAGS_TO_PASS) \
  GNATLIBFLAGS="$(GNATLIBFLAGS)" \
-	 GNATLIBCFLAGS="$(GNATLIBCFLAGS) $(TARGET_LIBGCC2_CFLAGS)" \
+	 GNATLIBCFLAGS="$(GNATLIBCFLAGS) $(PICFLAG)" \
 	 GNATLIBCFLAGS_FOR_C="$(GNATLIBCFLAGS_FOR_C)" \
 	 MULTISUBDIR="$(MULTISUBDIR)" \
 	 THREAD_KIND="$(THREAD_KIND)" \
@@ -2490,14 +2490,14 @@ gnatlib-shared-default:
 	$(RM) $(RTSDIR)/libgna*$(soext)
 	cd $(RTSDIR); `echo "$(GCC_FOR_TARGET)" \
 | sed -e 's,\./xgcc,../../xgcc,' -e 's,-B\./,-B../../,'` -shared $(GNATLIBCFLAGS) \
-		$(TARGET_LIBGCC2_CFLAGS) \
+		$(PICFLAG) \
 		-o libgnat$(hyphen)$(LIBRARY_VERSION)$(soext) \
 		$(GNATRTL_NONTASKING_OBJS) $(LIBGNAT_OBJS) \
 		$(SO_OPTS)libgnat$(hyphen)$(LIBRARY_VERSION)$(soext) \
 		$(MISCLIB) -lm
 	cd $(RTSDIR); `echo "$(GCC_FOR_TARGET)" \
 | sed -e 's,\./xgcc,../../xgcc,' -e 's,-B\./,-B../../,'` -shared $(GNATLIBCFLAGS) \
-		$(TARGET_LIBGCC2_CFLAGS) \
+		$(PICFLAG) \
 		-o libgnarl$(hyphen)$(LIBRARY_VERSION)$(soext) \
 		$(GNATRTL_TASKING_OBJS) \
 		$(SO_OPTS)libgnarl$(hyphen)$(LIBRARY_VERSION)$(soext) \
@@ -2529,7 +2529,7 @@ gnatlib-shared-dual:
 gnatlib-shared-dual-win32:
 	$(MAKE) $(FLAGS_TO_PASS) \
  GNATLIBFLAGS="$(GNATLIBFLAGS)" \
-	 GNATLIBCFLAGS="$(GNATLIBCFLAGS) $(TARGET_LIBGCC2_CFLAGS)" \
+	 GNATLIBCFLAGS="$(GNATLIBCFLAGS) $(PICFLAG)" \
 	 GNATLIBCFLAGS_FOR_C="$(GNATLIBCFLAGS_FOR_C)" \
 	 MULTISUBDIR="$(MULTISUBDIR)" \
 	 THREAD_KIND="$(THREAD_KIND)" \
@@ -2552,7 +2552,7 @@ gnatlib-shared-dual-win32:
 gnatlib-shared-win32:
 	$(MAKE) $(FLAGS_TO_PASS) \
  GNATLIBFLAGS="$(GNATLIBFLAGS)" \
-	 GNATLIBCFLAGS="$(GNATLIBCFLAGS) $(TARGET_LIBGCC2_CFLAGS)" \
+	 GNATLIBCFLAGS="$(GNATLIBCFLAGS) $(PICFLAG)" \
 	 GNATLIBCFLAGS_FOR_C="$(GNATLIBCFLAGS_FOR_C)" \
 	 MULTISUBDIR="$(MULTISUBDIR)" \
 	 THREAD_KIND="$(THR

Re: [C++0x] contiguous bitfields race implementation

2011-08-10 Thread Richard Guenther
On Tue, Aug 9, 2011 at 8:39 PM, Aldy Hernandez  wrote:
>
>> ok, so now you do this only for the first field in a bitfield group.  But
>> you
>> do it for _all_ bitfield groups in a struct, not only for the interesting
>> one.
>>
>> May I suggest to split the loop into two, first searching the first field
>> in the bitfield group that contains fld and then in a separate loop
>> computing
>> the bitwidth?
>
> Excellent idea.  Done!  Now there are at most two calls to
> get_inner_reference, and in many cases, only one.
>
>> Backing up, considering one of my earlier questions.  What is *offset
>> supposed to be relative to?  The docs say sth like "relative to
>> INNERDECL",
>> but the code doesn't contain a reference to INNERDECL anymore.
>
> Sorry, I see your confusion.  The comments at the top were completely out of
> date.  I have simplified and rewritten them accordingly.  I am attaching
> get_bit_range() with these and other changes you suggested. See if it makes
> sense now.
>
>> Now we come to that padding thing.  What's the C++ memory model
>> semantic for re-used tail padding?  Consider
>
> Andrew addressed this elsewhere.
>
>> There is too much get_inner_reference and tree folding stuff in this
>> patch (which makes it expensive given that the algorithm is still
>> inherently quadratic).  You can rely on the bitfield group advancing
>> by integer-cst bits (but the start offset may be non-constant, so
>> may the size of the underlying record).
>
> Now there are only two tree folding calls (apart from get_inner_reference),
> and the common case has very simple arithmetic tuples.  I see no clear way
> of removing the last call to get_inner_reference(), as the padding after the
> field can only be calculated by calling get_inner_reference() on the
> subsequent field.
>
>> Now seeing all this - and considering that this is purely C++ frontend
>> semantics.  Why can't the C++ frontend itself constrain accesses
>> according to the required semantics?  It could simply create
>> BIT_FIELD_REF> byte-offset-to-start-of-group>, bit-size, bit-offset>  for all bitfield
>> references (with a proper
>> type for the MEM_REF, specifying the size of the group).  That would
>> also avoid issues during tree optimization and would at least allow
>> optimizing the bitfield accesses according to the desired C++ semantics.
>
> Andrew addressed this as well.  Could you respond to his email if you think
> it is unsatisfactory?

Some comments.

  /* If we have a bit-field with a bitsize > 0... */
  if (DECL_BIT_FIELD_TYPE (fld)
  && tree_low_cst (DECL_SIZE (fld), 1) > 0)

I think we can check bitsize != 0, thus

&& !integer_zerop (DECL_SIZE (fld))

instead.  You don't break groups here with MAX_FIXED_MODE_SIZE, so
I don't think it's ok to do that in the 2nd loop

  /* Short-circuit out if we have the max bits allowed.  */
  if (cumulative_bitsize >= MAX_FIXED_MODE_SIZE)
{
  *maxbits = MAX_FIXED_MODE_SIZE;
  /* Calculate byte offset to the beginning of the bit region.  */
  gcc_assert (start_bitpos % BITS_PER_UNIT == 0);
  *offset = fold_build2 (PLUS_EXPR, TREE_TYPE (start_offset),
 start_offset,
 build_int_cst (integer_type_node,
start_bitpos / 
BITS_PER_UNIT));
  return;

apart from the *offset calculation being redundant, *offset + maxbits
may not include the referenced field.  How do you plan to find
an "optimal" window for such access? (*)

  /* Count the bitsize of the bitregion containing the field in question.  */
  found = false;
  cumulative_bitsize = 0;
  for (fld = bitregion_start; fld; fld = DECL_CHAIN (fld))
{
  if (TREE_CODE (fld) != FIELD_DECL)
continue;
  if (fld == field)
found = true;

  if (DECL_BIT_FIELD_TYPE (fld)
  && tree_low_cst (DECL_SIZE (fld), 1) > 0)
{
...
 }
  else if (found)
break;

should probably be

  if (!DECL_BIT_FIELD_TYPE (fld)
 || integer_zerop (DECL_SIZE (fld)))
break;

we know that we'll eventually find field.

  /* If we found the end of the bit field sequence, include the
 padding up to the next field...  */
  if (fld)
{

could be a non-FIELD_DECL, you have to skip those first.

 /* Calculate bitpos and offset of the next field.  */
  get_inner_reference (build3 (COMPONENT_REF,
   TREE_TYPE (exp),
   TREE_OPERAND (exp, 0),
   fld, NULL_TREE),
   &tbitsize, &end_bitpos, &end_offset,
   &tmode, &tunsignedp, &tvolatilep, true);
  gcc_assert (end_bitpos % BITS_PER_UNIT == 0);

  if (end_offset)
{
  tree type = TREE_TYPE (end_offset), end;

  /* Calculate byte offset to the end of the bit regi

Re: [PATCH, ARM] Generate conditional compares in Thumb2 state

2011-08-10 Thread Ramana Radhakrishnan
On 10 August 2011 09:20, Jiangning Liu  wrote:
> PING...

>
> BTW, in patch fix_cond_cmp_2.patch, the file mode of thumb2.md is carelessly
> changed, so please check attached new patch file fix_cond_cmp_3.patch.
>

Please do not top post.

I've been away for the whole of last week and then been away from my desk
for the first 2 days this week and am still catching up with email .

I'm missing a Changelog entry for this . Do I assume it is the same ?

I've just noticed that the length attribute isn't correct for Thumb2
state. When I last
looked at this I must have missed the case where the cmp and the cmn
are both 32 bit instructions.

The length can vary between 6 and 10 bytes for the Thumb2 variant from
my reading of the ARM-ARM.

i.e cmp reg, <8bitconstant>
 it
 cmn reg, <8bitconstant>

Length = 6 bytes

or even with
   cmp reg, reg
   it 
   cmn reg, reg

 All registers betwen r0 and r7 .

Length is 6 bytes if you have this for both l constraints.
Length is 6 bytes - you should catch this with Pw and Pv respectively .
Length is 8 bytes if you have Pw and L
Length is 8 bytes if you have I and Pv
Length is 10 bytes if you have I and L .


Showing you an example with l and Py at this point of time and leaving
the rest as homework. Yes it will duplicate a few cases but it helps
getting the lengths absolutely right.

(define_insn "*cmp_ite0"
  [(set (match_operand 6 "dominant_cc_register" "")
(compare
 (if_then_else:SI
  (match_operator 4 "arm_comparison_operator"
   [(match_operand:SI 0 "s_register_operand" "l,r,r,r,r")
(match_operand:SI 1 "arm_add_operand" "lPy,rI,L,rI,L")])
  (match_operator:SI 5 "arm_comparison_operator"
   [(match_operand:SI 2 "s_register_operand" "l,r,r,r,r")
(match_operand:SI 3 "arm_add_operand" "lPy,rI,rI,L,L")])
  (const_int 0))
 (const_int 0)))]
  "TARGET_32BIT"
  "*
  {
static const char * const cmp1[5][2] =
{
  {\"cmp\\t%2, %3\",
   \"cmp\\t%0, %1\"},
  {\"cmp\\t%2, %3\",
   \"cmp\\t%0, %1\"},
  {\"cmp\\t%2, %3\",
   \"cmn\\t%0, #%n1\"},
  {\"cmn\\t%2, #%n3\",
   \"cmp\\t%0, %1\"},
  {\"cmn\\t%2, #%n3\",
   \"cmn\\t%0, #%n1\"}
};
static const char * const cmp2[5][2] =
{
  {\"cmp%d5\\t%0, %1\",
   \"cmp%d4\\t%2, %3\"},
  {\"cmp%d5\\t%0, %1\",
   \"cmp%d4\\t%2, %3\"},
  {\"cmn%d5\\t%0, #%n1\",
   \"cmp%d4\\t%2, %3\"},
  {\"cmp%d5\\t%0, %1\",
   \"cmn%d4\\t%2, #%n3\"},
  {\"cmn%d5\\t%0, #%n1\",
   \"cmn%d4\\t%2, #%n3\"}
};
static const char * const ite[2] =
{
  \"it\\t%d5\",
  \"it\\t%d4\"
};
int swap =
  comparison_dominates_p (GET_CODE (operands[5]), GET_CODE (operands[4]));

output_asm_insn (cmp1[which_alternative][swap], operands);
if (TARGET_THUMB2) {
output_asm_insn (ite[swap], operands);
}
output_asm_insn (cmp2[which_alternative][swap], operands);
return \"\";
  }"
  [(set_attr "conds" "set")
   (set_attr "arch" "t2,any,any,any,any")
   (set_attr "length" "6,8,8,8,8")]
)


>As for the extra problem exposed by this specific case, may we treat it as a
>separate fix to decouple it with this one, and I can give follow up later
>on? I think it is a general problem not only for the particular pattern
>it/op/it/op. But I'm not sure how far we can go to optimize this kind of
>problems introduced by IT block.


Currently the way in which the Thumb2 backend generates
conditional instructions and combines them further with other
IT blocks is by running a state machine at the very end before assembly
generation. Conditional instructions in GCC in RTL form usually have
a cond-exec associated with them. The way this works is to look for
sequences of cond-execs and inspect their conditions to merge
them together later. Look at thumb2_final_prescan_insn in arm.c for
more information. Mostly in the cases where we have such instructions
especially where we have conditional compares being generated.

My suggestion in the previous mail about splitting this into cond-exec form
instructions would help without changing too much of the current
infrastructure.  I am happy to treat that as a follow-up set of patches.

cheers
Ramana


Re: [PLUGIN] Install c-tree.h header

2011-08-10 Thread Romain Geissler
2011/8/2 Romain Geissler :
> Hi,
>
> For now, plugins can't compare types. This patch allows
> c-tree.h to be installed as a plugin header, allowing
> plugins to see "comptypes" (among other things).
>
> Romain Geissler
>
>
> 2011-08-02  Romain Geissler  
>
>        * Makefile.in (PLUGIN_HEADERS): Add C_TREE_H.
>
>
> Index: gcc/Makefile.in
> ===
> --- gcc/Makefile.in     (revision 176741)
> +++ gcc/Makefile.in     (working copy)
> @@ -4584,7 +4584,7 @@ PLUGIN_HEADERS = $(TREE_H) $(CONFIG_H) $
>   $(GGC_H) $(TREE_DUMP_H) $(PRETTY_PRINT_H) $(OPTS_H) $(PARAMS_H) plugin.def \
>   $(tm_file_list) $(tm_include_list) $(tm_p_file_list) $(tm_p_include_list) \
>   $(host_xm_file_list) $(host_xm_include_list) $(xm_include_list) \
> -  intl.h $(PLUGIN_VERSION_H) $(DIAGNOSTIC_H) \
> +  intl.h $(PLUGIN_VERSION_H) $(DIAGNOSTIC_H) ${C_TREE_H} \
>   $(C_COMMON_H) c-family/c-objc.h $(C_PRETTY_PRINT_H) \
>   tree-iterator.h $(PLUGIN_H) $(TREE_FLOW_H) langhooks.h incpath.h debug.h \
>   $(EXCEPT_H) tree-ssa-sccvn.h real.h output.h $(IPA_UTILS_H) \
>

Ping.


[PATCH][C++] Remove last use of can_trust_pointer_alignment

2011-08-10 Thread Richard Guenther

Nothing in the middle-end happens conditional on 
can_trust_pointer_alignment anymore - we can always "trust" pointer
alignment, that function and its comment is somewhat gross.
In fact we can now track alignment properly via CCP and thus
the hack in cfgexpand.c:expand_call_stmt should not be neccessary
anymore.

Now, looking at the use of can_trust_pointer_alignment in build_over_call
and especially its comment:

  if (!can_trust_pointer_alignment ())
{
  /* If we can't be sure about pointer alignment, a call
 to __builtin_memcpy is expanded as a call to memcpy, which
 is invalid with identical args.  Otherwise it is
 expanded as a block move, which should be safe.  */
  arg0 = save_expr (arg0);
  arg1 = save_expr (arg1);
  test = build2 (EQ_EXPR, boolean_type_node, arg0, arg1);
}

"Otherwise it is expanded as a block move" is certainly not true.
Whether it is expanded as block move depends on optimization setting,
target costs and, well, a way to do block moves (we fall back to
memcpy after all).

So the patch changes the above to if (!optimize), but I'd argue
we can as well either remove the conditional or emit it unconditional.
Dependent on whether we believe a memcpy implementation exists in
the wild that asserts that src != dst.

Independent of the can_trust_pointer_alignment issue nowadays the
case of !tree_int_cst_equal (TYPE_SIZE (type), TYPE_SIZE (as_base))
should use a MODIFY_EXPR with two MEM_REF operands and a proper
alias set, which would avoid the address-taking (and thus aliasing)
because of the memcpy and also would avoid forcing alias-set zero
because of the memcpy.

Thus (untested), something like

Index: gcc/cp/call.c
===
--- gcc/cp/call.c   (revision 177597)
+++ gcc/cp/call.c   (working copy)
@@ -6379,35 +6379,21 @@ build_over_call (struct z_candidate *can
}
   else
{
- /* We must only copy the non-tail padding parts.
-Use __builtin_memcpy for the bitwise copy.
-FIXME fix 22488 so we can go back to using MODIFY_EXPR
-instead of an explicit call to memcpy.  */
-
- tree arg0, arg1, arg2, t;
+ /* We must only copy the non-tail padding parts.  */
+ tree arg0, arg2, t;
  tree test = NULL_TREE;
 
  arg2 = TYPE_SIZE_UNIT (as_base);
- arg1 = arg;
  arg0 = cp_build_addr_expr (to, complain);
 
- if (!can_trust_pointer_alignment ())
-   {
- /* If we can't be sure about pointer alignment, a call
-to __builtin_memcpy is expanded as a call to memcpy, 
which
-is invalid with identical args.  Otherwise it is
-expanded as a block move, which should be safe.  */
- arg0 = save_expr (arg0);
- arg1 = save_expr (arg1);
- test = build2 (EQ_EXPR, boolean_type_node, arg0, arg1);
-   }
- t = implicit_built_in_decls[BUILT_IN_MEMCPY];
- t = build_call_n (t, 3, arg0, arg1, arg2);
-
- t = convert (TREE_TYPE (arg0), t);
- if (test)
-   t = build3 (COND_EXPR, TREE_TYPE (t), test, arg0, t);
- val = cp_build_indirect_ref (t, RO_NULL, complain);
+ t = build_array_type (char_type_node, build_index_type (arg2));
+ t = build2 (MODIFY_EXPR, void_type_node,
+ build2 (MEM_REF, t, to,
+ build_int_cst (build_pointer_type (type), 
0)),
+ build2 (MEM_REF, t, arg,
+ build_int_cst (build_pointer_type (type), 
0)));
+ val = build2 (COMPOUND_EXPR, TREE_TYPE (to),
+   t, to);
   TREE_NO_WARNING (val) = 1;
}
 
which does a block copy of size arg2 (by using a char[arg2] array
a type for the mem-ref) using the alias-set of type (by
specifying the offset of the mem-ref with using a type of type *).

Well, but I'm bootstrapping and testing the following now, on
x86_64-unknown-linux-gnu (because I want to get rid of
that can_trust_pointer_alignment function).

Ok for trunk?

Thanks,
Richard.

2011-08-10  Richard Guenther  

cp/
* call.c (build_over_call): Check for optimize instead of
can_trust_pointer_alignment.  Adjust comment.

Index: gcc/cp/call.c
===
--- gcc/cp/call.c   (revision 177613)
+++ gcc/cp/call.c   (working copy)
@@ -6778,12 +6778,14 @@ build_over_call (struct z_candidate *can
  arg1 = arg;
  arg0 = cp_build_addr_expr (to, complain);
 
- if (!can_trust_pointer_alignment ())
+ if (!optimize)
{
- /* If we can't be sure about pointer alignment, a call
-to __builtin_memcpy is expanded as a call to memcpy, which
-is in

Re: Fix typo in internal documents

2011-08-10 Thread Richard Sandiford
"Paulo J. Matos"  writes:
> There is a typo in the internal documentation. This patch fixes this.

Thanks, applied to trunk under the "obviously correct" rule.  Sorry for
the slow response.

Richard

For the record, the changelog was:

2011-08-10  Paulo J. Matos  

* doc/tm.texi.in (CLASS_MAX_NREGS): Fix typo.
* doc/tm.texi: Regenerate.


[PING] Selective scheduler fixes

2011-08-10 Thread Alexander Monakov
Ping

On Wed, 3 Aug 2011, Alexander Monakov wrote:

> Hello,
> 
> This is a series of selective scheduler bug fixes.  They fix problems that
> have been discovered during internal testing, and one patch is necessary as
> preparation to predication support in the selective scheduler (predication
> patches will be submitted separately later).
> 
> I'm sorry that patches come without testcases, but as most of the problems
> have been discovered on a patched scheduler, it's virtually impossible to
> provide testcases that would fail without a patch and pass with it.
> 
> The patches have been bootstrapped and regtested together on x86-64 and ia64
> (without java on ia64).  Additionally, a number of tests was done with arm
> cross-compiler.  I'll commit approved patches separately.
> 
> OK for trunk?
> 
> Dmitry Melnik (3):
>   Take maximum spec when merging exprs
>   Make more insns unique
>   Drop an incorrect assert
> 
> Sergey Grechanik (5):
>   Fix usage of hard_regno_nregs before reload
>   Properly loop over all hard regs for mode
>   Try successors to find seqno
>   Factor out caching logic for INSN_COND
>   Only merge deps status for true dependencies
> 
>  gcc/sched-deps.c   |   69 +++-
>  gcc/sched-int.h|2 +-
>  gcc/sel-sched-ir.c |  111 
> +---
>  gcc/sel-sched-ir.h |1 +
>  gcc/sel-sched.c|   14 +++---
>  5 files changed, 137 insertions(+), 60 deletions(-)
> 


Re: RFC: Rewriting auto-inc-dec.c

2011-08-10 Thread Richard Sandiford
Thanks for looking at this.

Bernd Schmidt  writes:
> On 07/21/11 17:42, Richard Sandiford wrote:
>>   /* When optimizing for speed, don't introduce dependencies between
>>  memory references in the chain and memory references outside of it,
>>  since doing so would limit scheduling.  If the chain is truly
>>  worthwhile, we can still try again using a new pseudo register.  */
>
> Now, it's good that there is at least some consideration for this, but I
> wonder whether it's possible to do better here. The basic principle is
> that if something is achievable without autoinc, in the same number of
> insns (or a factor 1.x thereof with x small), then it's preferrable not
> to use autoinc at all.

Yeah.  The pass tries to reject changes that have a net zero cost unless
there are specific reasons to believe that later passes would benefit.
(This is the max_cost_delta argument to test_replacements.)

> For example,
>
>>... = *r1... = *rN++
>>... = *(r1 + 4)  ... = *rN
>>  *(r1 + 4) = ...  *rN++ = ...
>> r2 = r1 + 8  r2 = rN
>>... = *r2  --->  ... = rN++
>> r3 = r2 + 4  r3 = rN
>>... = *r3... = rN++
>> r4 = r1 + 16 r4 = rN
>>... = *r4... = rN
>
> On many targets, this would best be handled as a series of (reg +
> offset) addressing modes.

The idea is that we run after fwprop, so all addresses that are better
expressed as (reg + offset) should already have that form.  I.e. the
pass should be run at a stage where it doesn't have to process unoptimised
addresses (and thus have to "second guess" what later address optimisations
might do).  So the fact that the addresses above are not:

... = *(r1 + 8)
... = *(r1 + 12)
... = *(r1 + 16)

or at least:

... = *(r1 + 8)
... = *(r2 + 4)
... = *(r1 + 16)

should mean that those addresses aren't acceptable on the target,
or at least are more expensive than plain *rN.  This is the kind
of thing we'd see for NEON.

I should explain that in the comment -- thanks for bringing it up.

> If an update is required, on targets that
> support both (reg + offset) and {pre,post}_modify, it would be good to
> have an automodify in just one of the addresses; if a single add insn is
> required this may still be better if the number of memory references is
> large enough. Do you think this is something that can be done in the
> context of this pass?

So something like:

 ... = *r1... = *rN, rN += 16
 ... = *(r1 + 100)... = *(rN - 84)
 ... = *(r1 + 80) ... = *(rN - 64)
 ... = *(r1 - 20) ... = *(rN - 36)
 r1 = r1 + 16

?  Yeah, that could be added.

> I'm not sure all the code is completely safe if a hardreg is used with a
> mix of single- and multi-word accesses.

The code was supposed to handle that.  Is there anything specifically
that worries you?  FWIW, the principles were:

  - in update_liveness, when a multi-register value (rN...rM) is used or
defined, behave as though each individual register in the range [rN, rM]
had been used or defined.  (The DF machinery handles this for us;
there's a separate df_ref for each register in the range.)

  - If we're tracking (rN...rM) as a single register rtx, then any
non-ref_point use of any register in the range [rN, rM] is recorded
as an untracked use.

  - If we're tracking (rN...rM) as a single register rtx, then any
definition of any register in the range [rN, rM] clobbers the
tracked value, and brings its live range to an end.

> Maybe just mark a hardreg that occurs in a multi-word mode as
> unavailable for the optimization?

Well, I'd rather not if we can help it.  AVR in particular benefits from
handling wide hard regs.

> It would be nice not to have semi-identical pairs of functions like
> other_uses_before and other_uses_after or valid_mem_add_pair and
> valid_add_mem_pair. Can these be merged sensibly?

Although the functions are deliberately similar in interface, they're
not really that similar in implementation.  They test different live
ranges and apply different liveness checks.  So I'm not sure there's
much that can be merged.

> Just to make sure, my understanding is that this will also generate
> (automod (reg1) (plus (reg1) (reg2))). Right?

Yeah, that's right.

>> /* Represents an offset that is applied to a valno base.  */
>> union valno_offset {
>>   HOST_WIDE_INT constant;
>>   struct valno_def *valno;
>> };
>
> Should mention that this is used as splay tree key?

Well, it is, but it's also an important part of the valno record.
I mentioned that the offset was the key here:

  /* A splay tree of valnos that use this one as their base.  The splay
 key is the offset.  */

but I agree it's probably worth saying it above the "offset"

Re: [patch, ia64] Fix unaligned accesses on IA64 from dwarf2out.c

2011-08-10 Thread Richard Guenther
On Wed, Aug 10, 2011 at 10:48 AM, Pedro Alves  wrote:
> On Wednesday 10 August 2011 01:02:50, Steve Ellcey wrote:
>> On Tue, 2011-08-09 at 16:50 -0700, Richard Henderson wrote:
>> > >
>> > > I think I like using a union to ensure the alignment of checksum better.
>> > > In dwarf2out.c we are always using one md5_ctx structure and one
>> > > checksum buffer but in fold-const.c there are routines where we use one
>> > > md5_ctx structure with 4 (fold_build2_stat_loc) or 6
>> > > (fold_build3_stat_loc) different checksum buffers.
>> >
>> > I'm not keen on this.
>> >
>> > Yes, it does happen to work, but only accidentally.  The CTX
>> > object and the CHECKSUM object have overlapping lifetimes
>> > within md5_read_ctx.
>> >
>> >
>> > r~
>>
>> I am not proposing we put the CTX object and the CHECKSUM object into
>> one union.  I am proposing we leave the CTX object alone and put the
>> CHECKSUM object in a union with a dummy variable of type md5_uint32 in
>> order to ensure that the CHECKSUM object has the correct alignment.
>>
>> So the usage would be:
>>
>> union md5_resbuf
>> {
>>   md5_uint32 dummy;                     /* Unused variable used to ensure 
>> alignment */
>>   unsigned char resbuf[16];             /* The buffer we are using in 
>> md5_finish_ctx */
>> };
>
> Or even
>
> union md5_resbuf
> {
>  md5_uint32 ui32[4];
>  unsigned char ui8[16];
> };
>
> and put it in include/md5.h instead, so that all clients
> can use it instead of cooking up the same themselves.
> liberty's md5.c itself internaly could be make to use the
> union instead of the casts?
>
> *looks for users of md5*
>
> but then, gold does:
>
>  unsigned char* ov = of->get_output_view(this->build_id_note_->offset(),
>                                          this->build_id_note_->data_size());
>
>  const char* style = parameters->options().build_id();
>  if (strcmp(style, "sha1") == 0)
>    {
>      sha1_ctx ctx;
>      sha1_init_ctx(&ctx);
>      sha1_process_bytes(iv, this->output_file_size_, &ctx);
>      sha1_finish_ctx(&ctx, ov);
>    }
>  else if (strcmp(style, "md5") == 0)
>    {
>      md5_ctx ctx;
>      md5_init_ctx(&ctx);
>      md5_process_bytes(iv, this->output_file_size_, &ctx);
>      md5_finish_ctx(&ctx, ov);
>
> which makes me wonder if the right fix isn't to change
> libiberty internally to not rely on the alignment.

I think that would be the best fix.  It hardly can be a performance
critical part - libiberty could use a local aligned buffer for
compute and do a copy on return.

Richard.

> libiberty's sha1 routines seem to have the same issue
> (though they don't appear used in gcc.  gold uses them, as
> seen above).
>
> --
> Pedro Alves
>


Re: [PATCH] Fix PR49937

2011-08-10 Thread Richard Guenther
On Tue, 9 Aug 2011, Hans-Peter Nilsson wrote:

> On Tue, 9 Aug 2011, Richard Guenther wrote:
> >
> > This fixes PR49937 - callers of get_{pointer,object}_alignment
> > probably should not use BIGGEST_ALIGNMENT to limit what these
> > functions return (why do they do that?  Maybe because formerly
> > the routines returned TYPE_ALIGN?  But why wasn't that bound by
> > BIGGEST_ALIGNMENT?) - some targets define that as 8 (such as cris or avr).
> 
> ISTM such limiting is only valid when constructing non-function
> pointers to unknown data and the goal is to error on the safe
> side regarding generally unknown data placement and alignment.
> (If it's known to be on stack or in constant or static storage,
> there are other macros.)
> 
> BIGGEST_ALIGNMENT is not what its name says. As can be seen in
> the docs; it's rather something like
> BIGGEST_MINIMAL_HARDWARE_ENFORCED_ALIGNMENT_FOR_DATA_ACCESS.
> 
> Point being, it's likely there's a greater alignment that's
> preferable for e.g. performance reasons when generating objects
> and pointers to them.
> 
> BIGGEST_ALIGNMENT cannot be used for checking alignment of
> function pointers (to see e.g. how many low bits can be
> scavenged for hacks).  Raising BIGGEST_ALIGNMENT to include
> FUNCTION_BOUNDARY would be just wrong, as that's not data being
> accessed.
> 
> > Bootstrapped and tested on x86_64-unknown-linux-gnu.
> 
> And regtest cross to cris-elf passes at r177605.

Committed.

I think the max-align argument to get_{pointer,object}_alignment
doesn't make much sense.  I'll cook up a patch to remove them.

Richard.


Re: [patch, ia64] Fix unaligned accesses on IA64 from dwarf2out.c

2011-08-10 Thread Pedro Alves
On Wednesday 10 August 2011 01:02:50, Steve Ellcey wrote:
> On Tue, 2011-08-09 at 16:50 -0700, Richard Henderson wrote:
> > > 
> > > I think I like using a union to ensure the alignment of checksum better.
> > > In dwarf2out.c we are always using one md5_ctx structure and one
> > > checksum buffer but in fold-const.c there are routines where we use one
> > > md5_ctx structure with 4 (fold_build2_stat_loc) or 6
> > > (fold_build3_stat_loc) different checksum buffers.
> > 
> > I'm not keen on this.
> > 
> > Yes, it does happen to work, but only accidentally.  The CTX
> > object and the CHECKSUM object have overlapping lifetimes
> > within md5_read_ctx.
> > 
> > 
> > r~
> 
> I am not proposing we put the CTX object and the CHECKSUM object into
> one union.  I am proposing we leave the CTX object alone and put the
> CHECKSUM object in a union with a dummy variable of type md5_uint32 in
> order to ensure that the CHECKSUM object has the correct alignment.
> 
> So the usage would be:
> 
> union md5_resbuf
> {
>   md5_uint32 dummy; /* Unused variable used to ensure 
> alignment */
>   unsigned char resbuf[16]; /* The buffer we are using in 
> md5_finish_ctx */
> };

Or even

union md5_resbuf
{
  md5_uint32 ui32[4];
  unsigned char ui8[16];
};

and put it in include/md5.h instead, so that all clients
can use it instead of cooking up the same themselves.
liberty's md5.c itself internaly could be make to use the
union instead of the casts?

*looks for users of md5*

but then, gold does:

  unsigned char* ov = of->get_output_view(this->build_id_note_->offset(),
  this->build_id_note_->data_size());

  const char* style = parameters->options().build_id();
  if (strcmp(style, "sha1") == 0)
{
  sha1_ctx ctx;
  sha1_init_ctx(&ctx);
  sha1_process_bytes(iv, this->output_file_size_, &ctx);
  sha1_finish_ctx(&ctx, ov);
}
  else if (strcmp(style, "md5") == 0)
{
  md5_ctx ctx;
  md5_init_ctx(&ctx);
  md5_process_bytes(iv, this->output_file_size_, &ctx);
  md5_finish_ctx(&ctx, ov);

which makes me wonder if the right fix isn't to change
libiberty internally to not rely on the alignment.

libiberty's sha1 routines seem to have the same issue
(though they don't appear used in gcc.  gold uses them, as
seen above).

-- 
Pedro Alves


RE: [PATCH, ARM] Generate conditional compares in Thumb2 state

2011-08-10 Thread Jiangning Liu
PING...

BTW, in patch fix_cond_cmp_2.patch, the file mode of thumb2.md is carelessly
changed, so please check attached new patch file fix_cond_cmp_3.patch.

Thanks,
-Jiangning

> -Original Message-
> From: Jiangning Liu [mailto:jiangning@arm.com]
> Sent: Monday, August 08, 2011 2:01 PM
> To: 'Ramana Radhakrishnan'
> Cc: gcc-patches@gcc.gnu.org
> Subject: RE: [PATCH, ARM] Generate conditional compares in Thumb2 state
> 
> In attached new patch, arm_arch_thumb2 is changed to TARGET_THUMB2. I
> tried that with my patch command line option -mcpu=armv7-a9 doesn't
> generate IT instruction any longer, unless option "-mthumb" is being
> added.
> 
> All of my tests assume command line option -mthumb, while cortex-M0,
> cortex-M3 cortex-M4 are covered by options -mcpu=cortex-m0, -
> march=armv7-m, and -march=armv7e-m respectively.
> 
> As for the extra problem exposed by this specific case, may we treat it
> as a separate fix to decouple it with this one, and I can give follow
> up later on? I think it is a general problem not only for the
> particular pattern it/op/it/op. But I'm not sure how far we can go to
> optimize this kind of problems introduced by IT block. For this
> specific case, I see "if conversion" already generates conditional move
> before combination pass. So basically the peephole rules may probably
> work for most of the general scenarios. My initial thought is go over
> the rules introducing IT block and try to figure out all of the
> combination that two of this kinds of rules can be in sequential order.
> Maybe we only need to handle the most common case like this one. Since
> I do see a bunch of rules have something to do with problem, I'd like
> to look into all of them to give a most reasonable solution in a
> separate fix.
> 
> Does it make sense?
> 
> Thanks,
> -Jiangning
> 
> > -Original Message-
> > From: Ramana Radhakrishnan [mailto:ramana.radhakrish...@linaro.org]
> > Sent: Friday, August 05, 2011 9:20 AM
> > To: Jiangning Liu
> > Cc: gcc-patches@gcc.gnu.org
> > Subject: Re: [PATCH, ARM] Generate conditional compares in Thumb2
> > state
> >
> > On 3 August 2011 08:48, Jiangning Liu  wrote:
> > > This patch is to generate more conditional compare instructions in
> > Thumb2
> > > state. Given an example like below,
> > >
> > > int f(int i, int j)
> > > {
> > >  if ( (i == '+') || (j == '-') ) {
> > >    return i;
> > >  } else {
> > >    return j;
> > >  }
> > > }
> > >
> > > Without the patch, compiler generates the following codes,
> > >
> > >        sub     r2, r0, #43
> > >        rsbs    r3, r2, #0
> > >        adc     r3, r3, r2
> > >        cmp     r1, #45
> > >        it      eq
> > >        orreq   r3, r3, #1
> > >        cmp     r3, #0
> > >        it      eq
> > >        moveq   r0, r1
> > >        bx      lr
> > >
> > > With the patch, compiler can generate conditional jump like below,
> > >
> > >        cmp     r0, #43
> > >        it      ne
> > >        cmpne   r1, #45
> > >        it      ne
> > >        movne   r0, r1
> > >        bx      lr
> >
> >
> > Nice improvement but there could be a single it block to handle both
> > and thus you could make this even better with
> >
> > cmp r0, #43
> > itt ne
> > cmpne r1 ,#45
> > movne r0, r1
> >
> > The way to do this would be to try and split this post-reload
> > unfortunately into the cmp instruction and the conditional compare
> > with the appropriate instruction length - Then the backend has a
> > chance of merging some of this into a single instruction.
> > Unfortunately that won't be very straight-forward but that's a
> > direction we probably ought to proceed with in this case.
> >
> > In a number of places:
> >
> > > +   if (arm_arch_thumb2)
> >
> > Ah instead of this please use if (TARGET_THUMB2) - arm_arch_thumb2 is
> > true based on the architecture levels and not necessarily if the user
> > wants to generate Thumb code. I don't want an unnecessary IT
> > instruction being emitted in the ASM block in ARM state for v7-a and
> > above.
> >
> > > Tested against arm-none-eabi target and no regression found.
> >
> > Presumably for ARM and Thumb2 state ?
> >
> >
> > cheers
> > Ramana


fix_cond_cmp_3.patch
Description: Binary data


Re: [PATCH] Work around PR 50031, sphinx3 slowdown in powerpc on GCC 4.6 and GCC 4.7

2011-08-10 Thread Richard Guenther
On Tue, Aug 9, 2011 at 8:07 PM, Michael Meissner
 wrote:
> This is an initial patch to work around the slow down of sphinx3 in power7 VSX
> that first shows up in GCC 4.6 and is still present in the current GCC 4.7
> trunk.  http://gcc.gnu.org/bugzilla/show_bug.cgi?id=50031
>
> The key part of the slowdown is in this inner loop in the
> vector_gautbl_eval_logs3  function in sphinx3 vector.c:
>
> {
>  int32 i, r;
>  float64 f;
>  int32 end, veclen;
>  float32 *m1, *m2, *v1, *v2;
>  float64 dval1, dval2, diff1, diff2;
>
>    /* ... */
>
>    for (i = 0; i < veclen; i++) {
>      diff1 = x[i] - m1[i];
>      dval1 -= diff1 * diff1 * v1[i];
>      diff2 = x[i] - m2[i];
>      dval2 -= diff2 * diff2 * v2[i];
>    }
>
>    /* ... */
>
> }
>
> In particular, the compiler 4.6 and beyond vectorizes this inner loop.  
> Because
> it doesn't know the alignment of the float pointers, it generates code to use
> unaligned vector loads unconditionally, which on the powerpc, involves using a
> load of an aligned pointer, and then doing a vperm instruction to permute the
> bytes.  Since the code first does the calculation in 32-bit floating point and
> then converts it to 64-bit floating point, the compiler does a vector convert
> of V4SF to V2DF in the loop.  On the powerpc, this involes two more permutes,
> and then the vector conversion.  Thus in the inner loop, there are:
>
>    4 vector loads
>    4 vector permutes to do the unalgined load
>    8 vector permutes to get things in the right registers for conversion
>    4 vector conversions

Are the arrays all well-aligned in practice?  Thus, would versioning the loop
for all-good-alignment help?

If we have 4 permutes and then 8 further ones - can we combine for example
an unaligned load permute and the following permute for the sf->df conversion?

Does ppc have a VSX tuned cost-model and is it applied correctly in this case?
Maybe we need more fine-grained costs?

Thanks,
Richard.

> This patch offers a new option (-mno-vector-convert-32bit-to-64bit) that
> disables the vector float/int conversions to double.  Overall this is a win:
>
> GCC 4.6, 32-bit:
>    12% improvement, 464.h264ref
>     5% improvement, 450.soplex
>     3% regression,  465.tonto
>     2% improvement, 481.wrf
>     9% improvement, 482.sphinx3
>
> GCC 4.6, 64-bit:
>     5% improvement, 456.hmmer
>     6% improvement, 464.h264ref
>    14% improvement, 482.sphinx3
>
> GCC 4.7, 32-bit:
>      2% improvement, 437.leslie3d
>      9% improvement, 482.sphinx3
>
> I haven't measured GCC 4.7 64-bit mode at the present time, but I can do so if
> desired.
>
> While I don't think this is the only solution to 50031, it at least helps us.
> It is encouraging that GCC 4.7 doesn't have the regression in tonto.
>
> I have bootstraped and run make check on both 4.6 and 4.7 compilers with no
> regressions.  Is it ok to install in the 4.7 tree?  At present, I have made 
> the
> default to generate the vectorized conversion, but it may make sense to flip
> the default.  Is this patch ok to apply?  Given if affects 4.6, did you want 
> to
> see it in 4.6 as well?
>
> [gcc]
> 2011-08-09  Michael Meissner  
>
>        PR tree-optimization/50031
>        * doc/invoke.texi (RS/6000 and PowerPC Options): Add
>        -mnvsx-vector-32bit-to-64bit switch.
>
>        * config/rs6000/rs6000.md (vec_unpacks_lo_v4sf): Add conditions on
>        -mvector-convert-32bit-to-64bit switch.
>        (vec_unpacks_float_hi_v4s): Ditto.
>        (vec_unpacks_float_lo_v4s): Ditto.
>        (vec_unpacku_float_hi_v4s): Ditto.
>        (vec_unpacku_float_lo_v4s): Ditto.
>
>        * config/rs6000/rs6000.opt (-mvector-convert-32bit-to-64bit): New
>        switch to control whether the compiler does 32->64 bit conversions.
>
> [gcc/testsuite]
> 2011-08-09  Michael Meissner  
>
>        PR tree-optimization/50031
>        * gcc.target/powerpc/vsx-vector-7.c: New test for
>        -mvector-convert-32bit-to-64bit.
>        * gcc.target/powerpc/vsx-vector-8.c: Ditto.
>
> --
> Michael Meissner, IBM
> 5 Technology Place Drive, M/S 2757, Westford, MA 01886-3141, USA
> meiss...@linux.vnet.ibm.com     fax +1 (978) 399-6899
>


[Patch] Properly find getopt system declaration

2011-08-10 Thread Romain GEISSLER

Hi

Thanks to the recent changes made to stage 2 and 3 (now built with g++), 
i noticed a little error in the configure script that tries the

system getopt declaration. Indeed, if your system defines it in a system
header file named "getopt.h" (for example /usr/include/getopt.h on a Red 
Hat 4 configuration), the configure script will incorrectly load
/path/to/gcc/src/include/getopt.h instead, and thus find no getopt 
declaration.


This can be solved by changing the appropriate -I${srcdir}/../include by
-iquote ${srcdir}/../include. I added a configure check to verify that 
the compiler accepts the -iquote switch (and fallback to -I otherwise).
Note that this only solve the getopt case, but -I is certainly often 
misused (instead of -iquote that would prevent error with system header 
having the same name than gcc header).


The attached patch has been tested for regression with a native x86_64 
bootstrap.


config/

2011-08-10  Romain Geissler  

* acx.m4 (ACX_CHECK_CC_ACCEPTS_IQUOTE) : Define.


gcc/

2011-08-10  Romain Geissler  

* configure.ac (acx_cv_cc_accepts_iquote): Define through a call
to ACX_CHECK_CC_ACCEPTS_IQUOTE.
(CFLAGS for gcc_AC_CHECK_DECLS): Use $acx_cv_cc_accepts_iquote
instead of "-I".
* configure: Regenerate.


Romain Geissler
Index: config/acx.m4
===
--- config/acx.m4   (revision 177557)
+++ config/acx.m4   (working copy)
@@ -355,6 +355,21 @@ AC_DEFUN([AC_PROG_CPP_WERROR],
 m4_define([AC_CHECK_HEADER],m4_defn([_AC_CHECK_HEADER_OLD]))
 ac_c_preproc_warn_flag=yes])# AC_PROG_CPP_WERROR
 
+# Check whether the CC accepts the -iquote switch.
+AC_DEFUN([ACX_CHECK_CC_ACCEPTS_IQUOTE], [
+   AC_CACHE_CHECK([wether $CC accepts -iquote], acx_cv_cc_accepts_iquote, [
+   save_CFLAGS=$CFLAGS
+   CFLAGS="-iquote ."
+   AC_COMPILE_IFELSE([int main() { return 0; }],
+   [acx_cv_cc_accepts_iquote=yes], 
[acx_cv_cc_accepts_iquote=no])
+   CFLAGS=$save_CFLAGS])
+
+   if test x$acx_cv_cc_accepts_iquote = xyes; then
+   cc_quote_include="-iquote "
+   else
+   cc_quote_include="-I"
+   fi])
+
 # Test for GNAT.
 # We require the gnatbind program, and a compiler driver that
 # understands Ada.  We use the user's CC setting, already found,
Index: gcc/configure
===
--- gcc/configure   (revision 177557)
+++ gcc/configure   (working copy)
@@ -5336,6 +5336,35 @@ $as_echo "$ac_cv_safe_to_define___extens
   $as_echo "#define _TANDEM_SOURCE 1" >>confdefs.h
 
 
+
+   { $as_echo "$as_me:${as_lineno-$LINENO}: checking wether $CC accepts 
-iquote" >&5
+$as_echo_n "checking wether $CC accepts -iquote... " >&6; }
+if test "${acx_cv_cc_accepts_iquote+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+
+   save_CFLAGS=$CFLAGS
+   CFLAGS="-iquote ."
+   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+int main() { return 0; }
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  acx_cv_cc_accepts_iquote=yes
+else
+  acx_cv_cc_accepts_iquote=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+   CFLAGS=$save_CFLAGS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $acx_cv_cc_accepts_iquote" >&5
+$as_echo "$acx_cv_cc_accepts_iquote" >&6; }
+
+   if test x$acx_cv_cc_accepts_iquote = xyes; then
+   cc_quote_include="-iquote "
+   else
+   cc_quote_include="-I"
+   fi
 ac_ext=c
 ac_cpp='$CPP $CPPFLAGS'
 ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
@@ -10240,7 +10269,7 @@ $as_echo "#define HAVE_LANGINFO_CODESET
 
 # We will need to find libiberty.h and ansidecl.h
 saved_CFLAGS="$CFLAGS"
-CFLAGS="$CFLAGS -I${srcdir} -I${srcdir}/../include"
+CFLAGS="$CFLAGS ${cc_quote_include}${srcdir} 
${cc_quote_include}${srcdir}/../include"
 for ac_func in getenv atol asprintf sbrk abort atof getcwd getwd \
strsignal strstr strverscmp \
errno snprintf vsnprintf vasprintf malloc realloc calloc \
@@ -17763,7 +17792,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 17766 "configure"
+#line 17795 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -17869,7 +17898,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 17872 "configure"
+#line 17901 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
Index: gcc/configure.ac
===
--- gcc/configure.ac(revision 177557)
+++ gcc/configure.ac(working copy)
@@ -302,6 +302,7 @@ AC_SUBST(CFLAGS)
 # -
 
 AC_USE_SYSTEM_EXTENSIONS
+ACX_CHECK_CC_ACCEPTS_IQUOTE
 AC_PROG_CPP
 AC_C_INLINE
 
@