Re: [RFC] Do not consider volatile asms as optimization barriers #1

2014-03-11 Thread Hans-Peter Nilsson
On Mon, 3 Mar 2014, Richard Sandiford wrote:
 AIUI:

Reading back the references don't yield any dissenting
flash-backs, FWIW.

So, a (use fp) then a (clobber fp)?  That was probably just too
weird for me to think of, much like a hypercorrect ending of the
previous clause. :)

Thanks for dealing with this, and for not making my initial
nightmarish interpretation of $SUBJECT come true: Do not
consider volatile asms as anything we have to consider.
At least I hope so.  Dig up this horse in 6 months?

brgds, H-P


[Build, Driver] Add -lcilkrts for -fcilkplus

2014-03-11 Thread Tobias Burnus
When using Cilk Plus (-fcilkplus), it makes sense to automatically link 
the run-time library (-lcilkrts).


This patch mimics libgomp by adding a .spec file; I am not 100% sure 
whether the .spec file is needed, but the pthread tests in libgomp imply 
that it makes sense. (libgomp also checks for -lrt for the 
high-performance timers, a check which is not required for libcilkrts.)


Bootstrapped on x86-64-gnu-linux.
OK for the trunk?

Tobias
2014-03-11  Tobias Burnus  bur...@net-b.de

gcc/
	* gcc.c (LINK_COMMAND_SPEC): Use libcilkrts.spec for -fcilkplus.
	(CILK_SELF_SPECS): New define.
	(driver_self_specs): Use it.

libcilkrts/
	* libcilkrts.spec.in: New.
	* Makefile.am: Handle libcilkrts.spec.
	* configure.ac: Determine link options for libcilkrts.spec.
	* Makefile.in: Regenerate.
	* configure: Regenerate.

diff --git a/gcc/gcc.c b/gcc/gcc.c
index 691623a..bea1479 100644
--- a/gcc/gcc.c
+++ b/gcc/gcc.c
@@ -766,6 +766,7 @@ proper position among the other output files.  */
 %{s} %{t} %{u*} %{z} %{Z} %{!nostdlib:%{!nostartfiles:%S}}  VTABLE_VERIFICATION_SPEC  \
 %{static:} %{L*} %(mfwrap) %(link_libgcc)  SANITIZER_EARLY_SPEC  %o\
 %{fopenmp|ftree-parallelize-loops=*:%:include(libgomp.spec)%(link_gomp)}\
+%{fcilkplus:%:include(libcilkrts.spec)%(link_cilkrts)}\
 %{fgnu-tm:%:include(libitm.spec)%(link_itm)}\
 %(mflib)  STACK_SPLIT_SPEC \
 %{fprofile-arcs|fprofile-generate*|coverage:-lgcov}  SANITIZER_SPEC  \
@@ -932,9 +933,15 @@ static const char *const multilib_defaults_raw[] = MULTILIB_DEFAULTS;
 #define GTM_SELF_SPECS %{fgnu-tm: -pthread}
 #endif
 
+/* Likewise for -fcilkplus.  */
+#ifndef CILK_SELF_SPECS
+#define CILK_SELF_SPECS %{fcilkplus: -pthread}
+#endif
+
 static const char *const driver_self_specs[] = {
   %{fdump-final-insns:-fdump-final-insns=.} %fdump-final-insns,
-  DRIVER_SELF_SPECS, CONFIGURE_SPECS, GOMP_SELF_SPECS, GTM_SELF_SPECS
+  DRIVER_SELF_SPECS, CONFIGURE_SPECS, GOMP_SELF_SPECS, GTM_SELF_SPECS,
+  CILK_SELF_SPECS
 };
 
 #ifndef OPTION_DEFAULT_SPECS
diff --git a/libcilkrts/Makefile.am b/libcilkrts/Makefile.am
index e902f73..ba07569 100644
--- a/libcilkrts/Makefile.am
+++ b/libcilkrts/Makefile.am
@@ -54,6 +54,7 @@ cilkincludedir = $(libdir)/gcc/$(target_alias)/$(gcc_version)/cilk
 
 # Target list.
 toolexeclib_LTLIBRARIES = libcilkrts.la
+nodist_toolexeclib_HEADERS = libcilkrts.spec
 nodist_cilkinclude_HEADERS = include/cilk/cilk.h
 
 libcilkrts_la_SOURCES =\
diff --git a/libcilkrts/configure.ac b/libcilkrts/configure.ac
index 61b45b0..6ee0d3e 100644
--- a/libcilkrts/configure.ac
+++ b/libcilkrts/configure.ac
@@ -49,7 +49,7 @@ AC_PROG_CC
 AC_PROG_CXX
 # AC_PROG_LIBTOOL
 # AC_CONFIG_MACRO_DIR([..])
-AC_CONFIG_FILES([Makefile])
+AC_CONFIG_FILES([Makefile libcilkrts.spec])
 AM_ENABLE_MULTILIB(, ..)
 AC_FUNC_ALLOCA
 
@@ -183,6 +183,36 @@ AC_LINK_IFELSE(
   AC_DEFINE(HAVE_PTHREAD_AFFINITY_NP, 1,
 [   Define if pthread_{,attr_}{g,s}etaffinity_np is supported.]))
 
+# Check to see if -pthread or -lpthread is needed.  Prefer the former.
+# In case the pthread.h system header is not found, this test will fail.
+XPCFLAGS=
+CFLAGS=$CFLAGS -pthread
+AC_LINK_IFELSE(
+ [AC_LANG_PROGRAM(
+  [#include pthread.h
+   void *g(void *d) { return NULL; }],
+  [pthread_t t; pthread_create(t,NULL,g,NULL);])],
+ [XPCFLAGS= -Wc,-pthread],
+ [CFLAGS=$save_CFLAGS LIBS=-lpthread $LIBS
+  AC_LINK_IFELSE(
+   [AC_LANG_PROGRAM(
+[#include pthread.h
+ void *g(void *d) { return NULL; }],
+[pthread_t t; pthread_create(t,NULL,g,NULL);])],
+   [],
+   [AC_MSG_ERROR([Pthreads are required to build libcilkrts])])])
+
+# Set up the set of libraries that we need to link against for libcilkrts.
+# Note that the CILK_SELF_SPEC in gcc.c may force -pthread,
+# which will force linkage against -lpthread (or equivalent for the system).
+# That's not 100% ideal, but about the best we can do easily.
+if test $enable_shared = yes; then
+  link_cilkrts=-lcilkrts %{static: $LIBS}
+else
+  link_cilkrts=-lcilkrts $LIBS
+fi
+AC_SUBST(link_cilkrts)
+
 
 # Must be last
 AC_OUTPUT
diff --git a/libcilkrts/libcilkrts.spec.in b/libcilkrts/libcilkrts.spec.in
new file mode 100644
index 000..b98cce9
--- /dev/null
+++ b/libcilkrts/libcilkrts.spec.in
@@ -0,0 +1,3 @@
+# This spec file is read by gcc when linking.  It is used to specify the
+# standard libraries we need in order to link with libcilkrts.
+*link_cilkrts: @link_cilkrts@
diff --git a/libcilkrts/Makefile.in b/libcilkrts/Makefile.in
index 706a0da..4439292 100644
--- a/libcilkrts/Makefile.in
+++ b/libcilkrts/Makefile.in
@@ -112,8 +112,8 @@ target_triplet = @target@
 DIST_COMMON = $(srcdir)/include/internal/rev.mk README ChangeLog \
 	$(srcdir)/Makefile.in $(srcdir)/Makefile.am \
 	$(top_srcdir)/configure $(am__configure_deps) \
-	$(srcdir)/../mkinstalldirs $(srcdir)/../depcomp \
-	$(cilkinclude_HEADERS)
+	$(srcdir)/../mkinstalldirs $(srcdir)/libcilkrts.spec.in \
+	$(srcdir)/../depcomp $(cilkinclude_HEADERS)
 
 # If we're building on 

RE: [PATCH] Fix incorrect byte swap detection (PR tree-optimization/60454)

2014-03-11 Thread Thomas Preud'homme
 From: Jakub Jelinek [mailto:ja...@redhat.com]
n-size = n1.size;
  + for (i = 0, mask = 0xff; i  n-size; i++, mask = 8)
 
 This should be mask = BITS_PER_UNIT for consistency.

Indeed.

 
 And, as has been said earlier, the testcase should be a runtime testcase
 (in gcc.c-torture/execute/), probably with
 __attribute__((noinline, noclone)) on the function, where main calls the
 function with a couple of different values, verifies the result and aborts
 if it is incorrect.

Done as well. This should also solve the problem of aarch64 (and potentially 
others) missing as the test will be executed by all platform.

 
   Jakub

Since it seems Exchange is unable to keep formatting of patches (or I am unable 
to configure it correctly), I put the new patch in attachment in addition to 
reproducing it below.

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 748805e..b6d7d93 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,8 @@
+2014-03-07  Thomas Preud'homme  thomas.preudho...@arm.com
+
+   PR tree-optimization/60454
+   * tree-ssa-math-opts.c (find_bswap_1): Fix bswap detection.
+
 2014-02-23  David Holsgrove david.holsgr...@xilinx.com
 
* config/microblaze/microblaze.md: Correct ashrsi_reg / lshrsi_reg names
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index f3c0c85..b95b2b3 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,9 @@
+2014-03-10  Thomas Preud'homme  thomas.preudho...@arm.com
+
+   PR tree-optimization/60454
+   * gcc.c-torture/execute/pr60454.c (fake_swap32): Testcase to track
+   regression of PR60454.
+
 2014-02-23  David Holsgrove david.holsgr...@xilinx.com
 
* gcc/testsuite/gcc.target/microblaze/others/mem_reload.c: New test.
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr60454.c 
b/gcc/testsuite/gcc.c-torture/execute/pr60454.c
new file mode 100644
index 000..e5fbd8f
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr60454.c
@@ -0,0 +1,25 @@
+#include stdint.h
+
+#define __fake_const_swab32(x) ((uint32_t)(  \
+(((uint32_t)(x)  (uint32_t)0x00ffUL)  24) |\
+(((uint32_t)(x)  (uint32_t)0xff00UL)   8) |\
+(((uint32_t)(x)  (uint32_t)0x00ffUL)   8) |\
+(((uint32_t)(x)  (uint32_t)0xff00UL)  ) |\
+(((uint32_t)(x)  (uint32_t)0xff00UL)  24)))
+
+/* Previous version of bswap optimization would detect byte swap when none
+   happen. This test aims at catching such wrong detection to avoid
+   regressions.  */
+
+uint32_t
+fake_swap32 (uint32_t in) __attribute__ ((noinline, noclone))
+{
+  return __fake_const_swab32 (in);
+}
+
+int main(void)
+{
+  if (fake_swap32 (0x12345678) != 0x78567E12)
+abort ();
+  return 0;
+}
diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c
index 8e372ed..9ff857c 100644
--- a/gcc/tree-ssa-math-opts.c
+++ b/gcc/tree-ssa-math-opts.c
@@ -1801,7 +1801,9 @@ find_bswap_1 (gimple stmt, struct symbolic_number *n, int 
limit)
 
   if (rhs_class == GIMPLE_BINARY_RHS)
 {
+  int i;
   struct symbolic_number n1, n2;
+  unsigned HOST_WIDEST_INT mask;
   tree source_expr2;
 
   if (code != BIT_IOR_EXPR)
@@ -1827,6 +1829,15 @@ find_bswap_1 (gimple stmt, struct symbolic_number *n, 
int limit)
return NULL_TREE;
 
  n-size = n1.size;
+ for (i = 0, mask = 0xff; i  n-size; i++, mask = BITS_PER_UNIT)
+   {
+ unsigned HOST_WIDEST_INT masked1, masked2;
+
+ masked1 = n1.n  mask;
+ masked2 = n2.n  mask;
+ if (masked1  masked2  masked1 != masked2)
+   return NULL_TREE;
+   }
  n-n = n1.n | n2.n;
 
  if (!verify_symbolic_number_p (n, stmt))

incorrect_detection_v1.3.diff
Description: Binary data


[linaro/gcc-4_8-branch] Merge from gcc-4_8-branch

2014-03-11 Thread Yvan Roux
Hi,

we have merged the gcc-4_8-branch into linaro/gcc-4_8-branch up to
revision 208264 as r208471.

This will be part of our 2014.03 release.

Yvan


Re: [PATCH] Use the LTO linker plugin by default

2014-03-11 Thread Richard Biener
On Mon, 10 Mar 2014, Rainer Orth wrote:

 Richard Biener rguent...@suse.de writes:
 
  Ouch.  But as lto-plugin is a host module it should link against
  the host libgcc, no?  During stage1, that is.  So the question is
  why does it use the gcc/ compiler?
 
  For me it's using the host gcc:
 
  gcc -DHAVE_CONFIG_H -I. -I/space/rguenther/tramp3d/trunk/lto-plugin 
  -I/space/rguenther/tramp3d/trunk/lto-plugin/../include -DHAVE_CONFIG_H 
  -Wall -g -c /space/rguenther/tramp3d/trunk/lto-plugin/lto-plugin.c  -fPIC 
  -DPIC -o .libs/lto-plugin.o
  /bin/sh ./libtool --tag=CC --tag=disable-static  --mode=link gcc -Wall -g  
  -module -bindir /usr/local/lib/gcc/x86_64-unknown-linux-gnu/4.9.0  
  -static-libstdc++ -static-libgcc  -o liblto_plugin.la -rpath 
  /usr/local/lib/gcc/x86_64-unknown-linux-gnu/4.9.0 lto-plugin.lo 
  -Wc,../libiberty/pic/libiberty.a
  libtool: link: gcc -shared  .libs/lto-plugin.o
  ../libiberty/pic/libiberty.a   -Wl,-soname -Wl,liblto_plugin.so.0 -o 
  .libs/liblto_plugin.so.0.0.0
 
 It does use the host compiler for me, too.

So then if it succeeds to link to a shared libgcc_s then why
is it not able to find that later?  Maybe you miss setting
of a suitable LD_LIBRARY_PATH to pick up the runtime for
your host compiler?

  but maybe _that_ is the issue for you? (see also how it uses
  -static-libgcc, for me it doesn't even depend on libgcc_s)
 
 But as you can see above, libtool, being its usual helpful self, simply
 drops -static-libgcc ;-(  If I use -Wc,-static-libgcc, all seems fine.
 
 If -shared results in linking with libgcc_s.so.1 depends on the target.

Of course.

Richard.


Re: [PATCH, libjava]: Avoid suggest parentheses around comparison in operand of '|' in java/lang/natObject.cc

2014-03-11 Thread Andrew Haley
On 03/10/2014 08:13 PM, Uros Bizjak wrote:
 OK for mainline SVN and release branches?

Sure.  You don't need approval for pa

Thanks,
Andrew.



Re: [PATCH, libjava]: Avoid suggest parentheses around comparison in operand of '|' in java/lang/natObject.cc

2014-03-11 Thread Andrew Haley
On 03/10/2014 08:13 PM, Uros Bizjak wrote:
 OK for mainline SVN and release branches?

Sure.  You don't need approval for patches that are obviously
correct/trivial.

Thanks,
Andrew.



[PATCH, nios2] Fix frame pointer calculation

2014-03-11 Thread Chung-Lin Tang
The current Nios II prologue/epilogue code has a bug where the frame
pointer points to the start of the register save area, rather than the
frame slot where FP is saved (as specified the Nios II ABI).

This was only discovered relatively recently, as dwarf-based unwinding
is used most of the time, plus nios2 GDB's prologue analyzer is capable
of determining where FP is stored. Still this needs to fixed to be
conformant to the ABI.

Tested (both the compiler and gdb) and applied to trunk.

Chung-Lin

2014-03-11  Chung-Lin Tang  clt...@codesourcery.com

* config/nios2/nios2.c (machine_function): Add fp_save_offset
field.
(nios2_compute_frame_layout):
Add calculation of cfun-machine-fp_save_offset.
(nios2_expand_prologue): Correct setting of frame pointer
register in prologue.
(nios2_expand_epilogue): Update recovery of stack pointer from
frame pointer accordingly.
(nios2_initial_elimination_offset): Update calculation of offset
for eliminating to HARD_FRAME_POINTER_REGNUM.
Index: config/nios2/nios2.c
===
--- config/nios2/nios2.c(revision 208471)
+++ config/nios2/nios2.c(working copy)
@@ -81,8 +81,10 @@ struct GTY (()) machine_function
   int args_size;
   /* Number of bytes needed to store registers in frame.  */
   int save_reg_size;
-   /* Offset from new stack pointer to store registers.  */
+  /* Offset from new stack pointer to store registers.  */
   int save_regs_offset;
+  /* Offset from save_regs_offset to store frame pointer register.  */
+  int fp_save_offset;
   /* != 0 if frame layout already calculated.  */
   int initialized;
 };
@@ -390,6 +392,17 @@ nios2_compute_frame_layout (void)
  }
 }
 
+  cfun-machine-fp_save_offset = 0;
+  if (save_mask  (1  HARD_FRAME_POINTER_REGNUM))
+{
+  int fp_save_offset = 0;
+  for (regno = 0; regno  HARD_FRAME_POINTER_REGNUM; regno++)
+   if (save_mask  (1  regno))
+ fp_save_offset += 4;
+
+  cfun-machine-fp_save_offset = fp_save_offset;
+}
+
   save_reg_size = NIOS2_STACK_ALIGN (save_reg_size);
   total_size += save_reg_size;
   total_size += NIOS2_STACK_ALIGN (crtl-args.pretend_args_size);
@@ -450,8 +463,8 @@ nios2_expand_prologue (void)
 {
   unsigned int regno;
   int total_frame_size, save_offset;
-  int sp_offset; /* offset from base_reg to final stack value.  */
-  int fp_offset; /* offset from base_reg to final fp value.  */
+  int sp_offset;  /* offset from base_reg to final stack value.  */
+  int save_regs_base; /* offset from base_reg to register save area.  */
   rtx insn;
 
   total_frame_size = nios2_compute_frame_layout ();
@@ -468,8 +481,7 @@ nios2_expand_prologue (void)
gen_int_mode (cfun-machine-save_regs_offset
  - total_frame_size, Pmode)));
   RTX_FRAME_RELATED_P (insn) = 1;
-
-  fp_offset = 0;
+  save_regs_base = 0;
   sp_offset = -cfun-machine-save_regs_offset;
 }
   else if (total_frame_size)
@@ -478,16 +490,16 @@ nios2_expand_prologue (void)
   gen_int_mode (-total_frame_size,
 Pmode)));
   RTX_FRAME_RELATED_P (insn) = 1;
-  fp_offset = cfun-machine-save_regs_offset;
+  save_regs_base = cfun-machine-save_regs_offset;
   sp_offset = 0;
 }
   else
-fp_offset = sp_offset = 0;
+save_regs_base = sp_offset = 0;
 
   if (crtl-limit_stack)
 nios2_emit_stack_limit_check ();
 
-  save_offset = fp_offset + cfun-machine-save_reg_size;
+  save_offset = save_regs_base + cfun-machine-save_reg_size;
 
   for (regno = LAST_GP_REG; regno  0; regno--)
 if (cfun-machine-save_mask  (1  regno))
@@ -498,9 +510,10 @@ nios2_expand_prologue (void)
 
   if (frame_pointer_needed)
 {
+  int fp_save_offset = save_regs_base + cfun-machine-fp_save_offset;
   insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
   stack_pointer_rtx,
-  gen_int_mode (fp_offset, Pmode)));
+  gen_int_mode (fp_save_offset, Pmode)));
   RTX_FRAME_RELATED_P (insn) = 1;
 }
 
@@ -555,7 +568,9 @@ nios2_expand_epilogue (bool sibcall_p)
   if (frame_pointer_needed)
 {
   /* Recover the stack pointer.  */
-  insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
+  insn = emit_insn (gen_add3_insn
+   (stack_pointer_rtx, hard_frame_pointer_rtx,
+gen_int_mode (-cfun-machine-fp_save_offset, Pmode)));
   cfa_adj = plus_constant (Pmode, stack_pointer_rtx,
   (total_frame_size
- cfun-machine-save_regs_offset));
@@ -772,7 +787,8 @@ nios2_initial_elimination_offset (int from, int to
 /* If we are asked for the frame pointer offset, 

Re: [Patch, fortran] PR 60392 wrong descriptor when passing a transposed array to a contiguous assumed shape dummy.

2014-03-11 Thread Janus Weil
Hi Mikael,

 here is a fix for a wrong code issue, where we pass a descriptor with
 broken bounds when the actual argument is a transposed array and the
 dummy an assumed shape dummy.
 The bug comes from the interaction of the transpose optimization,
 which creates a descriptor with transposed bounds without copying the
 data, and the contiguous optimization, which reuses the descriptor for
 passing as argument after the call to internal_pack.
 The attached patch makes a copy of the descriptor with the correct
 bounds when a transposed scalarization is detected.

 Regression-tested on x86_64-unknown-linux-gnu.
 This is not a regression as far as I know, but quite a severe
 wrong-code, albeit limited to the corner case of transpose and
 assumed shape and contiguous.  OK for trunk/4.8/4.7 anyway ?

I would say it's ok for trunk at least. About the branches I'm not
sure. Maybe someone else can add an opinion here ...

Cheers,
Janus


Re: [PATCH] Fix incorrect byte swap detection (PR tree-optimization/60454)

2014-03-11 Thread Jakub Jelinek
On Tue, Mar 11, 2014 at 02:53:39PM +0800, Thomas Preud'homme wrote:
 +2014-03-10  Thomas Preud'homme  thomas.preudho...@arm.com
 +
 + PR tree-optimization/60454
 + * gcc.c-torture/execute/pr60454.c (fake_swap32): Testcase to track
 + regression of PR60454.

The ChangeLog entry is wrong, the file is new, so you should just say:
* gcc.c-torture/execute/pr60454.c: New test.

But more importantly:

 --- /dev/null
 +++ b/gcc/testsuite/gcc.c-torture/execute/pr60454.c
 @@ -0,0 +1,25 @@
 +#include stdint.h

I'm not sure if all targets even provide stdint.h header, and if all targets
have uint32_t type.  For most targets gcc now provides stdint.h (or uses
glibc stdint.h).
So perhaps instead of including stdint.h just do:
#ifndef __UINT32_TYPE__
int
main ()
{
  return 0;
}
#else
typedef __UINT32_TYPE__ uint32_t;

...
#endif
?
 +
 +#define __fake_const_swab32(x) ((uint32_t)(\
 +(((uint32_t)(x)  (uint32_t)0x00ffUL)  24) |\
 +(((uint32_t)(x)  (uint32_t)0xff00UL)   8) |\
 +(((uint32_t)(x)  (uint32_t)0x00ffUL)   8) |\
 +(((uint32_t)(x)  (uint32_t)0xff00UL)  ) |\
 +(((uint32_t)(x)  (uint32_t)0xff00UL)  24)))
 +
 +/* Previous version of bswap optimization would detect byte swap when none
 +   happen. This test aims at catching such wrong detection to avoid
 +   regressions.  */
 +
 +uint32_t
 +fake_swap32 (uint32_t in) __attribute__ ((noinline, noclone))
 +{

This must not have been tested, this is a syntax error.
It should be
__attribute__ (noinline, noclone)) uint32_t
fake_swap32 (uint32_t in)
{
...

(the attribute can be after the arguments only for prototypes).

 +  return __fake_const_swab32 (in);
 +}
 +
 +int main(void)
 +{
 +  if (fake_swap32 (0x12345678) != 0x78567E12)
 +abort ();

Use __builtin_abort (); here instead?  You aren't including stdlib.h
for abort.

After fixing up the testcase, you don't need to bootstrap/regtest it again,
make check-gcc RUNTESTFLAGS=execute.exp=pr60454.c
should be enough.

 --- a/gcc/tree-ssa-math-opts.c
 +++ b/gcc/tree-ssa-math-opts.c
 @@ -1801,7 +1801,9 @@ find_bswap_1 (gimple stmt, struct symbolic_number *n, 
 int limit)
  
if (rhs_class == GIMPLE_BINARY_RHS)
  {
 +  int i;
struct symbolic_number n1, n2;
 +  unsigned HOST_WIDEST_INT mask;
tree source_expr2;
  
if (code != BIT_IOR_EXPR)
 @@ -1827,6 +1829,15 @@ find_bswap_1 (gimple stmt, struct symbolic_number *n, 
 int limit)
   return NULL_TREE;
  
 n-size = n1.size;
 +   for (i = 0, mask = 0xff; i  n-size; i++, mask = BITS_PER_UNIT)
 + {
 +   unsigned HOST_WIDEST_INT masked1, masked2;
 +
 +   masked1 = n1.n  mask;
 +   masked2 = n2.n  mask;
 +   if (masked1  masked2  masked1 != masked2)
 + return NULL_TREE;
 + }
 n-n = n1.n | n2.n;
  
 if (!verify_symbolic_number_p (n, stmt))

The rest looks good to me.

Jakub


Re: [PATCH] ARM: Weaker memory barriers

2014-03-11 Thread Will Deacon

Hi John,

On Tue, Mar 11, 2014 at 02:54:18AM +, John Carr wrote:
 A comment in arm/sync.md notes We should consider issuing a inner
 shareability zone barrier here instead.  Here is my first attempt
 at a patch to emit weaker memory barriers.  Three instructions seem
 to be relevant for user mode code on my Cortex A9 Linux box:
 
 dmb ishst, dmb ish, dmb sy
 
 I believe these correspond to a release barrier, a full barrier
 with respect to other CPUs, and a full barrier that also orders
 relative to I/O.

Not quite; DMB ISHST only orders writes with other writes, so loads can move
across it in both directions. That means it's not sufficient for releasing a
lock, for example.

shameless plug

I gave a presentation at ELCE about the various ARMv7 barrier options (from
a kernel perspective):

  https://www.youtube.com/watch?v=6ORn6_35kKo

/shameless plug

 Consider this a request for comments on whether the approach is correct.
 I haven't done any testing yet (beyond eyeballing the assembly output).

You'll probably find that a lot of ARM micro-architectures treat them
equivalently, which makes this a good source of potentially latent bugs if
you get the wrong options. When I added these options to the kernel, I
tested with a big/little A7/A15 setup using a CCI400 (the cache-coherent
interconnect) to try and tickle things a bit better, but it's by no means
definitive.

  (define_insn *memory_barrier
[(set (match_operand:BLK 0  )
 - (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))]
 -  TARGET_HAVE_MEMORY_BARRIER
 + (unspec:BLK
 +  [(match_dup 0) (match_operand:SI 1 const_int_operand)]
 +  UNSPEC_MEMORY_BARRIER))]
 +  TARGET_HAVE_DMB || TARGET_HAVE_MEMORY_BARRIER
{
  if (TARGET_HAVE_DMB)
{
 - /* Note we issue a system level barrier. We should consider issuing
 -a inner shareabilty zone barrier here instead, ie. DMB ISH.  */
 - /* ??? Differentiate based on SEQ_CST vs less strict?  */
 - return dmb\tsy;
 +switch (INTVAL(operands[1]))
 + {
 + case MEMMODEL_RELEASE:
 +  return dmb\tishst;

As stated above, this isn't correct.

 + case MEMMODEL_CONSUME:

You might be able to build this one out of control dependency + ISB (in
lieu of a true address dependency). However, given the recent monolithic C11
thread that took over this list and others, let's play it safe for now and
stick with DMB ISH.

 + case MEMMODEL_ACQUIRE:
 + case MEMMODEL_ACQ_REL:
 +   return dmb\tish;

I think you probably *always* want ISH for GCC. You assume normal, cacheable
, coherent memory right? That also mirrors the first comment you delete
above.

 + case MEMMODEL_SEQ_CST:
 +   return dmb\tsy;

Again, ISH here should be fine. SY is for things like non-coherent devices,
which I don't think GCC has a concept of (the second comment you delete
doesn't make any sense and has probably tricked you).

Hope that helps,

Will


RE: [PATCH] Fix incorrect byte swap detection (PR tree-optimization/60454)

2014-03-11 Thread Thomas Preud'homme
 From: Jakub Jelinek [mailto:ja...@redhat.com]
 
 The ChangeLog entry is wrong, the file is new, so you should just say:
   * gcc.c-torture/execute/pr60454.c: New test.

Done.

 
 But more importantly:
 
  --- /dev/null
  +++ b/gcc/testsuite/gcc.c-torture/execute/pr60454.c
  @@ -0,0 +1,25 @@
  +#include stdint.h
 
 I'm not sure if all targets even provide stdint.h header, and if all targets
 have uint32_t type.  For most targets gcc now provides stdint.h (or uses
 glibc stdint.h).
 So perhaps instead of including stdint.h just do:
 #ifndef __UINT32_TYPE__
 int
 main ()
 {
   return 0;
 }
 #else
 typedef __UINT32_TYPE__ uint32_t;
 
 ...
 #endif
 ?

I also added a typedef unsigned uint32_t for when sizeof(unsigned) == 4. I hope 
it's right.

  +
  +#define __fake_const_swab32(x) ((uint32_t)(  \
  +(((uint32_t)(x)  (uint32_t)0x00ffUL)  24) |\
  +(((uint32_t)(x)  (uint32_t)0xff00UL)   8) |\
  +(((uint32_t)(x)  (uint32_t)0x00ffUL)   8) |\
  +(((uint32_t)(x)  (uint32_t)0xff00UL)  ) |\
  +(((uint32_t)(x)  (uint32_t)0xff00UL)  24)))
  +
  +/* Previous version of bswap optimization would detect byte swap when
 none
  +   happen. This test aims at catching such wrong detection to avoid
  +   regressions.  */
  +
  +uint32_t
  +fake_swap32 (uint32_t in) __attribute__ ((noinline, noclone))
  +{
 
 This must not have been tested, this is a syntax error.
 It should be
 __attribute__ (noinline, noclone)) uint32_t
 fake_swap32 (uint32_t in)
 {

My bad, I did catched this error but changed it only in a copy of the sources 
but produced the patch from git. I moved the attribute before the function name 
but after the return value though and it did compile and run. I used your 
version when redoing the patch.

 ...
 
 (the attribute can be after the arguments only for prototypes).

That's why it always sesms to be different than in my memories. I'll try to 
remember that (or always use prototypes in such case).

 
  +  return __fake_const_swab32 (in);
  +}
  +
  +int main(void)
  +{
  +  if (fake_swap32 (0x12345678) != 0x78567E12)
  +abort ();
 
 Use __builtin_abort (); here instead?  You aren't including stdlib.h
 for abort.

Again, I must have copied the extern void abort() definition I've seen 
elsewhere but made the change only on a local copy of the sources. My 
apologizes.

 
 After fixing up the testcase, you don't need to bootstrap/regtest it again,
 make check-gcc RUNTESTFLAGS=execute.exp=pr60454.c
 should be enough.

Right, but I made another bootstrap for another patch just after so I need to 
do it again.

 
 The rest looks good to me.

Thanks.

See below for the new version as well as in attachment.

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 748805e..b6d7d93 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,8 @@
+2014-03-07  Thomas Preud'homme  thomas.preudho...@arm.com
+
+   PR tree-optimization/60454
+   * tree-ssa-math-opts.c (find_bswap_1): Fix bswap detection.
+
 2014-02-23  David Holsgrove david.holsgr...@xilinx.com
 
* config/microblaze/microblaze.md: Correct ashrsi_reg / lshrsi_reg names
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index f3c0c85..04ce403 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2014-03-10  Thomas Preud'homme  thomas.preudho...@arm.com
+
+   PR tree-optimization/60454
+   * gcc.c-torture/execute/pr60454.c: New test.
+
 2014-02-23  David Holsgrove david.holsgr...@xilinx.com
 
* gcc/testsuite/gcc.target/microblaze/others/mem_reload.c: New test.
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr60454.c 
b/gcc/testsuite/gcc.c-torture/execute/pr60454.c
new file mode 100644
index 000..83f2987
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr60454.c
@@ -0,0 +1,31 @@
+#ifdef __UINT32_TYPE__
+typedef __UINT32_TYPE__ uint32_t;
+#else
+typedef unsigned uint32_t;
+#endif
+
+#define __fake_const_swab32(x) ((uint32_t)(  \
+(((uint32_t)(x)  (uint32_t)0x00ffUL)  24) |\
+(((uint32_t)(x)  (uint32_t)0xff00UL)   8) |\
+(((uint32_t)(x)  (uint32_t)0x00ffUL)   8) |\
+(((uint32_t)(x)  (uint32_t)0xff00UL)  ) |\
+(((uint32_t)(x)  (uint32_t)0xff00UL)  24)))
+
+/* Previous version of bswap optimization would detect byte swap when none
+   happen. This test aims at catching such wrong detection to avoid
+   regressions.  */
+
+__attribute__ ((noinline, noclone)) uint32_t
+fake_swap32 (uint32_t in)
+{
+  return __fake_const_swab32 (in);
+}
+
+int main(void)
+{
+  if (sizeof (uint32_t) != 4)
+return 0;
+  if (fake_swap32 (0x12345678) != 0x78567E12)
+__builtin_abort ();
+  return 0;
+}
diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c
index 8e372ed..9ff857c 100644
--- a/gcc/tree-ssa-math-opts.c
+++ 

Re: [PATCH GCC]Fix a latent bug in cfgcleanup by updating loop's latch info if necessary

2014-03-11 Thread Richard Biener
On Mon, Mar 10, 2014 at 10:29 AM, bin.cheng bin.ch...@arm.com wrote:
 Hi,
 When I investigating PR60363 which is caused by previous patch for PR60280,
 I found there is a latent bug in remove_forwarder_block_with_phi because GCC
 doesn't update loop's latch information.  Without this patch, cfgcleanup
 facility will remove and rebuild the loop structure, resulting in loss of
 loop meta information.

 This patch is just an obvious pickup, but it isn't intended to fix pr60363.

 Test on cortex-m3
 Bootstrap and test on x86_64

 Is it OK?

Ok.

Thanks,
Richard.


 2014-03-10  Bin Cheng  bin.ch...@arm.com

 * tree-cfgcleanup.c (remove_forwarder_block_with_phi): Record
 bb's single pred and update the father loop's latch info later.


Re: [PATCH, libgcc]: Avoid warning: array subscript is above array bounds when compiling crtstuff.c

2014-03-11 Thread Richard Biener
On Mon, Mar 10, 2014 at 11:14 AM, Jakub Jelinek ja...@redhat.com wrote:
 On Mon, Mar 10, 2014 at 11:10:05AM +0100, Uros Bizjak wrote:
  Well, better is non-obvious, while it is smaller (which is good for
  initialization and thus rarely executed code), the common case is that
  *jcr_list is 0 (gcj is used rarely these days) and for the common case it 
  is
  one instruction longer.
  Perhaps at least use if (__builtin_expect (*jcr_list != NULL, 0))?
  Otherwise looks good to me.

 Following source:

 void frame_dummy (void)
 {
   void **jcr_list = __JCR_LIST__;
   if (__builtin_expect (*jcr_list != 0, 0))
 register_classes (jcr_list);
 }

 generates exactly the same code while avoiding the warning. So,
 following your concern, I am testing following patch:

 But then the asm is gone and it can start to break any time soon.
 For GCC __JCR_LIST__ is simply a zero sized local array and thus
 dereferencing it's first element is invalid.  It doesn't know that we use
 linker magic to populate the array.

You can also declare it extern and define the symbol in a global asm ...

Richard.

 Jakub


Re: [PATCH] Fix incorrect byte swap detection (PR tree-optimization/60454)

2014-03-11 Thread Jakub Jelinek
On Tue, Mar 11, 2014 at 06:48:37PM +0800, Thomas Preud'homme wrote:
 I also added a typedef unsigned uint32_t for when sizeof(unsigned) == 4. I 
 hope it's right.

In theory you could have __CHAR_BIT__ different from 8 and what you care
about is that uint32_t has exactly 32 bits, so the check would need to be
  if (sizeof (uint32_t) * __CHAR_BIT__ != 32)
return 0;

 +  if (fake_swap32 (0x12345678) != 0x78567E12)
 +__builtin_abort ();

Also, for int16 targets where __UINT32_TYPE__ is supposedly unsigned long,
I think you would need to use:

  if (fake_swap32 (0x12345678UL) != 0x78567E12UL)
__builtin_abort ();

(the C standard guarantees that unsigned long is at least 32-bit and
unsigned int at least 16-bit).

Ok with those changes.

Do you have write access, or will somebody from your coworkers commit it for
you?  Are you covered by ARM GCC Copyright assignment?

Jakub


Re: [PATCH][AArch64] Fix default CPU configurations

2014-03-11 Thread Kyrill Tkachov

Ping^2

http://gcc.gnu.org/ml/gcc-patches/2014-02/msg01487.html

Kyrill

On 05/03/14 09:37, Kyrill Tkachov wrote:

Ping?

On 25/02/14 10:08, Kyrill Tkachov wrote:

Hi all,

The problem solved in this patch is that when gcc is configured with
--with-arch=armv8-a gcc will go into  aarch64-arches.def, pick the
representative CPU (Cortex-A53 for ARMv8-A) and use that CPUs ISA flags. Now
that we specified that Cortex-A53 has CRC and crypto though, this means that gcc
will choose by default to enable CRC and Crypto.

What it should be doing though is to use the 4th field in the AARCH64_ARCH macro
that specifies the ISA flags implied by the architecture. This patch does that
by looking in aarch64-arches.def and extracting the 4th field appropriately and
using that as the ext_mask when processing a --with-arch option.

Furthermore, if no --with-arch or --with-cpu directives are specified config.gcc
will set TARGET_DEFAULT_CPU to TARGET_CPU_generic. What it should be doing, is
leaving it undefined so that the backend in aarch64.h can define its own default
with the correct ISA options (currently we have this scheme where the
TARGET_CPU_core is encoded in the first 6 bits of TARGET_DEFAULT_CPU and the
ISA flags are encoded in the upper part of it. We should clean that up in the
next release). Before this patch, the code in aarch64.h that does that
initialisation was never even exercised because TARGET_CPU_DEFAULT was always
defined by config.gcc no matter what! config.gcc defined it as
TARGET_CPU_generic but without encoding the appropriate ISA flags in the upper
bits, leading to a cpu configured without fp or simd.

After a discussion with Richard, this patch sets the default CPU (if no
-mcpu,-march,--with-cpu,--with-arch is given) to be generic+fp+simd. The generic
CPU already schedules like the Cortex-A53, so it should give a decent generic
tuning.

This patch should improve the current situation a bit.
With this patch:
- If --with-arch=armv8-a is specified we will use generic+fp+simd as the CPU
(without the patch it's cortex-a53+fp+simd+crc+crypto)
- If no arch or cpu options specified anywhere, we will use the generic+fp+simd
CPU (without the patch it would be just generic)

Tested aarch64-none-elf on a model and checked the .cpu directive in the
generated assembly for a variety of --with-cpu, --with-arch combinations.

I'm proposing this patch as an alternative to
http://gcc.gnu.org/ml/gcc-patches/2014-02/msg01072.html.

Ok for trunk?

Thanks,
Kyrill

2014-02-25  Kyrylo Tkachov  kyrylo.tkac...@arm.com

   * config.gcc (aarch64*-*-*): Use ISA flags from aarch64-arches.def.
   Do not define target_cpu_default2 to generic.
   * config/aarch64/aarch64.h (TARGET_CPU_DEFAULT): Use generic cpu.
   * config/aarch64/aarch64.c (aarch64_override_options): Update comment.
   * config/aarch64/aarch64-arches.def (armv8-a): Use generic cpu.








Re: [PATCH (for next stage 1)] Add return type to gimple function dumps

2014-03-11 Thread Richard Biener
On Mon, Mar 10, 2014 at 8:22 PM, David Malcolm dmalc...@redhat.com wrote:
 Gimple function dumps contain the types of parameters, but not of the
 return type.

 The attached patch fixes this omission; here's an example of the
 before/after diff:
 $ diff -up /tmp/pr23401.c.004t.gimple.old /tmp/pr23401.c.004t.gimple.new
 --- /tmp/pr23401.c.004t.gimple.old  2014-03-10 13:40:08.972063541 -0400
 +++ /tmp/pr23401.c.004t.gimple.new  2014-03-10 13:39:49.346515464 -0400
 @@ -1,3 +1,4 @@
 +int
   (int i)
  {
int D.1731;


 Successfully bootstrapped and regrtested on x86_64 Linux (Fedora 20).

 A couple of test cases needed tweaking, since they were counting the
 number of occurrences of int in the gimple dump, which thus changed
 for functions returning int (like the one above).

 OK for next stage 1?

ISTR doing that and giving up because of the sheer number of
testsuite FAILs this causes.  Did you properly test all languages
(I specifically remember Fortran here).

You also want to pass dump_flags | TDF_SLIM here otherwise
you'll get struct types expanded.

Richard.

 [motivation: am generating code in my JIT from other program's
 representations, and have been debugging type mismatches in function
 calls; the precise return types would otherwise have been non-obvious]


Re: [PATCH][AArch64] Fix default CPU configurations

2014-03-11 Thread Marcus Shawcroft
On 25 February 2014 10:08, Kyrill Tkachov kyrylo.tkac...@arm.com wrote:
 Hi all,

 The problem solved in this patch is that when gcc is configured with
 --with-arch=armv8-a gcc will go into  aarch64-arches.def, pick the
 representative CPU (Cortex-A53 for ARMv8-A) and use that CPUs ISA flags. Now
 that we specified that Cortex-A53 has CRC and crypto though, this means that
 gcc will choose by default to enable CRC and Crypto.

 What it should be doing though is to use the 4th field in the AARCH64_ARCH
 macro that specifies the ISA flags implied by the architecture. This patch
 does that by looking in aarch64-arches.def and extracting the 4th field
 appropriately and using that as the ext_mask when processing a --with-arch
 option.

 Furthermore, if no --with-arch or --with-cpu directives are specified
 config.gcc will set TARGET_DEFAULT_CPU to TARGET_CPU_generic. What it should
 be doing, is leaving it undefined so that the backend in aarch64.h can
 define its own default with the correct ISA options (currently we have this
 scheme where the TARGET_CPU_core is encoded in the first 6 bits of
 TARGET_DEFAULT_CPU and the ISA flags are encoded in the upper part of it. We
 should clean that up in the next release). Before this patch, the code in
 aarch64.h that does that initialisation was never even exercised because
 TARGET_CPU_DEFAULT was always defined by config.gcc no matter what!
 config.gcc defined it as TARGET_CPU_generic but without encoding the
 appropriate ISA flags in the upper bits, leading to a cpu configured without
 fp or simd.

 After a discussion with Richard, this patch sets the default CPU (if no
 -mcpu,-march,--with-cpu,--with-arch is given) to be generic+fp+simd. The
 generic CPU already schedules like the Cortex-A53, so it should give a
 decent generic tuning.

 This patch should improve the current situation a bit.
 With this patch:
 - If --with-arch=armv8-a is specified we will use generic+fp+simd as the CPU
 (without the patch it's cortex-a53+fp+simd+crc+crypto)
 - If no arch or cpu options specified anywhere, we will use the
 generic+fp+simd CPU (without the patch it would be just generic)

 Tested aarch64-none-elf on a model and checked the .cpu directive in the
 generated assembly for a variety of --with-cpu, --with-arch combinations

 I'm proposing this patch as an alternative to
 http://gcc.gnu.org/ml/gcc-patches/2014-02/msg01072.html.

- if test x$target_cpu_cname = x
+ if test x$target_cpu_cname != x

I think the addition of quoting here is orthogonal to the issue you
are fixing. There are several other references to target_cpu_cname in
config.gcc none of which are quoted, so I guess either they should all
be quoted, or not, and if they are it is a separate patch.

That nit aside  I think the rest of the patch makes sense for stage-4.
  Give the RM's 24 hours to comment otherwise drop the above nit and
commit.

Cheers
/Marcus


Re: [PATCH] Fix incorrect byte swap detection (PR tree-optimization/60454)

2014-03-11 Thread Michael Eager

On 03/11/14 03:48, Thomas Preud'homme wrote:


diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 748805e..b6d7d93 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,8 @@
+2014-03-07  Thomas Preud'homme  thomas.preudho...@arm.com
+
+   PR tree-optimization/60454
+   * tree-ssa-math-opts.c (find_bswap_1): Fix bswap detection.
+
  2014-02-23  David Holsgrove david.holsgr...@xilinx.com

* config/microblaze/microblaze.md: Correct ashrsi_reg / lshrsi_reg names


Don't include the ChangeLog entry as part of your patch.
It isn't likely to apply.

--
Michael Eagerea...@eagercon.com
1960 Park Blvd., Palo Alto, CA 94306  650-325-8077


Re: [PATCH][AArch64] Fix default CPU configurations

2014-03-11 Thread Kyrill Tkachov

Hi Marcus,

On 11/03/14 11:25, Marcus Shawcroft wrote:

On 25 February 2014 10:08, Kyrill Tkachov kyrylo.tkac...@arm.com wrote:

Hi all,

The problem solved in this patch is that when gcc is configured with
--with-arch=armv8-a gcc will go into  aarch64-arches.def, pick the
representative CPU (Cortex-A53 for ARMv8-A) and use that CPUs ISA flags. Now
that we specified that Cortex-A53 has CRC and crypto though, this means that
gcc will choose by default to enable CRC and Crypto.

What it should be doing though is to use the 4th field in the AARCH64_ARCH
macro that specifies the ISA flags implied by the architecture. This patch
does that by looking in aarch64-arches.def and extracting the 4th field
appropriately and using that as the ext_mask when processing a --with-arch
option.

Furthermore, if no --with-arch or --with-cpu directives are specified
config.gcc will set TARGET_DEFAULT_CPU to TARGET_CPU_generic. What it should
be doing, is leaving it undefined so that the backend in aarch64.h can
define its own default with the correct ISA options (currently we have this
scheme where the TARGET_CPU_core is encoded in the first 6 bits of
TARGET_DEFAULT_CPU and the ISA flags are encoded in the upper part of it. We
should clean that up in the next release). Before this patch, the code in
aarch64.h that does that initialisation was never even exercised because
TARGET_CPU_DEFAULT was always defined by config.gcc no matter what!
config.gcc defined it as TARGET_CPU_generic but without encoding the
appropriate ISA flags in the upper bits, leading to a cpu configured without
fp or simd.

After a discussion with Richard, this patch sets the default CPU (if no
-mcpu,-march,--with-cpu,--with-arch is given) to be generic+fp+simd. The
generic CPU already schedules like the Cortex-A53, so it should give a
decent generic tuning.

This patch should improve the current situation a bit.
With this patch:
- If --with-arch=armv8-a is specified we will use generic+fp+simd as the CPU
(without the patch it's cortex-a53+fp+simd+crc+crypto)
- If no arch or cpu options specified anywhere, we will use the
generic+fp+simd CPU (without the patch it would be just generic)

Tested aarch64-none-elf on a model and checked the .cpu directive in the
generated assembly for a variety of --with-cpu, --with-arch combinations

I'm proposing this patch as an alternative to
http://gcc.gnu.org/ml/gcc-patches/2014-02/msg01072.html.

- if test x$target_cpu_cname = x
+ if test x$target_cpu_cname != x

I think the addition of quoting here is orthogonal to the issue you
are fixing. There are several other references to target_cpu_cname in
config.gcc none of which are quoted, so I guess either they should all
be quoted, or not, and if they are it is a separate patch.


Perhaps I should have commented on this.
This change is not orthogonal.
When I initially wrote it as  if test x$target_cpu_cname != x the script 
complained of an error and happily ignored that line, giving the wrong value to 
target_cpu_default2 on the line below!


config.gcc: line 4065: test: too many arguments

If I quote it, it works fine. I suspect it's because of spaces introduced into 
target_cpu_cname earlier, since target_cpu_cname has the format 
TARGET_CPU_$base_id | $ext_mask from earlier, but I'm not sure.


Kyrill


That nit aside  I think the rest of the patch makes sense for stage-4.
   Give the RM's 24 hours to comment otherwise drop the above nit and
commit.

Cheers
/Marcus






Re: [PATCH, libgcc]: Avoid warning: array subscript is above array bounds when compiling crtstuff.c

2014-03-11 Thread Jakub Jelinek
On Tue, Mar 11, 2014 at 11:58:01AM +0100, Richard Biener wrote:
 On Mon, Mar 10, 2014 at 11:14 AM, Jakub Jelinek ja...@redhat.com wrote:
  On Mon, Mar 10, 2014 at 11:10:05AM +0100, Uros Bizjak wrote:
   Well, better is non-obvious, while it is smaller (which is good for
   initialization and thus rarely executed code), the common case is that
   *jcr_list is 0 (gcj is used rarely these days) and for the common case 
   it is
   one instruction longer.
   Perhaps at least use if (__builtin_expect (*jcr_list != NULL, 0))?
   Otherwise looks good to me.
 
  Following source:
 
  void frame_dummy (void)
  {
void **jcr_list = __JCR_LIST__;
if (__builtin_expect (*jcr_list != 0, 0))
  register_classes (jcr_list);
  }
 
  generates exactly the same code while avoiding the warning. So,
  following your concern, I am testing following patch:
 
  But then the asm is gone and it can start to break any time soon.
  For GCC __JCR_LIST__ is simply a zero sized local array and thus
  dereferencing it's first element is invalid.  It doesn't know that we use
  linker magic to populate the array.
 
 You can also declare it extern and define the symbol in a global asm ...

But then you need to know how (which is quite target dependent).
Or define it in some other CU and ld -r it together, but supposedly not all
linkers can do ld -r.

Jakub


Re: [PATCH] Update -flto docs wrt option handling

2014-03-11 Thread Richard Biener
On Sat, 8 Mar 2014, Gerald Pfeifer wrote:

 Thanks for the time and diligence writing this up, Richi!
 
 On Thu, 6 Mar 2014, Richard Biener wrote:
  -files; if @option{-flto} is not passed to the linker, no
  -interprocedural optimizations are applied.
  +files; if @option{-fno-lto} is not passed to the linker, no
  +interprocedural optimizations are applied.
 
 That looks like one no too much?  

Fixed.

   Note that when
  +@option{-fno-fat-lto-objects} is enabled the compile-stage is faster
  +but you cannot perform a regular, non-LTO link, on them.
 
 The comma past link appears too much.

Fixed.

   Additionally, the optimization flags used to compile individual files
   are not necessarily related to those used at link time.  For instance,
 
 That requires -ffat-lto-objects, though?  The text above talks more
 about -fno-fat-lto-objects, not the positive form.

Doesn't require, no.  Unfortunately the default depends on some
configure checks ... so the positive form below is required on
some systems to make the -fno-lto link work.

   @smallexample
  -gcc -c -O0 -flto foo.c
  -gcc -c -O0 -flto bar.c
  -gcc -o myprog -flto -O3 foo.o bar.o
  +gcc -c -O0 -ffat-lto-objects -flto foo.c
  +gcc -c -O0 -ffat-lto-objects -flto bar.c
  +gcc -o myprog -O3 foo.o bar.o
   @end smallexample
   
   This produces individual object files with unoptimized assembler
   code, but the resulting binary @file{myprog} is optimized at
  -@option{-O3}.  If, instead, the final binary is generated without
  -@option{-flto}, then @file{myprog} is not optimized.
  +@option{-O3}.  If, instead, the final binary is generated with
  +@option{-fno-lto}, then @file{myprog} is not optimized.
 
 Would it make sense to use -Os in the example?  I assume in the
 last case myprog would then by optimized with -Os?  

You mean -Os instead of -O0?

 I am suggesting this since I believe it's not optimization vs
 no optimization but optimization level provided during compilation?

Yes.  But we were motivating the -O0 vs. -On case with fat objects
because you can get a debug build quickly with -fno-lto and
an optimized build otherwise (without the need to re-compile).
Not sure if that matters in practice ... but that's what the example
tries to tell you how to do that.

[I've merely edited existing parts to reflect reality in 4.9
due to changed defaults - the whole section should be rewritten
to be more in a FAQ-like way.  That is, You want to do X?  Here is
now to do it!]

  +Currently, the following options and their setting are take from
  +the first object file that explicitely specified it: 
  +@option{-fPIC}, @option{-fpic}, @option{-fpie}, @option{-fcommon},
  +@option{-fexceptions}, @option{-fnon-call-exceptions}, @option{-fgnu-tm}
  +and all the @option{-m} target flags.
 
 No -O options in case none are provided during link time?

See below, If you do not specify an optimization level option 
I've moved this to the very top.

  +Certain ABI changing flags are required to match in all compilation-units
  +and trying to override this at link-time with a conflicting value
  +is ignored.  This includes options such as @option{-freg-struct-return}
  +and @option{-fpcc-struct-return}. 
 
 If they are required to match, shouldn't a conflicting value during
 link time trigger a diagnoses -- error or at least warning?

Yes, but unfortunately all diagnoses from link-time are buffered
by collect2 and thus emitted very late.  So we don't emit any
but fatal diagnostics from lto-wrapper.

  +Other options such as @option{-ffp-contract}, 
  @option{-fno-strict-overflow},
  +@option{-fwrapv}, @option{-fno-trapv} or @option{-fno-strict-aliasing}
  +are passed through to the link stage and merged conservatively for
  +conflicting translation units.  You can override them at linke-time.
 
 What does conservative merging imply?  How does that work?

I've added

Specifically
@option{-fno-strict-overflow}, @option{-fwrapv} and @option{-fno-trapv} 
take
precedence and for example @option{-ffp-contract=off} takes precedence
over @option{-ffp-contract=fast}.  You can override them at linke-time.


  +same link with the same options and also specify those options at
  +link-time.
 
 link time (noun)

Fixed.

  -GCC will not work with an older/newer version of GCC@.
  +GCC will not work with an older/newer version of GCC.
 
 What is a version here?  Release series?
 
 Will GCC 4.9.0 and 4.9.1 work, or not?

We make no guarantees ;)  Specifically the implemented
bytecode version check is not strong enough :/

Updated patch below.

Ok?

Thanks,
Richard.

2014-03-11  Richard Biener  rguent...@suse.de

* doc/invoke.texi (flto): Update for changes in 4.9.

Index: gcc/doc/invoke.texi
===
--- gcc/doc/invoke.texi (revision 208478)
+++ gcc/doc/invoke.texi (working copy)
@@ -8524,8 +8524,9 @@ file.  When the object files are linked
 bodies are read from these ELF sections and instantiated as if they

[PATCH] Fix PRs 60429 and 60485, PTA offset constraints

2014-03-11 Thread Richard Biener

This fixes offsetted constraint handling in the constraint builder
and the solver.  The existing code didn't consider non-equal-sized
fields and thus missed including all those that are overlapping
a offsetted variable.

Fixed like the following, bootstrapped and tested on 
x86_64-unknown-linux-gnu and applied to trunk sofar
(needs massaging to apply to 4.8 or 4.7).

[bah, points-to should support unit-testing - it's incredibly hard
to cover all bases - the testcases cover only two touched paths,
maybe time to revive and improve my -Otest idea...]

Richard.

2014-03-11  Richard Biener  rguent...@suse.de

PR tree-optimization/60429
PR tree-optimization/60485
* tree-ssa-structalias.c (set_union_with_increment): Properly
take into account all fields that overlap the shifted vars.
(do_sd_constraint): Likewise.
(do_ds_constraint): Likewise.
(get_constraint_for_ptr_offset): Likewise.

* gcc.dg/pr60485-1.c: New testcase.
* gcc.dg/pr60485-2.c: Likewise.

Index: gcc/tree-ssa-structalias.c
===
*** gcc/tree-ssa-structalias.c  (revision 208448)
--- gcc/tree-ssa-structalias.c  (working copy)
*** set_union_with_increment  (bitmap to, bi
*** 993,1015 
changed |= bitmap_set_bit (to, i);
else
{
! unsigned HOST_WIDE_INT fieldoffset = vi-offset + inc;
  
  /* If the offset makes the pointer point to before the
 variable use offset zero for the field lookup.  */
! if (inc  0
!  fieldoffset  vi-offset)
!   fieldoffset = 0;
! 
! vi = first_or_preceding_vi_for_offset (vi, fieldoffset);
! 
! changed |= bitmap_set_bit (to, vi-id);
! /* If the result is not exactly at fieldoffset include the next
!field as well.  See get_constraint_for_ptr_offset for more
!rationale.  */
! if (vi-offset != fieldoffset
!  vi-next != 0)
!   changed |= bitmap_set_bit (to, vi-next);
}
  }
  
--- 993,1020 
changed |= bitmap_set_bit (to, i);
else
{
! HOST_WIDE_INT fieldoffset = vi-offset + inc;
! unsigned HOST_WIDE_INT size = vi-size;
  
  /* If the offset makes the pointer point to before the
 variable use offset zero for the field lookup.  */
! if (fieldoffset  0)
!   vi = get_varinfo (vi-head);
! else
!   vi = first_or_preceding_vi_for_offset (vi, fieldoffset);
! 
! do
!   {
! changed |= bitmap_set_bit (to, vi-id);
! if (vi-is_full_var
! || vi-next == 0)
!   break;
! 
! /* We have to include all fields that overlap the current field
!shifted by inc.  */
! vi = vi_next (vi);
!   }
! while (vi-offset  fieldoffset + size);
}
  }
  
*** do_sd_constraint (constraint_graph_t gra
*** 1618,1633 
  {
varinfo_t v = get_varinfo (j);
HOST_WIDE_INT fieldoffset = v-offset + roffset;
unsigned int t;
  
if (v-is_full_var)
!   fieldoffset = v-offset;
else if (roffset != 0)
!   v = first_vi_for_offset (v, fieldoffset);
!   /* If the access is outside of the variable we can ignore it.  */
!   if (!v)
!   continue;
  
do
{
  t = find (v-id);
--- 1623,1643 
  {
varinfo_t v = get_varinfo (j);
HOST_WIDE_INT fieldoffset = v-offset + roffset;
+   unsigned HOST_WIDE_INT size = v-size;
unsigned int t;
  
if (v-is_full_var)
!   ;
else if (roffset != 0)
!   {
! if (fieldoffset  0)
!   v = get_varinfo (v-head);
! else
!   v = first_or_preceding_vi_for_offset (v, fieldoffset);
!   }
  
+   /* We have to include all fields that overlap the current field
+shifted by roffset.  */
do
{
  t = find (v-id);
*** do_sd_constraint (constraint_graph_t gra
*** 1644,1659 
add_graph_edge (graph, lhs, t))
flag |= bitmap_ior_into (sol, get_varinfo (t)-solution);
  
! /* If the variable is not exactly at the requested offset
!we have to include the next one.  */
! if (v-offset == (unsigned HOST_WIDE_INT)fieldoffset
  || v-next == 0)
break;
  
  v = vi_next (v);
- fieldoffset = v-offset;
}
!   while (1);
  }
  
  done:
--- 1654,1666 
add_graph_edge (graph, lhs, t))
flag |= bitmap_ior_into (sol, get_varinfo (t)-solution);
  
! if (v-is_full_var
  || v-next == 0)
break;
  
  v = vi_next (v);
}
!   while (v-offset  fieldoffset + size);
  }
  
  done:
*** do_ds_constraint (constraint_t c, bitmap

[PATCH] Fix error message from -Wcast-qual when casting away volatile

2014-03-11 Thread Magnus Reftel
Currently, castring away volatile from a pointer makes -Wcast-qual
claim that __attribute__((noreturn)) was cast away (see bugzilla
55383). The attached patch, originally written by Manuel López-Ibáñez
and updated to match trunk by me, correctes that. No regressions on
gcc from applying this patch (as reported by make check) were seen.

BR
Magnus Reftel

2014-03-11 Manuel López-Ibáñez  m...@gcc.gnu.org

 PR c/55383
 * c/c-typeck.c: use correct format string in cast-qual warning
diff --git a/gcc/c/c-typeck.c b/gcc/c/c-typeck.c
index 524a59f..0bfc12b 100644
--- a/gcc/c/c-typeck.c
+++ b/gcc/c/c-typeck.c
@@ -4855,7 +4855,7 @@ handle_warn_cast_qual (location_t loc, tree type, tree otype)
 /* There are qualifiers present in IN_OTYPE that are not present
in IN_TYPE.  */
 warning_at (loc, OPT_Wcast_qual,
-		cast discards %q#v qualifier from pointer target type,
+		cast discards %qv qualifier from pointer target type,
 		discarded);
 
   if (added || discarded)
diff --git a/gcc/testsuite/c-c++-common/Wcast-qual-1.c b/gcc/testsuite/c-c++-common/Wcast-qual-1.c
index 640e4f0..af80438 100644
--- a/gcc/testsuite/c-c++-common/Wcast-qual-1.c
+++ b/gcc/testsuite/c-c++-common/Wcast-qual-1.c
@@ -85,11 +85,11 @@ f3 (void ***bar)
 void
 f4 (void * const **bar)
 {
-  const void ***p9 = (const void ***) bar; /* { dg-warning cast } */
+  const void ***p9 = (const void ***) bar; /* { dg-warning cast discards .const. qualifier  } */
   void * const **p11 = (void * const **) bar;
   void ** const *p13 = (void ** const *) bar; /* { dg-warning cast } */
   const void * const **p15 = (const void * const **) bar; /* { dg-warning cast } */
-  const void ** const *p17 = (const void ** const *) bar; /* { dg-warning cast } */
+  const void ** const *p17 = (const void ** const *) bar; /* { dg-warning cast discards .const. qualifier } */
   void * const * const * p19 = (void * const * const *) bar;
   const void * const * const *p21 = (const void * const * const *) bar;
 }


Re: [C++ Patch/RFC] PR 60389

2014-03-11 Thread Jason Merrill

On 03/11/2014 08:03 AM, Paolo Carlini wrote:

+  if (DECL_INHERITED_CTOR_BASE (fun)
+   TREE_CODE (fun) == TEMPLATE_DECL)
+{
+  ret = false;
+  if (complain)
+   error (inherited constructors inherit %constexpr% from 
+  the designated base);
+}


To correct my wording, the B constructor is the inheriting constructor, 
the inherited constructor is in A.


Let's look up the inherited constructor here and print it to be helpful. 
 Probably the easiest way to find it will be to add a new entry point 
to locate_fn_flags so we call it with


(DECL_INHERITED_CTOR_BASE (fun), DECL_NAME (fun), 
FUNCTION_FIRST_USER_PARMTYPE (fun), LOOKUP_NORMAL|LOOKUP_SPECULATIVE, 
complain)


Then we can say inherited constructor %qD is not constexpr.

Jason



[PATCH][match-and-simplify] Move constant folding to gimple_match_and_simplify

2014-03-11 Thread Richard Biener

This moves handling of constant folding and operand order canonicalization
to the folding worker instead of doing that only when building a stmt
as fallback.  Matches what fold_unary vs. fold_build does.

Applied to branch.

Richard.

2014-03-11  Richard Biener  rguent...@suse.de

* gimple-fold.c (gimple_build): Move constant folding and
operand order canonicalization ...
* gimple-match-head.c (gimple_match_and_simplify): ... here.

Index: gcc/gimple-fold.c
===
*** gcc/gimple-fold.c   (revision 208478)
--- gcc/gimple-fold.c   (working copy)
*** gimple_build (gimple_seq *seq, location_
*** 3637,3649 
  enum tree_code code, tree type, tree op0,
  tree (*valueize)(tree))
  {
-   if (CONSTANT_CLASS_P (op0))
- {
-   tree res =  fold_unary_to_constant (code, type, op0);
-   if (res != NULL_TREE)
-   return res;
- }
- 
tree res = gimple_match_and_simplify (code, type, op0, seq, valueize);
if (!res)
  {
--- 3637,3642 
*** gimple_build (gimple_seq *seq, location_
*** 3674,3698 
  enum tree_code code, tree type, tree op0, tree op1,
  tree (*valueize)(tree))
  {
-   if (CONSTANT_CLASS_P (op0)  CONSTANT_CLASS_P (op1))
- {
-   tree res = fold_binary_to_constant (code, type, op0, op1);
-   /* ???  We can't assert that we fold this to a constant as
-  for example we can't fold things like 1 / 0.  */
-   if (res != NULL_TREE)
-   return res;
- }
- 
-   /* Canonicalize operand order both for matching and fallback stmt
-  generation.  */
-   if (commutative_tree_code (code)
-tree_swap_operands_p (op0, op1, false))
- {
-   tree tem = op0;
-   op0 = op1;
-   op1 = tem;
- }
- 
tree res = gimple_match_and_simplify (code, type, op0, op1, seq, valueize);
if (!res)
  {
--- 3667,3672 
*** gimple_build (gimple_seq *seq, location_
*** 3716,3740 
  enum tree_code code, tree type, tree op0, tree op1, tree op2,
  tree (*valueize)(tree))
  {
-   if (CONSTANT_CLASS_P (op0)  CONSTANT_CLASS_P (op1)
-CONSTANT_CLASS_P (op2))
- {
-   tree res = fold_ternary/*_to_constant */ (code, type, op0, op1, op2);
-   if (res != NULL_TREE
-  CONSTANT_CLASS_P (res))
-   return res;
- }
- 
-   /* Canonicalize operand order both for matching and fallback stmt
-  generation.  */
-   if (commutative_ternary_tree_code (code)
-tree_swap_operands_p (op0, op1, false))
- {
-   tree tem = op0;
-   op0 = op1;
-   op1 = tem;
- }
- 
tree res = gimple_match_and_simplify (code, type, op0, op1, op2,
seq, valueize);
if (!res)
--- 3690,3695 
*** gimple_build (gimple_seq *seq, location_
*** 3765,3779 
  enum built_in_function fn, tree type, tree arg0,
  tree (*valueize)(tree))
  {
-   if (CONSTANT_CLASS_P (arg0))
- {
-   tree decl = builtin_decl_implicit (fn);
-   tree res = fold_builtin_n (loc, decl, arg0, 1, false);
-   gcc_assert (res != NULL_TREE
-  CONSTANT_CLASS_P (res));
-   return res;
- }
- 
tree res = gimple_match_and_simplify (fn, type, arg0, seq, valueize);
if (!res)
  {
--- 3720,3725 
Index: gcc/gimple-match-head.c
===
*** gcc/gimple-match-head.c (revision 208478)
--- gcc/gimple-match-head.c (working copy)
*** gimple_match_and_simplify (enum tree_cod
*** 247,252 
--- 247,259 
   tree op0,
   gimple_seq *seq, tree (*valueize)(tree))
  {
+   if (CONSTANT_CLASS_P (op0))
+ {
+   tree res = fold_unary_to_constant (code, type, op0);
+   if (res != NULL_TREE)
+   return res;
+ }
+ 
code_helper rcode;
tree ops[3] = {};
if (!gimple_match_and_simplify (code, type, op0,
*** gimple_match_and_simplify (enum tree_cod
*** 260,265 
--- 267,291 
   tree op0, tree op1,
   gimple_seq *seq, tree (*valueize)(tree))
  {
+   if (CONSTANT_CLASS_P (op0)  CONSTANT_CLASS_P (op1))
+ {
+   tree res = fold_binary_to_constant (code, type, op0, op1);
+   /* ???  We can't assert that we fold this to a constant as
+for example we can't fold things like 1 / 0.  */
+   if (res != NULL_TREE)
+   return res;
+ }
+ 
+   /* Canonicalize operand order both for matching and fallback stmt
+  generation.  */
+   if (commutative_tree_code (code)
+tree_swap_operands_p (op0, op1, false))
+ {
+   tree tem = op0;
+   op0 = op1;
+   op1 = tem;
+ }
+ 
code_helper rcode;
tree ops[3] = {};
if (!gimple_match_and_simplify (code, type, op0, op1,
*** 

PATCH to add -std=c++14

2014-03-11 Thread Ed Smith-Rowland

Why not also -std=gnu++14?



[PATCH][match-and-simplify] Convert SCCVN

2014-03-11 Thread Richard Biener

This converts remaining parts of SCCVN to use the
gimple_match_and_simplify interface.  Most importantly
(apart from one legacy case) we no longer build GENERIC
trees from GIMPLE to do simplification nor track
whether it's maybe worthwhile doing so.

Committed to branch (a few more optimization regressions due
to missed patterns appear).

Richard.

2014-03-11  Richard Biener  rguent...@suse.de

* tree-ssa-sccvn.h (struct vn_ssa_aux): Remove has_constants
member.
* tree-ssa-sccvn.c (vn_get_expr_for): Simplify for legacy PRE use.
(visit_copy): Do not set or use has_constants or expr.
(visit_reference_op_call): Likewise.
(visit_phi): Likewise.
(visit_use): Likewise.
(visit_reference_op_load): Simplify result using
gimple_match_and_simplify.
(expr_has_constants, stmt_has_constants, valueize_expr,
simplify_binary_expression, simplify_unary_expression): Remove.
(try_to_simplify): Do not use gimple_fold_stmt_to_constant_1.

Index: gcc/tree-ssa-sccvn.c
===
*** gcc/tree-ssa-sccvn.c(revision 208478)
--- gcc/tree-ssa-sccvn.c(working copy)
*** tree
*** 383,392 
  vn_get_expr_for (tree name)
  {
vn_ssa_aux_t vn = VN_INFO (name);
-   gimple def_stmt;
-   tree expr = NULL_TREE;
-   enum tree_code code;
- 
if (vn-valnum == VN_TOP)
  return name;
  
--- 383,388 
*** vn_get_expr_for (tree name)
*** 407,464 
if (vn-expr != NULL_TREE)
  return vn-expr;
  
!   /* Otherwise use the defining statement to build the expression.  */
!   def_stmt = SSA_NAME_DEF_STMT (vn-valnum);
! 
!   /* If the value number is not an assignment use it directly.  */
!   if (!is_gimple_assign (def_stmt))
! return vn-valnum;
! 
!   /* FIXME tuples.  This is incomplete and likely will miss some
!  simplifications.  */
!   code = gimple_assign_rhs_code (def_stmt);
!   switch (TREE_CODE_CLASS (code))
! {
! case tcc_reference:
!   if ((code == REALPART_EXPR
!  || code == IMAGPART_EXPR
!  || code == VIEW_CONVERT_EXPR)
!  TREE_CODE (TREE_OPERAND (gimple_assign_rhs1 (def_stmt),
! 0)) == SSA_NAME)
!   expr = fold_build1 (code,
!   gimple_expr_type (def_stmt),
!   TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0));
!   break;
! 
! case tcc_unary:
!   expr = fold_build1 (code,
! gimple_expr_type (def_stmt),
! gimple_assign_rhs1 (def_stmt));
!   break;
! 
! case tcc_binary:
!   expr = fold_build2 (code,
! gimple_expr_type (def_stmt),
! gimple_assign_rhs1 (def_stmt),
! gimple_assign_rhs2 (def_stmt));
!   break;
! 
! case tcc_exceptional:
!   if (code == CONSTRUCTOR
!  TREE_CODE
!  (TREE_TYPE (gimple_assign_rhs1 (def_stmt))) == VECTOR_TYPE)
!   expr = gimple_assign_rhs1 (def_stmt);
!   break;
! 
! default:;
! }
!   if (expr == NULL_TREE)
! return vn-valnum;
! 
!   /* Cache the expression.  */
!   vn-expr = expr;
! 
!   return expr;
  }
  
  /* Return the vn_kind the expression computed by the stmt should be
--- 403,410 
if (vn-expr != NULL_TREE)
  return vn-expr;
  
!   /* If not, return the value-number.  */
!   return vn-valnum;
  }
  
  /* Return the vn_kind the expression computed by the stmt should be
*** defs_to_varying (gimple stmt)
*** 2727,2746 
return changed;
  }
  
- static bool expr_has_constants (tree expr);
- static tree valueize_expr (tree expr);
- 
  /* Visit a copy between LHS and RHS, return true if the value number
 changed.  */
  
  static bool
  visit_copy (tree lhs, tree rhs)
  {
-   /* The copy may have a more interesting constant filled expression
-  (we don't, since we know our RHS is just an SSA name).  */
-   VN_INFO (lhs)-has_constants = VN_INFO (rhs)-has_constants;
-   VN_INFO (lhs)-expr = VN_INFO (rhs)-expr;
- 
/* And finally valueize.  */
rhs = SSA_VAL (rhs);
  
--- 2673,2684 
*** visit_reference_op_call (tree lhs, gimpl
*** 2799,2810 
vnresult-result = lhs;
  
if (vnresult-result  lhs)
!   {
! changed |= set_ssa_val_to (lhs, vnresult-result);
! 
! if (VN_INFO (vnresult-result)-has_constants)
!   VN_INFO (lhs)-has_constants = true;
!   }
  }
else
  {
--- 2737,2743 
vnresult-result = lhs;
  
if (vnresult-result  lhs)
!   changed |= set_ssa_val_to (lhs, vnresult-result);
  }
else
  {
*** visit_reference_op_load (tree lhs, tree
*** 2864,2896 
 of VIEW_CONVERT_EXPR TREE_TYPE (result) (result).
 So first simplify and lookup this expression to see if it
 is already available.  */
!   

[PATCH] Fix __builtin_unreachable related regression (PR middle-end/60482)

2014-03-11 Thread Jakub Jelinek
Hi!

As described in the PR, the r208165 change regressed following test.
The problem is that VRP inserts a useless ASSERT_EXPR right before
__builtin_unreachable () (obviously, the uses of the ASSERT_EXPR
lhs aren't and can't be used by anything), which then prevents
assert_unreachable_fallthru_edge_p from detecting it properly
(but, even ignoring ASSERT_EXPRs there still would fail, because
the ASSERT_EXPR adds another user of the SSA_NAME we check imm uses for).

Perhaps FOUND_IN_SUBGRAPH (4.3 and earlier era) would be always true
here, but live_on_edge provably isn't always true, so it makes sense to test
it, something that isn't live on the edge is useless.

The tree-cfg.c change is just small improvement discovered when looking into
it, clobber stmts before __builtin_unreachable can be certainly ignored,
they don't do anything.

The patch regresses ssa-ifcombine-10.c testcase, I'll post a fix for that
momentarily.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2014-03-11  Jakub Jelinek  ja...@redhat.com

PR middle-end/60482
* tree-vrp.c (register_edge_assert_for_1): Don't add assert
if there are multiple uses, but op doesn't live on E edge.
* tree-cfg.c (assert_unreachable_fallthru_edge_p): Also ignore
clobber stmts before __builtin_unreachable.

* gcc.dg/vect/pr60482.c: New test.

--- gcc/tree-vrp.c.jj   2014-01-25 00:11:37.0 +0100
+++ gcc/tree-vrp.c  2014-03-10 14:59:03.748267354 +0100
@@ -5423,12 +5423,9 @@ register_edge_assert_for_1 (tree op, enu
 return false;
 
   /* We know that OP will have a zero or nonzero value.  If OP is used
- more than once go ahead and register an assert for OP.
-
- The FOUND_IN_SUBGRAPH support is not helpful in this situation as
- it will always be set for OP (because OP is used in a COND_EXPR in
- the subgraph).  */
-  if (!has_single_use (op))
+ more than once go ahead and register an assert for OP.  */
+  if (live_on_edge (e, op)
+   !has_single_use (op))
 {
   val = build_int_cst (TREE_TYPE (op), 0);
   register_new_assert_for (op, op, code, val, NULL, e, bsi);
--- gcc/tree-cfg.c.jj   2014-02-20 21:38:42.0 +0100
+++ gcc/tree-cfg.c  2014-03-10 14:59:52.058957446 +0100
@@ -410,9 +410,9 @@ assert_unreachable_fallthru_edge_p (edge
  if (gsi_end_p (gsi))
return false;
  stmt = gsi_stmt (gsi);
- if (is_gimple_debug (stmt))
+ while (is_gimple_debug (stmt) || gimple_clobber_p (stmt))
{
- gsi_next_nondebug (gsi);
+ gsi_next (gsi);
  if (gsi_end_p (gsi))
return false;
  stmt = gsi_stmt (gsi);
--- gcc/testsuite/gcc.dg/vect/pr60482.c.jj  2014-03-10 15:08:16.700085976 
+0100
+++ gcc/testsuite/gcc.dg/vect/pr60482.c 2014-03-10 15:15:09.609738455 +0100
@@ -0,0 +1,20 @@
+/* PR middle-end/60482 */
+/* { dg-do compile } */
+/* { dg-additional-options -Ofast } */
+/* { dg-require-effective-target vect_int } */
+
+double
+foo (double *x, int n)
+{
+  double p = 0.0;
+  int i;
+  x = __builtin_assume_aligned (x, 128);
+  if (n % 128)
+__builtin_unreachable ();
+  for (i = 0; i  n; i++)
+p += x[i];
+  return p;
+}
+
+/* { dg-final { scan-tree-dump-not epilog loop required vect } } */
+/* { dg-final { cleanup-tree-dump vect } } */

Jakub


[PATCH] Improve ifcombine

2014-03-11 Thread Jakub Jelinek
Hi!

This patch fixes the ssa-ifcombine-10.c regression.
The thing is that the uselessly added ASSERT_EXPR makes vrp1 change
the cfg slightly like this:
   bb 2:
   _4 = x_3(D)  1;
   if (_4 == 0)
 goto bb 5;
   else
 goto bb 3;
 
   bb 3:
   _5 = x_3(D)  4;
   if (_5 != 0)
-goto bb 5;
-  else
 goto bb 4;
+  else
+goto bb 5;
 
   bb 4:
 
   bb 5:
-  # t_1 = PHI 0(2), 3(3), 0(4)
+  # t_1 = PHI 0(2), 3(4), 0(3)
   return t_1;
(addition of the ASSERT_EXPR resulted in creation of a new bb to insert
it into and that bb is then removed again during cfg cleanup, but
it ends up effectively swapping the forwarder block from one edge of the
gimple cond to the other with corresponding phi arg change).
Now, tree_ssa_ifcombine_bb apparently only groks the latter form (the one
with + lines), but not the equivalent form the testcase had before VRP
(and with the PR60482 fix also has after VRP, the one with - lines).

This patch teaches tree_ssa_ifcombine_bb to handle both forms.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Note, the phi-opt-2.c change is there because the patch made the
test fail, as for LOGICAL_OP_NON_SHORT_CIRCUIT we now generate even
better code, return a  b.  So, I've added ssa-ifcombine-13.c test
which is phi-opt-2.c and test that for -mbranch-cost=2 we have no ifs,
and phi-opt-2.c now checks that for -mbranch-cost=1 we do have one if
(ifcombine then doesn't do anything and we verify that phiopt does what it
should).

2014-03-11  Jakub Jelinek  ja...@redhat.com

* tree-ssa-ifcombine.c (forwarder_block_to): New function.
(tree_ssa_ifcombine_bb): Handle also cases where else_bb is
an empty forwarder block to then_bb or vice versa and then_bb
and else_bb are effectively swapped.

* gcc.dg/tree-ssa/ssa-ifcombine-12.c: New test.
* gcc.dg/tree-ssa/ssa-ifcombine-13.c: New test.
* gcc.dg/tree-ssa/phi-opt-2.c: Pass -mbranch-cost=1 if
possible, only test for exactly one if if -mbranch-cost=1
has been passed.

--- gcc/tree-ssa-ifcombine.c.jj 2014-03-11 12:13:53.012618098 +0100
+++ gcc/tree-ssa-ifcombine.c2014-03-11 16:15:29.084329709 +0100
@@ -135,6 +135,16 @@ bb_no_side_effects_p (basic_block bb)
   return true;
 }
 
+/* Return true if BB is an empty forwarder block to TO_BB.  */
+
+static bool
+forwarder_block_to (basic_block bb, basic_block to_bb)
+{
+  return empty_block_p (bb)
+ single_succ_p (bb)
+ single_succ (bb) == to_bb;
+}
+
 /* Verify if all PHI node arguments in DEST for edges from BB1 or
BB2 to DEST are the same.  This makes the CFG merge point
free from side-effects.  Return true in this case, else false.  */
@@ -660,6 +670,102 @@ tree_ssa_ifcombine_bb (basic_block inner
  return ifcombine_ifandif (inner_cond_bb, true, outer_cond_bb, false,
true);
}
+
+  if (forwarder_block_to (else_bb, then_bb))
+   {
+ /* Other possibilities for the  form, if else_bb is
+empty forwarder block to then_bb.  Compared to the above simpler
+forms this can be treated as if then_bb and else_bb were swapped,
+and the corresponding inner_cond_bb not inverted because of that.
+For same_phi_args_p we look at equality of arguments between
+edge from outer_cond_bb and the forwarder block.  */
+ if (recognize_if_then_else (outer_cond_bb, inner_cond_bb, then_bb)
+  same_phi_args_p (outer_cond_bb, else_bb, then_bb)
+  bb_no_side_effects_p (inner_cond_bb))
+   {
+ /* We have
+  outer_cond_bb
+if (q) goto inner_cond_bb; else goto then_bb;
+  inner_cond_bb
+if (p) goto then_bb; else goto else_bb;
+  else_bb
+# empty fallthru
+  then_bb
+# x = PHI y(outer), z(inner), y(else)
+...
+  */
+ return ifcombine_ifandif (inner_cond_bb, false, outer_cond_bb,
+   false, false);
+   }
+
+ /* And a version where the outer condition is negated.  */
+ if (recognize_if_then_else (outer_cond_bb, then_bb, inner_cond_bb)
+  same_phi_args_p (outer_cond_bb, else_bb, then_bb)
+  bb_no_side_effects_p (inner_cond_bb))
+   {
+ /* We have
+  outer_cond_bb
+if (q) goto then_bb; else goto inner_cond_bb;
+  inner_cond_bb
+if (p) goto then_bb; else goto else_bb;
+  else_bb
+# empty fallthru
+  then_bb
+# x = PHI y(outer), z(inner), y(else)
+...
+  */
+ return ifcombine_ifandif (inner_cond_bb, false, outer_cond_bb,
+   true, false);

Re: [C++ Patch/RFC] PR 60389

2014-03-11 Thread Paolo Carlini

Hi,

On 03/11/2014 02:10 PM, Jason Merrill wrote:

On 03/11/2014 08:03 AM, Paolo Carlini wrote:

+  if (DECL_INHERITED_CTOR_BASE (fun)
+   TREE_CODE (fun) == TEMPLATE_DECL)
+{
+  ret = false;
+  if (complain)
+error (inherited constructors inherit %constexpr% from 
+   the designated base);
+}


To correct my wording, the B constructor is the inheriting 
constructor, the inherited constructor is in A.


Let's look up the inherited constructor here and print it to be 
helpful.  Probably the easiest way to find it will be to add a new 
entry point to locate_fn_flags so we call it with


(DECL_INHERITED_CTOR_BASE (fun), DECL_NAME (fun), 
FUNCTION_FIRST_USER_PARMTYPE (fun), LOOKUP_NORMAL|LOOKUP_SPECULATIVE, 
complain)


Then we can say inherited constructor %qD is not constexpr.
I see. The below tries to implement the above (note: 
complete_ctor_identifier; push_deferring_access_checks, otherwise for a 
similar testcase inheriting from class A we produce duplicate diagnostic 
about accessibility). Tested x86_64-linux.


Thanks,
Paolo.

PS: I think we should also be more careful about inform vs error, but 
probably not at this Stage...
Index: cp/cp-tree.h
===
--- cp/cp-tree.h(revision 208474)
+++ cp/cp-tree.h(working copy)
@@ -5476,6 +5476,7 @@ extern tree get_copy_ctor (tree, 
tsubst_flags_t)
 extern tree get_copy_assign(tree);
 extern tree get_default_ctor   (tree);
 extern tree get_dtor   (tree, tsubst_flags_t);
+extern tree get_inherited_ctor (tree);
 extern tree locate_ctor(tree);
 extern tree implicitly_declare_fn   (special_function_kind, tree,
 bool, tree, tree);
Index: cp/method.c
===
--- cp/method.c (revision 208474)
+++ cp/method.c (working copy)
@@ -971,6 +971,25 @@ get_copy_assign (tree type)
   return fn;
 }
 
+/* Locate the inherited constructor of constructor CTOR.  */
+
+tree
+get_inherited_ctor (tree ctor)
+{
+  gcc_assert (DECL_INHERITED_CTOR_BASE (ctor));
+
+  push_deferring_access_checks (dk_no_check);
+  tree fn = locate_fn_flags (DECL_INHERITED_CTOR_BASE (ctor),
+complete_ctor_identifier,
+FUNCTION_FIRST_USER_PARMTYPE (ctor),
+LOOKUP_NORMAL|LOOKUP_SPECULATIVE,
+tf_none);
+  pop_deferring_access_checks ();
+  if (fn == error_mark_node)
+return NULL_TREE;
+  return fn;
+}
+
 /* Subroutine of synthesized_method_walk.  Update SPEC_P, TRIVIAL_P and
DELETED_P or give an error message MSG with argument ARG.  */
 
Index: cp/semantics.c
===
--- cp/semantics.c  (revision 208474)
+++ cp/semantics.c  (working copy)
@@ -7438,19 +7438,31 @@ retrieve_constexpr_fundef (tree fun)
 static bool
 is_valid_constexpr_fn (tree fun, bool complain)
 {
-  tree parm = FUNCTION_FIRST_USER_PARM (fun);
   bool ret = true;
-  for (; parm != NULL; parm = TREE_CHAIN (parm))
-if (!literal_type_p (TREE_TYPE (parm)))
-  {
-   ret = false;
-   if (complain)
+
+  if (DECL_INHERITED_CTOR_BASE (fun)
+   TREE_CODE (fun) == TEMPLATE_DECL)
+{
+  ret = false;
+  if (complain)
+   error (inherited constructor %qD is not constexpr,
+  get_inherited_ctor (fun));
+}
+  else
+{
+  for (tree parm = FUNCTION_FIRST_USER_PARM (fun);
+  parm != NULL_TREE; parm = TREE_CHAIN (parm))
+   if (!literal_type_p (TREE_TYPE (parm)))
  {
-   error (invalid type for parameter %d of constexpr 
-  function %q+#D, DECL_PARM_INDEX (parm), fun);
-   explain_non_literal_class (TREE_TYPE (parm));
+   ret = false;
+   if (complain)
+ {
+   error (invalid type for parameter %d of constexpr 
+  function %q+#D, DECL_PARM_INDEX (parm), fun);
+   explain_non_literal_class (TREE_TYPE (parm));
+ }
  }
-  }
+}
 
   if (!DECL_CONSTRUCTOR_P (fun))
 {
Index: testsuite/g++.dg/cpp0x/inh-ctor19.C
===
--- testsuite/g++.dg/cpp0x/inh-ctor19.C (revision 0)
+++ testsuite/g++.dg/cpp0x/inh-ctor19.C (working copy)
@@ -0,0 +1,14 @@
+// PR c++/60389
+// { dg-do compile { target c++11 } }
+
+struct A
+{
+  templatetypename...T A(T...) {}
+};
+
+struct B : A
+{
+  using A::A;   // { dg-error inherited }
+};
+
+constexpr B b;  // { dg-error literal }



Re: [PATCH] Fix error message from -Wcast-qual when casting away volatile

2014-03-11 Thread Gerald Pfeifer
On Tue, 11 Mar 2014, Magnus Reftel wrote:
 Currently, castring away volatile from a pointer makes -Wcast-qual
 claim that __attribute__((noreturn)) was cast away (see bugzilla
 55383). The attached patch, originally written by Manuel López-Ibáñez
 and updated to match trunk by me, correctes that. No regressions on
 gcc from applying this patch (as reported by make check) were seen.
 
 2014-03-11 Manuel López-Ibáñez  m...@gcc.gnu.org
 
  PR c/55383
  * c/c-typeck.c: use correct format string in cast-qual warning

I volunteer applying this if approved.

Geralddiff --git a/gcc/c/c-typeck.c b/gcc/c/c-typeck.c
index 524a59f..0bfc12b 100644
--- a/gcc/c/c-typeck.c
+++ b/gcc/c/c-typeck.c
@@ -4855,7 +4855,7 @@ handle_warn_cast_qual (location_t loc, tree type, tree otype)
 /* There are qualifiers present in IN_OTYPE that are not present
in IN_TYPE.  */
 warning_at (loc, OPT_Wcast_qual,
-		cast discards %q#v qualifier from pointer target type,
+		cast discards %qv qualifier from pointer target type,
 		discarded);
 
   if (added || discarded)
diff --git a/gcc/testsuite/c-c++-common/Wcast-qual-1.c b/gcc/testsuite/c-c++-common/Wcast-qual-1.c
index 640e4f0..af80438 100644
--- a/gcc/testsuite/c-c++-common/Wcast-qual-1.c
+++ b/gcc/testsuite/c-c++-common/Wcast-qual-1.c
@@ -85,11 +85,11 @@ f3 (void ***bar)
 void
 f4 (void * const **bar)
 {
-  const void ***p9 = (const void ***) bar; /* { dg-warning cast } */
+  const void ***p9 = (const void ***) bar; /* { dg-warning cast discards .const. qualifier  } */
   void * const **p11 = (void * const **) bar;
   void ** const *p13 = (void ** const *) bar; /* { dg-warning cast } */
   const void * const **p15 = (const void * const **) bar; /* { dg-warning cast } */
-  const void ** const *p17 = (const void ** const *) bar; /* { dg-warning cast } */
+  const void ** const *p17 = (const void ** const *) bar; /* { dg-warning cast discards .const. qualifier } */
   void * const * const * p19 = (void * const * const *) bar;
   const void * const * const *p21 = (const void * const * const *) bar;
 }


Re: [PATCH] Fix error message from -Wcast-qual when casting away volatile

2014-03-11 Thread Jakub Jelinek
On Tue, Mar 11, 2014 at 05:10:45PM +0100, Gerald Pfeifer wrote:
 On Tue, 11 Mar 2014, Magnus Reftel wrote:
  Currently, castring away volatile from a pointer makes -Wcast-qual
  claim that __attribute__((noreturn)) was cast away (see bugzilla
  55383). The attached patch, originally written by Manuel López-Ibáñez
  and updated to match trunk by me, correctes that. No regressions on
  gcc from applying this patch (as reported by make check) were seen.
  
  2014-03-11 Manuel López-Ibáñez  m...@gcc.gnu.org
  
   PR c/55383
   * c/c-typeck.c: use correct format string in cast-qual warning
 
 I volunteer applying this if approved.

The ChangeLog is incorrect (there should be two spaces before Manuel and
no c/ prefix for the filename).

I'll defer review to Joseph or Jason.

Jakub


Re: [C++ Patch/RFC] PR 60389

2014-03-11 Thread Jason Merrill

OK.

Jason


[patch] fix libstdc++/60499

2014-03-11 Thread Jonathan Wakely

This is a 4.9 regression due to a non-uglified name being used in
debug mode containers.

Tested x86_64-linux, normal and debug mode, committed to trunk.


commit 39cc9a9e195aa90027fcc40083b913016b83f0e5
Author: Jonathan Wakely jwak...@redhat.com
Date:   Tue Mar 11 10:47:52 2014 +

PR libstdc++/60499
* include/debug/forward_list (forward_list::operator=(forward_list)):
Uglify name.
* include/debug/map (map::operator=(map)): Likewise.
* include/debug/multimap (multimap::operator=(multimap)): Likewise.
* include/debug/multiset (multiset::operator=(multiset)): Likewise.
* include/debug/set (set::operator=(set)): Likewise.
* include/debug/unordered_map
(unordered_map::operator=(unordered_map)): Likewise.
(unordered_multimap::operator=(unordered_multimap)): Likewise.
* include/debug/unordered_set
(unordered_set::operator=(unordered_set)): Likewise.
(unordered_multiset::operator=(unordered_multiset)): Likewise.
* include/debug/vector (vector::operator=(vector)): Likewise.
* testsuite/23_containers/forward_list/debug/60499.cc: New
* testsuite/23_containers/map/debug/60499.cc: New
* testsuite/23_containers/multimap/debug/60499.cc: New
* testsuite/23_containers/multiset/debug/60499.cc: New
* testsuite/23_containers/set/debug/60499.cc: New
* testsuite/23_containers/unordered_map/debug/60499.cc: New
* testsuite/23_containers/unordered_multimap/debug/60499.cc: New
* testsuite/23_containers/unordered_multiset/debug/60499.cc: New
* testsuite/23_containers/unordered_set/debug/60499.cc: New
* testsuite/23_containers/vector/debug/60499.cc: New

diff --git a/libstdc++-v3/include/debug/forward_list 
b/libstdc++-v3/include/debug/forward_list
index f2984c9..12f6d7f 100644
--- a/libstdc++-v3/include/debug/forward_list
+++ b/libstdc++-v3/include/debug/forward_list
@@ -140,10 +140,10 @@ namespace __debug
   noexcept(_Node_alloc_traits::_S_nothrow_move())
   {
__glibcxx_check_self_move_assign(__list);
-   bool xfer_memory = _Node_alloc_traits::_S_propagate_on_move_assign()
+   bool __xfer_memory = _Node_alloc_traits::_S_propagate_on_move_assign()
|| __list.get_allocator() == this-get_allocator();
static_cast_Base(*this) = std::move(__list);
-   if (xfer_memory)
+   if (__xfer_memory)
  this-_M_swap(__list);
else
  this-_M_invalidate_all();
diff --git a/libstdc++-v3/include/debug/map.h b/libstdc++-v3/include/debug/map.h
index bd68c99..fda6ac1 100644
--- a/libstdc++-v3/include/debug/map.h
+++ b/libstdc++-v3/include/debug/map.h
@@ -148,10 +148,10 @@ namespace __debug
   noexcept(_Alloc_traits::_S_nothrow_move())
   {
__glibcxx_check_self_move_assign(__x);
-   bool xfer_memory = _Alloc_traits::_S_propagate_on_move_assign()
+   bool __xfer_memory = _Alloc_traits::_S_propagate_on_move_assign()
|| __x.get_allocator() == this-get_allocator();
_M_base() = std::move(__x._M_base());
-   if (xfer_memory)
+   if (__xfer_memory)
  this-_M_swap(__x);
else
  this-_M_invalidate_all();
diff --git a/libstdc++-v3/include/debug/multimap.h 
b/libstdc++-v3/include/debug/multimap.h
index fad80cc..4c3a3eb 100644
--- a/libstdc++-v3/include/debug/multimap.h
+++ b/libstdc++-v3/include/debug/multimap.h
@@ -150,10 +150,10 @@ namespace __debug
   noexcept(_Alloc_traits::_S_nothrow_move())
   {
__glibcxx_check_self_move_assign(__x);
-   bool xfer_memory = _Alloc_traits::_S_propagate_on_move_assign()
+   bool __xfer_memory = _Alloc_traits::_S_propagate_on_move_assign()
|| __x.get_allocator() == this-get_allocator();
_M_base() = std::move(__x._M_base());
-   if (xfer_memory)
+   if (__xfer_memory)
  this-_M_swap(__x);
else
  this-_M_invalidate_all();
diff --git a/libstdc++-v3/include/debug/multiset.h 
b/libstdc++-v3/include/debug/multiset.h
index bd555b3..ae62bf8 100644
--- a/libstdc++-v3/include/debug/multiset.h
+++ b/libstdc++-v3/include/debug/multiset.h
@@ -149,10 +149,10 @@ namespace __debug
   noexcept(_Alloc_traits::_S_nothrow_move())
   {
__glibcxx_check_self_move_assign(__x);
-   bool xfer_memory = _Alloc_traits::_S_propagate_on_move_assign()
+   bool __xfer_memory = _Alloc_traits::_S_propagate_on_move_assign()
|| __x.get_allocator() == this-get_allocator();
_M_base() = std::move(__x._M_base());
-   if (xfer_memory)
+   if (__xfer_memory)
  this-_M_swap(__x);
else
  this-_M_invalidate_all();
diff --git a/libstdc++-v3/include/debug/set.h b/libstdc++-v3/include/debug/set.h
index f40ecec..c83e2af 100644
--- a/libstdc++-v3/include/debug/set.h
+++ b/libstdc++-v3/include/debug/set.h
@@ -148,10 +148,10 @@ namespace __debug
   

[jit] Cleanup base_types

2014-03-11 Thread David Malcolm
Committed to branch dmalcolm/jit:

I tracked down the crash in dwarf2out.c mentioned in the commit message
for 6e624036e3f47c6ca4ae2e99974b74aa29f11eca:
base_types wasn't being flushed between invocations, leading to it
containing garbage that might be collected (it's not GTY-labelled), and
duplicate copies of types that appeared to violate reference-counting
checking in the DWARF string pruner, due to multiple copies of the
string __unknown__.

Release it.

With this commit, *almost* all of the test suite runs successfully at
optimization level 3, the exception being a crash in test-linked-list.c
that I've yet to figure out.

gcc/
* dwarf2out.c (dwarf2out_c_finalize): Release base_types.
---
 gcc/ChangeLog.jit | 4 
 gcc/dwarf2out.c   | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.jit b/gcc/ChangeLog.jit
index 0e35180..5c14fcc 100644
--- a/gcc/ChangeLog.jit
+++ b/gcc/ChangeLog.jit
@@ -1,3 +1,7 @@
+2014-03-11  David Malcolm  dmalc...@redhat.com
+
+   * dwarf2out.c (dwarf2out_c_finalize): Release base_types.
+
 2014-03-10  David Malcolm  dmalc...@redhat.com
 
* ipa-reference.c (ipa_init): Move static bool init_p from here
diff --git a/gcc/dwarf2out.c b/gcc/dwarf2out.c
index e9d230d..fd262f2 100644
--- a/gcc/dwarf2out.c
+++ b/gcc/dwarf2out.c
@@ -24326,7 +24326,7 @@ void dwarf2out_c_finalize (void)
   generic_type_instances = NULL;
   frame_pointer_fb_offset = 0;
   frame_pointer_fb_offset_valid = false;
-  //base_types = NULL;
+  base_types.release ();
 }
 
 #include gt-dwarf2out.h
-- 
1.7.11.7



Re: [Build, Driver] Add -lcilkrts for -fcilkplus

2014-03-11 Thread Joseph S. Myers
On Tue, 11 Mar 2014, Tobias Burnus wrote:

 When using Cilk Plus (-fcilkplus), it makes sense to automatically link the
 run-time library (-lcilkrts).
 
 This patch mimics libgomp by adding a .spec file; I am not 100% sure whether
 the .spec file is needed, but the pthread tests in libgomp imply that it makes
 sense. (libgomp also checks for -lrt for the high-performance timers, a check
 which is not required for libcilkrts.)
 
 Bootstrapped on x86-64-gnu-linux.
 OK for the trunk?

The driver changes are OK.

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: [PATCH] Fix error message from -Wcast-qual when casting away volatile

2014-03-11 Thread Joseph S. Myers
On Tue, 11 Mar 2014, Magnus Reftel wrote:

 Currently, castring away volatile from a pointer makes -Wcast-qual
 claim that __attribute__((noreturn)) was cast away (see bugzilla
 55383). The attached patch, originally written by Manuel L?pez-Ib??ez
 and updated to match trunk by me, correctes that. No regressions on
 gcc from applying this patch (as reported by make check) were seen.

OK (with the ChangeLog fix as noted).

-- 
Joseph S. Myers
jos...@codesourcery.com

[jit] Fix state issue in gcse.c

2014-03-11 Thread David Malcolm
Committed to branch dmalcolm/jit:

Turning optimizations up from 0 to 3 showed a segfault on the 2nd
iteration of test-linked-list.c in get_data_from_adhoc_loc whilst
garbage-collecting.

Investigation revealed the issue to be a CFG from the previous compile
being kept alive by this GC root in gcse.c:
  static GTY(()) rtx test_insn;

This wouldn't it itself be an issue, but one (or more) of the edges had:

  (gdb) p /x e-goto_locus
  $9 = 0x8000

and was thus treated as an ADHOC_LOC.  Hence, this line in the edge_def's
gt_ggc_mx routine:

  8313tree block = LOCATION_BLOCK (e-goto_locus);

led to a call (via LOCATION_BLOCK) to get_data_from_adhoc_loc:

152 void *
153 get_data_from_adhoc_loc (struct line_maps *set, source_location loc)
154 {
155   linemap_assert (IS_ADHOC_LOC (loc));
156   return set-location_adhoc_data_map.data[loc  
MAX_SOURCE_LOCATION].data;
157 }

but at this point, the ad-hoc location data from the previous in-process
compile no longer makes sense, and, with:
  (gdb) p set-location_adhoc_data_map
  $5 = {htab = 0x60fbb0, curr_loc = 0, allocated = 0, data = 0x0}

the read though the NULL data segfaults.

gcse.c appears to create test_insn on-demand as part of the implementation of
can_assign_to_reg_without_clobbers_p.

Hence it seems to make most sense to simply clear test_insn in toplev_finalize,
which this commit does.

With this, the whole test suite now runs successfully with optimizations
on, so also turn this up (in harness.h) from 0 to 3.

gcc/
* gcse.c (gcse_c_finalize): New, to clear test_insn between
in-process compiles.
* gcse.h (gcse_c_finalize): New.
* toplev.c: Include gcse.h so that we can...
(toplev_finalize): Call gcse_c_finalize.

gcc/testsuite/
* jit.dg/harness.h (set_options): Increase optimization level from
0 to 3.
---
 gcc/ChangeLog.jit  | 8 
 gcc/gcse.c | 5 +
 gcc/gcse.h | 2 ++
 gcc/testsuite/ChangeLog.jit| 5 +
 gcc/testsuite/jit.dg/harness.h | 2 +-
 gcc/toplev.c   | 2 ++
 6 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.jit b/gcc/ChangeLog.jit
index 5c14fcc..77ac44c 100644
--- a/gcc/ChangeLog.jit
+++ b/gcc/ChangeLog.jit
@@ -1,5 +1,13 @@
 2014-03-11  David Malcolm  dmalc...@redhat.com
 
+   * gcse.c (gcse_c_finalize): New, to clear test_insn between
+   in-process compiles.
+   * gcse.h (gcse_c_finalize): New.
+   * toplev.c: Include gcse.h so that we can...
+   (toplev_finalize): Call gcse_c_finalize.
+
+2014-03-11  David Malcolm  dmalc...@redhat.com
+
* dwarf2out.c (dwarf2out_c_finalize): Release base_types.
 
 2014-03-10  David Malcolm  dmalc...@redhat.com
diff --git a/gcc/gcse.c b/gcc/gcse.c
index bb9ba15..f2409ec 100644
--- a/gcc/gcse.c
+++ b/gcc/gcse.c
@@ -4226,4 +4226,9 @@ make_pass_rtl_hoist (gcc::context *ctxt)
   return new pass_rtl_hoist (ctxt);
 }
 
+void gcse_c_finalize (void)
+{
+  test_insn = NULL;
+}
+
 #include gt-gcse.h
diff --git a/gcc/gcse.h b/gcc/gcse.h
index e1dea21..f459be2 100644
--- a/gcc/gcse.h
+++ b/gcc/gcse.h
@@ -39,4 +39,6 @@ extern struct target_gcse *this_target_gcse;
 #define this_target_gcse (default_target_gcse)
 #endif
 
+void gcse_c_finalize (void);
+
 #endif
diff --git a/gcc/testsuite/ChangeLog.jit b/gcc/testsuite/ChangeLog.jit
index ec8af3d..5a84bfd 100644
--- a/gcc/testsuite/ChangeLog.jit
+++ b/gcc/testsuite/ChangeLog.jit
@@ -1,3 +1,8 @@
+2014-03-11  David Malcolm  dmalc...@redhat.com
+
+   * jit.dg/harness.h (set_options): Increase optimization level from
+   0 to 3.
+
 2014-03-07  David Malcolm  dmalc...@redhat.com
 
* jit.dg/test-functions.c (create_test_of_hidden_function): New,
diff --git a/gcc/testsuite/jit.dg/harness.h b/gcc/testsuite/jit.dg/harness.h
index aa98028..e67ac36 100644
--- a/gcc/testsuite/jit.dg/harness.h
+++ b/gcc/testsuite/jit.dg/harness.h
@@ -132,7 +132,7 @@ static void set_options (gcc_jit_context *ctxt, const char 
*argv0)
   gcc_jit_context_set_int_option (
 ctxt,
 GCC_JIT_INT_OPTION_OPTIMIZATION_LEVEL,
-0);
+3);
   gcc_jit_context_set_bool_option (
 ctxt,
 GCC_JIT_BOOL_OPTION_DEBUGINFO,
diff --git a/gcc/toplev.c b/gcc/toplev.c
index 9de1c2d..f1ac560 100644
--- a/gcc/toplev.c
+++ b/gcc/toplev.c
@@ -79,6 +79,7 @@ along with GCC; see the file COPYING3.  If not see
 #include pass_manager.h
 #include dwarf2out.h
 #include ipa-reference.h
+#include gcse.h
 
 #if defined(DBX_DEBUGGING_INFO) || defined(XCOFF_DEBUGGING_INFO)
 #include dbxout.h
@@ -2000,6 +2001,7 @@ void toplev_finalize (void)
   cgraphbuild_c_finalize ();
   cgraphunit_c_finalize ();
   dwarf2out_c_finalize ();
+  gcse_c_finalize ();
   ipa_c_finalize ();
   ipa_reference_c_finalize ();
   predict_c_finalize ();
-- 
1.7.11.7



[patch] Backport fixes for allocator handling in std::vector move assign

2014-03-11 Thread Jonathan Wakely

This patch backports the important parts of three commits from trunk
to the 4.8 branch.

Tested x86_64-linux, committed to the 4.8 branch.

commit 48a2f3af4fcff86695e6f12c1490833f5679ad44
Author: Jonathan Wakely jwak...@redhat.com
Date:   Tue Mar 11 18:20:47 2014 +

Backport from mainline.

2014-01-09  Jonathan Wakely  jwak...@redhat.com

PR libstdc++/59738
* include/bits/stl_vector.h (vector::_M_move_assign): Restore
support for non-Movable types.

2014-01-08  François Dumont  fdum...@gcc.gnu.org

* include/bits/stl_vector.h (std::vector::_M_move_assign): Pass
*this allocator instance when building temporary vector instance
so that *this allocator does not get moved.
* testsuite/23_containers/vector/allocator/move.cc (test01): Add
check on a vector iterator.
* testsuite/23_containers/vector/allocator/move_assign.cc
(test02): Likewise.
(test03): New, test with a non-propagating allocator.

2013-11-15  Jonathan Wakely  jwakely@gmail.com

* testsuite/23_containers/vector/allocator/move.cc: New

diff --git a/libstdc++-v3/include/bits/stl_vector.h 
b/libstdc++-v3/include/bits/stl_vector.h
index 69c6e27..14de461 100644
--- a/libstdc++-v3/include/bits/stl_vector.h
+++ b/libstdc++-v3/include/bits/stl_vector.h
@@ -1361,7 +1361,8 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
   void
   _M_move_assign(vector __x, std::true_type) noexcept
   {
-   const vector __tmp(std::move(*this));
+   vector __tmp(get_allocator());
+   this-_M_impl._M_swap_data(__tmp._M_impl);
this-_M_impl._M_swap_data(__x._M_impl);
if (_Alloc_traits::_S_propagate_on_move_assign())
  std::__alloc_on_move(_M_get_Tp_allocator(),
diff --git a/libstdc++-v3/testsuite/23_containers/vector/allocator/move.cc 
b/libstdc++-v3/testsuite/23_containers/vector/allocator/move.cc
new file mode 100644
index 000..7e62f1e
--- /dev/null
+++ b/libstdc++-v3/testsuite/23_containers/vector/allocator/move.cc
@@ -0,0 +1,59 @@
+// Copyright (C) 2013 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// http://www.gnu.org/licenses/.
+
+// { dg-options -std=gnu++11 }
+
+#include vector
+#include testsuite_hooks.h
+#include testsuite_allocator.h
+
+struct T { int i; };
+
+using __gnu_test::uneq_allocator;
+
+void test01()
+{
+  bool test __attribute__((unused)) = true;
+  typedef uneq_allocatorT alloc_type;
+  typedef std::vectorT, alloc_type test_type;
+  test_type v1(alloc_type(1));
+  v1 = { T() };
+  auto it = v1.begin();
+  test_type v2(std::move(v1));
+  VERIFY(1 == v1.get_allocator().get_personality());
+  VERIFY(1 == v2.get_allocator().get_personality());
+  VERIFY( it == v2.begin() );
+}
+
+void test02()
+{
+  bool test __attribute__((unused)) = true;
+  typedef uneq_allocatorT alloc_type;
+  typedef std::vectorT, alloc_type test_type;
+  test_type v1(alloc_type(1));
+  v1 = { T() };
+  test_type v2(std::move(v1), alloc_type(2));
+  VERIFY(1 == v1.get_allocator().get_personality());
+  VERIFY(2 == v2.get_allocator().get_personality());
+}
+
+int main()
+{
+  test01();
+  test02();
+  return 0;
+}
diff --git 
a/libstdc++-v3/testsuite/23_containers/vector/allocator/move_assign.cc 
b/libstdc++-v3/testsuite/23_containers/vector/allocator/move_assign.cc
index 64795d3..42dec6d 100644
--- a/libstdc++-v3/testsuite/23_containers/vector/allocator/move_assign.cc
+++ b/libstdc++-v3/testsuite/23_containers/vector/allocator/move_assign.cc
@@ -46,16 +46,35 @@ void test02()
   typedef std::vectorT, alloc_type test_type;
   test_type v1(alloc_type(1));
   v1.push_back(T());
+  auto it = v1.begin();
   test_type v2(alloc_type(2));
-  v2 = std::move(v1);
   v2.push_back(T());
+  v2 = std::move(v1);
+  VERIFY( it == v2.begin() );
   VERIFY(0 == v1.get_allocator().get_personality());
   VERIFY(1 == v2.get_allocator().get_personality());
 }
 
+void test03()
+{
+  bool test __attribute__((unused)) = true;
+  typedef propagating_allocatorT, false alloc_type;
+  typedef std::vectorT, alloc_type test_type;
+  test_type v1(alloc_type(1));
+  v1.push_back(T());
+  auto it = v1.begin();
+  test_type v2(alloc_type(1));
+  v2.push_back(T());
+  v2 = std::move(v1);
+  VERIFY( it == v2.begin() );
+  VERIFY(1 == 

Re: [patch] fix libstdc++/59215

2014-03-11 Thread Jonathan Wakely

On 27/01/14 17:58 +, Jonathan Wakely wrote:

This fixes a tsan warning in shared_ptr by replacing the non-atomic
load with a call to _M_get_use_count() which does a relaxed atomic
load.

Tested x86_64-linux, committed to trunk. Will commit to 4.8 soon too.


Here's the commit to the 4.8 branch.

commit 3be0a51522649213782c2bda37ceaf1454840275
Author: Jonathan Wakely jwak...@redhat.com
Date:   Tue Mar 11 19:27:53 2014 +

2014-03-11  Jonathan Wakely  jwak...@redhat.com

Backport from mainline.
2014-01-27  Jonathan Wakely  jwak...@redhat.com

PR libstdc++/59215
* include/bits/shared_ptr_base.h
(_Sp_counted_base_S_atomic::_M_add_ref_lock()): Use relaxed atomic
load.

diff --git a/libstdc++-v3/include/bits/shared_ptr_base.h 
b/libstdc++-v3/include/bits/shared_ptr_base.h
index 9dcefa2..e661b65 100644
--- a/libstdc++-v3/include/bits/shared_ptr_base.h
+++ b/libstdc++-v3/include/bits/shared_ptr_base.h
@@ -233,7 +233,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 _M_add_ref_lock()
 {
   // Perform lock-free add-if-not-zero operation.
-  _Atomic_word __count = _M_use_count;
+  _Atomic_word __count = _M_get_use_count();
   do
{
  if (__count == 0)


Re: RFA: New ipa-devirt PATCH for c++/58678 (devirt vs. KDE)

2014-03-11 Thread Jan Hubicka
 This patch fixes the latest 58678 testcase by avoiding speculative
 devirtualization to the destructor of an abstract class, which can
 never succeed: you can't create an object of an abstract class, so
 the pointer must point to an object of a derived class, and the
 derived class necessarily has its own destructor.  Other virtual
 member functions of an abstract class are OK for devirtualization:
 the destructor is the only virtual function that is always
 overridden in every class.
 
 We could also detect an abstract class by searching through the
 vtable for __cxa_pure_virtual, but I figured it was easy enough for
 the front end to set a flag on the BINFO.
 
 Tested x86_64-pc-linux-gnu.  OK for trunk?

 commit b64f52066f3f4cdc9d5a30e2d48aaf6dd5efd3d4
 Author: Jason Merrill ja...@redhat.com
 Date:   Wed Mar 5 11:35:07 2014 -0500
 
   PR c++/58678
 gcc/
   * tree.h (BINFO_ABSTRACT_P): New.
   * ipa-devirt.c (abstract_class_dtor_p): New.
   (likely_target_p): Check it.
 gcc/cp/
   * search.c (get_pure_virtuals): Set BINFO_ABSTRACT_P.
   * tree.c (copy_binfo): Copy it.

Jason, I was looking into this and I think I have patch that works.  I would
just like to verify I inderstnad things right.  First thing I implemented is to
consistently skip dtors of abstract classes as per the comment in
abstract_class_dtor_p there is no way to call those by virtual table pointer.
Unlike your patch it will i.e. enable better unreachable code removal since
they will not appear in possible target lists of polymorphic calls.

The second change I did is to move methods that are reachable only
via abstract class into the part of list that is in construction,
since obviously we do not have instances of these classes.
I do not think it is too important (and it needs bit of changes in the
walk), but it is better to be correct here so we avoid further problems
where the ipa-devirt digs out target that is in fact not possible.

What I would like to verify with you shtat I also changed walk when looking
for destructors to not consider types in construction. I believe there is no way
to get destructor call via construction vtable as we always know the type.
Is that right?

Honza


Re: [PATCH] ARM: Weaker memory barriers

2014-03-11 Thread John Carr

Will Deacon will.dea...@arm.com wrote:

 
 Hi John,
 
 On Tue, Mar 11, 2014 at 02:54:18AM +, John Carr wrote:
  A comment in arm/sync.md notes We should consider issuing a inner
  shareability zone barrier here instead.  Here is my first attempt
  at a patch to emit weaker memory barriers.  Three instructions seem
  to be relevant for user mode code on my Cortex A9 Linux box:
  
  dmb ishst, dmb ish, dmb sy
  
  I believe these correspond to a release barrier, a full barrier
  with respect to other CPUs, and a full barrier that also orders
  relative to I/O.
 
 Not quite; DMB ISHST only orders writes with other writes, so loads can move
 across it in both directions. That means it's not sufficient for releasing a
 lock, for example.

Release in this context doesn't mean lock release.  I understand
it to mean release in the specific context of the C++11 memory model.
(Similarly, if you're arguing standards compliance inline really
means relax the one definition rule for this function.)

I don't see a prohibition on moving non-atomic loads across a release
store.  Can you point to an analysis that shows a full barrier is needed?

If we assume that gcc is used to generate code for processes running
within a single inner shareable domain, then we can start by demoting
dmb sy to dmb ish for the memory barrier with no other change.

If a store-store barrier has no place in the gcc atomic memory model,
that supports my hypothesis that a twisty maze of ifdefs is superior to
a portable attractive nuisance.

 
 shameless plug
 
 I gave a presentation at ELCE about the various ARMv7 barrier options (from
 a kernel perspective):
 
   https://www.youtube.com/watch?v=6ORn6_35kKo
 
 /shameless plug

Conveniently just about the same length as a dryer cycle.

I learned that inner shareable domain is the right one to use
for normal user mode code, and the Linux kernel doesn't care
because it has its own memory model and barrier functions.



Re: Fix PR59586

2014-03-11 Thread Mircea Namolaru
Hi,

I think that  NULL pointer checks should be added for all pointers
must_raw, may_raw etc, not only for the *_no_source ones. 

This will make the function more robust and easier to maintain.
Indeed in the current code only the *_no_source pointers may be NULL, but
this may change in the future so you don't want to base the correctness of
the code on this assumption.

Mircea

- Original Message -
 From: Roman Gareev gareevro...@gmail.com
 To: gcc-patches@gcc.gnu.org
 Cc: Tobias Grosser tob...@grosser.es, mircea namolaru 
 mircea.namol...@inria.fr
 Sent: Monday, March 10, 2014 5:39:47 PM
 Subject: Fix PR59586
 
 This patch fixes PR59586.
 The segfault is caused by NULL arguments passed to compute_deps by
 loop_level_carries_dependences.
 This causes an assignment of NULL values to the no_source parameters
 of compute_deps.
 They are passed to subtract_commutative_associative_deps and dereferenced.
 
 However, this NULL arguments are appropriate for the algorithm used
 in loop_level_carries_dependences. It uses compute_deps
 for finding RAW, WAR and WAW dependences of all basic blocks
 in the body of the given loop. Subsequently, it tries to
 determine presence of these dependences at the given level.
 Therefore it maps the relation of the dependences to the relation
 of the corresponding time-stamps and intersects the result with
 the relation in which all the inputs before the DEPTH occur at the
 same time as the output, and the input at the DEPTH occurs before output.
 If the intersection is not empty, some dependences are carried
 by the DEPTH we currently check and the loop is consequently not parallel.
 
 This patch tries to avoid the problem by adding NULL checking of the
 no_source statements to
 subtract_commutative_associative_deps.
 
 Tested x86_64-unknown-linux-gnu, applying to 4.8.3 and trunk.
 


Re: [RFC] Do not consider volatile asms as optimization barriers #1

2014-03-11 Thread Richard Sandiford
Hans-Peter Nilsson h...@bitrange.com writes:
 On Mon, 3 Mar 2014, Richard Sandiford wrote:
 AIUI:

 Reading back the references don't yield any dissenting
 flash-backs, FWIW.

 So, a (use fp) then a (clobber fp)?  That was probably just too
 weird for me to think of, much like a hypercorrect ending of the
 previous clause. :)

 Thanks for dealing with this, and for not making my initial
 nightmarish interpretation of $SUBJECT come true: Do not
 consider volatile asms as anything we have to consider.
 At least I hope so.  Dig up this horse in 6 months?

Thanks, and to Bernd for the review.  I went ahead and applied it to trunk.

Richard


Re: [patch] fix libstdc++/59680

2014-03-11 Thread Jonathan Wakely

On 9 January 2014 23:39, Jonathan Wakely wrote:

PR libstdc++/59680
* src/c++11/thread.cc (__sleep_for): Fix call to ::sleep.

Tested x86_64-linux, and tested again with a hacked c++config.h to use
::sleep(), committed to trunk.


Also backported to the 4.8 branch now.

commit bdca0a1be8f2f59cba8ad3772c3b2f1d85aba0e8
Author: Jonathan Wakely jwak...@redhat.com
Date:   Tue Mar 11 19:45:53 2014 +

2014-03-11  Jonathan Wakely  jwak...@redhat.com

Backport from mainline.
2014-01-09  Jonathan Wakely  jwak...@redhat.com

PR libstdc++/59680
* src/c++11/thread.cc (__sleep_for): Fix call to ::sleep.

diff --git a/libstdc++-v3/src/c++11/thread.cc b/libstdc++-v3/src/c++11/thread.cc
index 8d040a7..0351f19 100644
--- a/libstdc++-v3/src/c++11/thread.cc
+++ b/libstdc++-v3/src/c++11/thread.cc
@@ -183,7 +183,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 ::usleep(__us);
   }
 # else
-::sleep(__s.count() + (__ns = 100));
+::sleep(__s.count() + (__ns.count() = 100));
 # endif
 #elif defined(_GLIBCXX_HAVE_WIN32_SLEEP)
 unsigned long ms = __ns.count() / 100;


[PATCH] Fix reassoc of vectors (PR tree-optimization/60502)

2014-03-11 Thread Jakub Jelinek
Hi!

build_low_bits_mask doesn't work for vector types (even TYPE_PRECISION
alone on it is meaningless), but what we actually want is a constant with
all bits set.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2014-03-11  Jakub Jelinek  ja...@redhat.com
Marc Glisse  marc.gli...@inria.fr

PR tree-optimization/60502
* tree-ssa-reassoc.c (eliminate_not_pairs): Use build_all_ones_cst
instead of build_low_bits_mask.

* gcc.c-torture/compile/pr60502.c: New test.

--- gcc/tree-ssa-reassoc.c.jj   2014-03-11 15:47:44.0 +0100
+++ gcc/tree-ssa-reassoc.c  2014-03-11 18:47:53.254946786 +0100
@@ -828,8 +828,7 @@ eliminate_not_pairs (enum tree_code opco
  if (opcode == BIT_AND_EXPR)
oe-op = build_zero_cst (TREE_TYPE (oe-op));
  else if (opcode == BIT_IOR_EXPR)
-   oe-op = build_low_bits_mask (TREE_TYPE (oe-op),
- TYPE_PRECISION (TREE_TYPE (oe-op)));
+   oe-op = build_all_ones_cst (TREE_TYPE (oe-op));
 
  reassociate_stats.ops_eliminated += ops-length () - 1;
  ops-truncate (0);
--- gcc/testsuite/gcc.c-torture/compile/pr60502.c.jj2014-03-11 
18:36:45.341757473 +0100
+++ gcc/testsuite/gcc.c-torture/compile/pr60502.c   2014-03-11 
18:35:58.0 +0100
@@ -0,0 +1,18 @@
+/* PR tree-optimization/60502 */
+
+typedef signed char v16i8 __attribute__ ((vector_size (16)));
+typedef unsigned char v16u8 __attribute__ ((vector_size (16)));
+
+void
+foo (v16i8 *x)
+{
+  v16i8 m1 = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 
};
+  *x |= *x ^ m1;
+}
+
+void
+bar (v16u8 *x)
+{
+  v16u8 m1 = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 
};
+  *x |= *x ^ m1;
+}

Jakub


[google/main] Fix arm build broken

2014-03-11 Thread 沈涵
Hi current google/main fails to build for arm because of duplicated
head file entries in gtyp-input.list.

Fixed by removing duplication in macro tm_file. This only affects arm
platform. Tested by successfully build for arm.

Patch below 

--- config.gcc.orig 2014-03-11 15:10:26.849602409 -0700
+++ config.gcc 2014-03-11 15:00:28.855375515 -0700
@@ -1024,9 +1024,8 @@ arm*-*-linux-*) # ARM GNU/Linux with E
  tm_file=$tm_file arm/bpabi.h arm/linux-eabi.h arm/aout.h arm/arm.h
  # Define multilib configuration for arm-linux-androideabi.
  case ${target} in
  arm*-*-linux-*eabi)
-tm_file=$tm_file arm/bpabi.h arm/linux-eabi.h
 tmake_file=$tmake_file arm/t-arm-elf arm/t-bpabi arm/t-linux-eabi
 # Define multilib configuration for arm-linux-androideabi.
 case ${target} in
 *-androideabi)

Ok for google/main?

Han Shen


Re: [google/main] Fix arm build broken

2014-03-11 Thread Dehao Chen
Looks good to me.

Dehao

On Tue, Mar 11, 2014 at 3:22 PM, Hán Shěn (沈涵) shen...@google.com wrote:
 Hi current google/main fails to build for arm because of duplicated
 head file entries in gtyp-input.list.

 Fixed by removing duplication in macro tm_file. This only affects arm
 platform. Tested by successfully build for arm.

 Patch below 

 --- config.gcc.orig 2014-03-11 15:10:26.849602409 -0700
 +++ config.gcc 2014-03-11 15:00:28.855375515 -0700
 @@ -1024,9 +1024,8 @@ arm*-*-linux-*) # ARM GNU/Linux with E
   tm_file=$tm_file arm/bpabi.h arm/linux-eabi.h arm/aout.h arm/arm.h
   # Define multilib configuration for arm-linux-androideabi.
   case ${target} in
   arm*-*-linux-*eabi)
 -tm_file=$tm_file arm/bpabi.h arm/linux-eabi.h
  tmake_file=$tmake_file arm/t-arm-elf arm/t-bpabi arm/t-linux-eabi
  # Define multilib configuration for arm-linux-androideabi.
  case ${target} in
  *-androideabi)

 Ok for google/main?

 Han Shen


[PATCH] Fix PR60505

2014-03-11 Thread Cong Hou
This patch is fixing PR60505 in which the vectorizer may produce
unnecessary epilogues.

Bootstrapped and tested on a x86_64 machine.

OK for trunk?


thanks,
Cong


diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index e1d8666..f98e628 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,9 @@
+2014-03-11  Cong Hou  co...@google.com
+
+ PR tree-optimization/60505
+ * tree-vect-loop.c (vect_analyze_loop_2): Check the maximum number
+ of iterations of the loop and see if we should build the epilogue.
+
 2014-03-10  Jakub Jelinek  ja...@redhat.com

  PR ipa/60457
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 41b6875..09ec1c0 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2014-03-11  Cong Hou  co...@google.com
+
+ PR tree-optimization/60505
+ * gcc.dg/vect/pr60505.c: New test.
+
 2014-03-10  Jakub Jelinek  ja...@redhat.com

  PR ipa/60457
diff --git a/gcc/testsuite/gcc.dg/vect/pr60505.c
b/gcc/testsuite/gcc.dg/vect/pr60505.c
new file mode 100644
index 000..6940513
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr60505.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-additional-options -Wall -Werror } */
+
+void foo(char *in, char *out, int num)
+{
+  int i;
+  char ovec[16] = {0};
+
+  for(i = 0; i  num ; ++i)
+out[i] = (ovec[i] = in[i]);
+  out[num] = ovec[num/2];
+}
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index df6ab6f..2156d5f 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -1625,6 +1625,7 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo)
   bool ok, slp = false;
   int max_vf = MAX_VECTORIZATION_FACTOR;
   int min_vf = 2;
+  int th;

   /* Find all data references in the loop (which correspond to vdefs/vuses)
  and analyze their evolution in the loop.  Also adjust the minimal
@@ -1769,6 +1770,12 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo)

   /* Decide whether we need to create an epilogue loop to handle
  remaining scalar iterations.  */
+  th = MAX (PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND), 1)
+   * LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1;
+  th = MAX (th, LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo)) + 1;
+  th = (th / LOOP_VINFO_VECT_FACTOR (loop_vinfo))
+   * LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+
   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)  0)
 {
@@ -1779,7 +1786,9 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo)
 }
   else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
|| (tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
-(unsigned)exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo
+(unsigned)exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
+(unsigned HOST_WIDE_INT)max_stmt_executions_int
+(LOOP_VINFO_LOOP (loop_vinfo))  (unsigned)th))
 LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;

   /* If an epilogue loop is required make sure we can create one.  */


[GOOGLE] Writes annotation info in elf section.

2014-03-11 Thread Dehao Chen
During AutoFDO annotation, we want to record the annotation stats into
an elf section, so that we can calculate how much percentage of the
profile is annotated, which can be used as an indicator whether code
has changed significantly comparing with the profiled source.

Bootstrapped and performance test on-going.

OK for google-4_8?

Thanks,
Dehao
Index: gcc/auto-profile.c
===
--- gcc/auto-profile.c  (revision 208283)
+++ gcc/auto-profile.c  (working copy)
@@ -49,6 +49,8 @@ along with GCC; see the file COPYING3.  If not see
 #include l-ipo.h
 #include ipa-utils.h
 #include ipa-inline.h
+#include output.h
+#include dwarf2asm.h
 #include auto-profile.h
 
 /* The following routines implements AutoFDO optimization.
@@ -100,9 +102,6 @@ typedef std::vectorconst char * string_vector;
 /* Map from function name's index in function_name_map to target's
execution count.  */
 typedef std::mapunsigned, gcov_type icall_target_map;
-/* Represent profile count of an inline stack,  profile count is represented as
-   (execution_count, value_profile_histogram).  */
-typedef std::pairgcov_type, icall_target_map count_info;
 
 /* Set of inline_stack. Used to track if the profile is already used to
annotate the program.  */
@@ -112,6 +111,13 @@ typedef std::setinline_stack location_set;
to direct call.  */
 typedef std::setgimple stmt_set;
 
+struct count_info
+{
+  gcov_type count;
+  icall_target_map targets;
+  bool annotated;
+};
+
 struct string_compare
 {
   bool operator() (const char *a, const char *b) const
@@ -154,7 +160,7 @@ class function_instance {
   /* Read the profile and create a function_instance with head count as
  HEAD_COUNT. Recursively read callsites to create nested function_instances
  too. STACK is used to track the recursive creation process.  */
-  static const function_instance *read_function_instance (
+  static function_instance *read_function_instance (
   function_instance_stack *stack, gcov_type head_count);
 
   /* Recursively deallocate all callsites (nested function_instances).  */
@@ -167,8 +173,8 @@ class function_instance {
 
   /* Recursively traverse STACK starting from LEVEL to find the corresponding
  function_instance.  */
-  const function_instance *get_function_instance (const inline_stack stack,
- unsigned level) const;
+  function_instance *get_function_instance (const inline_stack stack,
+   unsigned level);
 
   /* Store the profile info for LOC in INFO. Return TRUE if profile info
  is found.  */
@@ -178,18 +184,23 @@ class function_instance {
   MAP, return the total count for all inlined indirect calls.  */
   gcov_type find_icall_target_map (gimple stmt, icall_target_map *map) const;
 
+  /* Total number of counts that is used during annotation.  */
+  gcov_type total_annotated_count () const;
+
+  /* Mark LOC as annotated.  */
+  void mark_annotated (location_t loc);
+
 private:
   function_instance (unsigned name, gcov_type head_count)
   : name_(name), total_count_(0), head_count_(head_count) {}
 
   /* Traverse callsites of the current function_instance to find one at the
  location of LINENO and callee name represented in DECL.  */
-  const function_instance *get_function_instance_by_decl (unsigned lineno,
- tree decl) const;
+  function_instance *get_function_instance_by_decl (unsigned lineno, tree 
decl);
 
   /* Map from callsite decl_lineno (lineno in higher 16 bits, discriminator
  in lower 16 bits) to callee function_instance.  */
-  typedef std::mapunsigned, const function_instance * callsite_map;
+  typedef std::mapunsigned, function_instance * callsite_map;
   /* Map from source location (decl_lineno) to profile (count_info).  */
   typedef std::mapunsigned, count_info position_count_map;
 
@@ -218,30 +229,36 @@ class autofdo_source_profile {
 }
   ~autofdo_source_profile ();
   /* For a given DECL, returns the top-level function_instance.  */
-  const function_instance *get_function_instance_by_decl (tree decl) const;
+  function_instance *get_function_instance_by_decl (tree decl);
   /* Find profile info for a given gimple STMT. If found, and if the location
  of STMT does not exist in ANNOTATED, store the profile info in INFO, and
  return true; otherwise return false.  */
-  bool get_count_info (gimple stmt, count_info *info,
-  const location_set *annotated) const;
+  bool get_count_info (gimple stmt, count_info *info) const;
   /* Find total count of the callee of EDGE.  */
   gcov_type get_callsite_total_count (struct cgraph_edge *edge) const;
 
   /* Update value profile INFO for STMT from the inlined indirect callsite.
-  Return true if INFO is updated.  */
+ Return true if INFO is updated.  */
   bool update_inlined_ind_target (gimple stmt, count_info *info);
 
+  

Re: [PATCH][AARCH64]PR60034

2014-03-11 Thread Kugan
Ping ?


 
 
 gcc/
 
 2014-03-03  Kugan Vivekanandarajah  kug...@linaro.org
 
   PR target/60034
   * aarch64/aarch64.c (aarch64_classify_address): Fix alignment for
   section anchor.
 
 
 
 gcc/testsuite/
 
 2014-03-03  Kugan Vivekanandarajah  kug...@linaro.org
 
   PR target/60034
   * gcc.target/aarch64/pr60034.c: New file.
 


Re: [patch,libfortran] [4.7/4.8/4.9 Regression] PR38199 missed optimization: I/O performance

2014-03-11 Thread Jerry DeLisle
On 03/09/2014 05:39 PM, Jerry DeLisle wrote:
 Hi all,
 
 This final patch does two things.
 
 First:  In read.c it implements a simple space skipping scheme in read_decimal
 where I found a lot of repeated next_char calls happening. This gives a pretty
 good boost in performance and is applicable in general for reading integers.
 
 Second: I have taken Thomas idea of using LEN_TRIM in unit.c revised it to 
 work
 on formatted READ.  I tried to document the code with comments.  There are
 certain conditions for which one can not shorten the string length for 
 internal
 units. For arrays of characters you can not do this for strings more than 
 rank 1
 and stride 1. Also, you can not do this any time a BLANK='zero' is being 
 used. I
 also skip the optimization if there is any BLANK= specified in the READ. Thats
 conservative.  I could also test for BLANK='NULL' in the DTP structure. I will
 probably do that later.
 
 I have added a helper function which tests for the BZ within a format string
 when a format string is present.  I also check to see if the UNIT has had the
 BLANK status set.  The optimization is skipped for these conditions.
 
Updated patch to resolve issue found during NIST tests.

Regression tested and NIST tested.

OK for trunk?

Regards,

Jerry

Index: read.c
===
--- read.c	(revision 208303)
+++ read.c	(working copy)
@@ -677,7 +677,13 @@ read_decimal (st_parameter_dt *dtp, const fnode *f
 	
   if (c == ' ')
 {
-	  if (dtp-u.p.blank_status == BLANK_NULL) continue;
+	  if (dtp-u.p.blank_status == BLANK_NULL)
+	{
+	  /* Skip spaces.  */
+	  for ( ; w  0; p++, w--)
+		if (*p != ' ') break; 
+	  continue;
+	}
 	  if (dtp-u.p.blank_status == BLANK_ZERO) c = '0';
 }
 
Index: unit.c
===
--- unit.c	(revision 208303)
+++ unit.c	(working copy)
@@ -375,6 +375,38 @@ find_or_create_unit (int n)
 }
 
 
+/* Helper function to test conditions in format string. This
+   is used for optimization. You can't trim out blanks or shorten the
+   string if blank length is significant.  */
+static bool
+is_trim_ok (st_parameter_dt *dtp)
+{
+  /* Check rank and stride.  */
+  if (dtp-internal_unit_desc
+   (GFC_DESCRIPTOR_RANK (dtp-internal_unit_desc)  1
+	  || GFC_DESCRIPTOR_STRIDE(dtp-internal_unit_desc, 0) != 1))
+return false;
+  /* Format strings can not have 'BZ' or '/'.  */
+  if (dtp-common.flags  IOPARM_DT_HAS_FORMAT)
+{
+  char *p = dtp-format;
+  off_t i;
+  if (dtp-common.flags  IOPARM_DT_HAS_BLANK)
+	return false;
+  for (i = 0; i  dtp-format_len; i++)
+	{
+	  if (p[i] == '/') return false;
+	  if (p[i] == 'b' || p[i] == 'B')
+	if (p[i+1] == 'z' || p[i+1] == 'Z')
+	  return false;
+	}
+}
+  if (dtp-u.p.ionml) /* A namelist.  */
+return false;
+  return true;
+}
+
+
 gfc_unit *
 get_internal_unit (st_parameter_dt *dtp)
 {
@@ -402,6 +434,30 @@ get_internal_unit (st_parameter_dt *dtp)
  some other file I/O unit.  */
   iunit-unit_number = -1;
 
+  /* As an optimization, adjust the unit record length to not
+ include trailing blanks. This will not work under certain conditions
+ where trailing blanks have significance.  */
+  if (dtp-u.p.mode == READING  is_trim_ok (dtp))
+{
+  int len = 0;
+  if (dtp-common.unit == 0)
+	{
+	  len = string_len_trim (dtp-internal_unit_len,
+ dtp-internal_unit);
+	  if (len  0)
+	dtp-internal_unit_len = len; 
+	  iunit-recl = dtp-internal_unit_len;
+	}
+  else
+	{
+	  len = string_len_trim_char4 (dtp-internal_unit_len,
+			  (const gfc_char4_t*) dtp-internal_unit);
+	  if (len  0)
+	dtp-internal_unit_len = len;
+	  iunit-recl = dtp-internal_unit_len;
+	}
+}
+
   /* Set up the looping specification from the array descriptor, if any.  */
 
   if (is_array_io (dtp))
@@ -414,27 +470,6 @@ get_internal_unit (st_parameter_dt *dtp)
 
   start_record *= iunit-recl;
 }
-  else
-{
-  /* If we are not processing an array, adjust the unit record length not
-	 to include trailing blanks for list-formatted reads.  */
-  if (dtp-u.p.mode == READING  !(dtp-common.flags  IOPARM_DT_HAS_FORMAT))
-	{
-	  if (dtp-common.unit == 0)
-	{
-	  dtp-internal_unit_len =
-		string_len_trim (dtp-internal_unit_len, dtp-internal_unit);
-	  iunit-recl = dtp-internal_unit_len;
-	}
-	  else
-	{
-	  dtp-internal_unit_len =
-		string_len_trim_char4 (dtp-internal_unit_len,
-   (const gfc_char4_t*) dtp-internal_unit);
-	  iunit-recl = dtp-internal_unit_len;
-	}
-	}
-}
 
   /* Set initial values for unit parameters.  */
   if (dtp-common.unit)


Re: [PATCH] Fix incorrect byte swap detection (PR tree-optimization/60454)

2014-03-11 Thread Joey Ye
4.8 also has this bug. OK to backport?

On Tue, Mar 11, 2014 at 6:59 PM, Jakub Jelinek ja...@redhat.com wrote:
 On Tue, Mar 11, 2014 at 06:48:37PM +0800, Thomas Preud'homme wrote:
 I also added a typedef unsigned uint32_t for when sizeof(unsigned) == 4. I 
 hope it's right.

 In theory you could have __CHAR_BIT__ different from 8 and what you care
 about is that uint32_t has exactly 32 bits, so the check would need to be
   if (sizeof (uint32_t) * __CHAR_BIT__ != 32)
 return 0;

 +  if (fake_swap32 (0x12345678) != 0x78567E12)
 +__builtin_abort ();

 Also, for int16 targets where __UINT32_TYPE__ is supposedly unsigned long,
 I think you would need to use:

   if (fake_swap32 (0x12345678UL) != 0x78567E12UL)
 __builtin_abort ();

 (the C standard guarantees that unsigned long is at least 32-bit and
 unsigned int at least 16-bit).

 Ok with those changes.

 Do you have write access, or will somebody from your coworkers commit it for
 you?  Are you covered by ARM GCC Copyright assignment?

 Jakub


RE: [PATCH] Fix incorrect byte swap detection (PR tree-optimization/60454)

2014-03-11 Thread Thomas Preud'homme
 From: Jakub Jelinek [mailto:ja...@redhat.com]
 
 In theory you could have __CHAR_BIT__ different from 8 and what you care
 about is that uint32_t has exactly 32 bits, so the check would need to be
   if (sizeof (uint32_t) * __CHAR_BIT__ != 32)
 return 0;

I could go with:

In = (0x12  (__CHAR_BIT__ * 3))
| (0x34  (__CHAR_BIT__ * 2))
| (0x56  __CHAR_BIT__)
| 0x78;

and compare with a similarly constructed out so that I could run the test 
whenever sizeof (uint32_t) * __CHAR_BIT__ = 32, isn't it?

 
 Also, for int16 targets where __UINT32_TYPE__ is supposedly unsigned long,
 I think you would need to use:
 
   if (fake_swap32 (0x12345678UL) != 0x78567E12UL)
 __builtin_abort ();
 
 (the C standard guarantees that unsigned long is at least 32-bit and
 unsigned int at least 16-bit).

Right. Note to myself: §5.2.4.2.1 in C99. I guess so far I've only considered 
only some kind of architecture heterogeneity. Thanks for catching that.

 
 Ok with those changes.
 
 Do you have write access, or will somebody from your coworkers commit it
 for
 you?  Are you covered by ARM GCC Copyright assignment?

Yes and yes.

 
   Jakub

Thanks for the review. See attachment and below to check the version you 
approved.

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 748805e..b6d7d93 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,8 @@
+2014-03-07  Thomas Preud'homme  thomas.preudho...@arm.com
+
+   PR tree-optimization/60454
+   * tree-ssa-math-opts.c (find_bswap_1): Fix bswap detection.
+
 2014-02-23  David Holsgrove david.holsgr...@xilinx.com
 
* config/microblaze/microblaze.md: Correct ashrsi_reg / lshrsi_reg names
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index f3c0c85..04ce403 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2014-03-10  Thomas Preud'homme  thomas.preudho...@arm.com
+
+   PR tree-optimization/60454
+   * gcc.c-torture/execute/pr60454.c: New test.
+
 2014-02-23  David Holsgrove david.holsgr...@xilinx.com
 
* gcc/testsuite/gcc.target/microblaze/others/mem_reload.c: New test.
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr60454.c 
b/gcc/testsuite/gcc.c-torture/execute/pr60454.c
new file mode 100644
index 000..ceec45e
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr60454.c
@@ -0,0 +1,31 @@
+#ifdef __UINT32_TYPE__
+typedef __UINT32_TYPE__ uint32_t;
+#else
+typedef unsigned uint32_t;
+#endif
+
+#define __fake_const_swab32(x) ((uint32_t)(  \
+(((uint32_t)(x)  (uint32_t)0x00ffUL)  24) |\
+(((uint32_t)(x)  (uint32_t)0xff00UL)   8) |\
+(((uint32_t)(x)  (uint32_t)0x00ffUL)   8) |\
+(((uint32_t)(x)  (uint32_t)0xff00UL)  ) |\
+(((uint32_t)(x)  (uint32_t)0xff00UL)  24)))
+
+/* Previous version of bswap optimization would detect byte swap when none
+   happen. This test aims at catching such wrong detection to avoid
+   regressions.  */
+
+__attribute__ ((noinline, noclone)) uint32_t
+fake_swap32 (uint32_t in)
+{
+  return __fake_const_swab32 (in);
+}
+
+int main(void)
+{
+  if (sizeof (uint32_t) * __CHAR_BIT__ != 32)
+return 0;
+  if (fake_swap32 (0x12345678UL) != 0x78567E12UL)
+__builtin_abort ();
+  return 0;
+}
diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c
index 8e372ed..9ff857c 100644
--- a/gcc/tree-ssa-math-opts.c
+++ b/gcc/tree-ssa-math-opts.c
@@ -1801,7 +1801,9 @@ find_bswap_1 (gimple stmt, struct symbolic_number *n, int 
limit)
 
   if (rhs_class == GIMPLE_BINARY_RHS)
 {
+  int i;
   struct symbolic_number n1, n2;
+  unsigned HOST_WIDEST_INT mask;
   tree source_expr2;
 
   if (code != BIT_IOR_EXPR)
@@ -1827,6 +1829,15 @@ find_bswap_1 (gimple stmt, struct symbolic_number *n, 
int limit)
return NULL_TREE;
 
  n-size = n1.size;
+ for (i = 0, mask = 0xff; i  n-size; i++, mask = BITS_PER_UNIT)
+   {
+ unsigned HOST_WIDEST_INT masked1, masked2;
+
+ masked1 = n1.n  mask;
+ masked2 = n2.n  mask;
+ if (masked1  masked2  masked1 != masked2)
+   return NULL_TREE;
+   }
  n-n = n1.n | n2.n;
 
  if (!verify_symbolic_number_p (n, stmt))

Best regards,

Thomas

incorrect_detection_v1.5.diff
Description: Binary data


libgo patch committed: Compile math library with -ffp-contract=off

2014-03-11 Thread Ian Lance Taylor
The bug report http://golang.org/issue/7074 shows that math.Log2(1)
produces the wrong result on Aarch64, because the Go math package is
compiled to use a fused multiply-add instruction.  This patch to the
libgo configure script will use -ffp-contract=off when compiling the
math package on processors other than x86.  Bootstrapped and ran Go
testsuite on x86_64-unknown-linux-gnu, not that that tests much.
Committed to mainline.

Ian

diff -r 76dbb6f77e3d libgo/configure.ac
--- a/libgo/configure.ac	Tue Mar 11 12:53:06 2014 -0700
+++ b/libgo/configure.ac	Tue Mar 11 21:26:35 2014 -0700
@@ -620,6 +620,8 @@
 MATH_FLAG=
 if test $libgo_cv_c_fancymath = yes; then
   MATH_FLAG=-mfancy-math-387 -funsafe-math-optimizations
+else
+  MATH_FLAG=-ffp-contract=off
 fi
 AC_SUBST(MATH_FLAG)