[PATCH] Get rid of useless -fno-rtti for libubsan

2013-11-27 Thread Yury Gribov

Hi all,

As discussed in http://gcc.gnu.org/bugzilla/show_bug.cgi?id=59106 only a 
subset of libubsan should be built with RTTI support. Attached patch 
adds custom build rules for relevant files.


-Y
diff --git a/libsanitizer/ubsan/Makefile.am b/libsanitizer/ubsan/Makefile.am
index e98984a..eaab156 100644
--- a/libsanitizer/ubsan/Makefile.am
+++ b/libsanitizer/ubsan/Makefile.am
@@ -4,7 +4,7 @@ AM_CPPFLAGS = -I $(top_srcdir) -I $(top_srcdir)/include
 gcc_version := $(shell cat $(top_srcdir)/../gcc/BASE-VER)
 
 DEFS = -D_GNU_SOURCE -D_DEBUG -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS 
-AM_CXXFLAGS = -Wall -W -Wno-unused-parameter -Wwrite-strings -pedantic -Wno-long-long  -fPIC -fno-builtin -fno-exceptions -fomit-frame-pointer -funwind-tables -fvisibility=hidden -Wno-variadic-macros
+AM_CXXFLAGS = -Wall -W -Wno-unused-parameter -Wwrite-strings -pedantic -Wno-long-long  -fPIC -fno-builtin -fno-exceptions -fno-rtti -fomit-frame-pointer -funwind-tables -fvisibility=hidden -Wno-variadic-macros
 AM_CXXFLAGS += $(LIBSTDCXX_RAW_CXX_CXXFLAGS)
 ACLOCAL_AMFLAGS = -I m4
 
@@ -13,11 +13,13 @@ toolexeclib_LTLIBRARIES = libubsan.la
 ubsan_files = \
 	ubsan_diag.cc \
 	ubsan_handlers.cc \
-	ubsan_handlers_cxx.cc \
-	ubsan_type_hash.cc \
 	ubsan_value.cc
 
-libubsan_la_SOURCES = $(ubsan_files) 
+ubsan_cxx_files = \
+	ubsan_handlers_cxx.cc \
+	ubsan_type_hash.cc
+
+libubsan_la_SOURCES = $(ubsan_files) $(ubsan_cxx_files)
 libubsan_la_LIBADD = $(top_builddir)/sanitizer_common/libsanitizer_common.la 
 if !USING_MAC_INTERPOSE
 libubsan_la_LIBADD += $(top_builddir)/interception/libinterception.la
@@ -25,6 +27,17 @@ endif
 libubsan_la_LIBADD += $(LIBSTDCXX_RAW_CXX_LDFLAGS)
 libubsan_la_LDFLAGS = -version-info `grep -v '^\#' $(srcdir)/libtool-version` -lpthread -ldl
 
+# Use special rules for files/objects that require RTTI support.
+ubsan_handlers_cxx.lo: ubsan_handlers_cxx.cc
+	$(LTCXXCOMPILE) -frtti -c $
+ubsan_handlers_cxx.o: ubsan_handlers_cxx.cc
+	$(CXXCOMPILE) -frtti -c $
+
+ubsan_type_hash.lo: ubsan_type_hash.cc
+	$(LTCXXCOMPILE) -frtti -c $
+ubsan_type_hash.o: ubsan_type_hash.cc
+	$(CXXCOMPILE) -frtti -c $
+
 # Work around what appears to be a GNU make bug handling MAKEFLAGS
 # values defined in terms of make variables, as is the case for CC and
 # friends when we are called from the top level Makefile.
diff --git a/libsanitizer/ubsan/Makefile.in b/libsanitizer/ubsan/Makefile.in
index 6812538..198e31d 100644
--- a/libsanitizer/ubsan/Makefile.in
+++ b/libsanitizer/ubsan/Makefile.in
@@ -81,9 +81,9 @@ am__DEPENDENCIES_1 =
 libubsan_la_DEPENDENCIES =  \
 	$(top_builddir)/sanitizer_common/libsanitizer_common.la \
 	$(am__append_1) $(am__DEPENDENCIES_1)
-am__objects_1 = ubsan_diag.lo ubsan_handlers.lo ubsan_handlers_cxx.lo \
-	ubsan_type_hash.lo ubsan_value.lo
-am_libubsan_la_OBJECTS = $(am__objects_1)
+am__objects_1 = ubsan_diag.lo ubsan_handlers.lo ubsan_value.lo
+am__objects_2 = ubsan_handlers_cxx.lo ubsan_type_hash.lo
+am_libubsan_la_OBJECTS = $(am__objects_1) $(am__objects_2)
 libubsan_la_OBJECTS = $(am_libubsan_la_OBJECTS)
 libubsan_la_LINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) \
 	$(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \
@@ -240,7 +240,7 @@ AM_CPPFLAGS = -I $(top_srcdir) -I $(top_srcdir)/include
 # May be used by toolexeclibdir.
 gcc_version := $(shell cat $(top_srcdir)/../gcc/BASE-VER)
 AM_CXXFLAGS = -Wall -W -Wno-unused-parameter -Wwrite-strings -pedantic \
-	-Wno-long-long -fPIC -fno-builtin -fno-exceptions \
+	-Wno-long-long -fPIC -fno-builtin -fno-exceptions -fno-rtti \
 	-fomit-frame-pointer -funwind-tables -fvisibility=hidden \
 	-Wno-variadic-macros $(LIBSTDCXX_RAW_CXX_CXXFLAGS)
 ACLOCAL_AMFLAGS = -I m4
@@ -248,11 +248,13 @@ toolexeclib_LTLIBRARIES = libubsan.la
 ubsan_files = \
 	ubsan_diag.cc \
 	ubsan_handlers.cc \
-	ubsan_handlers_cxx.cc \
-	ubsan_type_hash.cc \
 	ubsan_value.cc
 
-libubsan_la_SOURCES = $(ubsan_files) 
+ubsan_cxx_files = \
+	ubsan_handlers_cxx.cc \
+	ubsan_type_hash.cc
+
+libubsan_la_SOURCES = $(ubsan_files) $(ubsan_cxx_files)
 libubsan_la_LIBADD =  \
 	$(top_builddir)/sanitizer_common/libsanitizer_common.la \
 	$(am__append_1) $(LIBSTDCXX_RAW_CXX_LDFLAGS)
@@ -575,6 +577,17 @@ uninstall-am: uninstall-toolexeclibLTLIBRARIES
 	tags uninstall uninstall-am uninstall-toolexeclibLTLIBRARIES
 
 
+# Use special rules for files/objects that require RTTI support.
+ubsan_handlers_cxx.lo: ubsan_handlers_cxx.cc
+	$(LTCXXCOMPILE) -frtti -c $
+ubsan_handlers_cxx.o: ubsan_handlers_cxx.cc
+	$(CXXCOMPILE) -frtti -c $
+
+ubsan_type_hash.lo: ubsan_type_hash.cc
+	$(LTCXXCOMPILE) -frtti -c $
+ubsan_type_hash.o: ubsan_type_hash.cc
+	$(CXXCOMPILE) -frtti -c $
+
 # Tell versions [3.59,3.63) of GNU make to not export all variables.
 # Otherwise a system limit (for SysV at least) may be exceeded.
 .NOEXPORT:


[Patch Ping] Add slim-lto support to gcc's build machinery

2013-11-27 Thread Markus Trippelsdorf
On 2013.11.20 at 15:43 +0100, Markus Trippelsdorf wrote:
 On 2013.11.20 at 14:41 +0100, Paolo Bonzini wrote:
  Note that you need to regenerate all users of libtool.m4.  Please post a
  patch _with_ the regeneration so that whoever applies it won't screw up.

Ping.
Can you please have look, Paolo? And if it is not too much work, it
would be nice if you could also apply the patch.

Thanks.

-- 
Markus


Re: [PATCH] Add signed integer overflow checking to ubsan

2013-11-27 Thread Marek Polacek
On Fri, Nov 22, 2013 at 10:54:16AM +0100, Marek Polacek wrote:
 Hi!
 
 Working virtually out of Pago Pago.
 
 The following is the implementation of the signed integer overflow
 checking for the UndefinedBehaviorSanitizer.  I wrote some of the
 generic bits; Jakub did the i?86 handlind/optabs as well as VRP/fold
 bits.

I'd like to ping this patch.  Here's a rebased version that contains
a fix for miscompilation with -Os -m32.

2013-11-27  Jakub Jelinek  ja...@redhat.com
Marek Polacek  pola...@redhat.com

* opts.c (common_handle_option): Handle
-fsanitize=signed-integer-overflow.
* config/i386/i386.md (addvmode4, subvmode4, mulvmode4,
negvmode3, negvmode3_1): Define expands.
(*addvmode4, *subvmode4, *mulvmode4, *negvmode3): Define
insns.
* sanitizer.def (BUILT_IN_UBSAN_HANDLE_ADD_OVERFLOW,
BUILT_IN_UBSAN_HANDLE_SUB_OVERFLOW,
BUILT_IN_UBSAN_HANDLE_MUL_OVERFLOW,
BUILT_IN_UBSAN_HANDLE_NEGATE_OVERFLOW): Define.
* ubsan.h (PROB_VERY_UNLIKELY, PROB_EVEN, PROB_VERY_LIKELY,
PROB_ALWAYS): Define.
(ubsan_build_overflow_builtin): Declare.
* gimple-fold.c (gimple_fold_stmt_to_constant_1): Add folding of
internal functions.
* ubsan.c (PROB_VERY_UNLIKELY): Don't define here.
(ubsan_build_overflow_builtin): New function.
(instrument_si_overflow): Likewise.
(ubsan_pass): Add signed integer overflow checking.
(gate_ubsan): Enable the pass also when SANITIZE_SI_OVERFLOW.
* flag-types.h (enum sanitize_code): Add SANITIZE_SI_OVERFLOW.
* internal-fn.c: Include ubsan.h and target.h.
(ubsan_expand_si_overflow_addsub_check): New function.
(ubsan_expand_si_overflow_neg_check): Likewise.
(ubsan_expand_si_overflow_mul_check): Likewise.
(expand_UBSAN_CHECK_ADD): Likewise.
(expand_UBSAN_CHECK_SUB): Likewise.
(expand_UBSAN_CHECK_MUL): Likewise.
* fold-const.c (fold_binary_loc): Don't fold A + (-B) - A - B and
(-A) + B - B - A when doing the signed integer overflow checking.
* internal-fn.def (UBSAN_CHECK_ADD, UBSAN_CHECK_SUB, UBSAN_CHECK_MUL):
Define.
* tree-vrp.c (extract_range_basic): Handle internal calls.
* optabs.def (addv4_optab, subv4_optab, mulv4_optab, negv4_optab): New
optabs.
c-family/
* c-gimplify.c (c_gimplify_expr): If doing the integer-overflow
sanitization, call unsigned_type_for only when !TYPE_OVERFLOW_WRAPS.
testsuite/
* c-c++-common/ubsan/overflow-mul-2.c: New test.
* c-c++-common/ubsan/overflow-add-1.c: New test.
* c-c++-common/ubsan/overflow-add-2.c: New test.
* c-c++-common/ubsan/overflow-mul-1.c: New test.
* c-c++-common/ubsan/overflow-sub-1.c: New test.
* c-c++-common/ubsan/overflow-sub-2.c: New test.
* c-c++-common/ubsan/overflow-negate-1.c: New test.

--- gcc/opts.c.mp   2013-11-27 08:46:28.032629413 +0100
+++ gcc/opts.c  2013-11-27 08:46:57.566753291 +0100
@@ -1460,6 +1460,8 @@ common_handle_option (struct gcc_options
  { vla-bound, SANITIZE_VLA, sizeof vla-bound - 1 },
  { return, SANITIZE_RETURN, sizeof return - 1 },
  { null, SANITIZE_NULL, sizeof null - 1 },
+ { signed-integer-overflow, SANITIZE_SI_OVERFLOW,
+   sizeof signed-integer-overflow -1 },
  { NULL, 0, 0 }
};
const char *comma;
--- gcc/config/i386/i386.md.mp  2013-11-27 08:46:27.890628818 +0100
+++ gcc/config/i386/i386.md 2013-11-27 08:46:57.491752976 +0100
@@ -6198,6 +6198,42 @@
   [(set_attr type alu)
(set_attr mode QI)])
 
+(define_mode_attr widerintmode [(QI HI) (HI SI) (SI DI) (DI TI)])
+
+;; Add with jump on overflow.
+(define_expand addvmode4
+  [(parallel [(set (reg:CCO FLAGS_REG)
+  (eq:CCO (plus:widerintmode
+ (sign_extend:widerintmode
+(match_operand:SWI 1 register_operand))
+ (sign_extend:widerintmode
+(match_operand:SWI 2 general_operand)))
+  (sign_extend:widerintmode
+ (plus:SWI (match_dup 1) (match_dup 2)
+ (set (match_operand:SWI 0 register_operand)
+  (plus:SWI (match_dup 1) (match_dup 2)))])
+   (set (pc) (if_then_else
+  (eq (reg:CCO FLAGS_REG) (const_int 0))
+  (label_ref (match_operand 3))
+  (pc)))]
+  )
+
+(define_insn *addvmode4
+  [(set (reg:CCO FLAGS_REG)
+   (eq:CCO (plus:widerintmode
+  (sign_extend:widerintmode
+ (match_operand:SWI 1 nonimmediate_operand %0,0))
+  (sign_extend:widerintmode
+ (match_operand:SWI 2 general_operand g,ri)))
+   (sign_extend:widerintmode
+  (plus:SWI (match_dup 1) 

Re: Patch ping (stage1-ish patches)

2013-11-27 Thread Alexander Ivchenko
Here is the patch series that had been posted in Sep that is aimed to
isolate the Android support from targets that actually don't have that
support (We discussed the need of it with Jakub here
http://gcc.gnu.org/ml/gcc-patches/2013-09/msg00185.html):

http://gcc.gnu.org/ml/gcc-patches/2013-09/msg01420.html


thanks,
Alexander

2013/11/27 Jakub Jelinek ja...@redhat.com:
 On Fri, Nov 22, 2013 at 09:36:18AM -0700, Jeff Law wrote:
 In fact, I would suggest that anyone with a pending patch from prior
 to stage1 close that hasn't gotten feedback by midnight Tuesday ping
 their patch.  I'd like to have a sense of everything that is
 outstanding sooner rather than later and wrap up any loose ends as
 quickly as possible.

 Ok, doing that now for my pending patches:

 Masked load/store vectorization:
 http://gcc.gnu.org/ml/gcc-patches/2013-11/msg01268.html
 http://gcc.gnu.org/ml/gcc-patches/2013-11/msg01437.html
 http://gcc.gnu.org/ml/gcc-patches/2013-11/msg01550.html

 Elemental function support (updated version of the earlier patch):
 http://gcc.gnu.org/ml/gcc-patches/2013-11/msg03197.html

 AddressSanitizer use-after-return instrumentation:
 http://gcc.gnu.org/ml/gcc-patches/2013-11/msg02070.html

 Use libbacktrace for libsanitizer's symbolization (will need tweaking,
 depending on next libsanitizer merge, whether the corresponding
 sanitizer_common changes are upstreamed or not, and perhaps to compile
 libbacktrace sources again with renamed function names and other tweaks
 - different allocator, only subset of files, etc.; but, there is a P1
 bug for this anyway):
 http://gcc.gnu.org/ml/gcc-patches/2013-11/msg02055.html

 Jakub


Re: [RFC] [PATCH V2, AARCH64]: Re: [RFC] [PATCH, AARCH64] Machine descriptions to support stack smashing protection

2013-11-27 Thread Venkataramanan Kumar
Hi Richard,

 I don't think it's good to have long lists of targets on generic tests.
  Can we factor this out into a target-supports option?

I have updated the patch as per your recommendation. Please let me
know if it is fine.

2013-11-26  Venkataramanan Kumar  venkataramanan.ku...@linaro.org
* configure.ac (gcc_cv_libc_provides_tls_ssp): Add test to
check TLS support in target C library for Aarch64.
* configure: Regenerate.
* config.in: Regenerate.
* config/aarch64/aarch64.md (stack_protect_set, stack_protect_test)
(stack_protect_set_mode, stack_protect_test_mode): Add
initial machine description for Stack Smashing Protector.
* config/aarch64/aarch64-linux.h (TARGET_THREAD_SSP_OFFSET): Define.

2013-11-26  Venkataramanan Kumar  venkataramanan.ku...@linaro.org
* lib/target-supports.exp
  (check_effective_target_stack_protection): New procedure.
* g++.dg/fstack-protector-strong.C: Add target check for
  stack protection.
* gcc.dg/fstack-protector-strong.c: Likewise.

regards,
Venkat.


On 26 November 2013 20:23, Richard Earnshaw rearn...@arm.com wrote:
 On 26/11/13 14:16, Venkataramanan Kumar wrote:
 Index: gcc/testsuite/gcc.dg/fstack-protector-strong.c
 ===
 --- gcc/testsuite/gcc.dg/fstack-protector-strong.c(revision 205378)
 +++ gcc/testsuite/gcc.dg/fstack-protector-strong.c(working copy)
 @@ -1,6 +1,6 @@
  /* Test that stack protection is done on chosen functions. */

 -/* { dg-do compile { target i?86-*-* x86_64-*-* rs6000-*-* s390x-*-* } } */
 +/* { dg-do compile { target i?86-*-* x86_64-*-* rs6000-*-* s390x-*-* 
 aarch64-*-* } } */
  /* { dg-options -O2 -fstack-protector-strong } */


 I don't think it's good to have long lists of targets on generic tests.
  Can we factor this out into a target-supports option?

 R.

Index: gcc/configure.ac
===
--- gcc/configure.ac(revision 205378)
+++ gcc/configure.ac(working copy)
@@ -4896,6 +4896,24 @@
[Define if your target C library provides stack protector support])
 fi
 
+# Test for tls based stack protector support in target C library.
+AC_CACHE_CHECK(TLS stack guard support in target C library,
+   gcc_cv_libc_provides_tls_ssp,
+   [gcc_cv_libc_provides_tls_ssp=no
+   case $target in
+  aarch64-*-linux* | aarch64-*-kfreebsd*-gnu | aarch64-*-knetbsd*-gnu)
+ # glibc 2.19 and later provides TLS access to stack guard canary
+ # currently set for Aarch64.
+ GCC_GLIBC_VERSION_GTE_IFELSE([2], [19], 
[gcc_cv_libc_provides_tls_ssp=yes])
+   ;;
+   *) gcc_cv_libc_provides_tls_ssp=no ;;
+   esac])
+
+if test x$gcc_cv_libc_provides_tls_ssp = xyes; then
+  AC_DEFINE(TARGET_LIBC_PROVIDES_TLS_SSP, 1,
+   [Define if your target C library provides TLS stack protector 
support.])
+fi
+
 # Test for sys/sdt.h on the target.
 GCC_TARGET_TEMPLATE([HAVE_SYS_SDT_H])
 AC_MSG_CHECKING(sys/sdt.h in the target C library)
Index: gcc/configure
===
--- gcc/configure   (revision 205378)
+++ gcc/configure   (working copy)
@@ -27127,6 +27127,35 @@
 
 fi
 
+# Test for tls based stack protector support in target C library.
+{ $as_echo $as_me:${as_lineno-$LINENO}: checking TLS stack guard support in 
target C library 5
+$as_echo_n checking TLS stack guard support in target C library...  6; }
+if test ${gcc_cv_libc_provides_tls_ssp+set} = set; then :
+  $as_echo_n (cached)  6
+else
+  gcc_cv_libc_provides_tls_ssp=no
+   case $target in
+  aarch64-*-linux* | aarch64-*-kfreebsd*-gnu | aarch64-*-knetbsd*-gnu)
+ # glibc 2.19 and later provides TLS access to stack guard canary
+ # currently set for Aarch64.
+
+if test $glibc_version_major -gt 2 \
+  || ( test $glibc_version_major -eq 2  test $glibc_version_minor -ge 19 ); 
then :
+  gcc_cv_libc_provides_tls_ssp=yes
+fi
+   ;;
+   *) gcc_cv_libc_provides_tls_ssp=no ;;
+   esac
+fi
+{ $as_echo $as_me:${as_lineno-$LINENO}: result: 
$gcc_cv_libc_provides_tls_ssp 5
+$as_echo $gcc_cv_libc_provides_tls_ssp 6; }
+
+if test x$gcc_cv_libc_provides_tls_ssp = xyes; then
+
+$as_echo #define TARGET_LIBC_PROVIDES_TLS_SSP 1 confdefs.h
+
+fi
+
 # Test for sys/sdt.h on the target.
 
 { $as_echo $as_me:${as_lineno-$LINENO}: checking sys/sdt.h in the target C 
library 5
Index: gcc/testsuite/lib/target-supports.exp
===
--- gcc/testsuite/lib/target-supports.exp   (revision 205378)
+++ gcc/testsuite/lib/target-supports.exp   (working copy)
@@ -808,6 +808,17 @@
 } -fstack-protector]
 }
 
+# Return 1 the target supports stack protection.
+proc check_effective_target_stack_protection { } {
+if { [istarget i?86-*-*]
+ || [istarget x86_64-*-*]
+ || [istarget aarch64-*-*]
+ || [istarget s390x-*-*] } {
+   

Re: [gomp4 simd, RFC] Simple fix to override vectorization cost estimation.

2013-11-27 Thread Tobias Burnus

Am 27.11.2013 08:22, schrieb Sergey Ostanevich:

Done.



Thanks for fixing Richard's and Jakub's comments and parts of mine.


+have the same meaning as described in @option{fvect-cost-model} and by
+default a cost model defined with @option{fvect-cost-model} is used.
As mentioned before, pleae add a hyphen before fvect (i.e. 
@option{fvect-cost-model}  - @option{-fvect-cost-model})


Regarding a test case: I still think it would be useful to have one, but I somehow seemed 
to have messed up with my previous one - I fail to get it not to vectorize with omp 
simd due to cost reasons. Thus, I don't have one.

Tobias

/* { dg-require-effective-target vect_int } */
/* { dg-additional-options -fopenmp-simd -Wopenmp-simd -mno-sse2 } */

/* NOTE: Without -mno-sse2, the loop is vectorized.  */

#include stdarg.h
#include ../../tree-vect.h

#define N 32

struct t{
  int k[N];
  int l; 
};
  
struct s{
  char a;	/* aligned */
  char b[N-1];  /* unaligned (offset 1B) */
  char c[N];/* aligned (offset NB) */
  struct t d;   /* aligned (offset 2NB) */
  struct t e;   /* unaligned (offset 2N+4N+4 B) */
};
 
struct s tmp = { 1 };

int main1 ()
{  
  int i;

  /* unaligned */
#pragma omp simd
  for (i = 0; i  N/2; i++)
{
  tmp.b[i] = 5; /* { dg-warning vectorization did not happen for a simd loop } */
}

  /* check results:  */
  for (i = 0; i N/2; i++)
{
  if (tmp.b[i] != 5)
abort ();
}

  return 0;
}

int main (void)
{ 
  check_vect ();
  
  return main1 ();
} 

/* { dg-final { scan-tree-dump-times loop vectorized 0 vect } } */
/* { dg-final { scan-tree-dump-times vectorization not profitable 1 vect } } */
/* ! dg-final ! cleanup-tree-dump vect ! ! */
/* { dg-require-effective-target vect_int } */
/* { dg-additional-options -fopenmp-simd -Wopenmp-simd -fsimd-cost-model=unlimited } */

#include stdarg.h
#include ../../tree-vect.h

#define N 32

struct t{
  int k[N];
  int l; 
};
  
struct s{
  char a;	/* aligned */
  char b[N-1];  /* unaligned (offset 1B) */
  char c[N];/* aligned (offset NB) */
  struct t d;   /* aligned (offset 2NB) */
  struct t e;   /* unaligned (offset 2N+4N+4 B) */
};
 
struct s tmp = { 1 };

int main1 ()
{  
  int i;

  /* unaligned */
#pragma omp simd
  for (i = 0; i  N/2; i++)
{
  tmp.b[i] = 5; /* dg-warning vectorization did not happen for a simd loop */
}

  /* check results:  */
  for (i = 0; i N/2; i++)
{
  if (tmp.b[i] != 5)
abort ();
}

  return 0;
}

int main (void)
{ 
  check_vect ();
  
  return main1 ();
} 

/* { dg-final { scan-tree-dump-times loop vectorized 1 vect } } */
/* { dg-final { scan-tree-dump-times vectorization not profitable 0 vect } } */
/* { dg-final { cleanup-tree-dump vect } } */


[PING] [PATCH, ARM, testcase] Skip target arm-neon for lp1243022.c

2013-11-27 Thread Zhenqiang Chen
Ping?

Thanks!
-Zhenqiang

 -Original Message-
 From: gcc-patches-ow...@gcc.gnu.org [mailto:gcc-patches-
 ow...@gcc.gnu.org] On Behalf Of Zhenqiang Chen
 Sent: Friday, November 08, 2013 10:51 AM
 To: gcc-patches@gcc.gnu.org
 Cc: Ramana Radhakrishnan
 Subject: [PATCH, ARM, testcase] Skip target arm-neon for lp1243022.c
 
 Hi,
 
 lp1243022.c will fail with options: -mfpu=neon -mfloat-abi=hard.
 
 Logs show it does not generate auto-incremental instruction in pass
 auto_inc_dec. In this case, the check of REG_INC note at subreg2 will be
 invalid. So skip the check for target arm-neon.
 
 All PASS with the following options:
 
 -mthumb/-mcpu=cortex-a9/-mfloat-abi=hard
 -mthumb/-mcpu=cortex-a9/-mfloat-abi=soft
 -mthumb/-mcpu=cortex-a9/-mfloat-abi=softfp
 -mthumb/-mcpu=cortex-a9/-mfloat-abi=soft/-mfpu=vfpv3
 -mthumb/-mcpu=cortex-a9/-mfloat-abi=softfp/-mfpu=vfpv3
 -mthumb/-mcpu=cortex-a9/-mfloat-abi=hard/-mfpu=vfpv3
 -mthumb/-mcpu=cortex-a9/-mfloat-abi=soft/-mfpu=neon
 -mthumb/-mcpu=cortex-a9/-mfloat-abi=softfp/-mfpu=neon
 -mthumb/-mcpu=cortex-a9/-mfloat-abi=hard/-mfpu=neon
 -mthumb/-mcpu=cortex-a15/-mfloat-abi=hard
 -mthumb/-mcpu=cortex-a15/-mfloat-abi=soft
 -mthumb/-mcpu=cortex-a15/-mfloat-abi=softfp
 -mthumb/-mcpu=cortex-a15/-mfloat-abi=soft/-mfpu=vfpv4
 -mthumb/-mcpu=cortex-a15/-mfloat-abi=softfp/-mfpu=vfpv4
 -mthumb/-mcpu=cortex-a15/-mfloat-abi=hard/-mfpu=vfpv4
 -mthumb/-mcpu=cortex-a15/-mfloat-abi=soft/-mfpu=neon
 -mthumb/-mcpu=cortex-a15/-mfloat-abi=softfp/-mfpu=neon
 -mthumb/-mcpu=cortex-a15/-mfloat-abi=hard/-mfpu=neon
 
 Is it OK?
 
 Thanks!
 -Zhenqiang
 
 testsuite/ChangeLog:
 2013-11-08  Zhenqiang Chen  zhenqiang.c...@linaro.org
 
 * gcc.target/arm/lp1243022.c: Skip target arm-neon.
 
 --- a/gcc/testsuite/gcc.target/arm/lp1243022.c
 +++ b/gcc/testsuite/gcc.target/arm/lp1243022.c
 @@ -1,7 +1,7 @@
  /* { dg-do compile { target arm_thumb2 } } */
  /* { dg-options -O2 -fdump-rtl-subreg2 } */
 
 -/* { dg-final { scan-rtl-dump REG_INC subreg2 } } */
 +/* { dg-final { scan-rtl-dump REG_INC subreg2 { target { !
 arm_neon} } } } */
  /* { dg-final { cleanup-rtl-dump subreg2 } } */  struct device;
typedef
 unsigned int __u32;






Re: [PATCH] Remove keep_aligning from get_inner_reference

2013-11-27 Thread Eric Botcazou
 I think you are right, this flag is no longer necessary, and removing
 this code path would simplify everything. Therefore I'd like to propose
 to remove the keep_aligning parameter of get_inner_reference as
 a split-out patch.
 
 Boot-strapped (with languages=all,ada,go) and
 regression-tested on x86_64-linux-gnu.

I don't understand how you can commit a patch that changes something only on 
strict-alignment platforms and test it only on x86-64.  This change *must* be 
tested with Ada on a strict-alignment platform, that's the only combination 
for which it is exercised.   If you cannot do that, then please back it out.

More generally speaking, it's not acceptable to make cleanup changes like that 
in the RTL expander without extreme care, which of course starts with proper 
testing.  The patch should not have been approved either for that reason.

-- 
Eric Botcazou


Re: [PATCH] OpenMP #pragma omp declare simd support (take 2)

2013-11-27 Thread Richard Biener
On Mon, 25 Nov 2013, Jakub Jelinek wrote:

 On Fri, Nov 22, 2013 at 11:08:41AM +0100, Richard Biener wrote:
  The expr.c hunk is also ok independently of the patch.
 
 This is committed now.
 
  Ah, here is the stuff moved from.  I suppose the IPA param re-org
  is ok for trunk separately as well.
 
 And this too (without the simdlen field of the adjustment, which turned out
 to be unnecessary).
 
  What's the reason you cannot defer SIMD cloning to LTRANS stage
  as simple IPA pass next to IPA-PTA?
 
 Ok, deferring till after IPA-PTA was easy, just small ipa-cp.c changes
 (look at the attribute rather than simd*clone* fields), passes.def and
 had to tweak ipa_add_new_function which assumed that all new functions
 must be definitions with gimple body.
 
   +   /* Ignore #pragma omp declare simd functions
   +  if they don't have data references in the
   +  call stmt itself.  */
   +   if (j == n
   +!(op
   + (DECL_P (op)
   +|| (REFERENCE_CLASS_P (op)
   + get_base_address (op)
   + continue;
  
  Hmm.  I guess I have an idea now how to better support calls in
  data-ref/dependence analysis.  The above is fine for now - you
  might want to dump sth here if you fail because datarefs in a declare
  simd fn call.
 
 Haven't added any dump here, because there is the:
  
   + }
   + }
   + }
   +   LOOP_VINFO_DATAREFS (loop_vinfo) = datarefs;
   +   if (dump_enabled_p ())
   + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
   +  not vectorized: loop contains function 
   +  calls or data references that cannot 
   +  be analyzed\n);
 
 which is dumped in that case.  Would another message be useful before that
 (or instead of that)?

Hmm, not sure - we can leave it as-is.

  I'd have expected an unconditional continue here (and leave
  STMT_VINFO_VECTYPE == NULL - fact is that the vector type of
  the argument is determined by its definition and thus may
  be different from what you record here anyway).
 
 Ok, now using STMT_VINFO_VECTYPE = NULL.
 
   +  if (thisarginfo.vectype != NULL_TREE
   +loop_vinfo
   +TREE_CODE (op) == SSA_NAME
   +simple_iv (loop, loop_containing_stmt (stmt), op, iv, false)
   +tree_fits_shwi_p (iv.step))
   + {
   +   thisarginfo.linear_step = tree_to_shwi (iv.step);
  
  Hmm, you should check thisarginfo.dt instead (I assume this case
  is for induction/reduction defs)?  In this case you also should
  use STMT_VINFO_LOOP_PHI_EVOLUTION_PART and not re-analyze via simple_iv.
 
 As discussed on IRC, STMT_VINFO_LOOP_PHI_EVOLUTION_PART can't be used,
 because it can be arbitrary linear function argument, not just an IV itself.
 vect-simd-clone-11.c testcase contains examples.  This patch doesn't avoid
 calling simple_iv again during transform phase, I don't have a failing
 testcase for that yet (but filed PR59288 for the preexisting issue).

You'll eventually run into a testcase - I promise ;)  Anyway, the
solution will be to store the result somewhere (in stmt_vinfo) and
re-use it.  We can fix this with a followup (see my fix for PR59288).

  
   +   thisarginfo.op = iv.base;
   + }
   +  else if (thisarginfo.vectype == NULL_TREE
   + POINTER_TYPE_P (TREE_TYPE (op)))
   + thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
  
  So this is for dt_external defs?
  
  Please switch on thisarginfo.dt here - that more naturally explains
  what you are doing (otherwise this definitely misses a comment).
 
 Done.
 
  Please save the result from the analysis (selecting the simd clone)
  in the stmt_vinfo and skip the analysis during transform phase.
 
 Done.
 
   +   vec_oprnd0
   + = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
   +   build_int_cst (integer_type_node, prec),
   +   build_int_cst (integer_type_node,
   +  (m  (k - 1)) * prec));
  
  Some helpers to build the tree to select a sub-vector would be nice
  (I remember seeing this kind of pattern elsewhere).
 
 I've simplified this to use size_int and bitsize_int for the args
 (as e.g. fold-const.c uses to create BIT_FIELD_REFs), but don't see what
 actually could be put into the helper, besides the BIT_FIELD_REF
 build there is nothing common with other spots and the arguments to that
 call also differ a lot.

Hmm, ok.

  
  For SINGLE_RHS assigns I prefer gimple_build_assign.
 
 Done everywhere.
 
   +  /* Update the exception handling table with the vector stmt if 
   necessary.  */
   +  if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
   +

Re: [gomp4 simd, RFC] Simple fix to override vectorization cost estimation.

2013-11-27 Thread Richard Biener
On Wed, 27 Nov 2013, Tobias Burnus wrote:

 Am 27.11.2013 08:22, schrieb Sergey Ostanevich:
  Done.
  
 
 Thanks for fixing Richard's and Jakub's comments and parts of mine.
 
  +have the same meaning as described in @option{fvect-cost-model} and by
  +default a cost model defined with @option{fvect-cost-model} is used.
 As mentioned before, pleae add a hyphen before fvect (i.e.
 @option{fvect-cost-model}  - @option{-fvect-cost-model})
 
 Regarding a test case: I still think it would be useful to have one, but I
 somehow seemed to have messed up with my previous one - I fail to get it not
 to vectorize with omp simd due to cost reasons. Thus, I don't have one.

Ok with that change - no need to re-post before applying.

Thanks,
Richard.


[PATCH, score] Remove unused REG_CLASS_FROM_LETTER define

2013-11-27 Thread Liqin Chen
2013-11-27  Chen Liqin  liqin@gmail.com

* config/score/score.h (REG_CLASS_FROM_LETTER): Delete.


Index: gcc/config/score/score.h
===
--- gcc/config/score/score.h(revision 205384)
+++ gcc/config/score/score.h(working copy)
@@ -395,9 +395,6 @@
 /* The class value for index registers.  */
 #define INDEX_REG_CLASSNO_REGS

-extern enum reg_class score_char_to_class[256];
-#define REG_CLASS_FROM_LETTER(C)   score_char_to_class[(unsigned char) (C)]
-
 /* Addressing modes, and classification of registers for them.  */
 #define REGNO_MODE_OK_FOR_BASE_P(REGNO, MODE) \
   score_regno_mode_ok_for_base_p (REGNO, 1)

Regards
--liqin


Re: [PATCH] Fixing PR59006 and PR58921 by delaying loop invariant hoisting in vectorizer.

2013-11-27 Thread Richard Biener
On Fri, 22 Nov 2013, Cong Hou wrote:

 Hi
 
 Currently in GCC vectorization, some loop invariant may be detected
 after aliasing checks, which can be hoisted outside of the loop. The
 current method in GCC may break the information built during the
 analysis phase, causing some crash (see PR59006 and PR58921).
 
 This patch improves the loop invariant hoisting by delaying it until
 all statements are vectorized, thereby keeping all built information.
 But those loop invariant statements won't be vectorized, and if a
 variable is defined by one of those loop invariant, it is treated as
 an external definition.
 
 Bootstrapped and testes on an x86-64 machine.

Hmm.  I'm still thinking that we should handle this during the regular
transform step.

Like with the following incomplete patch.  Missing is adjusting
the rest of the vectorizable_* functions to handle the case where all defs
are dt_external or constant by setting their own STMT_VINFO_DEF_TYPE to
dt_external.  From the gcc.dg/vect/pr58508.c we get only 4 hoists
instead of 8 because of this (I think).

Also gcc.dg/vect/pr52298.c ICEs for yet unanalyzed reason.

I can take over the bug if you like.

Thanks,
Richard.

Index: gcc/tree-vect-data-refs.c
===
*** gcc/tree-vect-data-refs.c   (revision 205435)
--- gcc/tree-vect-data-refs.c   (working copy)
*** again:
*** 3668,3673 
--- 3668,3682 
}
  STMT_VINFO_STRIDE_LOAD_P (stmt_info) = true;
}
+   else if (loop_vinfo
+   integer_zerop (DR_STEP (dr)))
+   {
+ /* All loads from a non-varying address will be disambiguated
+by data-ref analysis or via a runtime alias check and thus
+they will become invariant.  Force them to be vectorized
+as external.  */
+ STMT_VINFO_DEF_TYPE (stmt_info) = vect_external_def;
+   }
  }
  
/* If we stopped analysis at the first dataref we could not analyze
Index: gcc/tree-vect-loop-manip.c
===
*** gcc/tree-vect-loop-manip.c  (revision 205435)
--- gcc/tree-vect-loop-manip.c  (working copy)
*** vect_loop_versioning (loop_vec_info loop
*** 2269,2275 
  
/* Extract load statements on memrefs with zero-stride accesses.  */
  
!   if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
  {
/* In the loop body, we iterate each statement to check if it is a load.
 Then we check the DR_STEP of the data reference.  If DR_STEP is zero,
--- 2269,2275 
  
/* Extract load statements on memrefs with zero-stride accesses.  */
  
!   if (0  LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
  {
/* In the loop body, we iterate each statement to check if it is a load.
 Then we check the DR_STEP of the data reference.  If DR_STEP is zero,
Index: gcc/tree-vect-loop.c
===
*** gcc/tree-vect-loop.c(revision 205435)
--- gcc/tree-vect-loop.c(working copy)
*** vect_transform_loop (loop_vec_info loop_
*** 5995,6000 
--- 5995,6020 
}
}
  
+ /* If the stmt is loop invariant simply move it.  */
+ if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_external_def)
+   {
+ if (dump_enabled_p ())
+   {
+ dump_printf_loc (MSG_NOTE, vect_location,
+  hoisting out of the vectorized loop: );
+ dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
+ dump_printf (MSG_NOTE, \n);
+   }
+ gsi_remove (si, false);
+ if (gimple_vuse (stmt))
+   gimple_set_vuse (stmt, NULL);
+ basic_block new_bb;
+ new_bb = gsi_insert_on_edge_immediate (loop_preheader_edge (loop),
+stmt);
+ gcc_assert (!new_bb);
+ continue;
+   }
+ 
  /*  vectorize statement  */
  if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location, transform statement.\n);
Index: gcc/tree-vect-stmts.c
===
*** gcc/tree-vect-stmts.c   (revision 205435)
--- gcc/tree-vect-stmts.c   (working copy)
*** vectorizable_operation (gimple stmt, gim
*** 3497,3502 
--- 3497,3503 
vectree vec_oprnds2 = vNULL;
tree vop0, vop1, vop2;
bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
+   bool all_ops_external = true;
int vf;
  
if (!STMT_VINFO_RELEVANT_P (stmt_info)  !bb_vinfo)
*** vectorizable_operation (gimple stmt, gim
*** 3557,3562 
--- 3558,3565 
   use not simple.\n);
return false;
  }
+   if (dt[0] != vect_external_def  dt[0] != vect_constant_def)
+ 

Re: [patch] set MULTIARCH_DIRNAME for multilib architectures

2013-11-27 Thread Aurelien Jarno
On Thu, Jun 20, 2013 at 02:26:12PM +0200, Matthias Klose wrote:
 Am 13.06.2013 11:42, schrieb Richard Sandiford:
  Bernhard Reutner-Fischer rep.dot@gmail.com writes:
  On 12 June 2013 20:20:50 Richard Sandiford rdsandif...@googlemail.com 
  wrote:
  Matthias Klose d...@ubuntu.com writes:
  Index: config/mips/t-linux64
  ===
  --- config/mips/t-linux64(revision 200012)
  +++ config/mips/t-linux64(working copy)
  @@ -24,3 +24,13 @@
   ../lib32$(call 
  if_multiarch,:mips64$(MIPS_EL)-linux-gnuabin32$(MIPS_SOFT)) \
   ../lib$(call 
  if_multiarch,:mips$(MIPS_EL)-linux-gnu$(MIPS_SOFT)) \
   ../lib64$(call
  if_multiarch,:mips64$(MIPS_EL)-linux-gnuabi64$(MIPS_SOFT))
  +
  +ifneq (,$(findstring abin32,$(target)))
  +MULTIARCH_DIRNAME = $(call 
  if_multiarch,mips64$(MIPS_EL)-linux-gnuabin32$(MIPS_SOFT))
  +else
  +ifneq (,$(findstring abi64,$(target)))
  +MULTIARCH_DIRNAME = $(call 
  if_multiarch,mips64$(MIPS_EL)-linux-gnuabi64$(MIPS_SOFT))
  +else
  +MULTIARCH_DIRNAME = $(call 
  if_multiarch,mips$(MIPS_EL)-linux-gnu$(MIPS_SOFT))
  +endif
  +endif
 
  findstring seems a bit fragile for a full triple.  I think it would
  be better to have something similar to the current MIPS_SOFT definition:
 
  MIPS_SOFT = $(if $(strip $(filter MASK_SOFT_FLOAT_ABI, 
  $(target_cpu_default)) $(filter soft, $(with_float))),soft)
 
  but for ABIs.  It could then also take with_abi into account.
  Maybe something like:
 
  MIPS_ABI = $(or $(with_abi), \
  $(if $(filter MIPS_ABI_DEFAULT=ABI_N32, \
  $(target_cpu_default)), n32), \
  o32)
 
  (completely untested).
 
  Bikeshedding:
  Doko would know, but ISTR that $(or) did not exist in make-3.80 which is 
  currently the minimum prerequisite, fwiw.
  
  Gah, that's a pity.  Thanks for the catch though.  Maybe firstword
  would be OK instead.
  
  I see I also fell into the usual ABI trap.  It wouldn't have affected
  the use in this patch, but the default ought to be 32 rather than o32.
 
 the define is in tm_defines, not target_cpu_default.
 
 MIPS_ABI = $(or $(with_abi), \
 $(if $(filter MIPS_ABI_DEFAULT=ABI_N32, \
 $(tm_defines)), n32), \
  32)
 
 However I can't see yet how this should be used. Maybe Aurelian could come up
 with a tested version of this patch?
 

How about the patch below?

It first determines the ABI without using $(or), by looking (in this
order) for $(with_abi), $(tm_defines) and $(target), defaulting to 32
if none of the previous matches.

Then the multiarch path is constructed from the ABI.

Index: gcc/config/mips/t-linux64
===
--- gcc/config/mips/t-linux64   (révision 205437)
+++ gcc/config/mips/t-linux64   (copie de travail)
@@ -24,3 +24,22 @@
../lib32$(call 
if_multiarch,:mips64$(MIPS_EL)-linux-gnuabin32$(MIPS_SOFT)) \
../lib$(call if_multiarch,:mips$(MIPS_EL)-linux-gnu$(MIPS_SOFT)) \
../lib64$(call 
if_multiarch,:mips64$(MIPS_EL)-linux-gnuabi64$(MIPS_SOFT))
+
+ifneq ($(with_abi),)
+MIPS_ABI = $(with_abi)
+endif
+ifeq ($(MIPS_ABI),)
+MIPS_ABI=$(if $(strip $(filter MIPS_ABI_DEFAULT=ABI_N32, $(tm_defines))),n32)
+endif
+ifeq ($(MIPS_ABI),)
+MIPS_ABI = $(subst abi,,$(subst gnu,,$(lastword $(subst -, ,$(target)
+endif
+ifeq ($(MIPS_ABI),)
+MIPS_ABI=32
+endif
+
+ifeq ($(MIPS_ABI),32)
+MULTIARCH_DIRNAME = $(call if_multiarch,mips$(MIPS_EL)-linux-gnu$(MIPS_SOFT))
+else
+MULTIARCH_DIRNAME = $(call 
if_multiarch,mips64$(MIPS_EL)-linux-gnuabi$(MIPS_ABI)$(MIPS_SOFT))
+endif

-- 
Aurelien Jarno  GPG: 1024D/F1BCDB73
aurel...@aurel32.net http://www.aurel32.net


Re: [PATCH, ARM, LRA] Fixed bootstrap failure in Thumb mode

2013-11-27 Thread Yvan Roux
Ping

On 18 November 2013 09:40, Yvan Roux yvan.r...@linaro.org wrote:
 Ping.

 On 7 November 2013 15:56, Yvan Roux yvan.r...@linaro.org wrote:
 Hi,

 this patch fixed an LRA cycling due to secondary reload (Thumb mode).
 Notice that this patch is a prerequisite to turn on LRA by default on
 ARM.  Bootstrapped on a9 and a15 without any regression in the
 testsuite as LRA is off by default and with the regression reported in
 the thread bellow when LRA is on.

 http://gcc.gnu.org/ml/gcc-patches/2013-11/msg00725.html

 Thanks,
 Yvan

 2013-11-07  Yvan Roux  yvan.r...@linaro.org

 * config/arm/arm.h (THUMB_SECONDARY_INPUT_RELOAD_CLASS): Return 
 NO_REGS
 for LRA.


Re: [patch][RFC] make lra.c:check_rtl set maybe_hot_insn_p

2013-11-27 Thread Yvan Roux
Ping.

On 19 November 2013 09:52, Yvan Roux yvan.r...@linaro.org wrote:
 yep, all good performance-wise :)

 Great, Thanks Kyrill.

 Ok for trunk ?

 Yvan


Re: RFA: patch to fix PR58785 (an ARM LRA crash)

2013-11-27 Thread Yvan Roux
Ping.

On 20 November 2013 10:22, Yvan Roux yvan.r...@linaro.org wrote:
 Hi,

 as Richard said, only a subset of rclass is allowed to be returned by
 preferred_reload_class.  I've tested the attached patched in Thumb
 mode, on ARMv5, A9 and A9hf and on cross A15 without regression.

 Yvan

 2013-11-20  Yvan Roux  yvan.r...@linaro.org

 PR target/58785
 * config/arm/arm.c (arm_preferred_reload_class): Only return LO_REGS
 when rclass is GENERAL_REGS.


Re: [C++ Patch] PR 58647

2013-11-27 Thread Paolo Carlini

Hi,

On 11/26/2013 08:10 PM, Jason Merrill wrote:

On 11/26/2013 11:43 AM, Paolo Carlini wrote:

We have got a bunch of testcases, for example constexpr-ex4.C - attached
for your convenience - which trigger the assert !really_overloaded_fn
(t) ... What do you suggest?


Aha.  Well, in that case we really can't get a constant value, so I'd 
assert allow_non_constant, set *non_constant_p, and return t.


Actually, I'd do that any time we get a function COMPONENT_REF, since 
that will only ever happen under the checking maybe_constant_value 
call anyway.  So a small expansion of your original patch.

Thus something like the below? Passes testing.

Thanks,
Paolo.


Index: cp/semantics.c
===
--- cp/semantics.c  (revision 205433)
+++ cp/semantics.c  (working copy)
@@ -9601,6 +9601,12 @@ cxx_eval_constant_expression (const constexpr_call
   break;
 
 case COMPONENT_REF:
+  if (is_overloaded_fn (t))
+   {
+ gcc_checking_assert (allow_non_constant);
+ *non_constant_p = true;
+ return t;
+   }
   r = cxx_eval_component_reference (call, t, allow_non_constant, addr,
non_constant_p, overflow_p);
   break;
Index: testsuite/g++.dg/parse/crash66.C
===
--- testsuite/g++.dg/parse/crash66.C(revision 0)
+++ testsuite/g++.dg/parse/crash66.C(working copy)
@@ -0,0 +1,11 @@
+// PR c++/58647
+
+struct A
+{
+  static void foo();
+};
+
+templatetypename void bar()
+{
+  A().foo;
+}


[PATCH] ubsan TLC + PR59306 fix

2013-11-27 Thread Marek Polacek
This was meant only as a TLC, but it also fixes PR59306.  Using
walk_gimple_op was an overkill; gimple_{store,assign_load}_p is
enough.  As a side effect, it also fixes the bug because now we
better restrict what goes into instrument_member_call.

Bootstrapped, ran ubsan testsuite on x86_64-linux, ok for trunk?

2013-11-27  Marek Polacek  pola...@redhat.com

PR sanitizer/59306
* ubsan.c (instrument_null): Use gimple_store_p/gimple_assign_load_p
instead of walk_gimple_op.
(ubsan_pass): Adjust.  Call instrument_null only if SANITIZE_NULL.
testsuite/
* g++.dg/ubsan/pr59306.C:

--- gcc/ubsan.c.mp3 2013-11-27 09:55:23.573252894 +0100
+++ gcc/ubsan.c 2013-11-27 11:15:29.893507768 +0100
@@ -614,24 +614,22 @@ instrument_mem_ref (tree t, gimple_stmt_
   gsi_insert_before (iter, g, GSI_SAME_STMT);
 }
 
-/* Callback function for the pointer instrumentation.  */
+/* Perform the pointer instrumentation.  */
 
-static tree
-instrument_null (tree *tp, int * /*walk_subtree*/, void *data)
+static void
+instrument_null (gimple_stmt_iterator gsi, bool is_lhs)
 {
-  tree t = *tp;
+  gimple stmt = gsi_stmt (gsi);
+  tree t = is_lhs ? gimple_get_lhs (stmt) : gimple_assign_rhs1 (stmt);
+  t = get_base_address (t);
   const enum tree_code code = TREE_CODE (t);
-  struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
-
   if (code == MEM_REF
TREE_CODE (TREE_OPERAND (t, 0)) == SSA_NAME)
-instrument_mem_ref (TREE_OPERAND (t, 0), wi-gsi, wi-is_lhs);
+instrument_mem_ref (TREE_OPERAND (t, 0), gsi, is_lhs);
   else if (code == ADDR_EXPR
POINTER_TYPE_P (TREE_TYPE (t))
TREE_CODE (TREE_TYPE (TREE_TYPE (t))) == METHOD_TYPE)
-instrument_member_call (wi-gsi);
-
-  return NULL_TREE;
+instrument_member_call (gsi);
 }
 
 /* Gate and execute functions for ubsan pass.  */
@@ -646,7 +644,6 @@ ubsan_pass (void)
 {
   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
{
- struct walk_stmt_info wi;
  gimple stmt = gsi_stmt (gsi);
  if (is_gimple_debug (stmt) || gimple_clobber_p (stmt))
{
@@ -654,9 +651,14 @@ ubsan_pass (void)
  continue;
}
 
- memset (wi, 0, sizeof (wi));
- wi.gsi = gsi;
- walk_gimple_op (stmt, instrument_null, wi);
+ if (flag_sanitize  SANITIZE_NULL)
+   {
+ if (gimple_store_p (stmt))
+   instrument_null (gsi, true);
+ if (gimple_assign_load_p (stmt))
+   instrument_null (gsi, false);
+   }
+
  gsi_next (gsi);
}
 }
--- gcc/testsuite/g++.dg/ubsan/pr59306.C.mp32013-11-27 11:15:17.900456072 
+0100
+++ gcc/testsuite/g++.dg/ubsan/pr59306.C2013-11-27 11:15:01.696387988 
+0100
@@ -0,0 +1,14 @@
+// { dg-do compile }
+// { dg-options -fsanitize=undefined }
+// { dg-skip-if  { *-*-* } { -flto } {  } }
+
+class A {
+  void bar (void (A::*) (int));
+  void foo (int);
+  void B ();
+};
+
+void A::B()
+{
+  bar (A::foo);
+}

Marek


[PATCH] Postpone __LINE__ evaluation to the end of #line directives

2013-11-27 Thread mtewoodbury
From: Max TenEyck Woodbury max+...@mtew.isa-geek.net

Copyright 2013 assigned to the Free Software Foundation.
---
 gcc/testsuite/gcc.dg/cpp/line4.c | 19 +++
 libcpp/directives.c  |  9 -
 libcpp/internal.h|  1 +
 libcpp/macro.c   |  3 +++
 4 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/cpp/line4.c b/gcc/testsuite/gcc.dg/cpp/line4.c
index 84dbf96..0120a2b 100644
--- a/gcc/testsuite/gcc.dg/cpp/line4.c
+++ b/gcc/testsuite/gcc.dg/cpp/line4.c
@@ -13,7 +13,18 @@ enum { i = __LINE__ };
 enum { j = __LINE__ };
 
 #line 16  /* N.B. the _next_ line is line 16.  */
-
-char array1[i== 44 ? 1 : -1];
-char array2[j== 90 ? 1 : -1];
-char array3[__LINE__ == 19 ? 1 : -1];
+ /* __LINE__ should be 16 
*/
+char array1[i== 44 ? 1 : -1];   /* 17 
*/
+char array2[j== 90 ? 1 : -1];   /* 18 
*/
+char array3[__LINE__ == 19 ? 1 : -1];   /* 19 
*/
+/* 20 
*/
+# line __LINE__ /* N.B. the __LINE__ sequence should _not_ change here. */
+/* 22 
*/
+char array4[__LINE__ == 23 ? 1: -1];/* 23 
*/
+char array5[__LINE__ == 23 ? -1: 1];/* 24 
*/
+/* 25 
*/
+# line __LINE__ /* N.B. nor shoud a multi-line comment change the fact
+   that the __LINE__ sequence should _not_ change here. */
+/* 28 
*/
+char array6[__LINE__ == 29 ? 1: -1];/* 29 
*/
+char array7[__LINE__ == 27 ? -1: 1];/* 30 
*/
diff --git a/libcpp/directives.c b/libcpp/directives.c
index 65b2034..adb04a5 100644
--- a/libcpp/directives.c
+++ b/libcpp/directives.c
@@ -900,7 +900,9 @@ do_line (cpp_reader *pfile)
   bool wrapped;
 
   /* #line commands expand macros.  */
+  ++pfile-state.in_directive;  /* Request special __LINE__ handling. 
*/
   token = cpp_get_token (pfile);
+  --pfile-state.in_directive;  /* Cancle request 
*/
   if (token-type != CPP_NUMBER
   || strtolinenum (token-val.str.text, token-val.str.len,
   new_lineno, wrapped))
@@ -914,7 +916,9 @@ do_line (cpp_reader *pfile)
   return;
 }
 
-  if (CPP_PEDANTIC (pfile)  (new_lineno == 0 || new_lineno  cap || wrapped))
+  if (CPP_PEDANTIC (pfile)  (new_lineno == 0 ||
+  (new_lineno  cap  new_lineno != CUR__LINE__) ||
+  wrapped))
 cpp_error (pfile, CPP_DL_PEDWARN, line number out of range);
   else if (wrapped)
 cpp_error (pfile, CPP_DL_WARNING, line number out of range);
@@ -936,6 +940,9 @@ do_line (cpp_reader *pfile)
 }
 
   skip_rest_of_line (pfile);
+  if ( new_lineno == CUR__LINE__ )  /* Postponed evaluation ? 
*/
+new_lineno = linemap_get_expansion_line (pfile-line_table,
+ pfile-line_table-highest_line);
   _cpp_do_file_change (pfile, LC_RENAME_VERBATIM, new_file, new_lineno,
   map_sysp);
 }
diff --git a/libcpp/internal.h b/libcpp/internal.h
index 5321458..268de86 100644
--- a/libcpp/internal.h
+++ b/libcpp/internal.h
@@ -604,6 +604,7 @@ cpp_in_primary_file (cpp_reader *pfile)
 {
   return pfile-line_table-depth == 1;
 }
+#define CUR__LINE__ -1U
 
 /* In macro.c */
 extern void _cpp_free_definition (cpp_hashnode *);
diff --git a/libcpp/macro.c b/libcpp/macro.c
index e359d15..47e41b6 100644
--- a/libcpp/macro.c
+++ b/libcpp/macro.c
@@ -309,6 +309,9 @@ _cpp_builtin_macro_text (cpp_reader *pfile, cpp_hashnode 
*node)
   /* If __LINE__ is embedded in a macro, it must expand to the
 line of the macro's invocation, not its definition.
 Otherwise things like assert() will not work properly.  */
+  if ( pfile-state.in_directive  1 ) /* In #line directive? 
*/
+number = CUR__LINE__;  /* yes, postpone the lookup... 
*/
+  else
   number = linemap_get_expansion_line (pfile-line_table,
   CPP_OPTION (pfile, traditional)
   ? pfile-line_table-highest_line
-- 
1.8.0.rc0.18.gf84667d



Re: [gomp4 simd, RFC] Simple fix to override vectorization cost estimation.

2013-11-27 Thread Jakub Jelinek
On Wed, Nov 27, 2013 at 10:58:30AM +0100, Richard Biener wrote:
 On Wed, 27 Nov 2013, Tobias Burnus wrote:
 
  Am 27.11.2013 08:22, schrieb Sergey Ostanevich:
   Done.
   
  
  Thanks for fixing Richard's and Jakub's comments and parts of mine.
  
   +have the same meaning as described in @option{fvect-cost-model} and by
   +default a cost model defined with @option{fvect-cost-model} is used.
  As mentioned before, pleae add a hyphen before fvect (i.e.
  @option{fvect-cost-model}  - @option{-fvect-cost-model})
  
  Regarding a test case: I still think it would be useful to have one, but I
  somehow seemed to have messed up with my previous one - I fail to get it not
  to vectorize with omp simd due to cost reasons. Thus, I don't have one.
 
 Ok with that change - no need to re-post before applying.

Note the start of the ChangeLog is still wrong:

2013-11-25  Sergey Ostanevich  sergos@gmail.com

* c.opt: Introduced a new openmp-simd warning.
* lang.opt: Ditto.

Because:

2013-11-25  Sergey Ostanevich  sergos@gmail.com

* c.opt (Wopenmp-simd): New.

needs to go into the gcc/c-family/ChangeLog and

2013-11-25  Sergey Ostanevich  sergos@gmail.com

* lang.opt (Wopenmp-simd): New.

needs to go into the gcc/fortran/ChangeLog, you can't use ditto there
because it is a different file.  Please fix that up before committing.

Jakub


[PATCH v2] libgcc: AArch64: Check for correct signal insns on BE when unwinding

2013-11-27 Thread Matthew Leach
Hi,

When unwinding the stack, the unwind code checks for two opcodes that
denote a registrations of a signal handler. This is broken on BE as
the opcodes will be in the wrong byte-order as insns are always LE.

Add the correct checks when compiling for AArch64 big endian.

This patch fixes all glibc backtrace tests and causes no other
regressions on glibc.

Please note that I don't have commit access, if this is OK could
someone merge it for me?

Thanks,
Matt Leach

libgcc/
2013-11-26  Matthew Leach  matthew.le...@arm.com

* config/aarch64/linux-unwind.h (aarch64_fallback_frame_state):
Check for correct opcodes on BE.

diff --git a/libgcc/config/aarch64/linux-unwind.h 
b/libgcc/config/aarch64/linux-unwind.h
index fde4d14..8b0d7fe 100644
--- a/libgcc/config/aarch64/linux-unwind.h
+++ b/libgcc/config/aarch64/linux-unwind.h
@@ -25,6 +25,19 @@
 #include signal.h
 #include sys/ucontext.h
 
+
+/* Since insns are always stored LE, on a BE system the opcodes will
+   be loaded byte-reversed.  Therefore, define two sets of opcodes,
+   one for LE and one for BE.  */
+
+#if __AARCH64EB__
+#define MOVZ_X8_8B 0x681180d2
+#define SVC_0  0x01d4
+#else
+#define MOVZ_X8_8B 0xd2801168
+#define SVC_0  0xd401
+#endif
+
 #define MD_FALLBACK_FRAME_STATE_FOR aarch64_fallback_frame_state
 
 static _Unwind_Reason_Code
@@ -55,7 +68,7 @@ aarch64_fallback_frame_state (struct _Unwind_Context *context,
  0xd2801168 movz x8, #0x8b
  0xd401 svc  0x0
*/
-  if (pc[0] != 0xd2801168 || pc[1] != 0xd401)
+  if (pc[0] != MOVZ_X8_8B || pc[1] != SVC_0)
 {
   return _URC_END_OF_STACK;
 }
-- 
1.7.9.5




Re: [PATCH] Postpone __LINE__ evaluation to the end of #line directives

2013-11-27 Thread Marek Polacek
On Wed, Nov 27, 2013 at 05:29:22AM -0500, mtewoodb...@gmail.com wrote:
 From: Max TenEyck Woodbury max+...@mtew.isa-geek.net

This patch is badly missing a description.  You also want to mention
the PR number, if this fixes a bug.  I guess this is to fix PR58687.
 
 Copyright 2013 assigned to the Free Software Foundation.
 ---
  gcc/testsuite/gcc.dg/cpp/line4.c | 19 +++
  libcpp/directives.c  |  9 -
  libcpp/internal.h|  1 +
  libcpp/macro.c   |  3 +++
  4 files changed, 27 insertions(+), 5 deletions(-)

You didn't attach a ChangeLog entry.

The patch has numerous formatting issues, see below.

 diff --git a/gcc/testsuite/gcc.dg/cpp/line4.c 
 b/gcc/testsuite/gcc.dg/cpp/line4.c
 index 84dbf96..0120a2b 100644
 --- a/gcc/testsuite/gcc.dg/cpp/line4.c
 +++ b/gcc/testsuite/gcc.dg/cpp/line4.c
 @@ -13,7 +13,18 @@ enum { i = __LINE__ };
  enum { j = __LINE__ };
  
  #line 16  /* N.B. the _next_ line is line 16.  */
 -
 -char array1[i== 44 ? 1 : -1];
 -char array2[j== 90 ? 1 : -1];
 -char array3[__LINE__ == 19 ? 1 : -1];
 + /* __LINE__ should be 
 16 */
 +char array1[i== 44 ? 1 : -1];   /* 
 17 */
 +char array2[j== 90 ? 1 : -1];   /* 
 18 */
 +char array3[__LINE__ == 19 ? 1 : -1];   /* 
 19 */
 +/* 
 20 */
 +# line __LINE__ /* N.B. the __LINE__ sequence should _not_ change here. */

Two spaces after '.'.

 +# line __LINE__ /* N.B. nor shoud a multi-line comment change the fact

should

 diff --git a/libcpp/directives.c b/libcpp/directives.c
 index 65b2034..adb04a5 100644
 --- a/libcpp/directives.c
 +++ b/libcpp/directives.c
 @@ -900,7 +900,9 @@ do_line (cpp_reader *pfile)
bool wrapped;
  
/* #line commands expand macros.  */
 +  ++pfile-state.in_directive;  /* Request special __LINE__ 
 handling. */

Don't put the comments next to the statements, instead put them above
the statements.

token = cpp_get_token (pfile);
 +  --pfile-state.in_directive;  /* Cancle 
 request */

Cancel

 -  if (CPP_PEDANTIC (pfile)  (new_lineno == 0 || new_lineno  cap || 
 wrapped))
 +  if (CPP_PEDANTIC (pfile)  (new_lineno == 0 ||
 +  (new_lineno  cap  new_lineno != CUR__LINE__) ||
 +  wrapped))

|| operator shouldn't end the line, put it on the start of the next
line.  See existing code for how it should look like.

  cpp_error (pfile, CPP_DL_PEDWARN, line number out of range);
else if (wrapped)
  cpp_error (pfile, CPP_DL_WARNING, line number out of range);
 @@ -936,6 +940,9 @@ do_line (cpp_reader *pfile)
  }
  
skip_rest_of_line (pfile);
 +  if ( new_lineno == CUR__LINE__ )  /* Postponed evaluation 
 ? */
 +new_lineno = linemap_get_expansion_line (pfile-line_table,
 + 
 pfile-line_table-highest_line);

No space after '(' and before ')'.  No space before '?'.

 +#define CUR__LINE__ -1U

The trailing underscores look weird to me.  Thanks,

Marek


Re: Some wide-int review comments

2013-11-27 Thread Richard Biener
On Tue, Nov 26, 2013 at 5:33 PM, Kenneth Zadeck
zad...@naturalbridge.com wrote:
 Richi,

 patch ping

Ok.

Thanks,
Richard.

 also two more pieces of information.With further testing, this seems to
 fix

 Tests that now work, but didn't before:
 ===
 ext/random/hypergeometric_distribution/operators/values.cc (test for excess
 errors)

 New tests that PASS:

 ext/random/hypergeometric_distribution/operators/values.cc execution test
 

 also, the corresponding frag for fold-const.c on the wide-int branch will
 look like
 
 Index: gcc/fold-const.c
 ===
 --- gcc/fold-const.c(revision 205224)
 +++ gcc/fold-const.c(working copy)
 @@ -1030,51 +1030,51 @@ int_const_binop_1 (enum tree_code code,

  case TRUNC_DIV_EXPR:
  case EXACT_DIV_EXPR:
 -  res = wi::div_trunc (arg1, arg2, sign, overflow);
 -  if (overflow)
 +  if (arg2 == 0)
  return NULL_TREE;
 +  res = wi::div_trunc (arg1, arg2, sign, overflow);
break;

  case FLOOR_DIV_EXPR:
 -  res = wi::div_floor (arg1, arg2, sign, overflow);
 -  if (overflow)
 +  if (arg2 == 0)
  return NULL_TREE;
 +  res = wi::div_floor (arg1, arg2, sign, overflow);
break;

  case CEIL_DIV_EXPR:
 -  res = wi::div_ceil (arg1, arg2, sign, overflow);
 -  if (overflow)
 +  if (arg2 == 0)
  return NULL_TREE;
 +  res = wi::div_ceil (arg1, arg2, sign, overflow);
break;

  case ROUND_DIV_EXPR:
 -  res = wi::div_round (arg1, arg2, sign, overflow);
 -  if (overflow)
 +  if (arg2 == 0)
  return NULL_TREE;
 +  res = wi::div_round (arg1, arg2, sign, overflow);
break;

  case TRUNC_MOD_EXPR:
 -  res = wi::mod_trunc (arg1, arg2, sign, overflow);
 -  if (overflow)
 +  if (arg2 == 0)
  return NULL_TREE;
 +  res = wi::mod_trunc (arg1, arg2, sign, overflow);
break;

  case FLOOR_MOD_EXPR:
 -  res = wi::mod_floor (arg1, arg2, sign, overflow);
 -  if (overflow)
 +  if (arg2 == 0)
  return NULL_TREE;
 +  res = wi::mod_floor (arg1, arg2, sign, overflow);
break;

  case CEIL_MOD_EXPR:
 -  res = wi::mod_ceil (arg1, arg2, sign, overflow);
 -  if (overflow)
 +  if (arg2 == 0)
  return NULL_TREE;
 +  res = wi::mod_ceil (arg1, arg2, sign, overflow);
break;

  case ROUND_MOD_EXPR:
 -  res = wi::mod_round (arg1, arg2, sign, overflow);
 -  if (overflow)
 +  if (arg2 == 0)
  return NULL_TREE;
 +  res = wi::mod_round (arg1, arg2, sign, overflow);
break;

  case MIN_EXPR:

 

 On 11/20/2013 06:31 PM, Kenneth Zadeck wrote:

 On 11/13/2013 04:59 AM, Richard Biener wrote:

 On Tue, Nov 12, 2013 at 5:43 PM, Kenneth Zadeck
 zad...@naturalbridge.com wrote:

 On 11/12/2013 11:27 AM, Joseph S. Myers wrote:

 On Tue, 12 Nov 2013, Kenneth Zadeck wrote:

 Richi,

 i am having a little trouble putting this back the way that you want.
 The
 issue is rem.
 what is supposed to happen for INT_MIN % -1?

 I would assume because i am failing the last case of
 gcc.dg/c90-const-expr-8.c
 that INT_MIN %-1 should not overflow even if INT_MIN / -1 does.
 however,

 Given the conclusion in C11 that a%b should be considered undefined if
 a/b
 is not representable, I think it's reasonable to say INT_MIN % -1
 *should*
 be considered to overflow (for all C standard versions) (and bug 30484
 is
 only a bug for -fwrapv).

 however, my local question is what do we want the api to be
 int-const-binop-1?The existing behavior seems to be that at least
 for
 common modes this function silently returns 0 and it is up to the front
 ends
 to put their own spin on it.

 For wide-int you create 1:1 the behavior of current trunk (if a change of
 behavior in TImode is not tested in the testsuite then you can ignore
 that).
 Whatever change you do to semantics of functions you do separately
 from wide-int (preferably first on trunk, or at your choice after the
 wide-int
 merge).

 For this case in question I'd say a % -1 should return 0, but for
 INT_MIN % -1 that 0 should have TREE_OVERFLOW set (and
 thus you need to adjust that c90-const-expr-8.c testcase).

 Richard.

 kenny

 richi,
 I have done this exactly as you suggested.   bootstrapped and regression
 tested on x86-64.

 2013-11-20  Kenneth Zadeck  zad...@naturalbridge.com

 * fold-const.c
 (int_const_binop_1): Make INT_MIN % -1 return 0 with the overflow
 bit set.


 2013-11-20  Kenneth Zadeck  zad...@naturalbridge.com

 * gcc.dg/c90-const-expr-8.c: Look for overflow on INT_MIN % -1.
 * gcc.dg/c99-const-expr-8.c: Look for overflow on INT_MIN % -1.

 ok to commit?

 kenny




RE: [PATCH] Remove keep_aligning from get_inner_reference

2013-11-27 Thread Bernd Edlinger
Hi,

On 27 Nov 2013 10:43:59, Eric Botcazou wrote:

 I think you are right, this flag is no longer necessary, and removing
 this code path would simplify everything. Therefore I'd like to propose
 to remove the keep_aligning parameter of get_inner_reference as
 a split-out patch.

 Boot-strapped (with languages=all,ada,go) and
 regression-tested on x86_64-linux-gnu.

 I don't understand how you can commit a patch that changes something only on
 strict-alignment platforms and test it only on x86-64. This change *must* be
 tested with Ada on a strict-alignment platform, that's the only combination

Well, I did that. Apologies for not mentioning that.

 for which it is exercised. If you cannot do that, then please back it out.

 More generally speaking, it's not acceptable to make cleanup changes like that
 in the RTL expander without extreme care, which of course starts with proper
 testing. The patch should not have been approved either for that reason.

 --
 Eric Botcazou

The change on the ada interface is actually not critical, because all 
invocations
of get_inner_reference there used keep_aligning == false, as did the majority of
all other invocations.

What changes with that patch, is that get_inner_reference(, true) could 
return
a VIEW_CONVERT_EXPR, which is now obsolete.

If it is causing any trouble, I can revert that change of course.

Thanks
Bernd.

Re: [PATCH] ubsan TLC + PR59306 fix

2013-11-27 Thread Jakub Jelinek
On Wed, Nov 27, 2013 at 11:29:21AM +0100, Marek Polacek wrote:
 This was meant only as a TLC, but it also fixes PR59306.  Using
 walk_gimple_op was an overkill; gimple_{store,assign_load}_p is
 enough.  As a side effect, it also fixes the bug because now we
 better restrict what goes into instrument_member_call.
 
 Bootstrapped, ran ubsan testsuite on x86_64-linux, ok for trunk?

Ok, thanks.

 2013-11-27  Marek Polacek  pola...@redhat.com
 
   PR sanitizer/59306
   * ubsan.c (instrument_null): Use gimple_store_p/gimple_assign_load_p
   instead of walk_gimple_op.
   (ubsan_pass): Adjust.  Call instrument_null only if SANITIZE_NULL.
 testsuite/
   * g++.dg/ubsan/pr59306.C:

Missing  New test. ;)

Jakub


Re: [patch tree-ssa-forwprop]: Add type raising in shift-operations

2013-11-27 Thread Richard Biener
On Tue, Nov 26, 2013 at 10:42 PM, Jeff Law l...@redhat.com wrote:
 On 11/26/13 02:58, Richard Biener wrote:

 What is the rationale behind one for over the other?  Or is it arbitrary?
 I
 can easily make a case for either form based on what we're trying to
 optimize.  In general, it seems to me that


 When (Type) is a widening cast then you have to worry to not introduce
 undefined behavior when y is larger than the number of bits in X.
 When (Type) is a shortening cast then we lose information - because
 then the range we can determine for y from the shift operation will
 be larger.

 So I'm not sure I follow the reasoning to move out the casting when
 possible.

 Assume that we're not introducing undefined behaviour.  Obviously if we're
 introducing undefined behaviour, then the transformation is wrong, plain and
 simple.

 If by moving the cast we can eliminate an ALU operation because the shift
 (in this example) combines with something earlier or later in the IL, then
 we win.  Similarly if by moving the cast we expose redundant casting, then
 we win.

 A narrower range is obviously good, but often just having a narrower range
 won't in and of itself provide an optimization opportunity.




 The goal for canonicalization should be to shorten operations where
 possible.  Thus both cases, sinking and hoisting the cast should
 be applied dependent on validity and whether the shift will be performed
 in a smaller type in the end.

 I disagree, there are going to be times when sinking or hoisting the cast
 allows the shift to combine with other instructions in the IL. There are
 times when the ability to combine with other instructions in the stream
 should be driving these decisions.

 I would buy that as a guiding principle that applies in cases where we don't
 have other optimization opportunities we can expose by hoisting/sinking the
 type casts.  I would even buy that we prefer the narrowest range through
 some point in the compiler (say vrp2), then we allow wider ranges as needed
 to facilitate other optimizations.





 If we're more interested in combining the shift with statements that
 define
 the shift's operands, then the former is going to be a better
 representation
 because we're typecasting the result and thus the typecast doesn't
 interfere
 with the desired optimization.

 In contrast if we are more interested in cases where the shift defines
 operands for some other statement, then the latter form is more
 beneficial
 because we're typecasting the inputs and thus the typecast doesn't
 interfere
 with the desired optimization.


 Always a trade-off which is why combining passes always have to
 consider casts.

 Actually, I would say quite the opposite here.  There's a very clear line
 when we want to go from one representation to the other within the forwprop
 passes.  By doing so, we relieve the pattern matching aspects of that pass
 from needing to concern themselves with the typecasting issues.



 I think the goal of canonicalizing operations to work on the smallest
 mode possible during early GIMPLE passes makes the most sense
 (even if then a cast is not always outside or inside an operation).

 Later (currently during RTL expansion) we can widen operations again
 according to target needs.

 But what you end up missing here is optimization opportunities that expose
 themselves when the casts are moved from inputs to outputs or vice-versa.

Only if the optimization opportunities do not handle the casts themselves.
We for example have the get_unwidened () helper in fold that helps
transforms to look through some casts.  I think we want similar helpers
for the GIMPLE level combining - the combiner knows whether for
an operand with type T it's ok to have a wider or narrower operand
or if it wants an exact T2 operand instead.  Helpers should exist
that get it at that, if possible.

Currently the issue would be how to represent the result of get it at that,
but I'll work on removing the tree-building-and-re-gimplifying way of
middle-end expression creation for the next stage1 so that we can easily
represent multiple-stmts intermediate expressions.  It'll be a
(maybe wrapped in some C++ sugar, maybe then with non-GC
allocation) gimple_seq.

So you do

  op = get_me_the_op_in_type_X (X, gimple_assign_rhs2 (stmt));
  if (!op)
return false;

and now op is just a regular op you can work with, eventually pointing
to a temporary sequence that you'd need to insert.

Note that forwprops combining should really work like fold - combine
from the expression leafs to the expression roots (works currently
by visiting in stmt order).  Now consider (T2)(T1)(a  5) and
((T1)(a  5))  5.  The 2nd form would prefer if we'd have combined
the leaf (T1)(a  5) to ((T1)a  5) while the first would be pessimized
by that.  So - what kind of form do you prefer?  I'd say you can't
tell that from the use(s) but you have to be able to decide without
and thus cannot decide based on may I better be able to 

Re: [PATCH] Remove keep_aligning from get_inner_reference

2013-11-27 Thread Eric Botcazou
 Well, I did that. Apologies for not mentioning that.

OK, on which strict-alignment platform did you test it with Ada enabled?

 The change on the ada interface is actually not critical, because all
 invocations of get_inner_reference there used keep_aligning == false, as
 did the majority of all other invocations.

Sure, but that's not the point...

 What changes with that patch, is that get_inner_reference(, true) could
 return a VIEW_CONVERT_EXPR, which is now obsolete.

...that's what needs to be properly verified indeed.

 If it is causing any trouble, I can revert that change of course.

You might want to take a look at PR middle-end/17746, it took about 3 months 
almost a decade ago to find something plausible to fix this fundamental issue 
for Ada on strict-aligmnent platforms so I'd rather not go through this again.

-- 
Eric Botcazou


Re: [PATCH] Improve handling of threads which cross over the current loops header

2013-11-27 Thread Richard Biener
On Tue, Nov 26, 2013 at 11:37 PM, Jeff Law l...@redhat.com wrote:
 On 11/26/13 02:26, Richard Biener wrote:

 But only necessary if this threading returned true, no?

 Correct.  Fix for that spinning overnight.



 Also
 how likely did it scramble the loop?  I see that thread_block_1
 already cancels loops in some cases so I wonder what case
 it misses so that you need to cancel the loop unconditonally here.

 If you get into that code, effectively always.  We know we're threading
 around the backedge through a multi-way branch at the top, then to some
 interior node within the loop.  It would depend on the precise structure of
 the CFG within the loop, but basically if you get here, there is a good
 chance you have irreducible loops.

 For a good example of how badly things can get scrambled, look at what we do
 with 54742.  Dump or draw CFGs before/after DOM1.  It's painful to see what
 we do to the loop structure, but removing that switch is such a huge win
 that losing the loop structure has become a non-concern.

Ick ;)  Loop fixup isn't able to recover the original loop here, it
discovers a new one, the one just covering looping through the
default: case of the switch.

The first pass that allows CFG manipulations during loop init
is able to disambiguate the mess DOM left us with and discovers
a loop nest of depth 4.

Nice transform I'd say ;)

Richard.

 Do you have a testcase that ICEs if I remove the cancelling above?

 I'd have to review the PRs to recall if they were ICEs and/or codegen
 failures.

 jeff



Re: wide-int, rs6000

2013-11-27 Thread Richard Biener
On Wed, Nov 27, 2013 at 2:07 AM, Kenneth Zadeck
zad...@naturalbridge.com wrote:
 We will of course measure it but the only thing that is different because of 
 the conversion is that timode integers are packaged differently

Yeah, the slowness is due to generic code modification, not a port
using CONST_WIDE_INT or not.

Richard.



 On Nov 26, 2013, at 6:17 PM, David Edelsohn dje@gmail.com wrote:

 On Tue, Nov 26, 2013 at 5:46 PM, Mike Stump mikest...@comcast.net wrote:

 Ok?

 The revised version of the patch looks okay from a technical
 perspective, but I still am concerned about the compile-time impact. I
 thought that I saw a comment on IRC that wide-int seems to have a 3-5%
 compile time impact, which is a concern. I am concerned about the
 rs6000 port being further slowed with respect to other targets.

 Thanks, David


Re: [PATCH] Fixing PR59006 and PR58921 by delaying loop invariant hoisting in vectorizer.

2013-11-27 Thread Jakub Jelinek
On Wed, Nov 27, 2013 at 10:53:56AM +0100, Richard Biener wrote:
 Hmm.  I'm still thinking that we should handle this during the regular
 transform step.

I wonder if it can't be done instead just in vectorizable_load,
if LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo) and the load is
invariant, just emit the (broadcasted) load not inside of the loop, but on
the loop preheader edge.

 *** gcc/tree-vect-data-refs.c (revision 205435)
 --- gcc/tree-vect-data-refs.c (working copy)
 *** again:
 *** 3668,3673 
 --- 3668,3682 
   }
 STMT_VINFO_STRIDE_LOAD_P (stmt_info) = true;
   }
 +   else if (loop_vinfo
 + integer_zerop (DR_STEP (dr)))
 + {
 +   /* All loads from a non-varying address will be disambiguated
 +  by data-ref analysis or via a runtime alias check and thus
 +  they will become invariant.  Force them to be vectorized
 +  as external.  */
 +   STMT_VINFO_DEF_TYPE (stmt_info) = vect_external_def;
 + }

I think this is unsafe for simd loops.
I'd say:
int a[1024], b[1024];

int foo (void)
{
  int i;
  #pragma omp simd safelen(8)
  for (i = 0; i  1024; i++)
{
  a[i] = i;
  b[i] = a[0];
}
}

is valid testcase, the loop behaves the same if you execute it
sequentially, or vectorize using SIMD (hardware or emulated) instructions
with vectorization factor of 2, 4 or 8, as long as you do all memory
operations (either using scalar insns or simd instructions) in the order
they were written, which I believe the vectorizer right now handles
correctly, but the hoisting this patch wants to perform is not fine,
unless data ref analysis would prove that it can't alias.  For non-simd
loops we of course perform that data ref analysis and either version for
alias, or prove that the drs can't alias, but for simd loops we take as
given that the loop is safe to be vectorized.  It is, but not for hoisting.

So, the above say with emulated SIMD is safe to be executed as:
  for (i = 0; i  1024; i += 8)
{
  int tmp;
  for (tmp = i; tmp  i + 8; tmp++)
a[tmp] = tmp;
  for (tmp = i; tmp  i + 8; tmp++)
b[tmp] = a[0];
}
but not as:
  int tempa[8], tmp;
  /* Hoisted HW or emulated load + splat.  */
  for (tmp = 0; tmp  8; tmp++)
tempa[tmp] = a[0];
  for (i = 0; i  1024; i += 8)
{
  for (tmp = i; tmp  i + 8; tmp++)
a[tmp] = tmp;
  for (tmp = i; tmp  i + 8; tmp++)
b[tmp] = tempa[tmp];
}

Jakub


[testsuite] Properly set ld_library_path in cilk-plus tests

2013-11-27 Thread Rainer Orth
All 64-bit gcc.dg/atomic and c-c++-common/cilk-plus/CK execution tests
were FAILing on Solaris 10 and 11/x86:

ld.so.1: c11-atomic-exec-1.exe: fatal: 
/var/gcc/regression/trunk/10-gcc-gas/build/./gcc/libgcc_s.so.1: wrong ELF 
class: ELFCLASS32

ld.so.1: fib.exe: fatal: 
/var/gcc/regression/trunk/10-gcc-gas/build/i386-pc-solaris2.10/./libcilkrts/.libs/libcilkrts.so.5:
 wrong ELF class: ELFCLASS32

This happens because both cilk-plus.exp files override ld_library_path
instead of appending to it.  Fixed as follows by using the customary
method for setting ld_library_path.  Bootstrapped without regressions on
i386-pc-solaris2.1[01] and x86_64-unknown-linux-gnu, installed on
mainline.

Rainer
 

2013-11-26  Rainer Orth  r...@cebitec.uni-bielefeld.de

* gcc.dg/cilk-plus/cilk-plus.exp: Append to ld_library_path.
Call set_ld_library_path_env_vars.
* g++.dg/cilk-plus/cilk-plus.exp: Likewise.

# HG changeset patch
# Parent 293e24349ca8c7aa87bc32718eede5e185bca8c9
Properly set ld_library_path in cilk-plus tests

diff --git a/gcc/testsuite/g++.dg/cilk-plus/cilk-plus.exp b/gcc/testsuite/g++.dg/cilk-plus/cilk-plus.exp
--- a/gcc/testsuite/g++.dg/cilk-plus/cilk-plus.exp
+++ b/gcc/testsuite/g++.dg/cilk-plus/cilk-plus.exp
@@ -31,7 +31,8 @@ dg-finish
 
 set library_var [get_multilibs]
 # Pointing the ld_library_path to the Cilk Runtime library binaries. 
-set ld_library_path ${library_var}/libcilkrts/.libs
+append ld_library_path :${library_var}/libcilkrts/.libs
+set_ld_library_path_env_vars
 
 global TEST_EXTRA_LIBS
 set TEST_EXTRA_LIBS -L${library_var}/libcilkrts/.libs
diff --git a/gcc/testsuite/gcc.dg/cilk-plus/cilk-plus.exp b/gcc/testsuite/gcc.dg/cilk-plus/cilk-plus.exp
--- a/gcc/testsuite/gcc.dg/cilk-plus/cilk-plus.exp
+++ b/gcc/testsuite/gcc.dg/cilk-plus/cilk-plus.exp
@@ -26,7 +26,8 @@ if { ![check_effective_target_cilkplus] 
 
 set library_var [get_multilibs]
 # Pointing the ld_library_path to the Cilk Runtime library binaries. 
-set ld_library_path ${library_var}/libcilkrts/.libs
+append ld_library_path :${library_var}/libcilkrts/.libs
+set_ld_library_path_env_vars
 
 global TEST_EXTRA_LIBS
 set TEST_EXTRA_LIBS -L${library_var}/libcilkrts/.libs

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: [PATCH] Remove keep_aligning from get_inner_reference

2013-11-27 Thread Richard Biener
On Wed, Nov 27, 2013 at 10:43 AM, Eric Botcazou ebotca...@adacore.com wrote:
 I think you are right, this flag is no longer necessary, and removing
 this code path would simplify everything. Therefore I'd like to propose
 to remove the keep_aligning parameter of get_inner_reference as
 a split-out patch.

 Boot-strapped (with languages=all,ada,go) and
 regression-tested on x86_64-linux-gnu.

 I don't understand how you can commit a patch that changes something only on
 strict-alignment platforms and test it only on x86-64.  This change *must* be
 tested with Ada on a strict-alignment platform, that's the only combination
 for which it is exercised.   If you cannot do that, then please back it out.

 More generally speaking, it's not acceptable to make cleanup changes like that
 in the RTL expander without extreme care, which of course starts with proper
 testing.  The patch should not have been approved either for that reason.

I'm fine with reverting it for now (you were in CC of the patch submission
but silent on it, I asked for the patch to start simplifying the way
mems are expanded - ultimately to avoid the recursion and mem-attribute
compute by the recursion).

We can come back during stage1.

get_object_alignment should be able to properly handle this case
if you call it on the full reference in the normal_inner_ref: case.
All the weird duplicate code on the VIEW_CONVERT_EXPR case
should IMHO go.

Richard.

 --
 Eric Botcazou


Re: Patch ping (stage1-ish patches)

2013-11-27 Thread Rainer Orth
Jakub Jelinek ja...@redhat.com writes:

 On Fri, Nov 22, 2013 at 09:36:18AM -0700, Jeff Law wrote:
 In fact, I would suggest that anyone with a pending patch from prior
 to stage1 close that hasn't gotten feedback by midnight Tuesday ping
 their patch.  I'd like to have a sense of everything that is
 outstanding sooner rather than later and wrap up any loose ends as
 quickly as possible.

On my side, there's

[c++, driver] Add -lrt on Solaris
http://gcc.gnu.org/ml/gcc-patches/2013-05/msg01488.html

resubmitted as

http://gcc.gnu.org/ml/gcc-patches/2013-06/msg00412.html

It's unclear if the more intrusive solution outlined in the second
message (introduce libstdc++.spec) were acceptable in stage3, and I'm
uncertain if I can get it ready in time.

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: [PATCH] Make the IRA shrink-wrapping preparation also work on ppc64

2013-11-27 Thread Marek Polacek
On Fri, Nov 22, 2013 at 09:36:18AM -0700, Jeff Law wrote:
 In fact, I would suggest that anyone with a pending patch from prior
 to stage1 close that hasn't gotten feedback by midnight Tuesday ping
 their patch.  I'd like to have a sense of everything that is
 outstanding sooner rather than later and wrap up any loose ends as
 quickly as possible.

I've pinged http://gcc.gnu.org/ml/gcc-patches/2013-11/msg03447.html
today.

Marek


Re: [PATCH] Fixing PR59006 and PR58921 by delaying loop invariant hoisting in vectorizer.

2013-11-27 Thread Richard Biener
On Wed, 27 Nov 2013, Jakub Jelinek wrote:

 On Wed, Nov 27, 2013 at 10:53:56AM +0100, Richard Biener wrote:
  Hmm.  I'm still thinking that we should handle this during the regular
  transform step.
 
 I wonder if it can't be done instead just in vectorizable_load,
 if LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo) and the load is
 invariant, just emit the (broadcasted) load not inside of the loop, but on
 the loop preheader edge.

It is safe even for !LOOP_REQUIRES_VERSIONING_FOR_ALIAS.  It's just
a missed optimization I even noted when originally implementing
support for invariant loads ...

  *** gcc/tree-vect-data-refs.c   (revision 205435)
  --- gcc/tree-vect-data-refs.c   (working copy)
  *** again:
  *** 3668,3673 
  --- 3668,3682 
  }
STMT_VINFO_STRIDE_LOAD_P (stmt_info) = true;
  }
  +   else if (loop_vinfo
  +   integer_zerop (DR_STEP (dr)))
  +   {
  + /* All loads from a non-varying address will be disambiguated
  +by data-ref analysis or via a runtime alias check and thus
  +they will become invariant.  Force them to be vectorized
  +as external.  */
  + STMT_VINFO_DEF_TYPE (stmt_info) = vect_external_def;
  +   }
 
 I think this is unsafe for simd loops.
 I'd say:
 int a[1024], b[1024];
 
 int foo (void)
 {
   int i;
   #pragma omp simd safelen(8)
   for (i = 0; i  1024; i++)
 {
   a[i] = i;
   b[i] = a[0];
 }
 }
 
 is valid testcase, the loop behaves the same if you execute it
 sequentially, or vectorize using SIMD (hardware or emulated) instructions
 with vectorization factor of 2, 4 or 8, as long as you do all memory
 operations (either using scalar insns or simd instructions) in the order
 they were written, which I believe the vectorizer right now handles
 correctly, but the hoisting this patch wants to perform is not fine,
 unless data ref analysis would prove that it can't alias.  For non-simd
 loops we of course perform that data ref analysis and either version for
 alias, or prove that the drs can't alias, but for simd loops we take as
 given that the loop is safe to be vectorized.  It is, but not for hoisting.

Ick.  I hate this behind-the-back stuff - so safelen doesn't mean
that a[i] and a[0] do not alias.  Note that this will break with
SLP stuff at least as that will re-order reads/writes.  Not sure
how safelen applies to SLP though.  That is

a[i] = i;
b[i] = a[0];
a[i+1] = i+1;
b[i+1] = a[1];

will eventually end up re-ordering reads/writes in non-obvious
ways.

 So, the above say with emulated SIMD is safe to be executed as:
   for (i = 0; i  1024; i += 8)
 {
   int tmp;
   for (tmp = i; tmp  i + 8; tmp++)
   a[tmp] = tmp;
   for (tmp = i; tmp  i + 8; tmp++)
   b[tmp] = a[0];
 }
 but not as:
   int tempa[8], tmp;
   /* Hoisted HW or emulated load + splat.  */
   for (tmp = 0; tmp  8; tmp++)
 tempa[tmp] = a[0];
   for (i = 0; i  1024; i += 8)
 {
   for (tmp = i; tmp  i + 8; tmp++)
   a[tmp] = tmp;
   for (tmp = i; tmp  i + 8; tmp++)
   b[tmp] = tempa[tmp];
 }
 
   Jakub
 
 

-- 
Richard Biener rguent...@suse.de
SUSE / SUSE Labs
SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746
GF: Jeff Hawn, Jennifer Guild, Felix Imendorffer


Re: [patch] set MULTIARCH_DIRNAME for multilib architectures

2013-11-27 Thread Bernhard Reutner-Fischer
On 27 November 2013 11:10, Aurelien Jarno aure...@debian.org wrote:
 On Thu, Jun 20, 2013 at 02:26:12PM +0200, Matthias Klose wrote:
 Am 13.06.2013 11:42, schrieb Richard Sandiford:
  Bernhard Reutner-Fischer rep.dot@gmail.com writes:
  On 12 June 2013 20:20:50 Richard Sandiford rdsandif...@googlemail.com 
  wrote:
  Matthias Klose d...@ubuntu.com writes:
  Index: config/mips/t-linux64
  ===
  --- config/mips/t-linux64(revision 200012)
  +++ config/mips/t-linux64(working copy)
  @@ -24,3 +24,13 @@
   ../lib32$(call
  if_multiarch,:mips64$(MIPS_EL)-linux-gnuabin32$(MIPS_SOFT)) \
   ../lib$(call 
  if_multiarch,:mips$(MIPS_EL)-linux-gnu$(MIPS_SOFT)) \
   ../lib64$(call
  if_multiarch,:mips64$(MIPS_EL)-linux-gnuabi64$(MIPS_SOFT))
  +
  +ifneq (,$(findstring abin32,$(target)))
  +MULTIARCH_DIRNAME = $(call
  if_multiarch,mips64$(MIPS_EL)-linux-gnuabin32$(MIPS_SOFT))
  +else
  +ifneq (,$(findstring abi64,$(target)))
  +MULTIARCH_DIRNAME = $(call
  if_multiarch,mips64$(MIPS_EL)-linux-gnuabi64$(MIPS_SOFT))
  +else
  +MULTIARCH_DIRNAME = $(call
  if_multiarch,mips$(MIPS_EL)-linux-gnu$(MIPS_SOFT))
  +endif
  +endif
 
  findstring seems a bit fragile for a full triple.  I think it would
  be better to have something similar to the current MIPS_SOFT definition:
 
  MIPS_SOFT = $(if $(strip $(filter MASK_SOFT_FLOAT_ABI,
  $(target_cpu_default)) $(filter soft, $(with_float))),soft)
 
  but for ABIs.  It could then also take with_abi into account.
  Maybe something like:
 
  MIPS_ABI = $(or $(with_abi), \
  $(if $(filter MIPS_ABI_DEFAULT=ABI_N32, \
  $(target_cpu_default)), n32), \
  o32)
 
  (completely untested).
 
  Bikeshedding:
  Doko would know, but ISTR that $(or) did not exist in make-3.80 which is
  currently the minimum prerequisite, fwiw.
 
  Gah, that's a pity.  Thanks for the catch though.  Maybe firstword
  would be OK instead.
 
  I see I also fell into the usual ABI trap.  It wouldn't have affected
  the use in this patch, but the default ought to be 32 rather than o32.

 the define is in tm_defines, not target_cpu_default.

 MIPS_ABI = $(or $(with_abi), \
 $(if $(filter MIPS_ABI_DEFAULT=ABI_N32, \
 $(tm_defines)), n32), \
  32)

 However I can't see yet how this should be used. Maybe Aurelian could come up
 with a tested version of this patch?


 How about the patch below?

 It first determines the ABI without using $(or), by looking (in this
 order) for $(with_abi), $(tm_defines) and $(target), defaulting to 32
 if none of the previous matches.

 Then the multiarch path is constructed from the ABI.

 Index: gcc/config/mips/t-linux64
 ===
 --- gcc/config/mips/t-linux64   (révision 205437)
 +++ gcc/config/mips/t-linux64   (copie de travail)
 @@ -24,3 +24,22 @@
 ../lib32$(call 
 if_multiarch,:mips64$(MIPS_EL)-linux-gnuabin32$(MIPS_SOFT)) \
 ../lib$(call if_multiarch,:mips$(MIPS_EL)-linux-gnu$(MIPS_SOFT)) \
 ../lib64$(call 
 if_multiarch,:mips64$(MIPS_EL)-linux-gnuabi64$(MIPS_SOFT))
 +
 +ifneq ($(with_abi),)
 +MIPS_ABI = $(with_abi)
 +endif
 +ifeq ($(MIPS_ABI),)
 +MIPS_ABI=$(if $(strip $(filter MIPS_ABI_DEFAULT=ABI_N32, $(tm_defines))),n32)
 +endif
 +ifeq ($(MIPS_ABI),)
 +MIPS_ABI = $(subst abi,,$(subst gnu,,$(lastword $(subst -, ,$(target)
 +endif
 +ifeq ($(MIPS_ABI),)
 +MIPS_ABI=32
 +endif
 +
 +ifeq ($(MIPS_ABI),32)
 +MULTIARCH_DIRNAME = $(call if_multiarch,mips$(MIPS_EL)-linux-gnu$(MIPS_SOFT))
 +else
 +MULTIARCH_DIRNAME = $(call 
 if_multiarch,mips64$(MIPS_EL)-linux-gnuabi$(MIPS_ABI)$(MIPS_SOFT))

hm, shouldn't this be gnueabi with an 'e'  or is that implied for n64 anyway?
just curious..
thanks,

 +endif

 --
 Aurelien Jarno  GPG: 1024D/F1BCDB73
 aurel...@aurel32.net http://www.aurel32.net


RE: [PATCH] Remove keep_aligning from get_inner_reference

2013-11-27 Thread Bernd Edlinger
On Wed, 27 Nov 2013 12:47:19, Richard Biener wrote:

 On Wed, Nov 27, 2013 at 10:43 AM, Eric Botcazou ebotca...@adacore.com wrote:
 I think you are right, this flag is no longer necessary, and removing
 this code path would simplify everything. Therefore I'd like to propose
 to remove the keep_aligning parameter of get_inner_reference as
 a split-out patch.

 Boot-strapped (with languages=all,ada,go) and
 regression-tested on x86_64-linux-gnu.

 I don't understand how you can commit a patch that changes something only on
 strict-alignment platforms and test it only on x86-64. This change *must* be
 tested with Ada on a strict-alignment platform, that's the only combination
 for which it is exercised. If you cannot do that, then please back it out.

 More generally speaking, it's not acceptable to make cleanup changes like 
 that
 in the RTL expander without extreme care, which of course starts with proper
 testing. The patch should not have been approved either for that reason.

 I'm fine with reverting it for now (you were in CC of the patch submission
 but silent on it, I asked for the patch to start simplifying the way
 mems are expanded - ultimately to avoid the recursion and mem-attribute
 compute by the recursion).


Ok, then I'll revert this patch in the evening.

Bernd.

 We can come back during stage1.

 get_object_alignment should be able to properly handle this case
 if you call it on the full reference in the normal_inner_ref: case.
 All the weird duplicate code on the VIEW_CONVERT_EXPR case
 should IMHO go.

 Richard.

 --
 Eric Botcazou  

Re: [patch] set MULTIARCH_DIRNAME for multilib architectures

2013-11-27 Thread Aurelien Jarno
On Wed, Nov 27, 2013 at 12:55:54PM +0100, Bernhard Reutner-Fischer wrote:
 On 27 November 2013 11:10, Aurelien Jarno aure...@debian.org wrote:
  On Thu, Jun 20, 2013 at 02:26:12PM +0200, Matthias Klose wrote:
  Am 13.06.2013 11:42, schrieb Richard Sandiford:
   Bernhard Reutner-Fischer rep.dot@gmail.com writes:
   On 12 June 2013 20:20:50 Richard Sandiford rdsandif...@googlemail.com 
   wrote:
   Matthias Klose d...@ubuntu.com writes:
   Index: config/mips/t-linux64
   ===
   --- config/mips/t-linux64(revision 200012)
   +++ config/mips/t-linux64(working copy)
   @@ -24,3 +24,13 @@
../lib32$(call
   if_multiarch,:mips64$(MIPS_EL)-linux-gnuabin32$(MIPS_SOFT)) \
../lib$(call 
   if_multiarch,:mips$(MIPS_EL)-linux-gnu$(MIPS_SOFT)) \
../lib64$(call
   if_multiarch,:mips64$(MIPS_EL)-linux-gnuabi64$(MIPS_SOFT))
   +
   +ifneq (,$(findstring abin32,$(target)))
   +MULTIARCH_DIRNAME = $(call
   if_multiarch,mips64$(MIPS_EL)-linux-gnuabin32$(MIPS_SOFT))
   +else
   +ifneq (,$(findstring abi64,$(target)))
   +MULTIARCH_DIRNAME = $(call
   if_multiarch,mips64$(MIPS_EL)-linux-gnuabi64$(MIPS_SOFT))
   +else
   +MULTIARCH_DIRNAME = $(call
   if_multiarch,mips$(MIPS_EL)-linux-gnu$(MIPS_SOFT))
   +endif
   +endif
  
   findstring seems a bit fragile for a full triple.  I think it would
   be better to have something similar to the current MIPS_SOFT 
   definition:
  
   MIPS_SOFT = $(if $(strip $(filter MASK_SOFT_FLOAT_ABI,
   $(target_cpu_default)) $(filter soft, $(with_float))),soft)
  
   but for ABIs.  It could then also take with_abi into account.
   Maybe something like:
  
   MIPS_ABI = $(or $(with_abi), \
   $(if $(filter MIPS_ABI_DEFAULT=ABI_N32, \
   $(target_cpu_default)), n32), \
   o32)
  
   (completely untested).
  
   Bikeshedding:
   Doko would know, but ISTR that $(or) did not exist in make-3.80 which is
   currently the minimum prerequisite, fwiw.
  
   Gah, that's a pity.  Thanks for the catch though.  Maybe firstword
   would be OK instead.
  
   I see I also fell into the usual ABI trap.  It wouldn't have affected
   the use in this patch, but the default ought to be 32 rather than 
   o32.
 
  the define is in tm_defines, not target_cpu_default.
 
  MIPS_ABI = $(or $(with_abi), \
  $(if $(filter MIPS_ABI_DEFAULT=ABI_N32, \
  $(tm_defines)), n32), \
   32)
 
  However I can't see yet how this should be used. Maybe Aurelian could come 
  up
  with a tested version of this patch?
 
 
  How about the patch below?
 
  It first determines the ABI without using $(or), by looking (in this
  order) for $(with_abi), $(tm_defines) and $(target), defaulting to 32
  if none of the previous matches.
 
  Then the multiarch path is constructed from the ABI.
 
  Index: gcc/config/mips/t-linux64
  ===
  --- gcc/config/mips/t-linux64   (révision 205437)
  +++ gcc/config/mips/t-linux64   (copie de travail)
  @@ -24,3 +24,22 @@
  ../lib32$(call 
  if_multiarch,:mips64$(MIPS_EL)-linux-gnuabin32$(MIPS_SOFT)) \
  ../lib$(call if_multiarch,:mips$(MIPS_EL)-linux-gnu$(MIPS_SOFT)) \
  ../lib64$(call 
  if_multiarch,:mips64$(MIPS_EL)-linux-gnuabi64$(MIPS_SOFT))
  +
  +ifneq ($(with_abi),)
  +MIPS_ABI = $(with_abi)
  +endif
  +ifeq ($(MIPS_ABI),)
  +MIPS_ABI=$(if $(strip $(filter MIPS_ABI_DEFAULT=ABI_N32, 
  $(tm_defines))),n32)
  +endif
  +ifeq ($(MIPS_ABI),)
  +MIPS_ABI = $(subst abi,,$(subst gnu,,$(lastword $(subst -, ,$(target)
  +endif
  +ifeq ($(MIPS_ABI),)
  +MIPS_ABI=32
  +endif
  +
  +ifeq ($(MIPS_ABI),32)
  +MULTIARCH_DIRNAME = $(call 
  if_multiarch,mips$(MIPS_EL)-linux-gnu$(MIPS_SOFT))
  +else
  +MULTIARCH_DIRNAME = $(call 
  if_multiarch,mips64$(MIPS_EL)-linux-gnuabi$(MIPS_ABI)$(MIPS_SOFT))
 
 hm, shouldn't this be gnueabi with an 'e'  or is that implied for n64 anyway?
 just curious..
 thanks,

This is correct, the abis are n32 and 64. MIPS EABI is another ABI which
hasn't really existed in practice.

-- 
Aurelien Jarno  GPG: 1024D/F1BCDB73
aurel...@aurel32.net http://www.aurel32.net


Re: [PATCH] Use libbacktrace as libsanitizer's symbolizer

2013-11-27 Thread Alexey Samsonov
On Mon, Nov 25, 2013 at 7:02 PM, Jakub Jelinek ja...@redhat.com wrote:
 On Mon, Nov 25, 2013 at 06:53:59PM +0400, Alexey Samsonov wrote:
  In GCC, libbacktrace is built as a libtool convenience library only and
  then linked into whatever libraries want to use it.  So indeed, the plan
  is to link libbacktrace.la into libasan.so.1.0.0 and libasan.a
  (and the earlier GCC patch I've posted included corresponding toplevel
  configure/Makefile bits and libsanitizer Makefile/configure changes to make
  it happen).  E.g. libgo.so.* links libbacktrace.la into itself too.
  So, for GCC user experience, things will work automatically.

 Good. Note, however, that you'd need at least two versions of
 libbacktrace - 32-bit and 64-bit.

 In GCC you get that automatically (unless --disable-multilib, but then you
 don't get 32-bit libsanitizer either say on x86_64-linux).

  it means finding some way in the build system/configure to detect
  or request libbacktrace availability (for libraries not included in
  GCC tree GCC's configure typically uses --with-package=path
  or --with-package-lib=path/--with-package-include=path) and just
  link against it and you'd just unpack/configure/build libbacktrace
  on one of your test bots.

 We indeed can add support for detecting presence of libbacktrace
 binaries and headers
 when we configure a build tree. But to just link against it we have
 to either link against it in Clang
 driver (I don't think we should add support for it), or link against
 it in all the places where we build something
 with -fsanitize=foo, and there are quite a few of them.

 Or change the rules for creation of *sanitizer_common.*a, so that you
 link the libbacktrace.la convenience library into it (if configure found
 it).  If you build it using libtool/automake, you get it automatically,
 otherwise you can e.g. using ar unpack libbacktrace.a and note the filenames
 of the object files from it, then add those to the ar command line for
 creation of *sanitizer_common.*a.

This would be messy, especially assuming that we have two (both pretty ugly)
build systems in compiler-rt repository. And the argument about two
versions (32-bit and 64-bit) of
libbacktrace is more serious in this setting. Also, see note below.

 As for redirecting libc calls from libbacktrace to our internal
 versions (or interceptors) -
 if it works fine for our toy tests (which is the case), I suggest to
 wait for the actual problems gcc
 users may discover. If there will be need in compiling a separate
 sanitizer-specific version of libbacktrace,
 or providing an additional layer between libbacktrace and sanitizer
 runtimes, or both, this could probably
 be done in gcc version of the runtime only (w/o modifying any existing
 source files, only adding standalone gcc-specific stuff).

 Sure, worst case you keep it untested in your LLVM copy of libsanitizer
 and we'll just need to fix it up during merges if something breaks.
 If it will be used for GCC (and we have a P1 bug so it is a release blocker
 if llvm-symbolizer is still used), then it will be tested there.

Yes, I think that's what we should do in the short term.
I've submitted (slightly edited) version of your patch to compiler-rt
as r195837, I think
it should work after the next merge to gcc. I've done a minimal
testing of this change
(built the runtime with -DSANITIZER_LIBBACKTRACE, headers provided,
libbacktrace.a linked),
and it kind of worked - libbacktrace functions were properly picked
up, error callbacks was not called,
but the symbolization didn't work either:

Turns out libbacktrace doesn't work with executables produced by LLVM:
emitted compile unit DIEs don't have neither
DW_AT_low_pc/DW_AT_high_pc pair, nor DW_AT_ranges reference.
It means that to build a set of address ranges corresponding to each
compile unit, one actually
has to extract all the subprogram DIEs. I treat this as a bug in
libbacktrace (unless it's goal is to work only
for GCC-produced binaries). That's what I was afraid when Ian
suggested us to fork the sources.

-- 
Alexey Samsonov, MSK


Re: [PATCH] Fixing PR59006 and PR58921 by delaying loop invariant hoisting in vectorizer.

2013-11-27 Thread Jakub Jelinek
On Wed, Nov 27, 2013 at 12:54:14PM +0100, Richard Biener wrote:
 On Wed, 27 Nov 2013, Jakub Jelinek wrote:
 
  On Wed, Nov 27, 2013 at 10:53:56AM +0100, Richard Biener wrote:
   Hmm.  I'm still thinking that we should handle this during the regular
   transform step.
  
  I wonder if it can't be done instead just in vectorizable_load,
  if LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo) and the load is
  invariant, just emit the (broadcasted) load not inside of the loop, but on
  the loop preheader edge.
 
 It is safe even for !LOOP_REQUIRES_VERSIONING_FOR_ALIAS.  It's just
 a missed optimization I even noted when originally implementing
 support for invariant loads ...

True, but only for non-simd loops, or if we proved it by looking at all
relevant LOOP_VINFO_DDRSs.  But, if it is not a simd loop, and
not !LOOP_REQUIRES_VERSIONING_FOR_ALIAS, wouldn't previous optimizations
hoist the load before the loop already?

 Ick.  I hate this behind-the-back stuff - so safelen doesn't mean
 that a[i] and a[0] do not alias.

My initial understanding of the SIMD loops was also that it allows the
the up to safelen consecutive iterations to be randomly reordered or
intermixed without affecting valid programs, but further mails from Tobias
and others on this topic plus testcases changed my understanding of it.

Note that we don't purge LOOP_VINFO_DDRSs in any way for loop-safelen,
just don't add versioning for aliasor punt if there is some possible (or
proven) aliasing.  Perhaps we could add a bool flag to loop_vinfo which
would tell us whether the loop has no data dependencies at all (i.e.
either for non-safelen is !LOOP_REQUIRES_VERSIONING_FOR_ALIAS, or
with safelen non-zero would be !LOOP_REQUIRES_VERSIONING_FOR_ALIAS).
Then we could hoist if that flag is set or
LOOP_REQUIRES_VERSIONING_FOR_ALIAS (because then the runtime test
checks the dependency).

 Note that this will break with
 SLP stuff at least as that will re-order reads/writes.  Not sure
 how safelen applies to SLP though.  That is
 
 a[i] = i;
 b[i] = a[0];
 a[i+1] = i+1;
 b[i+1] = a[1];
 
 will eventually end up re-ordering reads/writes in non-obvious
 ways.

You mean SLP inside of loop vectorization, right?  Because for normal SLP
outside of loop vectorizer simdlen is ignored and normal data ref is
performed without any bypassing.

Jakub


Re: [PING][GCC ARM]Refine scaled address expression on ARM

2013-11-27 Thread Bin.Cheng
Ping^2

Thanks,
bin

On Fri, Nov 22, 2013 at 3:32 PM, bin.cheng bin.ch...@arm.com wrote:
 PING.
 Hi, there is a patch at
 http://gcc.gnu.org/ml/gcc-patches/2013-09/msg01353.html which slipped away.

 Thanks,
 bin






-- 
Best Regards.


Re: [RFC][LIBGCC][1 of 2] 64 bit divide implementation for processor without hw divide instruction

2013-11-27 Thread Christophe Lyon
On 27 November 2013 06:53, Jeff Law l...@redhat.com wrote:
 On 11/25/13 17:29, Kugan wrote:
[...]
 Thanks for the review. Is this OK for trunk now?

 Yes.  Please install.

 jeff

Thanks, committed on Kugan's behalf as r205444.


Re: [PATCH] Use libbacktrace as libsanitizer's symbolizer

2013-11-27 Thread Jakub Jelinek
On Wed, Nov 27, 2013 at 04:12:33PM +0400, Alexey Samsonov wrote:
  Sure, worst case you keep it untested in your LLVM copy of libsanitizer
  and we'll just need to fix it up during merges if something breaks.
  If it will be used for GCC (and we have a P1 bug so it is a release blocker
  if llvm-symbolizer is still used), then it will be tested there.
 
 Yes, I think that's what we should do in the short term.
 I've submitted (slightly edited) version of your patch to compiler-rt
 as r195837, I think

Thanks, once Kostya merges it in, I'll rework a patch for it (which
hopefully will not need to change anything in the sanitizer_common/
stuff, just Makefiles/configure and other stuff owned by GCC).

 it should work after the next merge to gcc. I've done a minimal
 testing of this change
 (built the runtime with -DSANITIZER_LIBBACKTRACE, headers provided,
 libbacktrace.a linked),
 and it kind of worked - libbacktrace functions were properly picked
 up, error callbacks was not called,
 but the symbolization didn't work either:
 
 Turns out libbacktrace doesn't work with executables produced by LLVM:
 emitted compile unit DIEs don't have neither
 DW_AT_low_pc/DW_AT_high_pc pair, nor DW_AT_ranges reference.

I'd call that a LLVM bug, not emitting those attributes on a CU sounds like
a flaw in it.

Jakub


Re: [PATCH] Fixing PR59006 and PR58921 by delaying loop invariant hoisting in vectorizer.

2013-11-27 Thread Richard Biener
On Wed, 27 Nov 2013, Jakub Jelinek wrote:

 On Wed, Nov 27, 2013 at 12:54:14PM +0100, Richard Biener wrote:
  On Wed, 27 Nov 2013, Jakub Jelinek wrote:
  
   On Wed, Nov 27, 2013 at 10:53:56AM +0100, Richard Biener wrote:
Hmm.  I'm still thinking that we should handle this during the regular
transform step.
   
   I wonder if it can't be done instead just in vectorizable_load,
   if LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo) and the load is
   invariant, just emit the (broadcasted) load not inside of the loop, but on
   the loop preheader edge.
  
  It is safe even for !LOOP_REQUIRES_VERSIONING_FOR_ALIAS.  It's just
  a missed optimization I even noted when originally implementing
  support for invariant loads ...
 
 True, but only for non-simd loops, or if we proved it by looking at all
 relevant LOOP_VINFO_DDRSs.  But, if it is not a simd loop, and
 not !LOOP_REQUIRES_VERSIONING_FOR_ALIAS, wouldn't previous optimizations
 hoist the load before the loop already?

Well - there is the case of

int g;
int *p;

  for (;;)
{
  *p = g;
}

where we know in the _vectorized_ loop body that they cannot alias
(because we have at least two vector elements).  But it seems
we lost that optimization at some point as

int g;
void foo (int *p, int n)
{
  int i;
  for (i = 0; i  n; ++i)
*p = g;
}

uses alias versioning.  Bah ;)  Maybe I'm just dreaming that
I implemented that as well.

  Ick.  I hate this behind-the-back stuff - so safelen doesn't mean
  that a[i] and a[0] do not alias.
 
 My initial understanding of the SIMD loops was also that it allows the
 the up to safelen consecutive iterations to be randomly reordered or
 intermixed without affecting valid programs, but further mails from Tobias
 and others on this topic plus testcases changed my understanding of it.
 
 Note that we don't purge LOOP_VINFO_DDRSs in any way for loop-safelen,
 just don't add versioning for aliasor punt if there is some possible (or
 proven) aliasing.  Perhaps we could add a bool flag to loop_vinfo which
 would tell us whether the loop has no data dependencies at all (i.e.
 either for non-safelen is !LOOP_REQUIRES_VERSIONING_FOR_ALIAS, or
 with safelen non-zero would be !LOOP_REQUIRES_VERSIONING_FOR_ALIAS).
 Then we could hoist if that flag is set or
 LOOP_REQUIRES_VERSIONING_FOR_ALIAS (because then the runtime test
 checks the dependency).
 
  Note that this will break with
  SLP stuff at least as that will re-order reads/writes.  Not sure
  how safelen applies to SLP though.  That is
  
  a[i] = i;
  b[i] = a[0];
  a[i+1] = i+1;
  b[i+1] = a[1];
  
  will eventually end up re-ordering reads/writes in non-obvious
  ways.
 
 You mean SLP inside of loop vectorization, right?  Because for normal SLP
 outside of loop vectorizer simdlen is ignored and normal data ref is
 performed without any bypassing.

Yes, SLP inside of a loop.

Richard.


Re: [PATCH] Use libbacktrace as libsanitizer's symbolizer

2013-11-27 Thread Alexey Samsonov
On Wed, Nov 27, 2013 at 4:20 PM, Jakub Jelinek ja...@redhat.com wrote:
 On Wed, Nov 27, 2013 at 04:12:33PM +0400, Alexey Samsonov wrote:
  Sure, worst case you keep it untested in your LLVM copy of libsanitizer
  and we'll just need to fix it up during merges if something breaks.
  If it will be used for GCC (and we have a P1 bug so it is a release blocker
  if llvm-symbolizer is still used), then it will be tested there.

 Yes, I think that's what we should do in the short term.
 I've submitted (slightly edited) version of your patch to compiler-rt
 as r195837, I think

 Thanks, once Kostya merges it in, I'll rework a patch for it (which
 hopefully will not need to change anything in the sanitizer_common/
 stuff, just Makefiles/configure and other stuff owned by GCC).

 it should work after the next merge to gcc. I've done a minimal
 testing of this change
 (built the runtime with -DSANITIZER_LIBBACKTRACE, headers provided,
 libbacktrace.a linked),
 and it kind of worked - libbacktrace functions were properly picked
 up, error callbacks was not called,
 but the symbolization didn't work either:

 Turns out libbacktrace doesn't work with executables produced by LLVM:
 emitted compile unit DIEs don't have neither
 DW_AT_low_pc/DW_AT_high_pc pair, nor DW_AT_ranges reference.

 I'd call that a LLVM bug, not emitting those attributes on a CU sounds like
 a flaw in it.

LLVM emits just a DW_AT_low_pc (base address of a compilation unit). The
standard tells that compilation unit entries may have attributes
specifying the
address range, but doesn't tell they are obligatory.
DWARF consumers probably shouldn't rely on their presence.

-- 
Alexey Samsonov, MSK


[PING] [PATCH, PR 57748] Check for out of bounds access, Part 2

2013-11-27 Thread Bernd Edlinger
Hi,

ping...

this patch still open: http://gcc.gnu.org/ml/gcc-patches/2013-11/msg02291.html

Note: it does, as it is, _not_ depend on the keep_aligning patch.
And it would fix some really nasty wrong code generation issues.


Thanks
Bernd.

 Date: Tue, 19 Nov 2013 15:39:39 +0100

 Hello,


 this is a minor update to my previous version of this patch, (using a boolean 
 expand_reference,
 instead of adding a new expand_modifier enum value):

 I forgot to pass down the expand_reference value at the second expand_expr 
 call inside the
 case VIEW_CONVERT_EXPR. Sorry for the inconvenience.



 @@ -10219,7 +10229,8 @@ expand_expr_real_1 (tree exp, rtx target, enum mac
 }

 if (!op0)
 - op0 = expand_expr (treeop0, NULL_RTX, VOIDmode, modifier);
 + op0 = expand_expr_real (treeop0, NULL_RTX, VOIDmode, modifier,
 + NULL, expand_reference);

 /* If the input and output modes are both the same, we are done. */
 if (mode == GET_MODE (op0))


 Boot-strapped and regression-tested on X86_64-pc-linux-gnu.

 Ok for trunk?


 Thanks
 Bernd.


 Date: Thu, 7 Nov 2013 13:58:55 +0100

 oops - this time with attachments...


 Hi,

 On Fri, 25 Oct 2013 12:51:13, Richard Biener wrote:

 On Fri, Oct 25, 2013 at 12:02 PM, Bernd Edlinger
 bernd.edlin...@hotmail.de wrote:
 Hi,

 Eh ... even

 register struct { int i; int a[0]; } asm (ebx);

 works. Also with int a[1] but not with a[2]. So just handling trailing
 arrays makes this case regress as well.

 Now I'd call this use questionable ... but we've likely supported that
 for decades and cannot change that now.

 Back to fixing everything in expand.

 Richard.


 Ok, finally you asked for it.

 Here is my previous version of that patch again.

 I have now added a new value EXPAND_REFERENCE to the expand_modifier
 enumeration. It is almost like EXPAND_MEMORY but it does not interfere 
 with
 constant values.

 I have done the same modification to VIEW_CONVERT_EXPR too, because
 this is a possible inner reference, itself. It is however inherently hard 
 to
 test around this code.

 To understand this patch it is good to know what type of object the
 return value tem of get_inner_reference can be.

 From the program logic at get_inner_reference it is clear that the
 return value may *not* be BIT_FIELD_REF, COMPONENT_REF, ARRAY_REF,
 ARRAY_RANGE_REF, REALPART_EXPR, IMAGPART_EXPR. The result may
 be VIEW_CONVERT_EXPR only on a STRICT_ALIGNMENT target. This is probably
 further restricted because exp is gimplified.

 Usually the result will be a MEM_REF or a SSA_NAME of the memory where
 the structure is to be found.

 When you look at where EXPAND_MEMORY is handled you see it is 
 special-cased
 in TARGET_MEM_REF, MEM_REF, ARRAY_REF, COMPONENT_REF, BIT_FIELD_REF,
 ARRAY_RANGE_REF.

 At TARGET_MEM_REF, MEM_REF, VIEW_CONVERT_EXPR, it should be the
 same if EXPAND_MEMORY, EXPAND_WRITE or EXPAND_REFERENCE is given:
 If it is an unaligned memory, we just return the unaligned reference.

 This was missing for VIEW_CONVERT_EXPR, and unfortunately I have no test 
 case,
 because it is only a problem for STRICT_ALIGNMENT targets, and even there 
 it will
 certainly be really hard to find test cases that exercise this code.

 In ARRAY_REF, COMPONENT_REF, BIT_FIELD_REF, ARRAY_RANGE_REF
 we do not have to touch the handling of the outer modifier. However we 
 pass
 EXPAND_REFERENCE to the inner object, which should not be a recursive
 use of any ARRAY_REF, COMPONENT_REF, BIT_FIELD_REF, ARRAY_RANGE_REF.

 TARGET_MEM_REF, MEM_REF and VIEW_CONVERT_EXPR know how to handle
 EXPAND_REFERENCE, anything else handles it like EXPAND_NORMAL.


 Boot-strapped and regression-tested on x86_64-linux-gnu
 OK for trunk?

 You point to a weak spot in expansion - that it handles creating
 the base MEM to offset with handled components by recursing
 into the case that handles bare MEM_REFs. This makes the
 bare MEM_REF handling code somewhat awkward (it's the
 one to assign mem-attrs which are later adjusted for example).

 Maybe a better appropach than adding yet another expand
 modifier would be to split out the base MEM expansion part
 out of the bare MEM_REF handling code so we can call that
 instead of recursing.

 In this light - instead of a new expand modifier don't you want
 an actual flag that specifies we are coming from a call that
 wants to expand a base? That is, allow EXPAND_SUM
 but with the recursion flag set?


 I think you are right. After some thought, I start to like that idea.

 This way we have at least much more flexibility, how to handle the inner
 references correctly, and if I change only the interfaces of 
 expand_expr_real/real_1
 that will not be used at too many places, either.

 Finally I think the recursion into the VIEW_CONVERT_EXPR case
 is only there because of the keep_aligning flag of get_inner_reference
 which should be obsolete now that we properly handle its effects
 in get_object_alignment. So you wouldn't need to adjust this path
 if we finally can get rid of that.


 I 

Re: [PATCH] Remove keep_aligning from get_inner_reference

2013-11-27 Thread Eric Botcazou
 I'm fine with reverting it for now (you were in CC of the patch submission
 but silent on it, I asked for the patch to start simplifying the way
 mems are expanded - ultimately to avoid the recursion and mem-attribute
 compute by the recursion).

Because I'm totally lost in this thread and its many sub-threads.

 We can come back during stage1.

Sure, let's do that instead and not enter stage #3 with hazardous changes.

 get_object_alignment should be able to properly handle this case
 if you call it on the full reference in the normal_inner_ref: case.

How exactly?  Once you flatten everything with get_inner_reference at the 
beginning, the TYPE_ALIGN_OK flag on the VIEW_CONVERT_EXPR is lost.

 All the weird duplicate code on the VIEW_CONVERT_EXPR case
 should IMHO go.

What has changed since 2004 exactly?  If you do a grep for TYPE_ALIGN_OK on 
4.1 and 4.9 trees, you get exactly the same 4 occurrences in the middle-end.

-- 
Eric Botcazou


Patch ping (stage1-ish patches)

2013-11-27 Thread Eric Botcazou
 In fact, I would suggest that anyone with a pending patch from prior to
 stage1 close that hasn't gotten feedback by midnight Tuesday ping their
 patch.  I'd like to have a sense of everything that is outstanding
 sooner rather than later and wrap up any loose ends as quickly as possible.

Improve debug info for small structures (2)
  http://gcc.gnu.org/ml/gcc-patches/2013-10/msg02007.html

-- 
Eric Botcazou


Re: [PATCH] Remove keep_aligning from get_inner_reference

2013-11-27 Thread Richard Biener
On Wed, Nov 27, 2013 at 1:29 PM, Eric Botcazou ebotca...@adacore.com wrote:
 I'm fine with reverting it for now (you were in CC of the patch submission
 but silent on it, I asked for the patch to start simplifying the way
 mems are expanded - ultimately to avoid the recursion and mem-attribute
 compute by the recursion).

 Because I'm totally lost in this thread and its many sub-threads.

 We can come back during stage1.

 Sure, let's do that instead and not enter stage #3 with hazardous changes.

 get_object_alignment should be able to properly handle this case
 if you call it on the full reference in the normal_inner_ref: case.

 How exactly?  Once you flatten everything with get_inner_reference at the
 beginning, the TYPE_ALIGN_OK flag on the VIEW_CONVERT_EXPR is lost.

Well, I want

   tem = get_inner_reference (from, ...);
   op = expand_expr (tem);
...

   set_mem_attrs (op, from);

instead of setting mem_attrs from 'tem' by some obfuscated means of
recursing through multiple levels of expanding the memory reference.

That is, completely refactor this stuff as it has become so non-obvious
what happens.

 All the weird duplicate code on the VIEW_CONVERT_EXPR case
 should IMHO go.

 What has changed since 2004 exactly?  If you do a grep for TYPE_ALIGN_OK on
 4.1 and 4.9 trees, you get exactly the same 4 occurrences in the middle-end.

Ah, get_object_alignment used keep_aligning ...

Richard.

 --
 Eric Botcazou


Re: [PATCH] Remove keep_aligning from get_inner_reference

2013-11-27 Thread Eric Botcazou
 Ah, get_object_alignment used keep_aligning ...

Yes, the patch contains the rather explicit hunks:

Index: gcc/builtins.c
===
--- gcc/builtins.c  (revision 204101)
+++ gcc/builtins.c  (working copy)
@@ -315,7 +315,7 @@ get_object_alignment_2 (tree exp, unsigned int *al
   /* Get the innermost object and the constant (bitpos) and possibly
  variable (offset) offset of the access.  */
   exp = get_inner_reference (exp, bitsize, bitpos, offset,
-mode, unsignedp, volatilep, true);
+mode, unsignedp, volatilep);
 
   /* Extract alignment information from the innermost object and
  possibly adjust bitpos and offset.  */
@@ -346,10 +346,6 @@ get_object_alignment_2 (tree exp, unsigned int *al
   align = DECL_ALIGN (exp);
   known_alignment = true;
 }
-  else if (TREE_CODE (exp) == VIEW_CONVERT_EXPR)
-{
-  align = TYPE_ALIGN (TREE_TYPE (exp));
-}
   else if (TREE_CODE (exp) == INDIRECT_REF
   || TREE_CODE (exp) == MEM_REF
   || TREE_CODE (exp) == TARGET_MEM_REF)

-- 
Eric Botcazou


Re: [PATCH] Use libbacktrace as libsanitizer's symbolizer

2013-11-27 Thread Jakub Jelinek
On Wed, Nov 27, 2013 at 04:31:48PM +0400, Alexey Samsonov wrote:
 LLVM emits just a DW_AT_low_pc (base address of a compilation unit). The
 standard tells that compilation unit entries may have attributes
 specifying the
 address range, but doesn't tell they are obligatory.
 DWARF consumers probably shouldn't rely on their presence.

DWARF is generally full of may have and almost no must have.  If the
DWARF consumer crashes on absence of some attribute/die etc., that would
be of course a consumer's fault.  But not being able to symbolize something
because the provided DWARF info omits important stuff is not wrong.
BTW, libbacktrace also uses symbol table (and for backtrace_syminfo
uses that exclusively), so even if it can't symbolize using DWARF info,
it should at least symbolize using symbol table (of course inlining info
isn't available in that case, nor tail call frames (but libbacktrace doesn't
support those yet, only GDB does so far)).

Jakub


Re: [PATCH] Use libbacktrace as libsanitizer's symbolizer

2013-11-27 Thread Alexey Samsonov
On Wed, Nov 27, 2013 at 4:51 PM, Jakub Jelinek ja...@redhat.com wrote:
 On Wed, Nov 27, 2013 at 04:31:48PM +0400, Alexey Samsonov wrote:
 LLVM emits just a DW_AT_low_pc (base address of a compilation unit). The
 standard tells that compilation unit entries may have attributes
 specifying the
 address range, but doesn't tell they are obligatory.
 DWARF consumers probably shouldn't rely on their presence.

 DWARF is generally full of may have and almost no must have.  If the
 DWARF consumer crashes on absence of some attribute/die etc., that would
 be of course a consumer's fault.  But not being able to symbolize something
 because the provided DWARF info omits important stuff is not wrong.

Ok, I agree to call it missing feature instead of bug =)

 BTW, libbacktrace also uses symbol table (and for backtrace_syminfo
 uses that exclusively), so even if it can't symbolize using DWARF info,
 it should at least symbolize using symbol table (of course inlining info
 isn't available in that case, nor tail call frames (but libbacktrace doesn't
 support those yet, only GDB does so far)).

Yes, symbol table symbolization works for me (it has no file/line
info, of course).

-- 
Alexey Samsonov, MSK


Re: [PING^2] [PATCH] PR59063

2013-11-27 Thread Yury Gribov
 I don't like the unconditional -lrt added for -static-lib*san (note, 
you need it for both -static-lib{a,t}san).


Makes sense.

 Perhaps it is time for libsanitizer.spec filled in during configure 
of libsanitizer

 that the spec would source in?

Draft patch is attached, let's see if I understood your recommendation 
correctly. Some obvious quirks:
1) I didn't add link_libubsan/link_liblsan because they seem to be happy 
with default libs from %(link_sanitizer).

2) I left LIBASAN_EARLY_SPEC/LIBASAN_SPEC logic in gnu-user.h and gcc.c
because they rely on LD_STATIC_OPTION and HAVE_LD_STATIC_DYNAMIC which 
are defined in gcc/configure

and thus not available in libsanitizer/configure.

-Y


diff --git a/gcc/config/gnu-user.h b/gcc/config/gnu-user.h
index 157e147..60eb30d 100644
--- a/gcc/config/gnu-user.h
+++ b/gcc/config/gnu-user.h
@@ -126,19 +126,3 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
   LD_STATIC_OPTION  --whole-archive -ltsan --no-whole-archive  \
   LD_DYNAMIC_OPTION }}%{!static-libtsan:-ltsan}
 #endif
-
-/* Additional libraries needed by -static-libasan.  */
-#undef STATIC_LIBASAN_LIBS
-#define STATIC_LIBASAN_LIBS -ldl -lpthread
-
-/* Additional libraries needed by -static-libtsan.  */
-#undef STATIC_LIBTSAN_LIBS
-#define STATIC_LIBTSAN_LIBS -ldl -lpthread
-
-/* Additional libraries needed by -static-liblsan.  */
-#undef STATIC_LIBLSAN_LIBS
-#define STATIC_LIBLSAN_LIBS -ldl -lpthread
-
-/* Additional libraries needed by -static-libubsan.  */
-#undef STATIC_LIBUBSAN_LIBS
-#define STATIC_LIBUBSAN_LIBS -ldl -lpthread
diff --git a/gcc/gcc.c b/gcc/gcc.c
index 4edf677..f18ac46 100644
--- a/gcc/gcc.c
+++ b/gcc/gcc.c
@@ -535,20 +535,16 @@ proper position among the other output files.  */
 #define STACK_SPLIT_SPEC  %{fsplit-stack: --wrap=pthread_create}
 
 #ifndef LIBASAN_SPEC
-#ifdef STATIC_LIBASAN_LIBS
-#define ADD_STATIC_LIBASAN_LIBS \
-   %{static-libasan: STATIC_LIBASAN_LIBS }
-#else
-#define ADD_STATIC_LIBASAN_LIBS
-#endif
+#define STATIC_LIBASAN_LIBS \
+   %{static-libasan:%:include(libsanitizer.spec)%(link_libasan) %(link_sanitizer)}
 #ifdef LIBASAN_EARLY_SPEC
-#define LIBASAN_SPEC ADD_STATIC_LIBASAN_LIBS
+#define LIBASAN_SPEC STATIC_LIBASAN_LIBS
 #elif defined(HAVE_LD_STATIC_DYNAMIC)
 #define LIBASAN_SPEC %{static-libasan: LD_STATIC_OPTION \
 		 } -lasan %{static-libasan: LD_DYNAMIC_OPTION } \
-		 ADD_STATIC_LIBASAN_LIBS
+		 STATIC_LIBASAN_LIBS
 #else
-#define LIBASAN_SPEC -lasan ADD_STATIC_LIBASAN_LIBS
+#define LIBASAN_SPEC -lasan STATIC_LIBASAN_LIBS
 #endif
 #endif
 
@@ -557,20 +553,16 @@ proper position among the other output files.  */
 #endif
 
 #ifndef LIBTSAN_SPEC
-#ifdef STATIC_LIBTSAN_LIBS
-#define ADD_STATIC_LIBTSAN_LIBS \
-   %{static-libtsan: STATIC_LIBTSAN_LIBS }
-#else
-#define ADD_STATIC_LIBTSAN_LIBS
-#endif
+#define STATIC_LIBTSAN_LIBS \
+   %{static-libtsan:%:include(libsanitizer.spec)%(link_libtsan) %(link_sanitizer)}
 #ifdef LIBTSAN_EARLY_SPEC
-#define LIBTSAN_SPEC ADD_STATIC_LIBTSAN_LIBS
+#define LIBTSAN_SPEC STATIC_LIBTSAN_LIBS
 #elif defined(HAVE_LD_STATIC_DYNAMIC)
 #define LIBTSAN_SPEC %{static-libtsan: LD_STATIC_OPTION \
 		 } -ltsan %{static-libtsan: LD_DYNAMIC_OPTION } \
-		 ADD_STATIC_LIBTSAN_LIBS
+		 STATIC_LIBTSAN_LIBS
 #else
-#define LIBTSAN_SPEC -ltsan ADD_STATIC_LIBTSAN_LIBS
+#define LIBTSAN_SPEC -ltsan STATIC_LIBTSAN_LIBS
 #endif
 #endif
 
@@ -579,34 +571,26 @@ proper position among the other output files.  */
 #endif
 
 #ifndef LIBLSAN_SPEC
-#ifdef STATIC_LIBLSAN_LIBS
-#define ADD_STATIC_LIBLSAN_LIBS \
-   %{static-liblsan: STATIC_LIBLSAN_LIBS }
-#else
-#define ADD_STATIC_LIBLSAN_LIBS
-#endif
+#define STATIC_LIBLSAN_LIBS \
+   %{static-liblsan:%:include(libsanitizer.spec) %(link_sanitizer)}
 #ifdef HAVE_LD_STATIC_DYNAMIC
 #define LIBLSAN_SPEC %{!shared:%{static-liblsan: LD_STATIC_OPTION \
 		 } -llsan %{static-liblsan: LD_DYNAMIC_OPTION } \
-		 ADD_STATIC_LIBLSAN_LIBS }
+		 STATIC_LIBLSAN_LIBS }
 #else
-#define LIBLSAN_SPEC %{!shared:-llsan ADD_STATIC_LIBLSAN_LIBS }
+#define LIBLSAN_SPEC %{!shared:-llsan STATIC_LIBLSAN_LIBS }
 #endif
 #endif
 
 #ifndef LIBUBSAN_SPEC
-#ifdef STATIC_LIBUBSAN_LIBS
-#define ADD_STATIC_LIBUBSAN_LIBS \
-   %{static-libubsan: STATIC_LIBUBSAN_LIBS }
-#else
-#define ADD_STATIC_LIBUBSAN_LIBS
-#endif
+#define STATIC_LIBUBSAN_LIBS \
+   %{static-libubsan:%:include(libsanitizer.spec) %(link_sanitizer)}
 #ifdef HAVE_LD_STATIC_DYNAMIC
 #define LIBUBSAN_SPEC %{static-libubsan: LD_STATIC_OPTION \
 		 } -lubsan %{static-libubsan: LD_DYNAMIC_OPTION } \
-		 ADD_STATIC_LIBUBSAN_LIBS
+		 STATIC_LIBUBSAN_LIBS
 #else
-#define LIBUBSAN_SPEC -lubsan ADD_STATIC_LIBUBSAN_LIBS
+#define LIBUBSAN_SPEC -lubsan STATIC_LIBUBSAN_LIBS
 #endif
 #endif
 
diff --git a/gcc/testsuite/c-c++-common/asan/pr59063-1.c b/gcc/testsuite/c-c++-common/asan/pr59063-1.c
index e69de29..a4e01f7 100644
--- a/gcc/testsuite/c-c++-common/asan/pr59063-1.c
+++ b/gcc/testsuite/c-c++-common/asan/pr59063-1.c
@@ 

[v3] Fix ext/random/hypergeometric_distribution/operators/values.cc on Solaris 9

2013-11-27 Thread Rainer Orth
ext/random/hypergeometric_distribution/operators/values.cc fails to
compile on Solaris 9 which lacks full C99 support in libc/libm:

FAIL: ext/random/hypergeometric_distribution/operators/values.cc (test for 
excess errors)

Excess errors:
/vol/gcc/src/hg/trunk/local/libstdc++-v3/testsuite/ext/random/hypergeometric_dis
tribution/operators/values.cc:41:45: error: 'hypergeometric_pdf' was not declare
d in this scope
/vol/gcc/src/hg/trunk/local/libstdc++-v3/testsuite/ext/random/hypergeometric_dis
tribution/operators/values.cc:46:48: error: 'hypergeometric_pdf' was not declare
d in this scope
/vol/gcc/src/hg/trunk/local/libstdc++-v3/testsuite/ext/random/hypergeometric_dis
tribution/operators/values.cc:51:47: error: 'hypergeometric_pdf' was not declare
d in this scope
/vol/gcc/src/hg/trunk/local/libstdc++-v3/testsuite/util/testsuite_random.h:56:7:
 error: void value not ignored as it ought to be
/vol/gcc/src/hg/trunk/local/libstdc++-v3/testsuite/util/testsuite_random.h:56:7:
 error: void value not ignored as it ought to be
/vol/gcc/src/hg/trunk/local/libstdc++-v3/testsuite/util/testsuite_random.h:56:7:
 error: void value not ignored as it ought to be

WARNING: ext/random/hypergeometric_distribution/operators/values.cc compilation 
failed to produce executable


hypergeometric_pdf is defined in testsuite/util/testsuite_random.h
inside _GLIBCXX_USE_C99_MATH_TR1, but used unconditionally.  Fixed which
the following patch which allows the test to compile and pass on
i386-pc-solaris2.9.  On i386-pc-solaris2.11, it FAILs with an Arithmetic
exception, as it did before the patch:

Program received signal SIGFPE, Arithmetic exception.
[Switching to Thread 1 (LWP 1)]
0x080542ba in std::generate_canonicalunsigned int, 32u, 
std::mersenne_twister_engineunsigned int, 32u, 624u, 397u, 31u, 2567483615u, 
11u, 4294967295u, 7u, 2636928640u, 15u, 4022730752u, 18u, 1812433253u  
(__urng=...)
at 
/var/gcc/regression/trunk/11-gcc/build/i386-pc-solaris2.11/libstdc++-v3/include/bits/random.tcc:3480
3480  return __sum / __tmp;

__tmp is 0 here.

Ok for mainline?

Rainer


2013-11-27  Rainer Orth  r...@cebitec.uni-bielefeld.de

* testsuite/ext/random/hypergeometric_distribution/operators/values.cc
(test01): Wrap in _GLIBCXX_USE_C99_MATH_TR1.

diff --git a/libstdc++-v3/testsuite/ext/random/hypergeometric_distribution/operators/values.cc b/libstdc++-v3/testsuite/ext/random/hypergeometric_distribution/operators/values.cc
--- a/libstdc++-v3/testsuite/ext/random/hypergeometric_distribution/operators/values.cc
+++ b/libstdc++-v3/testsuite/ext/random/hypergeometric_distribution/operators/values.cc
@@ -31,6 +31,7 @@
 void
 test01()
 {
+#if _GLIBCXX_USE_C99_MATH_TR1
   using namespace __gnu_test;
 
   std::mt19937 eng;
@@ -49,6 +50,7 @@ test01()
   auto bhd3 = std::bind(hd3, eng);
   testDiscreteDist(bhd3, [](int k)
 		   { return hypergeometric_pdf(k, 100, 20, 5); });
+#endif
 }
 
 int

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: [v3] Fix ext/random/hypergeometric_distribution/operators/values.cc on Solaris 9

2013-11-27 Thread Jonathan Wakely
On 27 November 2013 14:14, Rainer Orth wrote:

 Ok for mainline?

OK, thanks.


Re: [PING] [PATCH, PR 57748] Check for out of bounds access, Part 2

2013-11-27 Thread Richard Biener
On Wed, Nov 27, 2013 at 1:29 PM, Bernd Edlinger
bernd.edlin...@hotmail.de wrote:
 Hi,

 ping...

 this patch still open: http://gcc.gnu.org/ml/gcc-patches/2013-11/msg02291.html

 Note: it does, as it is, _not_ depend on the keep_aligning patch.
 And it would fix some really nasty wrong code generation issues.

I'll come back to all these patches once the late features have all
hit stage3 (hopefully next week).

We have some time left to fix this and related bugs.

Thanks,
Richard.


 Thanks
 Bernd.

 Date: Tue, 19 Nov 2013 15:39:39 +0100

 Hello,


 this is a minor update to my previous version of this patch, (using a 
 boolean expand_reference,
 instead of adding a new expand_modifier enum value):

 I forgot to pass down the expand_reference value at the second expand_expr 
 call inside the
 case VIEW_CONVERT_EXPR. Sorry for the inconvenience.



 @@ -10219,7 +10229,8 @@ expand_expr_real_1 (tree exp, rtx target, enum mac
 }

 if (!op0)
 - op0 = expand_expr (treeop0, NULL_RTX, VOIDmode, modifier);
 + op0 = expand_expr_real (treeop0, NULL_RTX, VOIDmode, modifier,
 + NULL, expand_reference);

 /* If the input and output modes are both the same, we are done. */
 if (mode == GET_MODE (op0))


 Boot-strapped and regression-tested on X86_64-pc-linux-gnu.

 Ok for trunk?


 Thanks
 Bernd.


 Date: Thu, 7 Nov 2013 13:58:55 +0100

 oops - this time with attachments...


 Hi,

 On Fri, 25 Oct 2013 12:51:13, Richard Biener wrote:

 On Fri, Oct 25, 2013 at 12:02 PM, Bernd Edlinger
 bernd.edlin...@hotmail.de wrote:
 Hi,

 Eh ... even

 register struct { int i; int a[0]; } asm (ebx);

 works. Also with int a[1] but not with a[2]. So just handling trailing
 arrays makes this case regress as well.

 Now I'd call this use questionable ... but we've likely supported that
 for decades and cannot change that now.

 Back to fixing everything in expand.

 Richard.


 Ok, finally you asked for it.

 Here is my previous version of that patch again.

 I have now added a new value EXPAND_REFERENCE to the expand_modifier
 enumeration. It is almost like EXPAND_MEMORY but it does not interfere 
 with
 constant values.

 I have done the same modification to VIEW_CONVERT_EXPR too, because
 this is a possible inner reference, itself. It is however inherently 
 hard to
 test around this code.

 To understand this patch it is good to know what type of object the
 return value tem of get_inner_reference can be.

 From the program logic at get_inner_reference it is clear that the
 return value may *not* be BIT_FIELD_REF, COMPONENT_REF, ARRAY_REF,
 ARRAY_RANGE_REF, REALPART_EXPR, IMAGPART_EXPR. The result may
 be VIEW_CONVERT_EXPR only on a STRICT_ALIGNMENT target. This is probably
 further restricted because exp is gimplified.

 Usually the result will be a MEM_REF or a SSA_NAME of the memory where
 the structure is to be found.

 When you look at where EXPAND_MEMORY is handled you see it is 
 special-cased
 in TARGET_MEM_REF, MEM_REF, ARRAY_REF, COMPONENT_REF, BIT_FIELD_REF,
 ARRAY_RANGE_REF.

 At TARGET_MEM_REF, MEM_REF, VIEW_CONVERT_EXPR, it should be the
 same if EXPAND_MEMORY, EXPAND_WRITE or EXPAND_REFERENCE is given:
 If it is an unaligned memory, we just return the unaligned reference.

 This was missing for VIEW_CONVERT_EXPR, and unfortunately I have no test 
 case,
 because it is only a problem for STRICT_ALIGNMENT targets, and even 
 there it will
 certainly be really hard to find test cases that exercise this code.

 In ARRAY_REF, COMPONENT_REF, BIT_FIELD_REF, ARRAY_RANGE_REF
 we do not have to touch the handling of the outer modifier. However we 
 pass
 EXPAND_REFERENCE to the inner object, which should not be a recursive
 use of any ARRAY_REF, COMPONENT_REF, BIT_FIELD_REF, ARRAY_RANGE_REF.

 TARGET_MEM_REF, MEM_REF and VIEW_CONVERT_EXPR know how to handle
 EXPAND_REFERENCE, anything else handles it like EXPAND_NORMAL.


 Boot-strapped and regression-tested on x86_64-linux-gnu
 OK for trunk?

 You point to a weak spot in expansion - that it handles creating
 the base MEM to offset with handled components by recursing
 into the case that handles bare MEM_REFs. This makes the
 bare MEM_REF handling code somewhat awkward (it's the
 one to assign mem-attrs which are later adjusted for example).

 Maybe a better appropach than adding yet another expand
 modifier would be to split out the base MEM expansion part
 out of the bare MEM_REF handling code so we can call that
 instead of recursing.

 In this light - instead of a new expand modifier don't you want
 an actual flag that specifies we are coming from a call that
 wants to expand a base? That is, allow EXPAND_SUM
 but with the recursion flag set?


 I think you are right. After some thought, I start to like that idea.

 This way we have at least much more flexibility, how to handle the inner
 references correctly, and if I change only the interfaces of 
 expand_expr_real/real_1
 that will not be used at too many places, either.

 Finally I think the recursion into the 

[PATCH] Fix PR58723

2013-11-27 Thread Richard Biener

This fixes the cgraph machinery wrt internal calls and makes LTO
and the recent OMP work regarding vectorization work (to some extent).

LTO bootstrapped and tested on x86_64-unknown-linux-gnu, applied.

Richard.

2013-11-27  Richard Biener  rguent...@suse.de

PR middle-end/58723
* cgraphbuild.c (build_cgraph_edges): Do not build edges
for internal calls.
(rebuild_cgraph_edges): Likewise.
* ipa-inline-analysis.c (estimate_function_body_sizes):
Skip internal calls.
* tree-inline.c (estimate_num_insns): Estimate size of internal
calls as 0.
(gimple_expand_calls_inline): Do not try inline-expanding
internal calls.
* lto-streamer-in.c (input_cfg): Stream loop safelen,
force_vect and simduid.
(input_struct_function_base): Stream has_force_vect_loops
and has_simduid_loops.
(input_function): Adjust.
* lto-streamer-out.c (output_cfg): Stream loop safelen,
force_vect and simduid.
(output_struct_function_base): Stream has_force_vect_loops
and has_simduid_loops.

Index: gcc/cgraphbuild.c
===
*** gcc/cgraphbuild.c.orig  2013-11-27 11:01:21.0 +0100
--- gcc/cgraphbuild.c   2013-11-27 11:01:33.767324692 +0100
*** build_cgraph_edges (void)
*** 335,340 
--- 335,342 
  if (decl)
cgraph_create_edge (node, cgraph_get_create_node (decl),
stmt, bb-count, freq);
+ else if (gimple_call_internal_p (stmt))
+   ;
  else
cgraph_create_indirect_edge (node, stmt,
 gimple_call_flags (stmt),
*** rebuild_cgraph_edges (void)
*** 464,469 
--- 466,473 
  if (decl)
cgraph_create_edge (node, cgraph_get_create_node (decl), stmt,
bb-count, freq);
+ else if (gimple_call_internal_p (stmt))
+   ;
  else
cgraph_create_indirect_edge (node, stmt,
 gimple_call_flags (stmt),
Index: gcc/ipa-inline-analysis.c
===
*** gcc/ipa-inline-analysis.c.orig  2013-11-27 11:01:21.0 +0100
--- gcc/ipa-inline-analysis.c   2013-11-27 11:01:33.771324737 +0100
*** estimate_function_body_sizes (struct cgr
*** 2502,2508 
}
  
  
! if (is_gimple_call (stmt))
{
  struct cgraph_edge *edge = cgraph_edge (node, stmt);
  struct inline_edge_summary *es = inline_edge_summary (edge);
--- 2502,2509 
}
  
  
! if (is_gimple_call (stmt)
!  !gimple_call_internal_p (stmt))
{
  struct cgraph_edge *edge = cgraph_edge (node, stmt);
  struct inline_edge_summary *es = inline_edge_summary (edge);
Index: gcc/tree-inline.c
===
*** gcc/tree-inline.c.orig  2013-11-27 11:01:21.0 +0100
--- gcc/tree-inline.c   2013-11-27 11:01:53.957557118 +0100
*** estimate_num_insns (gimple stmt, eni_wei
*** 3801,3812 
  
  case GIMPLE_CALL:
{
!   tree decl = gimple_call_fndecl (stmt);
struct cgraph_node *node = NULL;
  
/* Do not special case builtins where we see the body.
   This just confuse inliner.  */
!   if (!decl || !(node = cgraph_get_node (decl)) || node-definition)
  ;
/* For buitins that are likely expanded to nothing or
   inlined do not account operand costs.  */
--- 3801,3816 
  
  case GIMPLE_CALL:
{
!   tree decl;
struct cgraph_node *node = NULL;
  
/* Do not special case builtins where we see the body.
   This just confuse inliner.  */
!   if (gimple_call_internal_p (stmt))
! return 0;
!   else if (!(decl = gimple_call_fndecl (stmt))
!|| !(node = cgraph_get_node (decl))
!|| node-definition)
  ;
/* For buitins that are likely expanded to nothing or
   inlined do not account operand costs.  */
*** gimple_expand_calls_inline (basic_block
*** 4427,4432 
--- 4431,4437 
gimple stmt = gsi_stmt (gsi);
  
if (is_gimple_call (stmt)
+  !gimple_call_internal_p (stmt)
   expand_call_inline (bb, stmt, id))
return true;
  }
Index: gcc/lto-streamer-in.c
===
*** gcc/lto-streamer-in.c.orig  2013-11-25 10:44:28.0 +0100
--- gcc/lto-streamer-in.c   2013-11-27 11:28:13.006784679 +0100
*** make_new_block (struct function *fn, uns
*** 598,604 
  /* Read the CFG for function FN from input block IB.  */
  

Re: [v3] Fix ext/random/hypergeometric_distribution/operators/values.cc on Solaris 9

2013-11-27 Thread Paolo Carlini

Hi,

On 11/27/2013 03:14 PM, Rainer Orth wrote:

hypergeometric_pdf is defined in testsuite/util/testsuite_random.h
inside _GLIBCXX_USE_C99_MATH_TR1, but used unconditionally.  Fixed which
the following patch which allows the test to compile and pass on
i386-pc-solaris2.9.  On i386-pc-solaris2.11, it FAILs with an Arithmetic
exception, as it did before the patch:

Program received signal SIGFPE, Arithmetic exception.
[Switching to Thread 1 (LWP 1)]
0x080542ba in std::generate_canonicalunsigned int, 32u, 
std::mersenne_twister_engineunsigned int, 32u, 624u, 397u, 31u, 2567483615u, 11u, 
4294967295u, 7u, 2636928640u, 15u, 4022730752u, 18u, 1812433253u  (__urng=...)
 at 
/var/gcc/regression/trunk/11-gcc/build/i386-pc-solaris2.11/libstdc++-v3/include/bits/random.tcc:3480
3480  return __sum / __tmp;

__tmp is 0 here.

Ok for mainline?

Please use // { dg-require-cmath  } instead.

Thanks,
Paolo.


Re: [C++ Patch] PR 58647

2013-11-27 Thread Jason Merrill

On 11/27/2013 05:22 AM, Paolo Carlini wrote:

Thus something like the below? Passes testing.


Yep.  With a comment that we can only get there in checking mode via 
build_non_dependent_expr, because any expression that calls or takes the 
address of the function will have pull a FUNCTION_DECL out of the 
COMPONENT_REF.


Jason



Re: [PATCH] Fix split_live_ranges_for_shrink_wrap (PR rtl-optimization/59166)

2013-11-27 Thread Martin Jambor
On Tue, Nov 26, 2013 at 09:25:40PM +0100, Jakub Jelinek wrote:
 Hi!
 
 The problem on this testcase is that we have
 (debug_insn 30 29 31 7 (var_location:HI D#1 (subreg:HI (reg/v:SI 93 [ p ]) 
 0)) pr59166.c:20 -1
  (nil))
 and split_live_ranges_for_shrink_wrap decides to replace
 SImode pseudo 93 with some other SImode pseudo.  But it
 uses DF_REF_LOC, which is address of the HImode subreg around the pseudo,
 validate_change succeeds on it, because there is no validation inside of
 debug_insns and then we crash during var-tracking because of a mode
 mismatch.

thanks a lot for fixing it, I had no idea that DF_REF_REAL_LOC even
existed.  This change also apparently makes it possible to throw away
the SUBREG check in the same function.  I'll enqueue a patch removing
it for the next stage1.

 
 The following patch uses DF_REF_REAL_LOC instead, so that it looks through
 the subreg and adjusts what it should.  Code inspection showed the same
 issue in find_moveable_pseudos, don't have a testcase for that though.

That is where I copied the substitution from.

Martin

 
 Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
 
 2013-11-26  Jakub Jelinek  ja...@redhat.com
 
   PR rtl-optimization/59166
   * ira.c (find_moveable_pseudos): Use DF_REF_REAL_LOC instead of
   DF_REF_LOC in validate_change call.
   (split_live_ranges_for_shrink_wrap): Likewise.
 
   * gcc.dg/torture/pr59166.c: New test.



Re: [PING]: [GOMP4] [PATCH] SIMD-Enabled Functions (formerly Elemental functions) for C

2013-11-27 Thread Aldy Hernandez
Iyer, Balaji V balaji.v.i...@intel.com writes:

  c_finish_omp_declare_simd (c_parser *parser, tree fndecl, tree parms,
  vecc_token clauses)
  {
 +
 +  if (flag_enable_cilkplus
 +   clauses.exists ()  !vec_safe_is_empty (parser-elem_fn_tokens))
 +{
 +  error (%#pragma omp declare simd% cannot be used in the same
 +  function marked as a SIMD-enabled function);
 +  vec_free (parser-elem_fn_tokens);
 +  return;
 +}

I see Cilk Plus elementals are handled as omp declare simd in the above
function.  This function sets an omp declare simd attribute here:

 if (c != NULL_TREE)
c = tree_cons (NULL_TREE, c, NULL_TREE);
  c = build_tree_list (get_identifier (omp declare simd), c);
  TREE_CHAIN (c) = DECL_ATTRIBUTES (fndecl);
  DECL_ATTRIBUTES (fndecl) = c;

but you also need a cilk plus elemental attribute so the rest of the
compiler can differentiate Cilk Plus elementals from omp declare simds.

See simd_clone_clauses_extract():

+  /* To distinguish from an OpenMP simd clone, Cilk Plus functions to
+ be cloned have a distinctive artificial label in addition to omp
+ declare simd.  */
+  bool cilk_clone
+= (flag_enable_cilkplus
+lookup_attribute (cilk plus elemental,
+   DECL_ATTRIBUTES (node-decl)));

Also you probably want some kind of test for this, so test for whatever
cilk_elemental is doing.  In trunk, the only use of cilk_elemental is in
ix86_simd_clone_compute_vecsize_and_simdlen(), so come up with some
x86 specific test for cilk_elemental==true.

Aldy


Re: [C++ Patch] PR 58647

2013-11-27 Thread Paolo Carlini

On 11/27/2013 04:35 PM, Jason Merrill wrote:

On 11/27/2013 05:22 AM, Paolo Carlini wrote:

Thus something like the below? Passes testing.


Yep.  With a comment that we can only get there in checking mode via 
build_non_dependent_expr, because any expression that calls or takes 
the address of the function will have pull a FUNCTION_DECL out of the 
COMPONENT_REF.
Agreed, I applied the below. Since the issue doesn't affect release-mode 
for the time being at least I'm not going to fiddle with 4.7/4.8.


Thanks,
Paolo.

///
/cp
2013-11-27  Paolo Carlini  paolo.carl...@oracle.com

PR c++/58647
* semantics.c (cxx_eval_constant_expression, [COMPONENT_REF]):
Handle function COMPONENT_REFs.

/testsuite
2013-11-27  Paolo Carlini  paolo.carl...@oracle.com

PR c++/58647
* g++.dg/parse/crash66.C: New.
Index: cp/semantics.c
===
--- cp/semantics.c  (revision 205448)
+++ cp/semantics.c  (working copy)
@@ -9603,6 +9603,16 @@ cxx_eval_constant_expression (const constexpr_call
   break;
 
 case COMPONENT_REF:
+  if (is_overloaded_fn (t))
+   {
+ /* We can only get here in checking mode via 
+build_non_dependent_expr,  because any expression that
+calls or takes the address of the function will have
+pulled a FUNCTION_DECL out of the COMPONENT_REF.  */
+ gcc_checking_assert (allow_non_constant);
+ *non_constant_p = true;
+ return t;
+   }
   r = cxx_eval_component_reference (call, t, allow_non_constant, addr,
non_constant_p, overflow_p);
   break;
Index: testsuite/g++.dg/parse/crash66.C
===
--- testsuite/g++.dg/parse/crash66.C(revision 0)
+++ testsuite/g++.dg/parse/crash66.C(working copy)
@@ -0,0 +1,11 @@
+// PR c++/58647
+
+struct A
+{
+  static void foo();
+};
+
+templatetypename void bar()
+{
+  A().foo;
+}


Re: RFA: patch to fix PR58785 (an ARM LRA crash)

2013-11-27 Thread Richard Earnshaw
On 20/11/13 09:22, Yvan Roux wrote:
 Hi,
 
 as Richard said, only a subset of rclass is allowed to be returned by
 preferred_reload_class.  I've tested the attached patched in Thumb
 mode, on ARMv5, A9 and A9hf and on cross A15 without regression.
 
 Yvan
 
 2013-11-20  Yvan Roux  yvan.r...@linaro.org
 
 PR target/58785
 * config/arm/arm.c (arm_preferred_reload_class): Only return LO_REGS
 when rclass is GENERAL_REGS.
 
 
 preferred_reload.diff
 
 
 diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
 index 5c53440..63f10bd 100644
 --- a/gcc/config/arm/arm.c
 +++ b/gcc/config/arm/arm.c
 @@ -6882,10 +6882,7 @@ arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, 
 reg_class_t rclass)
  return rclass;
else
  {
 -  if (rclass == GENERAL_REGS
 -   || rclass == HI_REGS
 -   || rclass == NO_REGS
 -   || rclass == STACK_REG)
 +  if (rclass == GENERAL_REGS)
   return LO_REGS;
else
   return rclass;
 

OK.

R.




Re: [v3] Fix ext/random/hypergeometric_distribution/operators/values.cc on Solaris 9

2013-11-27 Thread Rainer Orth
Hi Paolo,

 On 11/27/2013 03:14 PM, Rainer Orth wrote:
 hypergeometric_pdf is defined in testsuite/util/testsuite_random.h
 inside _GLIBCXX_USE_C99_MATH_TR1, but used unconditionally.  Fixed which
 the following patch which allows the test to compile and pass on
 i386-pc-solaris2.9.  On i386-pc-solaris2.11, it FAILs with an Arithmetic
 exception, as it did before the patch:

 Program received signal SIGFPE, Arithmetic exception.
 [Switching to Thread 1 (LWP 1)]
 0x080542ba in std::generate_canonicalunsigned int, 32u,
 std::mersenne_twister_engineunsigned int, 32u, 624u, 397u, 31u,
 2567483615u, 11u, 4294967295u, 7u, 2636928640u, 15u, 4022730752u, 18u,
 1812433253u  (__urng=...)
  at
 /var/gcc/regression/trunk/11-gcc/build/i386-pc-solaris2.11/libstdc++-v3/include/bits/random.tcc:3480
 3480  return __sum / __tmp;

 __tmp is 0 here.

 Ok for mainline?
 Please use // { dg-require-cmath  } instead.

make sense since this renders the test unsupported instead of pass.
Since I'd already committed the previous version based on Jonathan's
approval, I've installed the following on top of it after testing with
the appropriate runtest command.

Rainer


2013-11-27  Rainer Orth  r...@cebitec.uni-bielefeld.de

* testsuite/ext/random/hypergeometric_distribution/operators/values.cc:
Use dg-require-cmath instead.

diff --git a/libstdc++-v3/testsuite/ext/random/hypergeometric_distribution/operators/values.cc b/libstdc++-v3/testsuite/ext/random/hypergeometric_distribution/operators/values.cc
--- a/libstdc++-v3/testsuite/ext/random/hypergeometric_distribution/operators/values.cc
+++ b/libstdc++-v3/testsuite/ext/random/hypergeometric_distribution/operators/values.cc
@@ -1,5 +1,6 @@
 // { dg-options -std=gnu++11 }
 // { dg-require-cstdint  }
+// { dg-require-cmath  }
 //
 // 2013-11-18  Edward M. Smith-Rowland  3dw...@verizon.net
 //
@@ -31,7 +32,6 @@
 void
 test01()
 {
-#if _GLIBCXX_USE_C99_MATH_TR1
   using namespace __gnu_test;
 
   std::mt19937 eng;
@@ -50,7 +50,6 @@ test01()
   auto bhd3 = std::bind(hd3, eng);
   testDiscreteDist(bhd3, [](int k)
 		   { return hypergeometric_pdf(k, 100, 20, 5); });
-#endif
 }
 
 int


-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: [PATCH] Postpone __LINE__ evaluation to the end of #line directives

2013-11-27 Thread Joseph S. Myers
On Wed, 27 Nov 2013, mtewoodb...@gmail.com wrote:

 Copyright 2013 assigned to the Free Software Foundation.

FWIW I don't see this in copyright.list yet.  If you sent the paperwork 
(whether paper mail or scans) to the FSF over a week ago and haven't had 
it acknowledged, please chase up ass...@gnu.org, and keep chasing them up 
weekly until it's acknowledged and a maintainer confirms it's now listed 
in copyright.list.

 diff --git a/gcc/testsuite/gcc.dg/cpp/line4.c 
 b/gcc/testsuite/gcc.dg/cpp/line4.c

I think it's best to leave this test as-is and add a new test line9.c 
specifically for this case of #line __LINE__.  Changing existing tests 
unnecessarily complicates establishing whether a newly seen failure is a 
regression or not.

I think it would be worth including tests where the __LINE__ expansion 
(which as you note should be the line number of the line *after* the 
directive) is involved in concatenation or stringized, e.g.

#define xstr(x) #x
#define str(x) xstr(x)
#line 1 str(__LINE__)

which should set __FILE__ to be the stringized number of the line after 
the directive.  (I hope these cases will just work given your patch, so 
this is just a matter of adding more to the testcase.)

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: wide-int, rtl

2013-11-27 Thread Eric Botcazou
 Richi has asked the we break the wide-int patch so that the individual port
 and front end maintainers can review their parts without have to go through
 the entire patch.This patch covers the first half of the rtl code.

--- a/gcc/cse.c
+++ b/gcc/cse.c
@@ -2336,15 +2336,23 @@ hash_rtx_cb (const_rtx x, enum machine_mode mode,
+ (unsigned int) INTVAL (x));
   return hash;
 
+case CONST_WIDE_INT:
+  {
+   int i;
+   for (i = 0; i  CONST_WIDE_INT_NUNITS (x); i++)
+ hash += CONST_WIDE_INT_ELT (x, i);
+  }
+  return hash;

You can write for (int i = 0; ... now and remove the parentheses.


--- a/gcc/cselib.c
+++ b/gcc/cselib.c
@@ -1121,15 +1120,23 @@ cselib_hash_rtx (rtx x, int create, enum machine_mode 
memmode)
   hash += ((unsigned) CONST_INT  7) + INTVAL (x);
   return hash ? hash : (unsigned int) CONST_INT;
 
+case CONST_WIDE_INT:
+  {
+   int i;
+   for (i = 0; i  CONST_WIDE_INT_NUNITS (x); i++)
+ hash += CONST_WIDE_INT_ELT (x, i);
+  }
+  return hash;
+

Likewise.


--- a/gcc/expmed.c
+++ b/gcc/expmed.c
@@ -3298,23 +3268,13 @@ choose_multiplier (unsigned HOST_WIDE_INT d, int n, 
int precision,
   pow = n + lgup;
   pow2 = n + lgup - precision;
 
-  /* We could handle this with some effort, but this case is much
- better handled directly with a scc insn, so rely on caller using
- that.  */
-  gcc_assert (pow != HOST_BITS_PER_DOUBLE_INT);

Why removing it?



--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -722,64 +722,33 @@ convert_modes (enum machine_mode mode, enum machine_mode 
oldmode, rtx x, int uns
   if (mode == oldmode)
 return x;
 
-  /* There is one case that we must handle specially: If we are converting
- a CONST_INT into a mode whose size is twice HOST_BITS_PER_WIDE_INT and
- we are to interpret the constant as unsigned, gen_lowpart will do
- the wrong if the constant appears negative.  What we want to do is
- make the high-order word of the constant zero, not all ones.  */
-
-  if (unsignedp  GET_MODE_CLASS (mode) == MODE_INT
-   GET_MODE_BITSIZE (mode) == HOST_BITS_PER_DOUBLE_INT
-   CONST_INT_P (x)  INTVAL (x)  0)
+  if (CONST_SCALAR_INT_P (x)
+   GET_MODE_CLASS (mode) == MODE_INT)

On a single line.


 {
-  double_int val = double_int::from_uhwi (INTVAL (x));
-
-  /* We need to zero extend VAL.  */
-  if (oldmode != VOIDmode)
-   val = val.zext (GET_MODE_BITSIZE (oldmode));
-
-  return immed_double_int_const (val, mode);
+  /* If the caller did not tell us the old mode, then there is
+not much to do with respect to canonization.  We have to assume
+that all the bits are significant.  */
+  if (GET_MODE_CLASS (oldmode) != MODE_INT)
+   oldmode = MAX_MODE_INT;
+  wide_int w = wide_int::from (std::make_pair (x, oldmode),
+  GET_MODE_PRECISION (mode),
+  unsignedp ? UNSIGNED : SIGNED);
+  return immed_wide_int_const (w, mode);

canonicalization?


@@ -5301,10 +5271,10 @@ store_expr (tree exp, rtx target, int call_param_p, 
bool nontemporal)
   alt_rtl);
 }
 
-  /* If TEMP is a VOIDmode constant and the mode of the type of EXP is not
- the same as that of TARGET, adjust the constant.  This is needed, for
- example, in case it is a CONST_DOUBLE and we want only a word-sized
- value.  */
+  /* If TEMP is a VOIDmode constant and the mode of the type of EXP is
+ not the same as that of TARGET, adjust the constant.  This is
+ needed, for example, in case it is a CONST_DOUBLE or
+ CONST_WIDE_INT and we want only a word-sized value.  */
   if (CONSTANT_P (temp)  GET_MODE (temp) == VOIDmode
TREE_CODE (exp) != ERROR_MARK
GET_MODE (target) != TYPE_MODE (TREE_TYPE (exp)))

Why reformatting the whole comment?


@@ -9481,11 +9459,19 @@ expand_expr_real_1 (tree exp, rtx target, enum 
machine_mode tmode,
   return decl_rtl;
 
 case INTEGER_CST:
-  temp = immed_double_const (TREE_INT_CST_LOW (exp),
-TREE_INT_CST_HIGH (exp), mode);
-
-  return temp;
-
+  {
+   tree type = TREE_TYPE (exp);

Redundant, see the beginning of the function.

+   /* One could argue that GET_MODE_PRECISION (TYPE_MODE (type))
+  should always be the same as TYPE_PRECISION (type).
+  However, it is not.  Since we are converting from tree to
+  rtl, we have to expose this ugly truth here.  */
+   temp = immed_wide_int_const (wide_int::from
+  (exp,
+   GET_MODE_PRECISION (TYPE_MODE (type)),
+   TYPE_SIGN (type)),
+TYPE_MODE (type));
+   return temp;
+  }

I don't really see how one could argue that, given that there are much fewer
modes than possible type precisions, so please rewrite 

a testcase for PR57410

2013-11-27 Thread Vladimir Makarov

Here is the test I've committed as rev. 205451 for

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=57410

2013-11-27  Vladimir Makarov  vmaka...@redhat.com

PR rtl-optimization/57410
* gcc.target/i386/pr57410.c: New.

Index: testsuite/gcc.target/i386/pr57410.c
===
--- testsuite/gcc.target/i386/pr57410.c (revision 0)
+++ testsuite/gcc.target/i386/pr57410.c (working copy)
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options -O -fpeel-loops } */
+
+extern char outbuffer[];
+extern char buffer[];
+
+void foo(int j)
+{
+  unsigned i, fp = fp;
+  for (i = 0; i  6; i++)
+buffer[j++] = outbuffer[fp - i];
+}



Re: [PATCH] Improve handling of threads which cross over the current loops header

2013-11-27 Thread Jeff Law

On 11/27/13 04:28, Richard Biener wrote:


Ick ;)  Loop fixup isn't able to recover the original loop here, it
discovers a new one, the one just covering looping through the
default: case of the switch.
I wouldn't expect it to.  I don't have the transformed CFG in front of 
me right now, but as you probably saw, it's scrambled pretty badly.


Sadly, we found out yesterday that the submitter of that BZ reduced 
coremark too far and the transformation doesn't actually apply to 
coremark.   We're not going to try and extend things at this point in 
the game -- that'll have to be 4.10/5.0 material.


jeff


Re: [PING] [PATCH, ARM, testcase] Skip target arm-neon for lp1243022.c

2013-11-27 Thread Jeff Law

On 11/27/13 02:05, Zhenqiang Chen wrote:

Ping?

Thanks for including the actual patch you're pinging, it helps :-)


Hi,


lp1243022.c will fail with options: -mfpu=neon -mfloat-abi=hard.

Logs show it does not generate auto-incremental instruction in pass
auto_inc_dec. In this case, the check of REG_INC note at subreg2 will be
invalid. So skip the check for target arm-neon.

All PASS with the following options:

-mthumb/-mcpu=cortex-a9/-mfloat-abi=hard
-mthumb/-mcpu=cortex-a9/-mfloat-abi=soft
-mthumb/-mcpu=cortex-a9/-mfloat-abi=softfp
-mthumb/-mcpu=cortex-a9/-mfloat-abi=soft/-mfpu=vfpv3
-mthumb/-mcpu=cortex-a9/-mfloat-abi=softfp/-mfpu=vfpv3
-mthumb/-mcpu=cortex-a9/-mfloat-abi=hard/-mfpu=vfpv3
-mthumb/-mcpu=cortex-a9/-mfloat-abi=soft/-mfpu=neon
-mthumb/-mcpu=cortex-a9/-mfloat-abi=softfp/-mfpu=neon
-mthumb/-mcpu=cortex-a9/-mfloat-abi=hard/-mfpu=neon
-mthumb/-mcpu=cortex-a15/-mfloat-abi=hard
-mthumb/-mcpu=cortex-a15/-mfloat-abi=soft
-mthumb/-mcpu=cortex-a15/-mfloat-abi=softfp
-mthumb/-mcpu=cortex-a15/-mfloat-abi=soft/-mfpu=vfpv4
-mthumb/-mcpu=cortex-a15/-mfloat-abi=softfp/-mfpu=vfpv4
-mthumb/-mcpu=cortex-a15/-mfloat-abi=hard/-mfpu=vfpv4
-mthumb/-mcpu=cortex-a15/-mfloat-abi=soft/-mfpu=neon
-mthumb/-mcpu=cortex-a15/-mfloat-abi=softfp/-mfpu=neon
-mthumb/-mcpu=cortex-a15/-mfloat-abi=hard/-mfpu=neon

Is it OK?

Thanks!
-Zhenqiang

testsuite/ChangeLog:
2013-11-08  Zhenqiang Chen  zhenqiang.c...@linaro.org

 * gcc.target/arm/lp1243022.c: Skip target arm-neon.
It seems to me you should be xfailing arm-neon, not skipping the test. 
Unless there is some fundamental reason why we can not generate auto-inc 
instructions on the neon.


jeff



Re: [PATCH, score] Remove unused REG_CLASS_FROM_LETTER define

2013-11-27 Thread Jeff Law

On 11/27/13 02:52, Liqin Chen wrote:

2013-11-27  Chen Liqin  liqin@gmail.com

 * config/score/score.h (REG_CLASS_FROM_LETTER): Delete.

Installed on the trunk.  Thanks.
jeff



Re: Some wide-int review comments

2013-11-27 Thread Kenneth Zadeck

committed as revision 205448 to trunk.
committed as revision 205455 to wide-int branch.
On 11/27/2013 05:50 AM, Richard Biener wrote:

On Tue, Nov 26, 2013 at 5:33 PM, Kenneth Zadeck
zad...@naturalbridge.com wrote:

Richi,

patch ping

Ok.

Thanks,
Richard.


also two more pieces of information.With further testing, this seems to
fix

Tests that now work, but didn't before:
===
ext/random/hypergeometric_distribution/operators/values.cc (test for excess
errors)

New tests that PASS:

ext/random/hypergeometric_distribution/operators/values.cc execution test


also, the corresponding frag for fold-const.c on the wide-int branch will
look like

Index: gcc/fold-const.c
===
--- gcc/fold-const.c(revision 205224)
+++ gcc/fold-const.c(working copy)
@@ -1030,51 +1030,51 @@ int_const_binop_1 (enum tree_code code,

  case TRUNC_DIV_EXPR:
  case EXACT_DIV_EXPR:
-  res = wi::div_trunc (arg1, arg2, sign, overflow);
-  if (overflow)
+  if (arg2 == 0)
  return NULL_TREE;
+  res = wi::div_trunc (arg1, arg2, sign, overflow);
break;

  case FLOOR_DIV_EXPR:
-  res = wi::div_floor (arg1, arg2, sign, overflow);
-  if (overflow)
+  if (arg2 == 0)
  return NULL_TREE;
+  res = wi::div_floor (arg1, arg2, sign, overflow);
break;

  case CEIL_DIV_EXPR:
-  res = wi::div_ceil (arg1, arg2, sign, overflow);
-  if (overflow)
+  if (arg2 == 0)
  return NULL_TREE;
+  res = wi::div_ceil (arg1, arg2, sign, overflow);
break;

  case ROUND_DIV_EXPR:
-  res = wi::div_round (arg1, arg2, sign, overflow);
-  if (overflow)
+  if (arg2 == 0)
  return NULL_TREE;
+  res = wi::div_round (arg1, arg2, sign, overflow);
break;

  case TRUNC_MOD_EXPR:
-  res = wi::mod_trunc (arg1, arg2, sign, overflow);
-  if (overflow)
+  if (arg2 == 0)
  return NULL_TREE;
+  res = wi::mod_trunc (arg1, arg2, sign, overflow);
break;

  case FLOOR_MOD_EXPR:
-  res = wi::mod_floor (arg1, arg2, sign, overflow);
-  if (overflow)
+  if (arg2 == 0)
  return NULL_TREE;
+  res = wi::mod_floor (arg1, arg2, sign, overflow);
break;

  case CEIL_MOD_EXPR:
-  res = wi::mod_ceil (arg1, arg2, sign, overflow);
-  if (overflow)
+  if (arg2 == 0)
  return NULL_TREE;
+  res = wi::mod_ceil (arg1, arg2, sign, overflow);
break;

  case ROUND_MOD_EXPR:
-  res = wi::mod_round (arg1, arg2, sign, overflow);
-  if (overflow)
+  if (arg2 == 0)
  return NULL_TREE;
+  res = wi::mod_round (arg1, arg2, sign, overflow);
break;

  case MIN_EXPR:



On 11/20/2013 06:31 PM, Kenneth Zadeck wrote:

On 11/13/2013 04:59 AM, Richard Biener wrote:

On Tue, Nov 12, 2013 at 5:43 PM, Kenneth Zadeck
zad...@naturalbridge.com wrote:

On 11/12/2013 11:27 AM, Joseph S. Myers wrote:

On Tue, 12 Nov 2013, Kenneth Zadeck wrote:


Richi,

i am having a little trouble putting this back the way that you want.
The
issue is rem.
what is supposed to happen for INT_MIN % -1?

I would assume because i am failing the last case of
gcc.dg/c90-const-expr-8.c
that INT_MIN %-1 should not overflow even if INT_MIN / -1 does.
however,

Given the conclusion in C11 that a%b should be considered undefined if
a/b
is not representable, I think it's reasonable to say INT_MIN % -1
*should*
be considered to overflow (for all C standard versions) (and bug 30484
is
only a bug for -fwrapv).


however, my local question is what do we want the api to be
int-const-binop-1?The existing behavior seems to be that at least
for
common modes this function silently returns 0 and it is up to the front
ends
to put their own spin on it.

For wide-int you create 1:1 the behavior of current trunk (if a change of
behavior in TImode is not tested in the testsuite then you can ignore
that).
Whatever change you do to semantics of functions you do separately
from wide-int (preferably first on trunk, or at your choice after the
wide-int
merge).

For this case in question I'd say a % -1 should return 0, but for
INT_MIN % -1 that 0 should have TREE_OVERFLOW set (and
thus you need to adjust that c90-const-expr-8.c testcase).

Richard.


kenny

richi,
I have done this exactly as you suggested.   bootstrapped and regression
tested on x86-64.

2013-11-20  Kenneth Zadeck  zad...@naturalbridge.com

 * fold-const.c
 (int_const_binop_1): Make INT_MIN % -1 return 0 with the overflow
 bit set.


2013-11-20  Kenneth Zadeck  zad...@naturalbridge.com

 * gcc.dg/c90-const-expr-8.c: Look for overflow on INT_MIN % -1.
 * gcc.dg/c99-const-expr-8.c: Look for overflow on INT_MIN % -1.

ok to commit?

kenny



Index: gcc/fold-const.c
===
--- gcc/fold-const.c	(revision 205447)
+++ gcc/fold-const.c	(working copy)
@@ 

Re: [PING]RE: [PATCH] _Cilk_for for C and C++

2013-11-27 Thread Jason Merrill

On 11/26/2013 12:23 PM, Iyer, Balaji V wrote:

Did you get a chance to look at my _Cilk_for patch for C?


BTW, I think pinging less than 24 hours after you send the patch is a 
bit excessive.  :)


Jason



Re: [PATCH] _Cilk_for for C and C++

2013-11-27 Thread Jason Merrill

On 11/25/2013 11:03 PM, Iyer, Balaji V wrote:

On a broad note, I think there's a lot of OpenMP code you could be 
reusing here rather than writing it all again.  And that way Cilk code 
will benefit from improvements to OpenMP handling, and vice versa.  It 
probably makes sense to turn Cilk_for into an OMP_FOR loop, and then 
gimplify into GIMPLE_OMP_FOR, rather than create a new tree code and 
handle everything at the tree level.  But I don't know the OMP code well 
enough to suggest exactly how that would work.


Finer-grained comments:


+  tree exp = build_new_op (EXPR_LOCATION (op1), code, flags, op0, op1,
+  NULL_TREE, NULL, tf_none);
+  if (exp == error_mark_node)
+exp = build_x_modify_expr (EXPR_LOCATION (op1), op0, code, op1, tf_none);
+  if (exp  exp != error_mark_node)
+return exp;


I thought you were changing this?


+/* Handler for iterator to compute the loop variable.  ADD_OP indicates
+   whether we need a '+' or '-' operation. LOW indicates the starting point
+   and LOOP_VAR is the induction variable.  This functin returns an
+   INIT_EXPR.  */


This comment still doesn't document VAR2.

function


+  tree exp = build_new_op (loc, add_op, 0, low, loop_var, NULL_TREE, 0,
+  tf_none);
+  gcc_assert (exp != error_mark_node);
+  exp = cp_build_modify_expr (var2, INIT_EXPR, exp, tf_warning_or_error);


Looking at online Cilk documentation I see:


The increment expression must add to or subtract from the control variable 
using one of the following supported operations:
+=
-=
++ (prefix or postfix)
-- (prefix or postfix)


From this, I think people would expect the increment to use a 
user-defined operator+=/-=/++/--, but your code above uses operator+/- 
instead.



+   -fcilkplus must be enabled t use %_Cilk_for%);


to


+cp_parser_cilk_for (cp_parser *parser, tree grain)


Please reuse cp_parser_omp_for, like Aldy did for #pragma simd 
(cp_parser_cilk_simd) rather than write yet another for-statement 
parser.  This should reduce the patch size quite a bit.



+case PRAGMA_CILK_GRAINSIZE:
+  if (context == pragma_external)
+   {
+ error_at (pragma_tok-location,
+   %#pragma cilk grainsize% may only be be used inside a 
+   function);
+ break;
+   }
+
+  /* Ignore the pragma if Cilk Plus is not enabled.  */
+  if (flag_enable_cilkplus)
+   {
+ cp_parser_cilk_grainsize (parser, pragma_tok);
+ return true;
+   }
 default:


Do you mean to fall through to the default case if Cilk+ is not enabled?


+   tmp = CILK_FOR_COND (t);
+   if (COMPARISON_CLASS_P (tmp))
+ {
+   tree op0 = RECUR (TREE_OPERAND (tmp, 0));
+   tree op1 = RECUR (TREE_OPERAND (tmp, 1));
+   tmp = build2 (TREE_CODE (tmp), boolean_type_node, op0, op1);
+ }
+   CILK_FOR_COND (stmt) = tmp;


Why not just recur into CILK_FOR_COND?


+   tmp = CILK_FOR_EXPR (t);
+   if (TREE_CODE (tmp) == MODIFY_EXPR)
+ {
+   tree lhs = TREE_OPERAND (tmp, 0);
+   tree rhs = TREE_OPERAND (tmp, 1);
+   lhs = RECUR (lhs);
+   rhs = build2 (TREE_CODE (rhs), TREE_TYPE (lhs),
+ RECUR (TREE_OPERAND (rhs, 0)),
+ RECUR (TREE_OPERAND (rhs, 1)));
+   tmp = build2 (MODIFY_EXPR, void_type_node, lhs, rhs);
+ }
+   else
+ tmp = build2 (TREE_CODE (tmp), void_type_node,
+   RECUR (TREE_OPERAND (tmp, 0)),
+   RECUR (TREE_OPERAND (tmp, 1)));
+   finish_for_expr (tmp, stmt);


And CILK_FOR_EXPR?

Jason



Re: [Patch, RTL] Eliminate redundant vec_select moves.

2013-11-27 Thread Tejas Belagod

Richard Sandiford wrote:

Tejas Belagod tbela...@arm.com writes:

The problem is that one reg rtx can span several hard registers.
E.g. (reg:V4SI 32) might represent one 64-bit register (no. 32),
but it might instead represent two 32-bit registers (nos. 32 and 33).
Obviously the latter's not very likely for vectors this small,
but more likely for larger ones (including on NEON IIRC).

So if we had 2 32-bit registers being treated as a V4HI, it would be:

   --3233--
   msb  lsb
   
   
   
   msb  lsb
   --32--

for big endian and:

   --3332--
   msb  lsb
   
   
   
   msb  lsb
   --32--

for little endian.

Ah, ok, that makes things clearer. Thanks for that.

I can't find any helper function that figures out if we're writing partial or 
full result regs. Would something like


 REGNO (src) == REGNO (dst) 
 HARD_REGNO_NREGS (src) == HARD_REGNO_NREGS (dst) == 1

be a sane check for partial result regs?


Yeah, that should work.  I think a more general alternative would be:

  simplify_subreg_regno (REGNO (src), GET_MODE (src),
 offset, GET_MODE (dst)) == (int) REGNO (dst)

where:

  offset = GET_MODE_UNIT_SIZE (GET_MODE (src)) * INTVAL (XVECEXP (sel, 0))

That offset is the byte offset of the first selected element from the
start of a vector in memory, which is also the way that SUBREG_BYTEs
are counted.  For little-endian it gives the offset of the lsb of the
slice, while for big-endian it gives the offset of the msb (which is
also how SUBREG_BYTEs work).

The simplify_subreg_regno should cope with both single-register vectors
and multi-register vectors.


Sorry for the delayed response to this.

Thanks for the tip. Here's an improved patch that implements the 
simplify_sureg_regno () method of eliminating redundant moves. Regarding the 
test case, I failed to get the ppc back-end to generate RTL pattern that this 
patch checks for. I can easily write a test case for aarch64(big and little 
endian) on these lines


typedef float float32x4_t __attribute__ ((__vector_size__ (16)));

float foo_be (float32x4_t x)
{
  return x[3];
}

float foo_le (float32x4_t x)
{
  return x[0];
}

where I know that the vector indexing will generate a vec_select on the same src 
and dst regs that could be optimized away and hence test it. But I'm struggling 
to get a test case  that the ppc altivec back-end will generate such a 
vec_select for. I see that altivec does not define vec_extract, so a simple 
indexing like this seems to happen via memory. Also, I don't know enough about 
the ppc PCS or architecture to write a test that will check for this 
optimization opportunity on same src and dst hard-registers. Any hints?


This patch has been bootstrapped on x64_64 and regressed on aarch64-none-elf and 
aarch64_be-none-elf.


Thanks for your patience,
Tejas.diff --git a/gcc/rtlanal.c b/gcc/rtlanal.c
index 0cd0c7e..ca25ce5 100644
--- a/gcc/rtlanal.c
+++ b/gcc/rtlanal.c
@@ -1180,6 +1180,22 @@ set_noop_p (const_rtx set)
   dst = SUBREG_REG (dst);
 }
 
+  /* It is a NOOP if destination overlaps with selected src vector
+ elements.  */
+  if (GET_CODE (src) == VEC_SELECT
+   REG_P (XEXP (src, 0))  REG_P (dst)
+   HARD_REGISTER_P (XEXP (src, 0))
+   HARD_REGISTER_P (dst))
+{
+  rtx par = XEXP (src, 1);
+  rtx src0 = XEXP (src, 0);
+  HOST_WIDE_INT offset =
+   GET_MODE_UNIT_SIZE (GET_MODE (src0)) * INTVAL (XVECEXP (par, 0, 0));
+
+  return simplify_subreg_regno (REGNO (src0), GET_MODE (src0),
+   offset, GET_MODE (dst)) == (int)REGNO (dst);
+}
+
   return (REG_P (src)  REG_P (dst)
   REGNO (src) == REGNO (dst));
 }

Re: [PATCH] Fix VRP register_edge_assert_for_1 (PR tree-optimization/59014)

2013-11-27 Thread Jakub Jelinek
On Tue, Nov 26, 2013 at 02:15:58PM -0700, Jeff Law wrote:
 On 11/26/13 13:33, Jakub Jelinek wrote:
 Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
 
 I'll try to create a testcase for 4.8 branch tomorrow.

I've managed to create a testcase which reproduces this on 4.8 too,
so I've committed it to trunk and committed both the patch from yesterday
and this one to 4.8 branch too.

2013-11-27  Jakub Jelinek  ja...@redhat.com

PR tree-optimization/59014
* gcc.c-torture/execute/pr59014-2.c: New test.

--- gcc/testsuite/gcc.c-torture/execute/pr59014-2.c 2013-08-25 
18:20:55.717911035 +0200
+++ gcc/testsuite/gcc.c-torture/execute/pr59014-2.c 2013-11-27 
15:31:09.833340947 +0100
@@ -0,0 +1,23 @@
+/* PR tree-optimization/59014 */
+
+__attribute__((noinline, noclone)) long long int
+foo (long long int x, long long int y)
+{
+  if (((int) x | (int) y) != 0)
+return 6;
+  return x + y;
+}
+
+int
+main ()
+{
+  if (sizeof (long long) == sizeof (int))
+return 0;
+  int shift_half = sizeof (int) * __CHAR_BIT__ / 2;
+  long long int x = (3LL  shift_half)  shift_half;
+  long long int y = (5LL  shift_half)  shift_half;
+  long long int z = foo (x, y);
+  if (z != ((8LL  shift_half)  shift_half))
+__builtin_abort ();
+  return 0;
+}


Jakub


Re: [PATCH] Get rid of useless -fno-rtti for libubsan

2013-11-27 Thread Jeff Law

On 11/27/13 01:01, Yury Gribov wrote:

Hi all,

As discussed in http://gcc.gnu.org/bugzilla/show_bug.cgi?id=59106 only a
subset of libubsan should be built with RTTI support. Attached patch
adds custom build rules for relevant files.

Wasn't that already checked in?

commit 5e0d610a433356af747fdeda5d8acfe34d3115a9
Author: ygribov ygribov@138bc75d-0d04-0410-961f-82ee72b054a4
Date:   Mon Nov 18 08:03:16 2013 +

libsanitizer:
2013-11-18  Yury Gribov  y.gri...@samsung.com

PR sanitizer/59106
* asan/Makefile.am (AM_CXXFLAGS): Add -fno-rtti.
* interception/Makefile.am (AM_CXXFLAGS): Likewise.
* lsan/Makefile.am (AM_CXXFLAGS): Likewise.
* sanitizer_common/Makefile.am (AM_CXXFLAGS): Likewise.
* tsan/Makefile.am (AM_CXXFLAGS): Likewise.
* asan/Makefile.in: Regenerate.
* interception/Makefile.in: Regenerate.
* tsan/Makefile.in: Regenerate.
* lsan/Makefile.in: Regenerate.
* sanitizer_common/Makefile.in: Regenerate.

gcc/testsuite:
2013-11-18  Yury Gribov  y.gri...@samsung.com

PR sanitizer/59106
* c-c++-common/asan/pr59106.c: New test.


git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@204934 
138bc75d-0d04-0410-96


Jeff



Re: [PATCH, ARM, LRA] Fixed bootstrap failure in Thumb mode

2013-11-27 Thread Jeff Law

On 11/27/13 03:18, Yvan Roux wrote:

Ping

On 18 November 2013 09:40, Yvan Roux yvan.r...@linaro.org wrote:

Ping.

On 7 November 2013 15:56, Yvan Roux yvan.r...@linaro.org wrote:

Hi,

this patch fixed an LRA cycling due to secondary reload (Thumb mode).
Notice that this patch is a prerequisite to turn on LRA by default on
ARM.  Bootstrapped on a9 and a15 without any regression in the
testsuite as LRA is off by default and with the regression reported in
the thread bellow when LRA is on.

http://gcc.gnu.org/ml/gcc-patches/2013-11/msg00725.html

Thanks,
Yvan

2013-11-07  Yvan Roux  yvan.r...@linaro.org

 * config/arm/arm.h (THUMB_SECONDARY_INPUT_RELOAD_CLASS): Return NO_REGS
 for LRA

?

How can that be correct?

The secondary reload macros/hooks define cases where additional 
registers are needed to reload certain forms of rtl.  I doubt the use of 
LRA completely eliminates the need for secondary reloads.


Jeff



Re: [patch][RFC] make lra.c:check_rtl set maybe_hot_insn_p

2013-11-27 Thread Jeff Law

On 11/27/13 03:18, Yvan Roux wrote:

Ping.

On 19 November 2013 09:52, Yvan Roux yvan.r...@linaro.org wrote:

yep, all good performance-wise :)


Great, Thanks Kyrill.

Ok for trunk ?
Please include either the patch you are pinging or at the least a link 
to it in the archives.




jeff


Re: [testsuite] Properly set ld_library_path in cilk-plus tests

2013-11-27 Thread Jeff Law

On 11/27/13 04:39, Rainer Orth wrote:

All 64-bit gcc.dg/atomic and c-c++-common/cilk-plus/CK execution tests
were FAILing on Solaris 10 and 11/x86:

ld.so.1: c11-atomic-exec-1.exe: fatal: 
/var/gcc/regression/trunk/10-gcc-gas/build/./gcc/libgcc_s.so.1: wrong ELF 
class: ELFCLASS32

ld.so.1: fib.exe: fatal: 
/var/gcc/regression/trunk/10-gcc-gas/build/i386-pc-solaris2.10/./libcilkrts/.libs/libcilkrts.so.5:
 wrong ELF class: ELFCLASS32

This happens because both cilk-plus.exp files override ld_library_path
instead of appending to it.  Fixed as follows by using the customary
method for setting ld_library_path.  Bootstrapped without regressions on
i386-pc-solaris2.1[01] and x86_64-unknown-linux-gnu, installed on
mainline.

Rainer


2013-11-26  Rainer Orth  r...@cebitec.uni-bielefeld.de

* gcc.dg/cilk-plus/cilk-plus.exp: Append to ld_library_path.
Call set_ld_library_path_env_vars.
* g++.dg/cilk-plus/cilk-plus.exp: Likewise.
Thanks for taking care of this.  You're probably getting more cilk+ 
fallout than most because you do a lot of Solaris work.   Sorry about that.



jeff


RE: [testsuite] Properly set ld_library_path in cilk-plus tests

2013-11-27 Thread Iyer, Balaji V


 -Original Message-
 From: Rainer Orth [mailto:r...@cebitec.uni-bielefeld.de]
 Sent: Wednesday, November 27, 2013 6:39 AM
 To: gcc-patches@gcc.gnu.org
 Cc: Iyer, Balaji V
 Subject: [testsuite] Properly set ld_library_path in cilk-plus tests
 
 All 64-bit gcc.dg/atomic and c-c++-common/cilk-plus/CK execution tests
 were FAILing on Solaris 10 and 11/x86:
 
 ld.so.1: c11-atomic-exec-1.exe: fatal: /var/gcc/regression/trunk/10-gcc-
 gas/build/./gcc/libgcc_s.so.1: wrong ELF class: ELFCLASS32
 
 ld.so.1: fib.exe: fatal: /var/gcc/regression/trunk/10-gcc-gas/build/i386-pc-
 solaris2.10/./libcilkrts/.libs/libcilkrts.so.5: wrong ELF class: ELFCLASS32
 
 This happens because both cilk-plus.exp files override ld_library_path
 instead of appending to it.  Fixed as follows by using the customary method
 for setting ld_library_path.  Bootstrapped without regressions on i386-pc-
 solaris2.1[01] and x86_64-unknown-linux-gnu, installed on mainline.
 
   Rainer
 
 
 2013-11-26  Rainer Orth  r...@cebitec.uni-bielefeld.de
 
   * gcc.dg/cilk-plus/cilk-plus.exp: Append to ld_library_path.
   Call set_ld_library_path_env_vars.
   * g++.dg/cilk-plus/cilk-plus.exp: Likewise.

Thanks for catching this! Sorry I didn't catch it sooner. I am just getting 
myself familiar with the DejaGNU framework.

-Balaji V. Iyer.


Re: [patch][RFC] make lra.c:check_rtl set maybe_hot_insn_p

2013-11-27 Thread Yvan Roux
 Please include either the patch you are pinging or at the least a link to it
 in the archives.

Ok, sorry for that, here is the patch and Changelog

Yvan


2013-11-17  Yvan Roux  yvan.r...@linaro.org

* config/arm/arm.md (store_minmaxsi): Use only when
optimize_function_for_size_p.
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index e8d5464..da387fb 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -3642,7 +3642,7 @@
 [(match_operand:SI 1 s_register_operand r)
  (match_operand:SI 2 s_register_operand r)]))
(clobber (reg:CC CC_REGNUM))]
-  TARGET_32BIT  optimize_insn_for_size_p()
+  TARGET_32BIT  optimize_function_for_size_p (cfun)
   *
   operands[3] = gen_rtx_fmt_ee (minmax_code (operands[3]), SImode,
operands[1], operands[2]);


Re: [PING]RE: [PATCH] _Cilk_for for C and C++

2013-11-27 Thread Jason Merrill

On 11/27/2013 12:06 PM, Jason Merrill wrote:

On 11/26/2013 12:23 PM, Iyer, Balaji V wrote:

Did you get a chance to look at my _Cilk_for patch for C?


BTW, I think pinging less than 24 hours after you send the patch is a
bit excessive.  :)


Ah, I see, you were pinging the non-C++ parts.

Jason




Re: Patch ping (stage1-ish patches)

2013-11-27 Thread Jeff Law

On 11/27/13 05:30, Eric Botcazou wrote:

In fact, I would suggest that anyone with a pending patch from prior to
stage1 close that hasn't gotten feedback by midnight Tuesday ping their
patch.  I'd like to have a sense of everything that is outstanding
sooner rather than later and wrap up any loose ends as quickly as possible.


Improve debug info for small structures (2)
   http://gcc.gnu.org/ml/gcc-patches/2013-10/msg02007.html

This is fine.   Sorry about the delay.

jeff



Re: Patch ping (stage1-ish patches)

2013-11-27 Thread Jeff Law

On 11/27/13 04:48, Rainer Orth wrote:

Jakub Jelinek ja...@redhat.com writes:


On Fri, Nov 22, 2013 at 09:36:18AM -0700, Jeff Law wrote:

In fact, I would suggest that anyone with a pending patch from prior
to stage1 close that hasn't gotten feedback by midnight Tuesday ping
their patch.  I'd like to have a sense of everything that is
outstanding sooner rather than later and wrap up any loose ends as
quickly as possible.


On my side, there's

[c++, driver] Add -lrt on Solaris
http://gcc.gnu.org/ml/gcc-patches/2013-05/msg01488.html

resubmitted as

http://gcc.gnu.org/ml/gcc-patches/2013-06/msg00412.html

It's unclear if the more intrusive solution outlined in the second
message (introduce libstdc++.spec) were acceptable in stage3, and I'm
uncertain if I can get it ready in time.
Well, the short-term hack to g++spec.c along with the corresponding 
change to sol2.h is, OK for the trunk.


As for the more invasive change, I'd let the C++ runtime guys decide if 
its too invasive for stage3.  If you go that route, worst case is it's 
considered too invasive and it goes in during stage1 and you can remove 
the hack-ish solution from this patch.


jeff


Re: [PATCH] Postpone __LINE__ evaluation to the end of #line directives

2013-11-27 Thread Max Woodbury

From 6c95593f684c120a0ea7ef6178401283f63250b7 Mon Sep 17 00:00:00 2001
From: Max TenEyck Woodbury max+...@mtew.isa-geek.net
Date: Sun, 24 Nov 2013 09:48:09 -0500
Subject: [PATCH] Postpone __LINE__ evaluation to the end of #line directives
To: gcc-patches@gcc.gnu.org

Copyright 2013 assigned to the Free Software Foundation.
---
 gcc/testsuite/gcc.dg/cpp/line4.c | 19 +++
 libcpp/directives.c  |  9 -
 libcpp/internal.h|  1 +
 libcpp/macro.c   |  3 +++
 4 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/cpp/line4.c 
b/gcc/testsuite/gcc.dg/cpp/line4.c

index 84dbf96..0120a2b 100644
--- a/gcc/testsuite/gcc.dg/cpp/line4.c
+++ b/gcc/testsuite/gcc.dg/cpp/line4.c
@@ -13,7 +13,18 @@ enum { i = __LINE__ };
 enum { j = __LINE__ };

 #line 16  /* N.B. the _next_ line is line 16.  */
-
-char array1[i== 44 ? 1 : -1];
-char array2[j== 90 ? 1 : -1];
-char array3[__LINE__ == 19 ? 1 : -1];
+ /* __LINE__ should 
be 16 */
+char array1[i== 44 ? 1 : -1]; 
 /* 17 */
+char array2[j== 90 ? 1 : -1]; 
 /* 18 */
+char array3[__LINE__ == 19 ? 1 : -1]; 
 /* 19 */
+ 
 /* 20 */
+# line __LINE__ /* N.B. the __LINE__ sequence should _not_ change here. 
 */
+ 
 /* 22 */
+char array4[__LINE__ == 23 ? 1: -1]; 
 /* 23 */
+char array5[__LINE__ == 23 ? -1: 1]; 
 /* 24 */
+ 
 /* 25 */
+# line __LINE__ /* N.B. nor should a multi-line comment change the fact 

+   that the __LINE__ sequence should _not_ change here. 
*/
+ 
 /* 28 */
+char array6[__LINE__ == 29 ? 1: -1]; 
 /* 29 */
+char array7[__LINE__ == 27 ? -1: 1]; 
 /* 30 */

diff --git a/libcpp/directives.c b/libcpp/directives.c
index 65b2034..adb04a5 100644
--- a/libcpp/directives.c
+++ b/libcpp/directives.c
@@ -900,7 +900,9 @@ do_line (cpp_reader *pfile)
   bool wrapped;

   /* #line commands expand macros.  */
+  ++pfile-state.in_directive;  /* Request special __LINE__ 
handling. */

   token = cpp_get_token (pfile);
+  --pfile-state.in_directive;  /* Cancel 
request */

   if (token-type != CPP_NUMBER
   || strtolinenum (token-val.str.text, token-val.str.len,
   new_lineno, wrapped))
@@ -914,7 +916,9 @@ do_line (cpp_reader *pfile)
   return;
 }

-  if (CPP_PEDANTIC (pfile)  (new_lineno == 0 || new_lineno  cap || 
wrapped))

+  if (CPP_PEDANTIC (pfile)  (new_lineno == 0
+  || (new_lineno  cap  new_lineno != CUR__LINE__)
+  || wrapped))
 cpp_error (pfile, CPP_DL_PEDWARN, line number out of range);
   else if (wrapped)
 cpp_error (pfile, CPP_DL_WARNING, line number out of range);
@@ -936,6 +940,9 @@ do_line (cpp_reader *pfile)
 }

   skip_rest_of_line (pfile);
+  if ( new_lineno == CUR__LINE__ )  /* Postponed 
evaluation ? */

+new_lineno = linemap_get_expansion_line (pfile-line_table,
+ 
pfile-line_table-highest_line);

   _cpp_do_file_change (pfile, LC_RENAME_VERBATIM, new_file, new_lineno,
   map_sysp);
 }
diff --git a/libcpp/internal.h b/libcpp/internal.h
index 5321458..268de86 100644
--- a/libcpp/internal.h
+++ b/libcpp/internal.h
@@ -604,6 +604,7 @@ cpp_in_primary_file (cpp_reader *pfile)
 {
   return pfile-line_table-depth == 1;
 }
+#define CUR__LINE__ -1U

 /* In macro.c */
 extern void _cpp_free_definition (cpp_hashnode *);
diff --git a/libcpp/macro.c b/libcpp/macro.c
index e359d15..47e41b6 100644
--- a/libcpp/macro.c
+++ b/libcpp/macro.c
@@ -309,6 +309,9 @@ _cpp_builtin_macro_text (cpp_reader *pfile, 
cpp_hashnode *node)

   /* If __LINE__ is embedded in a macro, it must expand to the
 line of the macro's invocation, not its definition.
 Otherwise things like assert() will not work properly.  */
+  if (pfile-state.in_directive  1)   /* In #line 
directive? */
+number = CUR__LINE__;  /* yes, postpone the 
lookup... */

+  else
   number = linemap_get_expansion_line (pfile-line_table,
   CPP_OPTION (pfile, traditional)
   ? 
pfile-line_table-highest_line

--
1.8.0.rc0.18.gf84667d


Re: _Cilk_spawn and _Cilk_sync for C++

2013-11-27 Thread Jason Merrill

On 11/25/2013 10:50 AM, Iyer, Balaji V wrote:

I have fixed this issue.  My function to map the variable's context from the 
spawner to the spawn helper function was going into the lambda function. I made 
it stop by adding a language specific copy_tree_body (basically stop going into 
the lambda function's body for C++ and for the rest of the times just use 
copy_tree_body_r, no code duplicating  is done between the two) that and it 
works fine now.


I doubt it was walking from the enclosing function into the body of the 
lambda function.  Looking at the patch, it seems that what you're 
avoiding is walking into the closure object itself, and adding an entire 
new langhook seems like overkill for that.


I think a better approach would be to add a cp_build_cilk_spawn that 
uses stabilize_call to pre-evaluate the arguments of the call.


Jason



Re: [PATCH] Use libbacktrace as libsanitizer's symbolizer

2013-11-27 Thread Ian Lance Taylor
On Wed, Nov 27, 2013 at 4:31 AM, Alexey Samsonov samso...@google.com wrote:

 LLVM emits just a DW_AT_low_pc (base address of a compilation unit). The
 standard tells that compilation unit entries may have attributes
 specifying the
 address range, but doesn't tell they are obligatory.
 DWARF consumers probably shouldn't rely on their presence.

How are consumers of LLVM debug info expected to determine the address
range of the function?  Is the intent that the consumer should assume
that the function continues from the DW_AT_low_pc address to the next
DW_AT_low_pc address from the debug info?  Or should the consumer look
at the function size from the symbol table?  Or should the consumer
examine all the DIEs looking for address ranges?

Does LLVM support anything like GCC's -freorder-blocks-and-partition
option, and, if so, how does it represent the address ranges in the
debug info?

Ian


Re: [PATCH] _Cilk_for for C and C++

2013-11-27 Thread Jason Merrill

On 11/15/2013 02:23 PM, Iyer, Balaji V wrote:

One small thing that I have not done that Jakub and several other have asked me 
before is that, there are no tests in c-c++-common for _Cilk_for. The reason being 
that the syntax between C and C++ implementations are different. In C++, the 
induction variable must be defined in the initializer (e.g. it should start wth 
_Cilk_for (int ii = 0)). In C, this is not allowed (e.g. it should start as 
_Cilk_for (ii = 0; ii  10; ii++)).


That can be handled with #ifdef __cplusplus.

Jason



Re: [PATCH, ARM, LRA] Fixed bootstrap failure in Thumb mode

2013-11-27 Thread Yvan Roux
 How can that be correct?

 The secondary reload macros/hooks define cases where additional registers
 are needed to reload certain forms of rtl.  I doubt the use of LRA
 completely eliminates the need for secondary reloads.

Vladimir explained me that in that case on arm, secondary reload hook
confuses LRA, and that returning NO_REGS will let LRA deal with
constraints, but I may have badly understand what he said.

Yvan


Re: [PATCH] _Cilk_for for C and C++

2013-11-27 Thread Jakub Jelinek
On Wed, Nov 27, 2013 at 12:48:11PM -0500, Jason Merrill wrote:
 On 11/15/2013 02:23 PM, Iyer, Balaji V wrote:
 One small thing that I have not done that Jakub and several other have asked 
 me before is that, there are no tests in c-c++-common for _Cilk_for. The 
 reason being that the syntax between C and C++ implementations are 
 different. In C++, the induction variable must be defined in the initializer 
 (e.g. it should start wth _Cilk_for (int ii = 0)). In C, this is not 
 allowed (e.g. it should start as _Cilk_for (ii = 0; ii  10; ii++)).
 
 That can be handled with #ifdef __cplusplus.

It isn't allowed even in C99?  For OpenMP,
int a[30];

void
foo ()
{
  #pragma omp for
  for (int i = 0; i  30; i++)
a[i] = i;
}
is valid for C99.  So, perhaps you just want
/* { dg-additional-options -std=c99 { target c } } */
in the c-c++-common tests?

Jakub


Re: [PATCH, ARM, LRA] Fixed bootstrap failure in Thumb mode

2013-11-27 Thread Richard Earnshaw
On 27/11/13 17:49, Yvan Roux wrote:
 How can that be correct?

 The secondary reload macros/hooks define cases where additional registers
 are needed to reload certain forms of rtl.  I doubt the use of LRA
 completely eliminates the need for secondary reloads.
 
 Vladimir explained me that in that case on arm, secondary reload hook
 confuses LRA, and that returning NO_REGS will let LRA deal with
 constraints, but I may have badly understand what he said.
 
 Yvan
 

But wasn't that in the context of PREFERRED_RELOAD_CLASS?  That's a
different beast.

R.



Re: [PATCH, ARM, LRA] Fixed bootstrap failure in Thumb mode

2013-11-27 Thread Vladimir Makarov

On 11/27/2013, 12:16 PM, Jeff Law wrote:

On 11/27/13 03:18, Yvan Roux wrote:

Ping

On 18 November 2013 09:40, Yvan Roux yvan.r...@linaro.org wrote:

Ping.

On 7 November 2013 15:56, Yvan Roux yvan.r...@linaro.org wrote:

Hi,

this patch fixed an LRA cycling due to secondary reload (Thumb mode).
Notice that this patch is a prerequisite to turn on LRA by default on
ARM.  Bootstrapped on a9 and a15 without any regression in the
testsuite as LRA is off by default and with the regression reported in
the thread bellow when LRA is on.

http://gcc.gnu.org/ml/gcc-patches/2013-11/msg00725.html

Thanks,
Yvan

2013-11-07  Yvan Roux  yvan.r...@linaro.org

 * config/arm/arm.h (THUMB_SECONDARY_INPUT_RELOAD_CLASS): 
Return NO_REGS

 for LRA

?

How can that be correct?

The secondary reload macros/hooks define cases where additional 
registers are needed to reload certain forms of rtl.  I doubt the use 
of LRA completely eliminates the need for secondary reloads.
  When I designed LRA I wanted to remove as many hooks as possible 
because I thought insn constraints as major info source are enough. 
Unfortunately I did not succeed fully with secondary reload hooks and 
macros.  It is still needed for some complicated cases but general rule 
to port LRA to a target is to try to switch these hooks off.


  For example, LRA can generate a move of pseudos of classes for which 
hardware has no real insn.  After that looking on the move constraints,  
LRA can generate more move insns and additional pseudos to generate 
moves (loads or stores if additional pseudos got NO_REGS) which 
represent real hardware insns.  In complicated cases when these macros 
are still needed, LRA runs into infinite loop of such move generation if 
the macros are not used.


  I might be return to a project to remove necessity of such hooks and 
macros for LRA but I am not sure when.  I guess I need to write a small 
guidance too how to port LRA.


  Also I found that in many cases as here, the macros although working 
for reload can confuse LRA (because of different reload pass and LRA 
implementation approaches).  So I guess the patch is ok although I can 
not approve it as I am not an arm port maintainer.


Re: [PATCH] _Cilk_for for C and C++

2013-11-27 Thread Aldy Hernandez

On 11/27/13 10:54, Jakub Jelinek wrote:

On Wed, Nov 27, 2013 at 12:48:11PM -0500, Jason Merrill wrote:

On 11/15/2013 02:23 PM, Iyer, Balaji V wrote:

One small thing that I have not done that Jakub and several other have asked me 
before is that, there are no tests in c-c++-common for _Cilk_for. The reason being 
that the syntax between C and C++ implementations are different. In C++, the 
induction variable must be defined in the initializer (e.g. it should start wth 
_Cilk_for (int ii = 0)). In C, this is not allowed (e.g. it should start as 
_Cilk_for (ii = 0; ii  10; ii++)).


That can be handled with #ifdef __cplusplus.


It isn't allowed even in C99?  For OpenMP,
int a[30];

void
foo ()
{
   #pragma omp for
   for (int i = 0; i  30; i++)
 a[i] = i;
}
is valid for C99.  So, perhaps you just want
/* { dg-additional-options -std=c99 { target c } } */
in the c-c++-common tests?

Jakub



Yup, that's what I did for the Cilk Plus pragma simd tests.


Re: Patch ping (stage1-ish patches)

2013-11-27 Thread Jeff Law

On 11/27/13 01:28, Alexander Ivchenko wrote:

Here is the patch series that had been posted in Sep that is aimed to
isolate the Android support from targets that actually don't have that
support (We discussed the need of it with Jakub here
http://gcc.gnu.org/ml/gcc-patches/2013-09/msg00185.html):

http://gcc.gnu.org/ml/gcc-patches/2013-09/msg01420.html

This patch series is fine.

Just a minor typo in patch #4:

+/* IFUNCs are supportted by glibc, but not by uClibc or Bionic.  */
s/supportted/supported/

jeff


Re: wide-int, rtl

2013-11-27 Thread Kenneth Zadeck

Eric,

Let me make one high level comment here and the low level comments will 
be responded to when i fix the patch.


CONST_DOUBLE has two hwis in it.   So in practice, you get 128 bits and 
that is it.a CONST_WIDE_INT has an array of HWIs that has as many 
elements as it needs to represent the value.


If TARGET_SUPPORTS_WIDE_INT == 0 (the default and currently the value 
for every public port except the RS6000), then rtl integer constants 
that do not fit in a CONST_INT are put in a CONST_DOUBLE and there are 
never any CONST_WIDE_INTS created.A platform like this cannot 
correctly represent variables larger than 128 bits. i.e. this is like 
the current trunk except that there are fewer bugs for the 128 bit 
variables.


if TARGET_SUPPORTS_WIDE_INT is not 0, then rtl integer constants are put 
in CONST_WIDE_INT and CONST_DOUBLES only hold floating point values.   
For the last comment that you made, if the target does not support wide 
int, then there is simply no way to represent that constant properly and 
if the target accepts types larger than TImode, then the maintainer 
needs to do the work to use CONST_WIDE_INTs.


This is not a lot a work, but it requires target specific knowledge to 
convert the patterns, predicates and constraints to look for 
CONST_WIDE_INTs rather than CONST_DOUBLE for larger integers.


As far as your comment about const_double_operand, i could add some 
conditional code to make this not accept integers if the TARGET... is 
not 0.   Richard Sandiford wanted my to use that test as infrequently as 
possible.


thanks for your comments.

Kenny

On 11/27/2013 11:24 AM, Eric Botcazou wrote:

Richi has asked the we break the wide-int patch so that the individual port
and front end maintainers can review their parts without have to go through
the entire patch.This patch covers the first half of the rtl code.

--- a/gcc/cse.c
+++ b/gcc/cse.c
@@ -2336,15 +2336,23 @@ hash_rtx_cb (const_rtx x, enum machine_mode mode,
 + (unsigned int) INTVAL (x));
return hash;
  
+case CONST_WIDE_INT:

+  {
+   int i;
+   for (i = 0; i  CONST_WIDE_INT_NUNITS (x); i++)
+ hash += CONST_WIDE_INT_ELT (x, i);
+  }
+  return hash;

You can write for (int i = 0; ... now and remove the parentheses.


--- a/gcc/cselib.c
+++ b/gcc/cselib.c
@@ -1121,15 +1120,23 @@ cselib_hash_rtx (rtx x, int create, enum machine_mode
memmode)
hash += ((unsigned) CONST_INT  7) + INTVAL (x);
return hash ? hash : (unsigned int) CONST_INT;
  
+case CONST_WIDE_INT:

+  {
+   int i;
+   for (i = 0; i  CONST_WIDE_INT_NUNITS (x); i++)
+ hash += CONST_WIDE_INT_ELT (x, i);
+  }
+  return hash;
+

Likewise.


--- a/gcc/expmed.c
+++ b/gcc/expmed.c
@@ -3298,23 +3268,13 @@ choose_multiplier (unsigned HOST_WIDE_INT d, int n,
int precision,
pow = n + lgup;
pow2 = n + lgup - precision;
  
-  /* We could handle this with some effort, but this case is much

- better handled directly with a scc insn, so rely on caller using
- that.  */
-  gcc_assert (pow != HOST_BITS_PER_DOUBLE_INT);

Why removing it?



--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -722,64 +722,33 @@ convert_modes (enum machine_mode mode, enum machine_mode
oldmode, rtx x, int uns
if (mode == oldmode)
  return x;
  
-  /* There is one case that we must handle specially: If we are converting

- a CONST_INT into a mode whose size is twice HOST_BITS_PER_WIDE_INT and
- we are to interpret the constant as unsigned, gen_lowpart will do
- the wrong if the constant appears negative.  What we want to do is
- make the high-order word of the constant zero, not all ones.  */
-
-  if (unsignedp  GET_MODE_CLASS (mode) == MODE_INT
-   GET_MODE_BITSIZE (mode) == HOST_BITS_PER_DOUBLE_INT
-   CONST_INT_P (x)  INTVAL (x)  0)
+  if (CONST_SCALAR_INT_P (x)
+   GET_MODE_CLASS (mode) == MODE_INT)

On a single line.


  {
-  double_int val = double_int::from_uhwi (INTVAL (x));
-
-  /* We need to zero extend VAL.  */
-  if (oldmode != VOIDmode)
-   val = val.zext (GET_MODE_BITSIZE (oldmode));
-
-  return immed_double_int_const (val, mode);
+  /* If the caller did not tell us the old mode, then there is
+not much to do with respect to canonization.  We have to assume
+that all the bits are significant.  */
+  if (GET_MODE_CLASS (oldmode) != MODE_INT)
+   oldmode = MAX_MODE_INT;
+  wide_int w = wide_int::from (std::make_pair (x, oldmode),
+  GET_MODE_PRECISION (mode),
+  unsignedp ? UNSIGNED : SIGNED);
+  return immed_wide_int_const (w, mode);

canonicalization?


@@ -5301,10 +5271,10 @@ store_expr (tree exp, rtx target, int call_param_p,
bool nontemporal)
   alt_rtl);
  }
  
-  /* If TEMP is a VOIDmode constant and the mode of the type of EXP is not

- the same as that of TARGET, adjust the 

Re: [PATCH] Use libbacktrace as libsanitizer's symbolizer

2013-11-27 Thread Alexey Samsonov
On Wed, Nov 27, 2013 at 9:44 PM, Ian Lance Taylor i...@google.com wrote:
 On Wed, Nov 27, 2013 at 4:31 AM, Alexey Samsonov samso...@google.com wrote:

 LLVM emits just a DW_AT_low_pc (base address of a compilation unit). The
 standard tells that compilation unit entries may have attributes
 specifying the
 address range, but doesn't tell they are obligatory.
 DWARF consumers probably shouldn't rely on their presence.

 How are consumers of LLVM debug info expected to determine the address
 range of the function?

Sorry, I wasn't clear enough. GCC emits DW_AT_low_pc and DW_AT_high_pc for
*compile unit* DIE, so the consumer doesn't have to iterate over all
the functions DIEs.
LLVM doesn't emit DW_AT_low_pc/DW_AT_high_pc for compile unit,
but emits them for function DIEs.

I've added some debugging output to libbacktrace and noticed it was
able to build
[address range]-[compile unit] mapping only for sources compiled with GCC, so I
suggested that it doesn't examine all the descendants of compile unit
DIEs to build the ranges.

 Is the intent that the consumer should assume
 that the function continues from the DW_AT_low_pc address to the next
 DW_AT_low_pc address from the debug info?  Or should the consumer look
 at the function size from the symbol table?  Or should the consumer
 examine all the DIEs looking for address ranges?

 Does LLVM support anything like GCC's -freorder-blocks-and-partition
 option, and, if so, how does it represent the address ranges in the
 debug info?

 Ian

-- 
Alexey Samsonov, MSK


Re: [PATCH, ARM, LRA] Fixed bootstrap failure in Thumb mode

2013-11-27 Thread Yvan Roux
On 27 November 2013 18:58, Vladimir Makarov vmaka...@redhat.com wrote:
 On 11/27/2013, 12:16 PM, Jeff Law wrote:

 On 11/27/13 03:18, Yvan Roux wrote:

 Ping

 On 18 November 2013 09:40, Yvan Roux yvan.r...@linaro.org wrote:

 Ping.

 On 7 November 2013 15:56, Yvan Roux yvan.r...@linaro.org wrote:

 Hi,

 this patch fixed an LRA cycling due to secondary reload (Thumb mode).
 Notice that this patch is a prerequisite to turn on LRA by default on
 ARM.  Bootstrapped on a9 and a15 without any regression in the
 testsuite as LRA is off by default and with the regression reported in
 the thread bellow when LRA is on.

 http://gcc.gnu.org/ml/gcc-patches/2013-11/msg00725.html

 Thanks,
 Yvan

 2013-11-07  Yvan Roux  yvan.r...@linaro.org

  * config/arm/arm.h (THUMB_SECONDARY_INPUT_RELOAD_CLASS):
 Return NO_REGS
  for LRA

 ?

 How can that be correct?

 The secondary reload macros/hooks define cases where additional registers
 are needed to reload certain forms of rtl.  I doubt the use of LRA
 completely eliminates the need for secondary reloads.

   When I designed LRA I wanted to remove as many hooks as possible because I
 thought insn constraints as major info source are enough. Unfortunately I
 did not succeed fully with secondary reload hooks and macros.  It is still
 needed for some complicated cases but general rule to port LRA to a target
 is to try to switch these hooks off.

   For example, LRA can generate a move of pseudos of classes for which
 hardware has no real insn.  After that looking on the move constraints,  LRA
 can generate more move insns and additional pseudos to generate moves (loads
 or stores if additional pseudos got NO_REGS) which represent real hardware
 insns.  In complicated cases when these macros are still needed, LRA runs
 into infinite loop of such move generation if the macros are not used.

   I might be return to a project to remove necessity of such hooks and
 macros for LRA but I am not sure when.  I guess I need to write a small
 guidance too how to port LRA.

   Also I found that in many cases as here, the macros although working for
 reload can confuse LRA (because of different reload pass and LRA
 implementation approaches).  So I guess the patch is ok although I can not
 approve it as I am not an arm port maintainer.

Thanks for the clarification Vladimir, and BTW I just find the same
cycling issue on iWMMXT.

Yvan


Re: [PATCH, i386]: Fix PR56788, _mm_frcz_sd and _mm_frcz_ss ignore their second argument

2013-11-27 Thread Uros Bizjak
On Wed, Nov 27, 2013 at 7:45 AM, Gopalasubramanian, Ganesh
ganesh.gopalasubraman...@amd.com wrote:
 Hopefully someone from AMD will provide tests that are mysteriously missing 
 from XOP testsuite.

 As pointed out by Marc, I added myself to the bug later.
 I was bit confused about the internal insn representation with 
 user-visible function.
 So, couldn't add test then and there. I could have solved that earlier. Sorry 
 for that.

 Attached is the test that checks the (controversial) frcz functions.

 Uros could you please add this to your patch while committing.

Thanks, I have changed the patch slightly to skip runtime test on
non-xop machines.

2013-11-27  Uros Bizjak  ubiz...@gmail.com
Ganesh Gopalasubramanian  ganesh.gopalasubraman...@amd.com

PR target/56788
* gcc.target/i386/xop-frczX.c: New test.

Tested on x86_64-pc-linux-gnu {,-m32} and committed to mainline SVN.

Uros.
Index: gcc.target/i386/xop-frczX.c
===
--- gcc.target/i386/xop-frczX.c (revision 0)
+++ gcc.target/i386/xop-frczX.c (working copy)
@@ -0,0 +1,60 @@
+/* { dg-do run } */
+/* { dg-require-effective-target xop } */
+/* { dg-options -O2 -mxop } */
+
+#include xop-check.h
+
+#include x86intrin.h
+
+void
+check_mm_vmfrcz_sd (__m128d __A, __m128d __B)
+{
+  union128d a, b, c;
+  double d[2];
+
+  a.x = __A;
+  b.x = __B;
+  c.x = _mm_frcz_sd (__A, __B);
+  d[0] = b.a[0] - (int)b.a[0] ;
+  d[1] = a.a[1];
+  if (check_union128d (c, d))
+abort ();
+}
+
+void
+check_mm_vmfrcz_ss (__m128 __A, __m128 __B)
+{
+  union128 a, b, c;
+  float f[4];
+
+  a.x = __A;
+  b.x = __B;
+  c.x = _mm_frcz_ss (__A, __B);
+  f[0] = b.a[0] - (int)b.a[0] ;
+  f[1] = a.a[1];
+  f[2] = a.a[2];
+  f[3] = a.a[3];
+  if (check_union128 (c, f))
+abort ();
+}
+
+static void
+xop_test (void)
+{
+  union128 a, b;
+  union128d c,d;
+  int i;
+
+  for (i = 0; i  4; i++)
+{
+   a.a[i] = i + 3.5;
+   b.a[i] = i + 7.9;
+}
+  for (i = 0; i  2; i++)
+{
+   c.a[i] = i + 3.5;
+   d.a[i] = i + 7.987654321;
+}
+  check_mm_vmfrcz_ss (a.x, b.x);
+  check_mm_vmfrcz_sd (c.x, d.x);
+}


Re: [patch][RFC] make lra.c:check_rtl set maybe_hot_insn_p

2013-11-27 Thread Jeff Law

On 11/27/13 10:30, Yvan Roux wrote:

Please include either the patch you are pinging or at the least a link to it
in the archives.


Ok, sorry for that, here is the patch and Changelog

Yvan


2013-11-17  Yvan Roux  yvan.r...@linaro.org

 * config/arm/arm.md (store_minmaxsi): Use only when
 optimize_function_for_size_p.

Thanks.

This is fine for the trunk.  And yes, having an insn's validity change 
based on what block it's in is most definitely bad.


I was a bit concerned have the x86 backend, as I happened to know it 
uses optimize_insn_for_* rather extensively.  But thankfully it's only 
used in splitters, expanders  peephole2 patterns, which should all be safe.


If you wouldn't mind, could you look at md.texi and see if there's a 
reasonable place to put verbage about this issue in the internals 
manual?  It might save someone time in the future :-)


Thanks,
Jeff



RE: [PING]: [GOMP4] [PATCH] SIMD-Enabled Functions (formerly Elemental functions) for C

2013-11-27 Thread Iyer, Balaji V
HI Aldy and Jakub,
Attached, please find a fixed patch. I have fixed all the changes you 
have mentioned below. Is this OK to install?

Here are the ChangeLog entries:
gcc/ChangeLog
2013-11-27  Balaji V. Iyer  balaji.v.i...@intel.com

* config/i386/i386.c (ix86_simd_clone_compute_vecsize_and_simdlen):
Removed a carriage return from the warning string.
* omp-low.c (simd_clone_clauses_extract): Added a check for cilk plus
SIMD-enabled function attributes.

gcc/c/ChangeLog
2013-11-27  Balaji V. Iyer  balaji.v.i...@intel.com

* c-parser.c (struct c_parser::elem_fn_tokens): Added new field.
(c_parser_declaration_or_fndef): Added a check if elem_fn_tokens
field in parser is not empty.  If not-empty, call the function
c_parser_finish_omp_declare_simd.
(c_parser_elem_fn_vectorlength): New function.
(c_parser_elem_fn_expr_list): Likewise.
(c_finish_elem_fn_tokens): Likewise.
(c_parser_attributes): Added a elem_fn_tokens parameter.  Added a
check for vector attribute and if so call c_parser_elem_fn_expr_list.
Also, called c_finish_elem_fn_tokens when Cilk Plus is enabled.
(c_finish_omp_declare_simd): Added a check if elem_fn_tokens in
parser field is non-empty.  If so, parse them as you would parse
the omp declare simd pragma.

gcc/testsuite/ChangeLog
2013-11-27  Balaji V. Iyer  balaji.v.i...@intel.com

* c-c++-common/cilk-plus/EF/ef_test.c: New test.
* c-c++-common/cilk-plus/EF/ef_test2.c: Likewise.
* c-c++-common/cilk-plus/EF/vlength_errors.c: Likewise.
* c-c++-common/cilk-plus/EF/ef_error.c: Likewise.
* c-c++-common/cilk-plus/EF/ef_error2.c: Likewise.
* gcc.dg/cilk-plus/cilk-plus.exp: Added calls for the above tests.


Thanks,

Balaji V. Iyer.

 -Original Message-
 From: Aldy Hernandez [mailto:al...@redhat.com]
 Sent: Wednesday, November 27, 2013 10:52 AM
 To: Iyer, Balaji V
 Cc: Jakub Jelinek; gcc-patches@gcc.gnu.org
 Subject: Re: [PING]: [GOMP4] [PATCH] SIMD-Enabled Functions (formerly
 Elemental functions) for C
 
 Iyer, Balaji V balaji.v.i...@intel.com writes:
 
   c_finish_omp_declare_simd (c_parser *parser, tree fndecl, tree parms,
 vecc_token clauses)
   {
  +
  +  if (flag_enable_cilkplus
  +   clauses.exists ()  !vec_safe_is_empty (parser-
 elem_fn_tokens))
  +{
  +  error (%#pragma omp declare simd% cannot be used in the same
  +function marked as a SIMD-enabled function);
  +  vec_free (parser-elem_fn_tokens);
  +  return;
  +}
 
 I see Cilk Plus elementals are handled as omp declare simd in the above
 function.  This function sets an omp declare simd attribute here:
 
  if (c != NULL_TREE)
 c = tree_cons (NULL_TREE, c, NULL_TREE);
   c = build_tree_list (get_identifier (omp declare simd), c);
   TREE_CHAIN (c) = DECL_ATTRIBUTES (fndecl);
   DECL_ATTRIBUTES (fndecl) = c;
 
 but you also need a cilk plus elemental attribute so the rest of the 
 compiler
 can differentiate Cilk Plus elementals from omp declare simds.
 

Fixed.

 See simd_clone_clauses_extract():
 
 +  /* To distinguish from an OpenMP simd clone, Cilk Plus functions to
 + be cloned have a distinctive artificial label in addition to omp
 + declare simd.  */
 +  bool cilk_clone
 += (flag_enable_cilkplus
 +lookup_attribute (cilk plus elemental,
 +   DECL_ATTRIBUTES (node-decl)));
 
 Also you probably want some kind of test for this, so test for whatever
 cilk_elemental is doing.  In trunk, the only use of cilk_elemental is in
 ix86_simd_clone_compute_vecsize_and_simdlen(), so come up with some
 x86 specific test for cilk_elemental==true.
 

Fixed. 

 Aldy
Index: gcc/c/c-parser.c
===
--- gcc/c/c-parser.c(revision 205457)
+++ gcc/c/c-parser.c(working copy)
@@ -208,6 +208,12 @@
   /* True if we are in a context where the Objective-C Property attribute
  keywords are valid.  */
   BOOL_BITFIELD objc_property_attr_context : 1;
+
+  /* Cilk Plus specific parser/lexer information.  */
+
+  /* Buffer to hold all the tokens from parsing the vector attribute for the
+ SIMD Enabled functions (formerly known as elemental functions).  */
+  vec c_token, va_gc *elem_fn_tokens;
 } c_parser;
 
 
@@ -1647,7 +1653,8 @@
C_DTR_NORMAL, dummy);
   if (declarator == NULL)
{
- if (omp_declare_simd_clauses.exists ())
+ if (omp_declare_simd_clauses.exists ()
+ || !vec_safe_is_empty (parser-elem_fn_tokens))
c_finish_omp_declare_simd (parser, NULL_TREE, NULL_TREE,
   omp_declare_simd_clauses);
  c_parser_skip_to_end_of_block_or_statement (parser);
@@ -1734,7 +1741,8 @@
  chainon (postfix_attrs, 

  1   2   >