[PATCH] Get rid of useless -fno-rtti for libubsan
Hi all, As discussed in http://gcc.gnu.org/bugzilla/show_bug.cgi?id=59106 only a subset of libubsan should be built with RTTI support. Attached patch adds custom build rules for relevant files. -Y diff --git a/libsanitizer/ubsan/Makefile.am b/libsanitizer/ubsan/Makefile.am index e98984a..eaab156 100644 --- a/libsanitizer/ubsan/Makefile.am +++ b/libsanitizer/ubsan/Makefile.am @@ -4,7 +4,7 @@ AM_CPPFLAGS = -I $(top_srcdir) -I $(top_srcdir)/include gcc_version := $(shell cat $(top_srcdir)/../gcc/BASE-VER) DEFS = -D_GNU_SOURCE -D_DEBUG -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -AM_CXXFLAGS = -Wall -W -Wno-unused-parameter -Wwrite-strings -pedantic -Wno-long-long -fPIC -fno-builtin -fno-exceptions -fomit-frame-pointer -funwind-tables -fvisibility=hidden -Wno-variadic-macros +AM_CXXFLAGS = -Wall -W -Wno-unused-parameter -Wwrite-strings -pedantic -Wno-long-long -fPIC -fno-builtin -fno-exceptions -fno-rtti -fomit-frame-pointer -funwind-tables -fvisibility=hidden -Wno-variadic-macros AM_CXXFLAGS += $(LIBSTDCXX_RAW_CXX_CXXFLAGS) ACLOCAL_AMFLAGS = -I m4 @@ -13,11 +13,13 @@ toolexeclib_LTLIBRARIES = libubsan.la ubsan_files = \ ubsan_diag.cc \ ubsan_handlers.cc \ - ubsan_handlers_cxx.cc \ - ubsan_type_hash.cc \ ubsan_value.cc -libubsan_la_SOURCES = $(ubsan_files) +ubsan_cxx_files = \ + ubsan_handlers_cxx.cc \ + ubsan_type_hash.cc + +libubsan_la_SOURCES = $(ubsan_files) $(ubsan_cxx_files) libubsan_la_LIBADD = $(top_builddir)/sanitizer_common/libsanitizer_common.la if !USING_MAC_INTERPOSE libubsan_la_LIBADD += $(top_builddir)/interception/libinterception.la @@ -25,6 +27,17 @@ endif libubsan_la_LIBADD += $(LIBSTDCXX_RAW_CXX_LDFLAGS) libubsan_la_LDFLAGS = -version-info `grep -v '^\#' $(srcdir)/libtool-version` -lpthread -ldl +# Use special rules for files/objects that require RTTI support. +ubsan_handlers_cxx.lo: ubsan_handlers_cxx.cc + $(LTCXXCOMPILE) -frtti -c $ +ubsan_handlers_cxx.o: ubsan_handlers_cxx.cc + $(CXXCOMPILE) -frtti -c $ + +ubsan_type_hash.lo: ubsan_type_hash.cc + $(LTCXXCOMPILE) -frtti -c $ +ubsan_type_hash.o: ubsan_type_hash.cc + $(CXXCOMPILE) -frtti -c $ + # Work around what appears to be a GNU make bug handling MAKEFLAGS # values defined in terms of make variables, as is the case for CC and # friends when we are called from the top level Makefile. diff --git a/libsanitizer/ubsan/Makefile.in b/libsanitizer/ubsan/Makefile.in index 6812538..198e31d 100644 --- a/libsanitizer/ubsan/Makefile.in +++ b/libsanitizer/ubsan/Makefile.in @@ -81,9 +81,9 @@ am__DEPENDENCIES_1 = libubsan_la_DEPENDENCIES = \ $(top_builddir)/sanitizer_common/libsanitizer_common.la \ $(am__append_1) $(am__DEPENDENCIES_1) -am__objects_1 = ubsan_diag.lo ubsan_handlers.lo ubsan_handlers_cxx.lo \ - ubsan_type_hash.lo ubsan_value.lo -am_libubsan_la_OBJECTS = $(am__objects_1) +am__objects_1 = ubsan_diag.lo ubsan_handlers.lo ubsan_value.lo +am__objects_2 = ubsan_handlers_cxx.lo ubsan_type_hash.lo +am_libubsan_la_OBJECTS = $(am__objects_1) $(am__objects_2) libubsan_la_OBJECTS = $(am_libubsan_la_OBJECTS) libubsan_la_LINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \ @@ -240,7 +240,7 @@ AM_CPPFLAGS = -I $(top_srcdir) -I $(top_srcdir)/include # May be used by toolexeclibdir. gcc_version := $(shell cat $(top_srcdir)/../gcc/BASE-VER) AM_CXXFLAGS = -Wall -W -Wno-unused-parameter -Wwrite-strings -pedantic \ - -Wno-long-long -fPIC -fno-builtin -fno-exceptions \ + -Wno-long-long -fPIC -fno-builtin -fno-exceptions -fno-rtti \ -fomit-frame-pointer -funwind-tables -fvisibility=hidden \ -Wno-variadic-macros $(LIBSTDCXX_RAW_CXX_CXXFLAGS) ACLOCAL_AMFLAGS = -I m4 @@ -248,11 +248,13 @@ toolexeclib_LTLIBRARIES = libubsan.la ubsan_files = \ ubsan_diag.cc \ ubsan_handlers.cc \ - ubsan_handlers_cxx.cc \ - ubsan_type_hash.cc \ ubsan_value.cc -libubsan_la_SOURCES = $(ubsan_files) +ubsan_cxx_files = \ + ubsan_handlers_cxx.cc \ + ubsan_type_hash.cc + +libubsan_la_SOURCES = $(ubsan_files) $(ubsan_cxx_files) libubsan_la_LIBADD = \ $(top_builddir)/sanitizer_common/libsanitizer_common.la \ $(am__append_1) $(LIBSTDCXX_RAW_CXX_LDFLAGS) @@ -575,6 +577,17 @@ uninstall-am: uninstall-toolexeclibLTLIBRARIES tags uninstall uninstall-am uninstall-toolexeclibLTLIBRARIES +# Use special rules for files/objects that require RTTI support. +ubsan_handlers_cxx.lo: ubsan_handlers_cxx.cc + $(LTCXXCOMPILE) -frtti -c $ +ubsan_handlers_cxx.o: ubsan_handlers_cxx.cc + $(CXXCOMPILE) -frtti -c $ + +ubsan_type_hash.lo: ubsan_type_hash.cc + $(LTCXXCOMPILE) -frtti -c $ +ubsan_type_hash.o: ubsan_type_hash.cc + $(CXXCOMPILE) -frtti -c $ + # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT:
[Patch Ping] Add slim-lto support to gcc's build machinery
On 2013.11.20 at 15:43 +0100, Markus Trippelsdorf wrote: On 2013.11.20 at 14:41 +0100, Paolo Bonzini wrote: Note that you need to regenerate all users of libtool.m4. Please post a patch _with_ the regeneration so that whoever applies it won't screw up. Ping. Can you please have look, Paolo? And if it is not too much work, it would be nice if you could also apply the patch. Thanks. -- Markus
Re: [PATCH] Add signed integer overflow checking to ubsan
On Fri, Nov 22, 2013 at 10:54:16AM +0100, Marek Polacek wrote: Hi! Working virtually out of Pago Pago. The following is the implementation of the signed integer overflow checking for the UndefinedBehaviorSanitizer. I wrote some of the generic bits; Jakub did the i?86 handlind/optabs as well as VRP/fold bits. I'd like to ping this patch. Here's a rebased version that contains a fix for miscompilation with -Os -m32. 2013-11-27 Jakub Jelinek ja...@redhat.com Marek Polacek pola...@redhat.com * opts.c (common_handle_option): Handle -fsanitize=signed-integer-overflow. * config/i386/i386.md (addvmode4, subvmode4, mulvmode4, negvmode3, negvmode3_1): Define expands. (*addvmode4, *subvmode4, *mulvmode4, *negvmode3): Define insns. * sanitizer.def (BUILT_IN_UBSAN_HANDLE_ADD_OVERFLOW, BUILT_IN_UBSAN_HANDLE_SUB_OVERFLOW, BUILT_IN_UBSAN_HANDLE_MUL_OVERFLOW, BUILT_IN_UBSAN_HANDLE_NEGATE_OVERFLOW): Define. * ubsan.h (PROB_VERY_UNLIKELY, PROB_EVEN, PROB_VERY_LIKELY, PROB_ALWAYS): Define. (ubsan_build_overflow_builtin): Declare. * gimple-fold.c (gimple_fold_stmt_to_constant_1): Add folding of internal functions. * ubsan.c (PROB_VERY_UNLIKELY): Don't define here. (ubsan_build_overflow_builtin): New function. (instrument_si_overflow): Likewise. (ubsan_pass): Add signed integer overflow checking. (gate_ubsan): Enable the pass also when SANITIZE_SI_OVERFLOW. * flag-types.h (enum sanitize_code): Add SANITIZE_SI_OVERFLOW. * internal-fn.c: Include ubsan.h and target.h. (ubsan_expand_si_overflow_addsub_check): New function. (ubsan_expand_si_overflow_neg_check): Likewise. (ubsan_expand_si_overflow_mul_check): Likewise. (expand_UBSAN_CHECK_ADD): Likewise. (expand_UBSAN_CHECK_SUB): Likewise. (expand_UBSAN_CHECK_MUL): Likewise. * fold-const.c (fold_binary_loc): Don't fold A + (-B) - A - B and (-A) + B - B - A when doing the signed integer overflow checking. * internal-fn.def (UBSAN_CHECK_ADD, UBSAN_CHECK_SUB, UBSAN_CHECK_MUL): Define. * tree-vrp.c (extract_range_basic): Handle internal calls. * optabs.def (addv4_optab, subv4_optab, mulv4_optab, negv4_optab): New optabs. c-family/ * c-gimplify.c (c_gimplify_expr): If doing the integer-overflow sanitization, call unsigned_type_for only when !TYPE_OVERFLOW_WRAPS. testsuite/ * c-c++-common/ubsan/overflow-mul-2.c: New test. * c-c++-common/ubsan/overflow-add-1.c: New test. * c-c++-common/ubsan/overflow-add-2.c: New test. * c-c++-common/ubsan/overflow-mul-1.c: New test. * c-c++-common/ubsan/overflow-sub-1.c: New test. * c-c++-common/ubsan/overflow-sub-2.c: New test. * c-c++-common/ubsan/overflow-negate-1.c: New test. --- gcc/opts.c.mp 2013-11-27 08:46:28.032629413 +0100 +++ gcc/opts.c 2013-11-27 08:46:57.566753291 +0100 @@ -1460,6 +1460,8 @@ common_handle_option (struct gcc_options { vla-bound, SANITIZE_VLA, sizeof vla-bound - 1 }, { return, SANITIZE_RETURN, sizeof return - 1 }, { null, SANITIZE_NULL, sizeof null - 1 }, + { signed-integer-overflow, SANITIZE_SI_OVERFLOW, + sizeof signed-integer-overflow -1 }, { NULL, 0, 0 } }; const char *comma; --- gcc/config/i386/i386.md.mp 2013-11-27 08:46:27.890628818 +0100 +++ gcc/config/i386/i386.md 2013-11-27 08:46:57.491752976 +0100 @@ -6198,6 +6198,42 @@ [(set_attr type alu) (set_attr mode QI)]) +(define_mode_attr widerintmode [(QI HI) (HI SI) (SI DI) (DI TI)]) + +;; Add with jump on overflow. +(define_expand addvmode4 + [(parallel [(set (reg:CCO FLAGS_REG) + (eq:CCO (plus:widerintmode + (sign_extend:widerintmode +(match_operand:SWI 1 register_operand)) + (sign_extend:widerintmode +(match_operand:SWI 2 general_operand))) + (sign_extend:widerintmode + (plus:SWI (match_dup 1) (match_dup 2) + (set (match_operand:SWI 0 register_operand) + (plus:SWI (match_dup 1) (match_dup 2)))]) + (set (pc) (if_then_else + (eq (reg:CCO FLAGS_REG) (const_int 0)) + (label_ref (match_operand 3)) + (pc)))] + ) + +(define_insn *addvmode4 + [(set (reg:CCO FLAGS_REG) + (eq:CCO (plus:widerintmode + (sign_extend:widerintmode + (match_operand:SWI 1 nonimmediate_operand %0,0)) + (sign_extend:widerintmode + (match_operand:SWI 2 general_operand g,ri))) + (sign_extend:widerintmode + (plus:SWI (match_dup 1)
Re: Patch ping (stage1-ish patches)
Here is the patch series that had been posted in Sep that is aimed to isolate the Android support from targets that actually don't have that support (We discussed the need of it with Jakub here http://gcc.gnu.org/ml/gcc-patches/2013-09/msg00185.html): http://gcc.gnu.org/ml/gcc-patches/2013-09/msg01420.html thanks, Alexander 2013/11/27 Jakub Jelinek ja...@redhat.com: On Fri, Nov 22, 2013 at 09:36:18AM -0700, Jeff Law wrote: In fact, I would suggest that anyone with a pending patch from prior to stage1 close that hasn't gotten feedback by midnight Tuesday ping their patch. I'd like to have a sense of everything that is outstanding sooner rather than later and wrap up any loose ends as quickly as possible. Ok, doing that now for my pending patches: Masked load/store vectorization: http://gcc.gnu.org/ml/gcc-patches/2013-11/msg01268.html http://gcc.gnu.org/ml/gcc-patches/2013-11/msg01437.html http://gcc.gnu.org/ml/gcc-patches/2013-11/msg01550.html Elemental function support (updated version of the earlier patch): http://gcc.gnu.org/ml/gcc-patches/2013-11/msg03197.html AddressSanitizer use-after-return instrumentation: http://gcc.gnu.org/ml/gcc-patches/2013-11/msg02070.html Use libbacktrace for libsanitizer's symbolization (will need tweaking, depending on next libsanitizer merge, whether the corresponding sanitizer_common changes are upstreamed or not, and perhaps to compile libbacktrace sources again with renamed function names and other tweaks - different allocator, only subset of files, etc.; but, there is a P1 bug for this anyway): http://gcc.gnu.org/ml/gcc-patches/2013-11/msg02055.html Jakub
Re: [RFC] [PATCH V2, AARCH64]: Re: [RFC] [PATCH, AARCH64] Machine descriptions to support stack smashing protection
Hi Richard, I don't think it's good to have long lists of targets on generic tests. Can we factor this out into a target-supports option? I have updated the patch as per your recommendation. Please let me know if it is fine. 2013-11-26 Venkataramanan Kumar venkataramanan.ku...@linaro.org * configure.ac (gcc_cv_libc_provides_tls_ssp): Add test to check TLS support in target C library for Aarch64. * configure: Regenerate. * config.in: Regenerate. * config/aarch64/aarch64.md (stack_protect_set, stack_protect_test) (stack_protect_set_mode, stack_protect_test_mode): Add initial machine description for Stack Smashing Protector. * config/aarch64/aarch64-linux.h (TARGET_THREAD_SSP_OFFSET): Define. 2013-11-26 Venkataramanan Kumar venkataramanan.ku...@linaro.org * lib/target-supports.exp (check_effective_target_stack_protection): New procedure. * g++.dg/fstack-protector-strong.C: Add target check for stack protection. * gcc.dg/fstack-protector-strong.c: Likewise. regards, Venkat. On 26 November 2013 20:23, Richard Earnshaw rearn...@arm.com wrote: On 26/11/13 14:16, Venkataramanan Kumar wrote: Index: gcc/testsuite/gcc.dg/fstack-protector-strong.c === --- gcc/testsuite/gcc.dg/fstack-protector-strong.c(revision 205378) +++ gcc/testsuite/gcc.dg/fstack-protector-strong.c(working copy) @@ -1,6 +1,6 @@ /* Test that stack protection is done on chosen functions. */ -/* { dg-do compile { target i?86-*-* x86_64-*-* rs6000-*-* s390x-*-* } } */ +/* { dg-do compile { target i?86-*-* x86_64-*-* rs6000-*-* s390x-*-* aarch64-*-* } } */ /* { dg-options -O2 -fstack-protector-strong } */ I don't think it's good to have long lists of targets on generic tests. Can we factor this out into a target-supports option? R. Index: gcc/configure.ac === --- gcc/configure.ac(revision 205378) +++ gcc/configure.ac(working copy) @@ -4896,6 +4896,24 @@ [Define if your target C library provides stack protector support]) fi +# Test for tls based stack protector support in target C library. +AC_CACHE_CHECK(TLS stack guard support in target C library, + gcc_cv_libc_provides_tls_ssp, + [gcc_cv_libc_provides_tls_ssp=no + case $target in + aarch64-*-linux* | aarch64-*-kfreebsd*-gnu | aarch64-*-knetbsd*-gnu) + # glibc 2.19 and later provides TLS access to stack guard canary + # currently set for Aarch64. + GCC_GLIBC_VERSION_GTE_IFELSE([2], [19], [gcc_cv_libc_provides_tls_ssp=yes]) + ;; + *) gcc_cv_libc_provides_tls_ssp=no ;; + esac]) + +if test x$gcc_cv_libc_provides_tls_ssp = xyes; then + AC_DEFINE(TARGET_LIBC_PROVIDES_TLS_SSP, 1, + [Define if your target C library provides TLS stack protector support.]) +fi + # Test for sys/sdt.h on the target. GCC_TARGET_TEMPLATE([HAVE_SYS_SDT_H]) AC_MSG_CHECKING(sys/sdt.h in the target C library) Index: gcc/configure === --- gcc/configure (revision 205378) +++ gcc/configure (working copy) @@ -27127,6 +27127,35 @@ fi +# Test for tls based stack protector support in target C library. +{ $as_echo $as_me:${as_lineno-$LINENO}: checking TLS stack guard support in target C library 5 +$as_echo_n checking TLS stack guard support in target C library... 6; } +if test ${gcc_cv_libc_provides_tls_ssp+set} = set; then : + $as_echo_n (cached) 6 +else + gcc_cv_libc_provides_tls_ssp=no + case $target in + aarch64-*-linux* | aarch64-*-kfreebsd*-gnu | aarch64-*-knetbsd*-gnu) + # glibc 2.19 and later provides TLS access to stack guard canary + # currently set for Aarch64. + +if test $glibc_version_major -gt 2 \ + || ( test $glibc_version_major -eq 2 test $glibc_version_minor -ge 19 ); then : + gcc_cv_libc_provides_tls_ssp=yes +fi + ;; + *) gcc_cv_libc_provides_tls_ssp=no ;; + esac +fi +{ $as_echo $as_me:${as_lineno-$LINENO}: result: $gcc_cv_libc_provides_tls_ssp 5 +$as_echo $gcc_cv_libc_provides_tls_ssp 6; } + +if test x$gcc_cv_libc_provides_tls_ssp = xyes; then + +$as_echo #define TARGET_LIBC_PROVIDES_TLS_SSP 1 confdefs.h + +fi + # Test for sys/sdt.h on the target. { $as_echo $as_me:${as_lineno-$LINENO}: checking sys/sdt.h in the target C library 5 Index: gcc/testsuite/lib/target-supports.exp === --- gcc/testsuite/lib/target-supports.exp (revision 205378) +++ gcc/testsuite/lib/target-supports.exp (working copy) @@ -808,6 +808,17 @@ } -fstack-protector] } +# Return 1 the target supports stack protection. +proc check_effective_target_stack_protection { } { +if { [istarget i?86-*-*] + || [istarget x86_64-*-*] + || [istarget aarch64-*-*] + || [istarget s390x-*-*] } { +
Re: [gomp4 simd, RFC] Simple fix to override vectorization cost estimation.
Am 27.11.2013 08:22, schrieb Sergey Ostanevich: Done. Thanks for fixing Richard's and Jakub's comments and parts of mine. +have the same meaning as described in @option{fvect-cost-model} and by +default a cost model defined with @option{fvect-cost-model} is used. As mentioned before, pleae add a hyphen before fvect (i.e. @option{fvect-cost-model} - @option{-fvect-cost-model}) Regarding a test case: I still think it would be useful to have one, but I somehow seemed to have messed up with my previous one - I fail to get it not to vectorize with omp simd due to cost reasons. Thus, I don't have one. Tobias /* { dg-require-effective-target vect_int } */ /* { dg-additional-options -fopenmp-simd -Wopenmp-simd -mno-sse2 } */ /* NOTE: Without -mno-sse2, the loop is vectorized. */ #include stdarg.h #include ../../tree-vect.h #define N 32 struct t{ int k[N]; int l; }; struct s{ char a; /* aligned */ char b[N-1]; /* unaligned (offset 1B) */ char c[N];/* aligned (offset NB) */ struct t d; /* aligned (offset 2NB) */ struct t e; /* unaligned (offset 2N+4N+4 B) */ }; struct s tmp = { 1 }; int main1 () { int i; /* unaligned */ #pragma omp simd for (i = 0; i N/2; i++) { tmp.b[i] = 5; /* { dg-warning vectorization did not happen for a simd loop } */ } /* check results: */ for (i = 0; i N/2; i++) { if (tmp.b[i] != 5) abort (); } return 0; } int main (void) { check_vect (); return main1 (); } /* { dg-final { scan-tree-dump-times loop vectorized 0 vect } } */ /* { dg-final { scan-tree-dump-times vectorization not profitable 1 vect } } */ /* ! dg-final ! cleanup-tree-dump vect ! ! */ /* { dg-require-effective-target vect_int } */ /* { dg-additional-options -fopenmp-simd -Wopenmp-simd -fsimd-cost-model=unlimited } */ #include stdarg.h #include ../../tree-vect.h #define N 32 struct t{ int k[N]; int l; }; struct s{ char a; /* aligned */ char b[N-1]; /* unaligned (offset 1B) */ char c[N];/* aligned (offset NB) */ struct t d; /* aligned (offset 2NB) */ struct t e; /* unaligned (offset 2N+4N+4 B) */ }; struct s tmp = { 1 }; int main1 () { int i; /* unaligned */ #pragma omp simd for (i = 0; i N/2; i++) { tmp.b[i] = 5; /* dg-warning vectorization did not happen for a simd loop */ } /* check results: */ for (i = 0; i N/2; i++) { if (tmp.b[i] != 5) abort (); } return 0; } int main (void) { check_vect (); return main1 (); } /* { dg-final { scan-tree-dump-times loop vectorized 1 vect } } */ /* { dg-final { scan-tree-dump-times vectorization not profitable 0 vect } } */ /* { dg-final { cleanup-tree-dump vect } } */
[PING] [PATCH, ARM, testcase] Skip target arm-neon for lp1243022.c
Ping? Thanks! -Zhenqiang -Original Message- From: gcc-patches-ow...@gcc.gnu.org [mailto:gcc-patches- ow...@gcc.gnu.org] On Behalf Of Zhenqiang Chen Sent: Friday, November 08, 2013 10:51 AM To: gcc-patches@gcc.gnu.org Cc: Ramana Radhakrishnan Subject: [PATCH, ARM, testcase] Skip target arm-neon for lp1243022.c Hi, lp1243022.c will fail with options: -mfpu=neon -mfloat-abi=hard. Logs show it does not generate auto-incremental instruction in pass auto_inc_dec. In this case, the check of REG_INC note at subreg2 will be invalid. So skip the check for target arm-neon. All PASS with the following options: -mthumb/-mcpu=cortex-a9/-mfloat-abi=hard -mthumb/-mcpu=cortex-a9/-mfloat-abi=soft -mthumb/-mcpu=cortex-a9/-mfloat-abi=softfp -mthumb/-mcpu=cortex-a9/-mfloat-abi=soft/-mfpu=vfpv3 -mthumb/-mcpu=cortex-a9/-mfloat-abi=softfp/-mfpu=vfpv3 -mthumb/-mcpu=cortex-a9/-mfloat-abi=hard/-mfpu=vfpv3 -mthumb/-mcpu=cortex-a9/-mfloat-abi=soft/-mfpu=neon -mthumb/-mcpu=cortex-a9/-mfloat-abi=softfp/-mfpu=neon -mthumb/-mcpu=cortex-a9/-mfloat-abi=hard/-mfpu=neon -mthumb/-mcpu=cortex-a15/-mfloat-abi=hard -mthumb/-mcpu=cortex-a15/-mfloat-abi=soft -mthumb/-mcpu=cortex-a15/-mfloat-abi=softfp -mthumb/-mcpu=cortex-a15/-mfloat-abi=soft/-mfpu=vfpv4 -mthumb/-mcpu=cortex-a15/-mfloat-abi=softfp/-mfpu=vfpv4 -mthumb/-mcpu=cortex-a15/-mfloat-abi=hard/-mfpu=vfpv4 -mthumb/-mcpu=cortex-a15/-mfloat-abi=soft/-mfpu=neon -mthumb/-mcpu=cortex-a15/-mfloat-abi=softfp/-mfpu=neon -mthumb/-mcpu=cortex-a15/-mfloat-abi=hard/-mfpu=neon Is it OK? Thanks! -Zhenqiang testsuite/ChangeLog: 2013-11-08 Zhenqiang Chen zhenqiang.c...@linaro.org * gcc.target/arm/lp1243022.c: Skip target arm-neon. --- a/gcc/testsuite/gcc.target/arm/lp1243022.c +++ b/gcc/testsuite/gcc.target/arm/lp1243022.c @@ -1,7 +1,7 @@ /* { dg-do compile { target arm_thumb2 } } */ /* { dg-options -O2 -fdump-rtl-subreg2 } */ -/* { dg-final { scan-rtl-dump REG_INC subreg2 } } */ +/* { dg-final { scan-rtl-dump REG_INC subreg2 { target { ! arm_neon} } } } */ /* { dg-final { cleanup-rtl-dump subreg2 } } */ struct device; typedef unsigned int __u32;
Re: [PATCH] Remove keep_aligning from get_inner_reference
I think you are right, this flag is no longer necessary, and removing this code path would simplify everything. Therefore I'd like to propose to remove the keep_aligning parameter of get_inner_reference as a split-out patch. Boot-strapped (with languages=all,ada,go) and regression-tested on x86_64-linux-gnu. I don't understand how you can commit a patch that changes something only on strict-alignment platforms and test it only on x86-64. This change *must* be tested with Ada on a strict-alignment platform, that's the only combination for which it is exercised. If you cannot do that, then please back it out. More generally speaking, it's not acceptable to make cleanup changes like that in the RTL expander without extreme care, which of course starts with proper testing. The patch should not have been approved either for that reason. -- Eric Botcazou
Re: [PATCH] OpenMP #pragma omp declare simd support (take 2)
On Mon, 25 Nov 2013, Jakub Jelinek wrote: On Fri, Nov 22, 2013 at 11:08:41AM +0100, Richard Biener wrote: The expr.c hunk is also ok independently of the patch. This is committed now. Ah, here is the stuff moved from. I suppose the IPA param re-org is ok for trunk separately as well. And this too (without the simdlen field of the adjustment, which turned out to be unnecessary). What's the reason you cannot defer SIMD cloning to LTRANS stage as simple IPA pass next to IPA-PTA? Ok, deferring till after IPA-PTA was easy, just small ipa-cp.c changes (look at the attribute rather than simd*clone* fields), passes.def and had to tweak ipa_add_new_function which assumed that all new functions must be definitions with gimple body. + /* Ignore #pragma omp declare simd functions + if they don't have data references in the + call stmt itself. */ + if (j == n +!(op + (DECL_P (op) +|| (REFERENCE_CLASS_P (op) + get_base_address (op) + continue; Hmm. I guess I have an idea now how to better support calls in data-ref/dependence analysis. The above is fine for now - you might want to dump sth here if you fail because datarefs in a declare simd fn call. Haven't added any dump here, because there is the: + } + } + } + LOOP_VINFO_DATAREFS (loop_vinfo) = datarefs; + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + not vectorized: loop contains function + calls or data references that cannot + be analyzed\n); which is dumped in that case. Would another message be useful before that (or instead of that)? Hmm, not sure - we can leave it as-is. I'd have expected an unconditional continue here (and leave STMT_VINFO_VECTYPE == NULL - fact is that the vector type of the argument is determined by its definition and thus may be different from what you record here anyway). Ok, now using STMT_VINFO_VECTYPE = NULL. + if (thisarginfo.vectype != NULL_TREE +loop_vinfo +TREE_CODE (op) == SSA_NAME +simple_iv (loop, loop_containing_stmt (stmt), op, iv, false) +tree_fits_shwi_p (iv.step)) + { + thisarginfo.linear_step = tree_to_shwi (iv.step); Hmm, you should check thisarginfo.dt instead (I assume this case is for induction/reduction defs)? In this case you also should use STMT_VINFO_LOOP_PHI_EVOLUTION_PART and not re-analyze via simple_iv. As discussed on IRC, STMT_VINFO_LOOP_PHI_EVOLUTION_PART can't be used, because it can be arbitrary linear function argument, not just an IV itself. vect-simd-clone-11.c testcase contains examples. This patch doesn't avoid calling simple_iv again during transform phase, I don't have a failing testcase for that yet (but filed PR59288 for the preexisting issue). You'll eventually run into a testcase - I promise ;) Anyway, the solution will be to store the result somewhere (in stmt_vinfo) and re-use it. We can fix this with a followup (see my fix for PR59288). + thisarginfo.op = iv.base; + } + else if (thisarginfo.vectype == NULL_TREE + POINTER_TYPE_P (TREE_TYPE (op))) + thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT; So this is for dt_external defs? Please switch on thisarginfo.dt here - that more naturally explains what you are doing (otherwise this definitely misses a comment). Done. Please save the result from the analysis (selecting the simd clone) in the stmt_vinfo and skip the analysis during transform phase. Done. + vec_oprnd0 + = build3 (BIT_FIELD_REF, atype, vec_oprnd0, + build_int_cst (integer_type_node, prec), + build_int_cst (integer_type_node, + (m (k - 1)) * prec)); Some helpers to build the tree to select a sub-vector would be nice (I remember seeing this kind of pattern elsewhere). I've simplified this to use size_int and bitsize_int for the args (as e.g. fold-const.c uses to create BIT_FIELD_REFs), but don't see what actually could be put into the helper, besides the BIT_FIELD_REF build there is nothing common with other spots and the arguments to that call also differ a lot. Hmm, ok. For SINGLE_RHS assigns I prefer gimple_build_assign. Done everywhere. + /* Update the exception handling table with the vector stmt if necessary. */ + if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt)) +
Re: [gomp4 simd, RFC] Simple fix to override vectorization cost estimation.
On Wed, 27 Nov 2013, Tobias Burnus wrote: Am 27.11.2013 08:22, schrieb Sergey Ostanevich: Done. Thanks for fixing Richard's and Jakub's comments and parts of mine. +have the same meaning as described in @option{fvect-cost-model} and by +default a cost model defined with @option{fvect-cost-model} is used. As mentioned before, pleae add a hyphen before fvect (i.e. @option{fvect-cost-model} - @option{-fvect-cost-model}) Regarding a test case: I still think it would be useful to have one, but I somehow seemed to have messed up with my previous one - I fail to get it not to vectorize with omp simd due to cost reasons. Thus, I don't have one. Ok with that change - no need to re-post before applying. Thanks, Richard.
[PATCH, score] Remove unused REG_CLASS_FROM_LETTER define
2013-11-27 Chen Liqin liqin@gmail.com * config/score/score.h (REG_CLASS_FROM_LETTER): Delete. Index: gcc/config/score/score.h === --- gcc/config/score/score.h(revision 205384) +++ gcc/config/score/score.h(working copy) @@ -395,9 +395,6 @@ /* The class value for index registers. */ #define INDEX_REG_CLASSNO_REGS -extern enum reg_class score_char_to_class[256]; -#define REG_CLASS_FROM_LETTER(C) score_char_to_class[(unsigned char) (C)] - /* Addressing modes, and classification of registers for them. */ #define REGNO_MODE_OK_FOR_BASE_P(REGNO, MODE) \ score_regno_mode_ok_for_base_p (REGNO, 1) Regards --liqin
Re: [PATCH] Fixing PR59006 and PR58921 by delaying loop invariant hoisting in vectorizer.
On Fri, 22 Nov 2013, Cong Hou wrote: Hi Currently in GCC vectorization, some loop invariant may be detected after aliasing checks, which can be hoisted outside of the loop. The current method in GCC may break the information built during the analysis phase, causing some crash (see PR59006 and PR58921). This patch improves the loop invariant hoisting by delaying it until all statements are vectorized, thereby keeping all built information. But those loop invariant statements won't be vectorized, and if a variable is defined by one of those loop invariant, it is treated as an external definition. Bootstrapped and testes on an x86-64 machine. Hmm. I'm still thinking that we should handle this during the regular transform step. Like with the following incomplete patch. Missing is adjusting the rest of the vectorizable_* functions to handle the case where all defs are dt_external or constant by setting their own STMT_VINFO_DEF_TYPE to dt_external. From the gcc.dg/vect/pr58508.c we get only 4 hoists instead of 8 because of this (I think). Also gcc.dg/vect/pr52298.c ICEs for yet unanalyzed reason. I can take over the bug if you like. Thanks, Richard. Index: gcc/tree-vect-data-refs.c === *** gcc/tree-vect-data-refs.c (revision 205435) --- gcc/tree-vect-data-refs.c (working copy) *** again: *** 3668,3673 --- 3668,3682 } STMT_VINFO_STRIDE_LOAD_P (stmt_info) = true; } + else if (loop_vinfo + integer_zerop (DR_STEP (dr))) + { + /* All loads from a non-varying address will be disambiguated +by data-ref analysis or via a runtime alias check and thus +they will become invariant. Force them to be vectorized +as external. */ + STMT_VINFO_DEF_TYPE (stmt_info) = vect_external_def; + } } /* If we stopped analysis at the first dataref we could not analyze Index: gcc/tree-vect-loop-manip.c === *** gcc/tree-vect-loop-manip.c (revision 205435) --- gcc/tree-vect-loop-manip.c (working copy) *** vect_loop_versioning (loop_vec_info loop *** 2269,2275 /* Extract load statements on memrefs with zero-stride accesses. */ ! if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo)) { /* In the loop body, we iterate each statement to check if it is a load. Then we check the DR_STEP of the data reference. If DR_STEP is zero, --- 2269,2275 /* Extract load statements on memrefs with zero-stride accesses. */ ! if (0 LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo)) { /* In the loop body, we iterate each statement to check if it is a load. Then we check the DR_STEP of the data reference. If DR_STEP is zero, Index: gcc/tree-vect-loop.c === *** gcc/tree-vect-loop.c(revision 205435) --- gcc/tree-vect-loop.c(working copy) *** vect_transform_loop (loop_vec_info loop_ *** 5995,6000 --- 5995,6020 } } + /* If the stmt is loop invariant simply move it. */ + if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_external_def) + { + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_NOTE, vect_location, + hoisting out of the vectorized loop: ); + dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0); + dump_printf (MSG_NOTE, \n); + } + gsi_remove (si, false); + if (gimple_vuse (stmt)) + gimple_set_vuse (stmt, NULL); + basic_block new_bb; + new_bb = gsi_insert_on_edge_immediate (loop_preheader_edge (loop), +stmt); + gcc_assert (!new_bb); + continue; + } + /* vectorize statement */ if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, transform statement.\n); Index: gcc/tree-vect-stmts.c === *** gcc/tree-vect-stmts.c (revision 205435) --- gcc/tree-vect-stmts.c (working copy) *** vectorizable_operation (gimple stmt, gim *** 3497,3502 --- 3497,3503 vectree vec_oprnds2 = vNULL; tree vop0, vop1, vop2; bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); + bool all_ops_external = true; int vf; if (!STMT_VINFO_RELEVANT_P (stmt_info) !bb_vinfo) *** vectorizable_operation (gimple stmt, gim *** 3557,3562 --- 3558,3565 use not simple.\n); return false; } + if (dt[0] != vect_external_def dt[0] != vect_constant_def) +
Re: [patch] set MULTIARCH_DIRNAME for multilib architectures
On Thu, Jun 20, 2013 at 02:26:12PM +0200, Matthias Klose wrote: Am 13.06.2013 11:42, schrieb Richard Sandiford: Bernhard Reutner-Fischer rep.dot@gmail.com writes: On 12 June 2013 20:20:50 Richard Sandiford rdsandif...@googlemail.com wrote: Matthias Klose d...@ubuntu.com writes: Index: config/mips/t-linux64 === --- config/mips/t-linux64(revision 200012) +++ config/mips/t-linux64(working copy) @@ -24,3 +24,13 @@ ../lib32$(call if_multiarch,:mips64$(MIPS_EL)-linux-gnuabin32$(MIPS_SOFT)) \ ../lib$(call if_multiarch,:mips$(MIPS_EL)-linux-gnu$(MIPS_SOFT)) \ ../lib64$(call if_multiarch,:mips64$(MIPS_EL)-linux-gnuabi64$(MIPS_SOFT)) + +ifneq (,$(findstring abin32,$(target))) +MULTIARCH_DIRNAME = $(call if_multiarch,mips64$(MIPS_EL)-linux-gnuabin32$(MIPS_SOFT)) +else +ifneq (,$(findstring abi64,$(target))) +MULTIARCH_DIRNAME = $(call if_multiarch,mips64$(MIPS_EL)-linux-gnuabi64$(MIPS_SOFT)) +else +MULTIARCH_DIRNAME = $(call if_multiarch,mips$(MIPS_EL)-linux-gnu$(MIPS_SOFT)) +endif +endif findstring seems a bit fragile for a full triple. I think it would be better to have something similar to the current MIPS_SOFT definition: MIPS_SOFT = $(if $(strip $(filter MASK_SOFT_FLOAT_ABI, $(target_cpu_default)) $(filter soft, $(with_float))),soft) but for ABIs. It could then also take with_abi into account. Maybe something like: MIPS_ABI = $(or $(with_abi), \ $(if $(filter MIPS_ABI_DEFAULT=ABI_N32, \ $(target_cpu_default)), n32), \ o32) (completely untested). Bikeshedding: Doko would know, but ISTR that $(or) did not exist in make-3.80 which is currently the minimum prerequisite, fwiw. Gah, that's a pity. Thanks for the catch though. Maybe firstword would be OK instead. I see I also fell into the usual ABI trap. It wouldn't have affected the use in this patch, but the default ought to be 32 rather than o32. the define is in tm_defines, not target_cpu_default. MIPS_ABI = $(or $(with_abi), \ $(if $(filter MIPS_ABI_DEFAULT=ABI_N32, \ $(tm_defines)), n32), \ 32) However I can't see yet how this should be used. Maybe Aurelian could come up with a tested version of this patch? How about the patch below? It first determines the ABI without using $(or), by looking (in this order) for $(with_abi), $(tm_defines) and $(target), defaulting to 32 if none of the previous matches. Then the multiarch path is constructed from the ABI. Index: gcc/config/mips/t-linux64 === --- gcc/config/mips/t-linux64 (révision 205437) +++ gcc/config/mips/t-linux64 (copie de travail) @@ -24,3 +24,22 @@ ../lib32$(call if_multiarch,:mips64$(MIPS_EL)-linux-gnuabin32$(MIPS_SOFT)) \ ../lib$(call if_multiarch,:mips$(MIPS_EL)-linux-gnu$(MIPS_SOFT)) \ ../lib64$(call if_multiarch,:mips64$(MIPS_EL)-linux-gnuabi64$(MIPS_SOFT)) + +ifneq ($(with_abi),) +MIPS_ABI = $(with_abi) +endif +ifeq ($(MIPS_ABI),) +MIPS_ABI=$(if $(strip $(filter MIPS_ABI_DEFAULT=ABI_N32, $(tm_defines))),n32) +endif +ifeq ($(MIPS_ABI),) +MIPS_ABI = $(subst abi,,$(subst gnu,,$(lastword $(subst -, ,$(target) +endif +ifeq ($(MIPS_ABI),) +MIPS_ABI=32 +endif + +ifeq ($(MIPS_ABI),32) +MULTIARCH_DIRNAME = $(call if_multiarch,mips$(MIPS_EL)-linux-gnu$(MIPS_SOFT)) +else +MULTIARCH_DIRNAME = $(call if_multiarch,mips64$(MIPS_EL)-linux-gnuabi$(MIPS_ABI)$(MIPS_SOFT)) +endif -- Aurelien Jarno GPG: 1024D/F1BCDB73 aurel...@aurel32.net http://www.aurel32.net
Re: [PATCH, ARM, LRA] Fixed bootstrap failure in Thumb mode
Ping On 18 November 2013 09:40, Yvan Roux yvan.r...@linaro.org wrote: Ping. On 7 November 2013 15:56, Yvan Roux yvan.r...@linaro.org wrote: Hi, this patch fixed an LRA cycling due to secondary reload (Thumb mode). Notice that this patch is a prerequisite to turn on LRA by default on ARM. Bootstrapped on a9 and a15 without any regression in the testsuite as LRA is off by default and with the regression reported in the thread bellow when LRA is on. http://gcc.gnu.org/ml/gcc-patches/2013-11/msg00725.html Thanks, Yvan 2013-11-07 Yvan Roux yvan.r...@linaro.org * config/arm/arm.h (THUMB_SECONDARY_INPUT_RELOAD_CLASS): Return NO_REGS for LRA.
Re: [patch][RFC] make lra.c:check_rtl set maybe_hot_insn_p
Ping. On 19 November 2013 09:52, Yvan Roux yvan.r...@linaro.org wrote: yep, all good performance-wise :) Great, Thanks Kyrill. Ok for trunk ? Yvan
Re: RFA: patch to fix PR58785 (an ARM LRA crash)
Ping. On 20 November 2013 10:22, Yvan Roux yvan.r...@linaro.org wrote: Hi, as Richard said, only a subset of rclass is allowed to be returned by preferred_reload_class. I've tested the attached patched in Thumb mode, on ARMv5, A9 and A9hf and on cross A15 without regression. Yvan 2013-11-20 Yvan Roux yvan.r...@linaro.org PR target/58785 * config/arm/arm.c (arm_preferred_reload_class): Only return LO_REGS when rclass is GENERAL_REGS.
Re: [C++ Patch] PR 58647
Hi, On 11/26/2013 08:10 PM, Jason Merrill wrote: On 11/26/2013 11:43 AM, Paolo Carlini wrote: We have got a bunch of testcases, for example constexpr-ex4.C - attached for your convenience - which trigger the assert !really_overloaded_fn (t) ... What do you suggest? Aha. Well, in that case we really can't get a constant value, so I'd assert allow_non_constant, set *non_constant_p, and return t. Actually, I'd do that any time we get a function COMPONENT_REF, since that will only ever happen under the checking maybe_constant_value call anyway. So a small expansion of your original patch. Thus something like the below? Passes testing. Thanks, Paolo. Index: cp/semantics.c === --- cp/semantics.c (revision 205433) +++ cp/semantics.c (working copy) @@ -9601,6 +9601,12 @@ cxx_eval_constant_expression (const constexpr_call break; case COMPONENT_REF: + if (is_overloaded_fn (t)) + { + gcc_checking_assert (allow_non_constant); + *non_constant_p = true; + return t; + } r = cxx_eval_component_reference (call, t, allow_non_constant, addr, non_constant_p, overflow_p); break; Index: testsuite/g++.dg/parse/crash66.C === --- testsuite/g++.dg/parse/crash66.C(revision 0) +++ testsuite/g++.dg/parse/crash66.C(working copy) @@ -0,0 +1,11 @@ +// PR c++/58647 + +struct A +{ + static void foo(); +}; + +templatetypename void bar() +{ + A().foo; +}
[PATCH] ubsan TLC + PR59306 fix
This was meant only as a TLC, but it also fixes PR59306. Using walk_gimple_op was an overkill; gimple_{store,assign_load}_p is enough. As a side effect, it also fixes the bug because now we better restrict what goes into instrument_member_call. Bootstrapped, ran ubsan testsuite on x86_64-linux, ok for trunk? 2013-11-27 Marek Polacek pola...@redhat.com PR sanitizer/59306 * ubsan.c (instrument_null): Use gimple_store_p/gimple_assign_load_p instead of walk_gimple_op. (ubsan_pass): Adjust. Call instrument_null only if SANITIZE_NULL. testsuite/ * g++.dg/ubsan/pr59306.C: --- gcc/ubsan.c.mp3 2013-11-27 09:55:23.573252894 +0100 +++ gcc/ubsan.c 2013-11-27 11:15:29.893507768 +0100 @@ -614,24 +614,22 @@ instrument_mem_ref (tree t, gimple_stmt_ gsi_insert_before (iter, g, GSI_SAME_STMT); } -/* Callback function for the pointer instrumentation. */ +/* Perform the pointer instrumentation. */ -static tree -instrument_null (tree *tp, int * /*walk_subtree*/, void *data) +static void +instrument_null (gimple_stmt_iterator gsi, bool is_lhs) { - tree t = *tp; + gimple stmt = gsi_stmt (gsi); + tree t = is_lhs ? gimple_get_lhs (stmt) : gimple_assign_rhs1 (stmt); + t = get_base_address (t); const enum tree_code code = TREE_CODE (t); - struct walk_stmt_info *wi = (struct walk_stmt_info *) data; - if (code == MEM_REF TREE_CODE (TREE_OPERAND (t, 0)) == SSA_NAME) -instrument_mem_ref (TREE_OPERAND (t, 0), wi-gsi, wi-is_lhs); +instrument_mem_ref (TREE_OPERAND (t, 0), gsi, is_lhs); else if (code == ADDR_EXPR POINTER_TYPE_P (TREE_TYPE (t)) TREE_CODE (TREE_TYPE (TREE_TYPE (t))) == METHOD_TYPE) -instrument_member_call (wi-gsi); - - return NULL_TREE; +instrument_member_call (gsi); } /* Gate and execute functions for ubsan pass. */ @@ -646,7 +644,6 @@ ubsan_pass (void) { for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);) { - struct walk_stmt_info wi; gimple stmt = gsi_stmt (gsi); if (is_gimple_debug (stmt) || gimple_clobber_p (stmt)) { @@ -654,9 +651,14 @@ ubsan_pass (void) continue; } - memset (wi, 0, sizeof (wi)); - wi.gsi = gsi; - walk_gimple_op (stmt, instrument_null, wi); + if (flag_sanitize SANITIZE_NULL) + { + if (gimple_store_p (stmt)) + instrument_null (gsi, true); + if (gimple_assign_load_p (stmt)) + instrument_null (gsi, false); + } + gsi_next (gsi); } } --- gcc/testsuite/g++.dg/ubsan/pr59306.C.mp32013-11-27 11:15:17.900456072 +0100 +++ gcc/testsuite/g++.dg/ubsan/pr59306.C2013-11-27 11:15:01.696387988 +0100 @@ -0,0 +1,14 @@ +// { dg-do compile } +// { dg-options -fsanitize=undefined } +// { dg-skip-if { *-*-* } { -flto } { } } + +class A { + void bar (void (A::*) (int)); + void foo (int); + void B (); +}; + +void A::B() +{ + bar (A::foo); +} Marek
[PATCH] Postpone __LINE__ evaluation to the end of #line directives
From: Max TenEyck Woodbury max+...@mtew.isa-geek.net Copyright 2013 assigned to the Free Software Foundation. --- gcc/testsuite/gcc.dg/cpp/line4.c | 19 +++ libcpp/directives.c | 9 - libcpp/internal.h| 1 + libcpp/macro.c | 3 +++ 4 files changed, 27 insertions(+), 5 deletions(-) diff --git a/gcc/testsuite/gcc.dg/cpp/line4.c b/gcc/testsuite/gcc.dg/cpp/line4.c index 84dbf96..0120a2b 100644 --- a/gcc/testsuite/gcc.dg/cpp/line4.c +++ b/gcc/testsuite/gcc.dg/cpp/line4.c @@ -13,7 +13,18 @@ enum { i = __LINE__ }; enum { j = __LINE__ }; #line 16 /* N.B. the _next_ line is line 16. */ - -char array1[i== 44 ? 1 : -1]; -char array2[j== 90 ? 1 : -1]; -char array3[__LINE__ == 19 ? 1 : -1]; + /* __LINE__ should be 16 */ +char array1[i== 44 ? 1 : -1]; /* 17 */ +char array2[j== 90 ? 1 : -1]; /* 18 */ +char array3[__LINE__ == 19 ? 1 : -1]; /* 19 */ +/* 20 */ +# line __LINE__ /* N.B. the __LINE__ sequence should _not_ change here. */ +/* 22 */ +char array4[__LINE__ == 23 ? 1: -1];/* 23 */ +char array5[__LINE__ == 23 ? -1: 1];/* 24 */ +/* 25 */ +# line __LINE__ /* N.B. nor shoud a multi-line comment change the fact + that the __LINE__ sequence should _not_ change here. */ +/* 28 */ +char array6[__LINE__ == 29 ? 1: -1];/* 29 */ +char array7[__LINE__ == 27 ? -1: 1];/* 30 */ diff --git a/libcpp/directives.c b/libcpp/directives.c index 65b2034..adb04a5 100644 --- a/libcpp/directives.c +++ b/libcpp/directives.c @@ -900,7 +900,9 @@ do_line (cpp_reader *pfile) bool wrapped; /* #line commands expand macros. */ + ++pfile-state.in_directive; /* Request special __LINE__ handling. */ token = cpp_get_token (pfile); + --pfile-state.in_directive; /* Cancle request */ if (token-type != CPP_NUMBER || strtolinenum (token-val.str.text, token-val.str.len, new_lineno, wrapped)) @@ -914,7 +916,9 @@ do_line (cpp_reader *pfile) return; } - if (CPP_PEDANTIC (pfile) (new_lineno == 0 || new_lineno cap || wrapped)) + if (CPP_PEDANTIC (pfile) (new_lineno == 0 || + (new_lineno cap new_lineno != CUR__LINE__) || + wrapped)) cpp_error (pfile, CPP_DL_PEDWARN, line number out of range); else if (wrapped) cpp_error (pfile, CPP_DL_WARNING, line number out of range); @@ -936,6 +940,9 @@ do_line (cpp_reader *pfile) } skip_rest_of_line (pfile); + if ( new_lineno == CUR__LINE__ ) /* Postponed evaluation ? */ +new_lineno = linemap_get_expansion_line (pfile-line_table, + pfile-line_table-highest_line); _cpp_do_file_change (pfile, LC_RENAME_VERBATIM, new_file, new_lineno, map_sysp); } diff --git a/libcpp/internal.h b/libcpp/internal.h index 5321458..268de86 100644 --- a/libcpp/internal.h +++ b/libcpp/internal.h @@ -604,6 +604,7 @@ cpp_in_primary_file (cpp_reader *pfile) { return pfile-line_table-depth == 1; } +#define CUR__LINE__ -1U /* In macro.c */ extern void _cpp_free_definition (cpp_hashnode *); diff --git a/libcpp/macro.c b/libcpp/macro.c index e359d15..47e41b6 100644 --- a/libcpp/macro.c +++ b/libcpp/macro.c @@ -309,6 +309,9 @@ _cpp_builtin_macro_text (cpp_reader *pfile, cpp_hashnode *node) /* If __LINE__ is embedded in a macro, it must expand to the line of the macro's invocation, not its definition. Otherwise things like assert() will not work properly. */ + if ( pfile-state.in_directive 1 ) /* In #line directive? */ +number = CUR__LINE__; /* yes, postpone the lookup... */ + else number = linemap_get_expansion_line (pfile-line_table, CPP_OPTION (pfile, traditional) ? pfile-line_table-highest_line -- 1.8.0.rc0.18.gf84667d
Re: [gomp4 simd, RFC] Simple fix to override vectorization cost estimation.
On Wed, Nov 27, 2013 at 10:58:30AM +0100, Richard Biener wrote: On Wed, 27 Nov 2013, Tobias Burnus wrote: Am 27.11.2013 08:22, schrieb Sergey Ostanevich: Done. Thanks for fixing Richard's and Jakub's comments and parts of mine. +have the same meaning as described in @option{fvect-cost-model} and by +default a cost model defined with @option{fvect-cost-model} is used. As mentioned before, pleae add a hyphen before fvect (i.e. @option{fvect-cost-model} - @option{-fvect-cost-model}) Regarding a test case: I still think it would be useful to have one, but I somehow seemed to have messed up with my previous one - I fail to get it not to vectorize with omp simd due to cost reasons. Thus, I don't have one. Ok with that change - no need to re-post before applying. Note the start of the ChangeLog is still wrong: 2013-11-25 Sergey Ostanevich sergos@gmail.com * c.opt: Introduced a new openmp-simd warning. * lang.opt: Ditto. Because: 2013-11-25 Sergey Ostanevich sergos@gmail.com * c.opt (Wopenmp-simd): New. needs to go into the gcc/c-family/ChangeLog and 2013-11-25 Sergey Ostanevich sergos@gmail.com * lang.opt (Wopenmp-simd): New. needs to go into the gcc/fortran/ChangeLog, you can't use ditto there because it is a different file. Please fix that up before committing. Jakub
[PATCH v2] libgcc: AArch64: Check for correct signal insns on BE when unwinding
Hi, When unwinding the stack, the unwind code checks for two opcodes that denote a registrations of a signal handler. This is broken on BE as the opcodes will be in the wrong byte-order as insns are always LE. Add the correct checks when compiling for AArch64 big endian. This patch fixes all glibc backtrace tests and causes no other regressions on glibc. Please note that I don't have commit access, if this is OK could someone merge it for me? Thanks, Matt Leach libgcc/ 2013-11-26 Matthew Leach matthew.le...@arm.com * config/aarch64/linux-unwind.h (aarch64_fallback_frame_state): Check for correct opcodes on BE. diff --git a/libgcc/config/aarch64/linux-unwind.h b/libgcc/config/aarch64/linux-unwind.h index fde4d14..8b0d7fe 100644 --- a/libgcc/config/aarch64/linux-unwind.h +++ b/libgcc/config/aarch64/linux-unwind.h @@ -25,6 +25,19 @@ #include signal.h #include sys/ucontext.h + +/* Since insns are always stored LE, on a BE system the opcodes will + be loaded byte-reversed. Therefore, define two sets of opcodes, + one for LE and one for BE. */ + +#if __AARCH64EB__ +#define MOVZ_X8_8B 0x681180d2 +#define SVC_0 0x01d4 +#else +#define MOVZ_X8_8B 0xd2801168 +#define SVC_0 0xd401 +#endif + #define MD_FALLBACK_FRAME_STATE_FOR aarch64_fallback_frame_state static _Unwind_Reason_Code @@ -55,7 +68,7 @@ aarch64_fallback_frame_state (struct _Unwind_Context *context, 0xd2801168 movz x8, #0x8b 0xd401 svc 0x0 */ - if (pc[0] != 0xd2801168 || pc[1] != 0xd401) + if (pc[0] != MOVZ_X8_8B || pc[1] != SVC_0) { return _URC_END_OF_STACK; } -- 1.7.9.5
Re: [PATCH] Postpone __LINE__ evaluation to the end of #line directives
On Wed, Nov 27, 2013 at 05:29:22AM -0500, mtewoodb...@gmail.com wrote: From: Max TenEyck Woodbury max+...@mtew.isa-geek.net This patch is badly missing a description. You also want to mention the PR number, if this fixes a bug. I guess this is to fix PR58687. Copyright 2013 assigned to the Free Software Foundation. --- gcc/testsuite/gcc.dg/cpp/line4.c | 19 +++ libcpp/directives.c | 9 - libcpp/internal.h| 1 + libcpp/macro.c | 3 +++ 4 files changed, 27 insertions(+), 5 deletions(-) You didn't attach a ChangeLog entry. The patch has numerous formatting issues, see below. diff --git a/gcc/testsuite/gcc.dg/cpp/line4.c b/gcc/testsuite/gcc.dg/cpp/line4.c index 84dbf96..0120a2b 100644 --- a/gcc/testsuite/gcc.dg/cpp/line4.c +++ b/gcc/testsuite/gcc.dg/cpp/line4.c @@ -13,7 +13,18 @@ enum { i = __LINE__ }; enum { j = __LINE__ }; #line 16 /* N.B. the _next_ line is line 16. */ - -char array1[i== 44 ? 1 : -1]; -char array2[j== 90 ? 1 : -1]; -char array3[__LINE__ == 19 ? 1 : -1]; + /* __LINE__ should be 16 */ +char array1[i== 44 ? 1 : -1]; /* 17 */ +char array2[j== 90 ? 1 : -1]; /* 18 */ +char array3[__LINE__ == 19 ? 1 : -1]; /* 19 */ +/* 20 */ +# line __LINE__ /* N.B. the __LINE__ sequence should _not_ change here. */ Two spaces after '.'. +# line __LINE__ /* N.B. nor shoud a multi-line comment change the fact should diff --git a/libcpp/directives.c b/libcpp/directives.c index 65b2034..adb04a5 100644 --- a/libcpp/directives.c +++ b/libcpp/directives.c @@ -900,7 +900,9 @@ do_line (cpp_reader *pfile) bool wrapped; /* #line commands expand macros. */ + ++pfile-state.in_directive; /* Request special __LINE__ handling. */ Don't put the comments next to the statements, instead put them above the statements. token = cpp_get_token (pfile); + --pfile-state.in_directive; /* Cancle request */ Cancel - if (CPP_PEDANTIC (pfile) (new_lineno == 0 || new_lineno cap || wrapped)) + if (CPP_PEDANTIC (pfile) (new_lineno == 0 || + (new_lineno cap new_lineno != CUR__LINE__) || + wrapped)) || operator shouldn't end the line, put it on the start of the next line. See existing code for how it should look like. cpp_error (pfile, CPP_DL_PEDWARN, line number out of range); else if (wrapped) cpp_error (pfile, CPP_DL_WARNING, line number out of range); @@ -936,6 +940,9 @@ do_line (cpp_reader *pfile) } skip_rest_of_line (pfile); + if ( new_lineno == CUR__LINE__ ) /* Postponed evaluation ? */ +new_lineno = linemap_get_expansion_line (pfile-line_table, + pfile-line_table-highest_line); No space after '(' and before ')'. No space before '?'. +#define CUR__LINE__ -1U The trailing underscores look weird to me. Thanks, Marek
Re: Some wide-int review comments
On Tue, Nov 26, 2013 at 5:33 PM, Kenneth Zadeck zad...@naturalbridge.com wrote: Richi, patch ping Ok. Thanks, Richard. also two more pieces of information.With further testing, this seems to fix Tests that now work, but didn't before: === ext/random/hypergeometric_distribution/operators/values.cc (test for excess errors) New tests that PASS: ext/random/hypergeometric_distribution/operators/values.cc execution test also, the corresponding frag for fold-const.c on the wide-int branch will look like Index: gcc/fold-const.c === --- gcc/fold-const.c(revision 205224) +++ gcc/fold-const.c(working copy) @@ -1030,51 +1030,51 @@ int_const_binop_1 (enum tree_code code, case TRUNC_DIV_EXPR: case EXACT_DIV_EXPR: - res = wi::div_trunc (arg1, arg2, sign, overflow); - if (overflow) + if (arg2 == 0) return NULL_TREE; + res = wi::div_trunc (arg1, arg2, sign, overflow); break; case FLOOR_DIV_EXPR: - res = wi::div_floor (arg1, arg2, sign, overflow); - if (overflow) + if (arg2 == 0) return NULL_TREE; + res = wi::div_floor (arg1, arg2, sign, overflow); break; case CEIL_DIV_EXPR: - res = wi::div_ceil (arg1, arg2, sign, overflow); - if (overflow) + if (arg2 == 0) return NULL_TREE; + res = wi::div_ceil (arg1, arg2, sign, overflow); break; case ROUND_DIV_EXPR: - res = wi::div_round (arg1, arg2, sign, overflow); - if (overflow) + if (arg2 == 0) return NULL_TREE; + res = wi::div_round (arg1, arg2, sign, overflow); break; case TRUNC_MOD_EXPR: - res = wi::mod_trunc (arg1, arg2, sign, overflow); - if (overflow) + if (arg2 == 0) return NULL_TREE; + res = wi::mod_trunc (arg1, arg2, sign, overflow); break; case FLOOR_MOD_EXPR: - res = wi::mod_floor (arg1, arg2, sign, overflow); - if (overflow) + if (arg2 == 0) return NULL_TREE; + res = wi::mod_floor (arg1, arg2, sign, overflow); break; case CEIL_MOD_EXPR: - res = wi::mod_ceil (arg1, arg2, sign, overflow); - if (overflow) + if (arg2 == 0) return NULL_TREE; + res = wi::mod_ceil (arg1, arg2, sign, overflow); break; case ROUND_MOD_EXPR: - res = wi::mod_round (arg1, arg2, sign, overflow); - if (overflow) + if (arg2 == 0) return NULL_TREE; + res = wi::mod_round (arg1, arg2, sign, overflow); break; case MIN_EXPR: On 11/20/2013 06:31 PM, Kenneth Zadeck wrote: On 11/13/2013 04:59 AM, Richard Biener wrote: On Tue, Nov 12, 2013 at 5:43 PM, Kenneth Zadeck zad...@naturalbridge.com wrote: On 11/12/2013 11:27 AM, Joseph S. Myers wrote: On Tue, 12 Nov 2013, Kenneth Zadeck wrote: Richi, i am having a little trouble putting this back the way that you want. The issue is rem. what is supposed to happen for INT_MIN % -1? I would assume because i am failing the last case of gcc.dg/c90-const-expr-8.c that INT_MIN %-1 should not overflow even if INT_MIN / -1 does. however, Given the conclusion in C11 that a%b should be considered undefined if a/b is not representable, I think it's reasonable to say INT_MIN % -1 *should* be considered to overflow (for all C standard versions) (and bug 30484 is only a bug for -fwrapv). however, my local question is what do we want the api to be int-const-binop-1?The existing behavior seems to be that at least for common modes this function silently returns 0 and it is up to the front ends to put their own spin on it. For wide-int you create 1:1 the behavior of current trunk (if a change of behavior in TImode is not tested in the testsuite then you can ignore that). Whatever change you do to semantics of functions you do separately from wide-int (preferably first on trunk, or at your choice after the wide-int merge). For this case in question I'd say a % -1 should return 0, but for INT_MIN % -1 that 0 should have TREE_OVERFLOW set (and thus you need to adjust that c90-const-expr-8.c testcase). Richard. kenny richi, I have done this exactly as you suggested. bootstrapped and regression tested on x86-64. 2013-11-20 Kenneth Zadeck zad...@naturalbridge.com * fold-const.c (int_const_binop_1): Make INT_MIN % -1 return 0 with the overflow bit set. 2013-11-20 Kenneth Zadeck zad...@naturalbridge.com * gcc.dg/c90-const-expr-8.c: Look for overflow on INT_MIN % -1. * gcc.dg/c99-const-expr-8.c: Look for overflow on INT_MIN % -1. ok to commit? kenny
RE: [PATCH] Remove keep_aligning from get_inner_reference
Hi, On 27 Nov 2013 10:43:59, Eric Botcazou wrote: I think you are right, this flag is no longer necessary, and removing this code path would simplify everything. Therefore I'd like to propose to remove the keep_aligning parameter of get_inner_reference as a split-out patch. Boot-strapped (with languages=all,ada,go) and regression-tested on x86_64-linux-gnu. I don't understand how you can commit a patch that changes something only on strict-alignment platforms and test it only on x86-64. This change *must* be tested with Ada on a strict-alignment platform, that's the only combination Well, I did that. Apologies for not mentioning that. for which it is exercised. If you cannot do that, then please back it out. More generally speaking, it's not acceptable to make cleanup changes like that in the RTL expander without extreme care, which of course starts with proper testing. The patch should not have been approved either for that reason. -- Eric Botcazou The change on the ada interface is actually not critical, because all invocations of get_inner_reference there used keep_aligning == false, as did the majority of all other invocations. What changes with that patch, is that get_inner_reference(, true) could return a VIEW_CONVERT_EXPR, which is now obsolete. If it is causing any trouble, I can revert that change of course. Thanks Bernd.
Re: [PATCH] ubsan TLC + PR59306 fix
On Wed, Nov 27, 2013 at 11:29:21AM +0100, Marek Polacek wrote: This was meant only as a TLC, but it also fixes PR59306. Using walk_gimple_op was an overkill; gimple_{store,assign_load}_p is enough. As a side effect, it also fixes the bug because now we better restrict what goes into instrument_member_call. Bootstrapped, ran ubsan testsuite on x86_64-linux, ok for trunk? Ok, thanks. 2013-11-27 Marek Polacek pola...@redhat.com PR sanitizer/59306 * ubsan.c (instrument_null): Use gimple_store_p/gimple_assign_load_p instead of walk_gimple_op. (ubsan_pass): Adjust. Call instrument_null only if SANITIZE_NULL. testsuite/ * g++.dg/ubsan/pr59306.C: Missing New test. ;) Jakub
Re: [patch tree-ssa-forwprop]: Add type raising in shift-operations
On Tue, Nov 26, 2013 at 10:42 PM, Jeff Law l...@redhat.com wrote: On 11/26/13 02:58, Richard Biener wrote: What is the rationale behind one for over the other? Or is it arbitrary? I can easily make a case for either form based on what we're trying to optimize. In general, it seems to me that When (Type) is a widening cast then you have to worry to not introduce undefined behavior when y is larger than the number of bits in X. When (Type) is a shortening cast then we lose information - because then the range we can determine for y from the shift operation will be larger. So I'm not sure I follow the reasoning to move out the casting when possible. Assume that we're not introducing undefined behaviour. Obviously if we're introducing undefined behaviour, then the transformation is wrong, plain and simple. If by moving the cast we can eliminate an ALU operation because the shift (in this example) combines with something earlier or later in the IL, then we win. Similarly if by moving the cast we expose redundant casting, then we win. A narrower range is obviously good, but often just having a narrower range won't in and of itself provide an optimization opportunity. The goal for canonicalization should be to shorten operations where possible. Thus both cases, sinking and hoisting the cast should be applied dependent on validity and whether the shift will be performed in a smaller type in the end. I disagree, there are going to be times when sinking or hoisting the cast allows the shift to combine with other instructions in the IL. There are times when the ability to combine with other instructions in the stream should be driving these decisions. I would buy that as a guiding principle that applies in cases where we don't have other optimization opportunities we can expose by hoisting/sinking the type casts. I would even buy that we prefer the narrowest range through some point in the compiler (say vrp2), then we allow wider ranges as needed to facilitate other optimizations. If we're more interested in combining the shift with statements that define the shift's operands, then the former is going to be a better representation because we're typecasting the result and thus the typecast doesn't interfere with the desired optimization. In contrast if we are more interested in cases where the shift defines operands for some other statement, then the latter form is more beneficial because we're typecasting the inputs and thus the typecast doesn't interfere with the desired optimization. Always a trade-off which is why combining passes always have to consider casts. Actually, I would say quite the opposite here. There's a very clear line when we want to go from one representation to the other within the forwprop passes. By doing so, we relieve the pattern matching aspects of that pass from needing to concern themselves with the typecasting issues. I think the goal of canonicalizing operations to work on the smallest mode possible during early GIMPLE passes makes the most sense (even if then a cast is not always outside or inside an operation). Later (currently during RTL expansion) we can widen operations again according to target needs. But what you end up missing here is optimization opportunities that expose themselves when the casts are moved from inputs to outputs or vice-versa. Only if the optimization opportunities do not handle the casts themselves. We for example have the get_unwidened () helper in fold that helps transforms to look through some casts. I think we want similar helpers for the GIMPLE level combining - the combiner knows whether for an operand with type T it's ok to have a wider or narrower operand or if it wants an exact T2 operand instead. Helpers should exist that get it at that, if possible. Currently the issue would be how to represent the result of get it at that, but I'll work on removing the tree-building-and-re-gimplifying way of middle-end expression creation for the next stage1 so that we can easily represent multiple-stmts intermediate expressions. It'll be a (maybe wrapped in some C++ sugar, maybe then with non-GC allocation) gimple_seq. So you do op = get_me_the_op_in_type_X (X, gimple_assign_rhs2 (stmt)); if (!op) return false; and now op is just a regular op you can work with, eventually pointing to a temporary sequence that you'd need to insert. Note that forwprops combining should really work like fold - combine from the expression leafs to the expression roots (works currently by visiting in stmt order). Now consider (T2)(T1)(a 5) and ((T1)(a 5)) 5. The 2nd form would prefer if we'd have combined the leaf (T1)(a 5) to ((T1)a 5) while the first would be pessimized by that. So - what kind of form do you prefer? I'd say you can't tell that from the use(s) but you have to be able to decide without and thus cannot decide based on may I better be able to
Re: [PATCH] Remove keep_aligning from get_inner_reference
Well, I did that. Apologies for not mentioning that. OK, on which strict-alignment platform did you test it with Ada enabled? The change on the ada interface is actually not critical, because all invocations of get_inner_reference there used keep_aligning == false, as did the majority of all other invocations. Sure, but that's not the point... What changes with that patch, is that get_inner_reference(, true) could return a VIEW_CONVERT_EXPR, which is now obsolete. ...that's what needs to be properly verified indeed. If it is causing any trouble, I can revert that change of course. You might want to take a look at PR middle-end/17746, it took about 3 months almost a decade ago to find something plausible to fix this fundamental issue for Ada on strict-aligmnent platforms so I'd rather not go through this again. -- Eric Botcazou
Re: [PATCH] Improve handling of threads which cross over the current loops header
On Tue, Nov 26, 2013 at 11:37 PM, Jeff Law l...@redhat.com wrote: On 11/26/13 02:26, Richard Biener wrote: But only necessary if this threading returned true, no? Correct. Fix for that spinning overnight. Also how likely did it scramble the loop? I see that thread_block_1 already cancels loops in some cases so I wonder what case it misses so that you need to cancel the loop unconditonally here. If you get into that code, effectively always. We know we're threading around the backedge through a multi-way branch at the top, then to some interior node within the loop. It would depend on the precise structure of the CFG within the loop, but basically if you get here, there is a good chance you have irreducible loops. For a good example of how badly things can get scrambled, look at what we do with 54742. Dump or draw CFGs before/after DOM1. It's painful to see what we do to the loop structure, but removing that switch is such a huge win that losing the loop structure has become a non-concern. Ick ;) Loop fixup isn't able to recover the original loop here, it discovers a new one, the one just covering looping through the default: case of the switch. The first pass that allows CFG manipulations during loop init is able to disambiguate the mess DOM left us with and discovers a loop nest of depth 4. Nice transform I'd say ;) Richard. Do you have a testcase that ICEs if I remove the cancelling above? I'd have to review the PRs to recall if they were ICEs and/or codegen failures. jeff
Re: wide-int, rs6000
On Wed, Nov 27, 2013 at 2:07 AM, Kenneth Zadeck zad...@naturalbridge.com wrote: We will of course measure it but the only thing that is different because of the conversion is that timode integers are packaged differently Yeah, the slowness is due to generic code modification, not a port using CONST_WIDE_INT or not. Richard. On Nov 26, 2013, at 6:17 PM, David Edelsohn dje@gmail.com wrote: On Tue, Nov 26, 2013 at 5:46 PM, Mike Stump mikest...@comcast.net wrote: Ok? The revised version of the patch looks okay from a technical perspective, but I still am concerned about the compile-time impact. I thought that I saw a comment on IRC that wide-int seems to have a 3-5% compile time impact, which is a concern. I am concerned about the rs6000 port being further slowed with respect to other targets. Thanks, David
Re: [PATCH] Fixing PR59006 and PR58921 by delaying loop invariant hoisting in vectorizer.
On Wed, Nov 27, 2013 at 10:53:56AM +0100, Richard Biener wrote: Hmm. I'm still thinking that we should handle this during the regular transform step. I wonder if it can't be done instead just in vectorizable_load, if LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo) and the load is invariant, just emit the (broadcasted) load not inside of the loop, but on the loop preheader edge. *** gcc/tree-vect-data-refs.c (revision 205435) --- gcc/tree-vect-data-refs.c (working copy) *** again: *** 3668,3673 --- 3668,3682 } STMT_VINFO_STRIDE_LOAD_P (stmt_info) = true; } + else if (loop_vinfo + integer_zerop (DR_STEP (dr))) + { + /* All loads from a non-varying address will be disambiguated + by data-ref analysis or via a runtime alias check and thus + they will become invariant. Force them to be vectorized + as external. */ + STMT_VINFO_DEF_TYPE (stmt_info) = vect_external_def; + } I think this is unsafe for simd loops. I'd say: int a[1024], b[1024]; int foo (void) { int i; #pragma omp simd safelen(8) for (i = 0; i 1024; i++) { a[i] = i; b[i] = a[0]; } } is valid testcase, the loop behaves the same if you execute it sequentially, or vectorize using SIMD (hardware or emulated) instructions with vectorization factor of 2, 4 or 8, as long as you do all memory operations (either using scalar insns or simd instructions) in the order they were written, which I believe the vectorizer right now handles correctly, but the hoisting this patch wants to perform is not fine, unless data ref analysis would prove that it can't alias. For non-simd loops we of course perform that data ref analysis and either version for alias, or prove that the drs can't alias, but for simd loops we take as given that the loop is safe to be vectorized. It is, but not for hoisting. So, the above say with emulated SIMD is safe to be executed as: for (i = 0; i 1024; i += 8) { int tmp; for (tmp = i; tmp i + 8; tmp++) a[tmp] = tmp; for (tmp = i; tmp i + 8; tmp++) b[tmp] = a[0]; } but not as: int tempa[8], tmp; /* Hoisted HW or emulated load + splat. */ for (tmp = 0; tmp 8; tmp++) tempa[tmp] = a[0]; for (i = 0; i 1024; i += 8) { for (tmp = i; tmp i + 8; tmp++) a[tmp] = tmp; for (tmp = i; tmp i + 8; tmp++) b[tmp] = tempa[tmp]; } Jakub
[testsuite] Properly set ld_library_path in cilk-plus tests
All 64-bit gcc.dg/atomic and c-c++-common/cilk-plus/CK execution tests were FAILing on Solaris 10 and 11/x86: ld.so.1: c11-atomic-exec-1.exe: fatal: /var/gcc/regression/trunk/10-gcc-gas/build/./gcc/libgcc_s.so.1: wrong ELF class: ELFCLASS32 ld.so.1: fib.exe: fatal: /var/gcc/regression/trunk/10-gcc-gas/build/i386-pc-solaris2.10/./libcilkrts/.libs/libcilkrts.so.5: wrong ELF class: ELFCLASS32 This happens because both cilk-plus.exp files override ld_library_path instead of appending to it. Fixed as follows by using the customary method for setting ld_library_path. Bootstrapped without regressions on i386-pc-solaris2.1[01] and x86_64-unknown-linux-gnu, installed on mainline. Rainer 2013-11-26 Rainer Orth r...@cebitec.uni-bielefeld.de * gcc.dg/cilk-plus/cilk-plus.exp: Append to ld_library_path. Call set_ld_library_path_env_vars. * g++.dg/cilk-plus/cilk-plus.exp: Likewise. # HG changeset patch # Parent 293e24349ca8c7aa87bc32718eede5e185bca8c9 Properly set ld_library_path in cilk-plus tests diff --git a/gcc/testsuite/g++.dg/cilk-plus/cilk-plus.exp b/gcc/testsuite/g++.dg/cilk-plus/cilk-plus.exp --- a/gcc/testsuite/g++.dg/cilk-plus/cilk-plus.exp +++ b/gcc/testsuite/g++.dg/cilk-plus/cilk-plus.exp @@ -31,7 +31,8 @@ dg-finish set library_var [get_multilibs] # Pointing the ld_library_path to the Cilk Runtime library binaries. -set ld_library_path ${library_var}/libcilkrts/.libs +append ld_library_path :${library_var}/libcilkrts/.libs +set_ld_library_path_env_vars global TEST_EXTRA_LIBS set TEST_EXTRA_LIBS -L${library_var}/libcilkrts/.libs diff --git a/gcc/testsuite/gcc.dg/cilk-plus/cilk-plus.exp b/gcc/testsuite/gcc.dg/cilk-plus/cilk-plus.exp --- a/gcc/testsuite/gcc.dg/cilk-plus/cilk-plus.exp +++ b/gcc/testsuite/gcc.dg/cilk-plus/cilk-plus.exp @@ -26,7 +26,8 @@ if { ![check_effective_target_cilkplus] set library_var [get_multilibs] # Pointing the ld_library_path to the Cilk Runtime library binaries. -set ld_library_path ${library_var}/libcilkrts/.libs +append ld_library_path :${library_var}/libcilkrts/.libs +set_ld_library_path_env_vars global TEST_EXTRA_LIBS set TEST_EXTRA_LIBS -L${library_var}/libcilkrts/.libs -- - Rainer Orth, Center for Biotechnology, Bielefeld University
Re: [PATCH] Remove keep_aligning from get_inner_reference
On Wed, Nov 27, 2013 at 10:43 AM, Eric Botcazou ebotca...@adacore.com wrote: I think you are right, this flag is no longer necessary, and removing this code path would simplify everything. Therefore I'd like to propose to remove the keep_aligning parameter of get_inner_reference as a split-out patch. Boot-strapped (with languages=all,ada,go) and regression-tested on x86_64-linux-gnu. I don't understand how you can commit a patch that changes something only on strict-alignment platforms and test it only on x86-64. This change *must* be tested with Ada on a strict-alignment platform, that's the only combination for which it is exercised. If you cannot do that, then please back it out. More generally speaking, it's not acceptable to make cleanup changes like that in the RTL expander without extreme care, which of course starts with proper testing. The patch should not have been approved either for that reason. I'm fine with reverting it for now (you were in CC of the patch submission but silent on it, I asked for the patch to start simplifying the way mems are expanded - ultimately to avoid the recursion and mem-attribute compute by the recursion). We can come back during stage1. get_object_alignment should be able to properly handle this case if you call it on the full reference in the normal_inner_ref: case. All the weird duplicate code on the VIEW_CONVERT_EXPR case should IMHO go. Richard. -- Eric Botcazou
Re: Patch ping (stage1-ish patches)
Jakub Jelinek ja...@redhat.com writes: On Fri, Nov 22, 2013 at 09:36:18AM -0700, Jeff Law wrote: In fact, I would suggest that anyone with a pending patch from prior to stage1 close that hasn't gotten feedback by midnight Tuesday ping their patch. I'd like to have a sense of everything that is outstanding sooner rather than later and wrap up any loose ends as quickly as possible. On my side, there's [c++, driver] Add -lrt on Solaris http://gcc.gnu.org/ml/gcc-patches/2013-05/msg01488.html resubmitted as http://gcc.gnu.org/ml/gcc-patches/2013-06/msg00412.html It's unclear if the more intrusive solution outlined in the second message (introduce libstdc++.spec) were acceptable in stage3, and I'm uncertain if I can get it ready in time. Rainer -- - Rainer Orth, Center for Biotechnology, Bielefeld University
Re: [PATCH] Make the IRA shrink-wrapping preparation also work on ppc64
On Fri, Nov 22, 2013 at 09:36:18AM -0700, Jeff Law wrote: In fact, I would suggest that anyone with a pending patch from prior to stage1 close that hasn't gotten feedback by midnight Tuesday ping their patch. I'd like to have a sense of everything that is outstanding sooner rather than later and wrap up any loose ends as quickly as possible. I've pinged http://gcc.gnu.org/ml/gcc-patches/2013-11/msg03447.html today. Marek
Re: [PATCH] Fixing PR59006 and PR58921 by delaying loop invariant hoisting in vectorizer.
On Wed, 27 Nov 2013, Jakub Jelinek wrote: On Wed, Nov 27, 2013 at 10:53:56AM +0100, Richard Biener wrote: Hmm. I'm still thinking that we should handle this during the regular transform step. I wonder if it can't be done instead just in vectorizable_load, if LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo) and the load is invariant, just emit the (broadcasted) load not inside of the loop, but on the loop preheader edge. It is safe even for !LOOP_REQUIRES_VERSIONING_FOR_ALIAS. It's just a missed optimization I even noted when originally implementing support for invariant loads ... *** gcc/tree-vect-data-refs.c (revision 205435) --- gcc/tree-vect-data-refs.c (working copy) *** again: *** 3668,3673 --- 3668,3682 } STMT_VINFO_STRIDE_LOAD_P (stmt_info) = true; } + else if (loop_vinfo + integer_zerop (DR_STEP (dr))) + { + /* All loads from a non-varying address will be disambiguated +by data-ref analysis or via a runtime alias check and thus +they will become invariant. Force them to be vectorized +as external. */ + STMT_VINFO_DEF_TYPE (stmt_info) = vect_external_def; + } I think this is unsafe for simd loops. I'd say: int a[1024], b[1024]; int foo (void) { int i; #pragma omp simd safelen(8) for (i = 0; i 1024; i++) { a[i] = i; b[i] = a[0]; } } is valid testcase, the loop behaves the same if you execute it sequentially, or vectorize using SIMD (hardware or emulated) instructions with vectorization factor of 2, 4 or 8, as long as you do all memory operations (either using scalar insns or simd instructions) in the order they were written, which I believe the vectorizer right now handles correctly, but the hoisting this patch wants to perform is not fine, unless data ref analysis would prove that it can't alias. For non-simd loops we of course perform that data ref analysis and either version for alias, or prove that the drs can't alias, but for simd loops we take as given that the loop is safe to be vectorized. It is, but not for hoisting. Ick. I hate this behind-the-back stuff - so safelen doesn't mean that a[i] and a[0] do not alias. Note that this will break with SLP stuff at least as that will re-order reads/writes. Not sure how safelen applies to SLP though. That is a[i] = i; b[i] = a[0]; a[i+1] = i+1; b[i+1] = a[1]; will eventually end up re-ordering reads/writes in non-obvious ways. So, the above say with emulated SIMD is safe to be executed as: for (i = 0; i 1024; i += 8) { int tmp; for (tmp = i; tmp i + 8; tmp++) a[tmp] = tmp; for (tmp = i; tmp i + 8; tmp++) b[tmp] = a[0]; } but not as: int tempa[8], tmp; /* Hoisted HW or emulated load + splat. */ for (tmp = 0; tmp 8; tmp++) tempa[tmp] = a[0]; for (i = 0; i 1024; i += 8) { for (tmp = i; tmp i + 8; tmp++) a[tmp] = tmp; for (tmp = i; tmp i + 8; tmp++) b[tmp] = tempa[tmp]; } Jakub -- Richard Biener rguent...@suse.de SUSE / SUSE Labs SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746 GF: Jeff Hawn, Jennifer Guild, Felix Imendorffer
Re: [patch] set MULTIARCH_DIRNAME for multilib architectures
On 27 November 2013 11:10, Aurelien Jarno aure...@debian.org wrote: On Thu, Jun 20, 2013 at 02:26:12PM +0200, Matthias Klose wrote: Am 13.06.2013 11:42, schrieb Richard Sandiford: Bernhard Reutner-Fischer rep.dot@gmail.com writes: On 12 June 2013 20:20:50 Richard Sandiford rdsandif...@googlemail.com wrote: Matthias Klose d...@ubuntu.com writes: Index: config/mips/t-linux64 === --- config/mips/t-linux64(revision 200012) +++ config/mips/t-linux64(working copy) @@ -24,3 +24,13 @@ ../lib32$(call if_multiarch,:mips64$(MIPS_EL)-linux-gnuabin32$(MIPS_SOFT)) \ ../lib$(call if_multiarch,:mips$(MIPS_EL)-linux-gnu$(MIPS_SOFT)) \ ../lib64$(call if_multiarch,:mips64$(MIPS_EL)-linux-gnuabi64$(MIPS_SOFT)) + +ifneq (,$(findstring abin32,$(target))) +MULTIARCH_DIRNAME = $(call if_multiarch,mips64$(MIPS_EL)-linux-gnuabin32$(MIPS_SOFT)) +else +ifneq (,$(findstring abi64,$(target))) +MULTIARCH_DIRNAME = $(call if_multiarch,mips64$(MIPS_EL)-linux-gnuabi64$(MIPS_SOFT)) +else +MULTIARCH_DIRNAME = $(call if_multiarch,mips$(MIPS_EL)-linux-gnu$(MIPS_SOFT)) +endif +endif findstring seems a bit fragile for a full triple. I think it would be better to have something similar to the current MIPS_SOFT definition: MIPS_SOFT = $(if $(strip $(filter MASK_SOFT_FLOAT_ABI, $(target_cpu_default)) $(filter soft, $(with_float))),soft) but for ABIs. It could then also take with_abi into account. Maybe something like: MIPS_ABI = $(or $(with_abi), \ $(if $(filter MIPS_ABI_DEFAULT=ABI_N32, \ $(target_cpu_default)), n32), \ o32) (completely untested). Bikeshedding: Doko would know, but ISTR that $(or) did not exist in make-3.80 which is currently the minimum prerequisite, fwiw. Gah, that's a pity. Thanks for the catch though. Maybe firstword would be OK instead. I see I also fell into the usual ABI trap. It wouldn't have affected the use in this patch, but the default ought to be 32 rather than o32. the define is in tm_defines, not target_cpu_default. MIPS_ABI = $(or $(with_abi), \ $(if $(filter MIPS_ABI_DEFAULT=ABI_N32, \ $(tm_defines)), n32), \ 32) However I can't see yet how this should be used. Maybe Aurelian could come up with a tested version of this patch? How about the patch below? It first determines the ABI without using $(or), by looking (in this order) for $(with_abi), $(tm_defines) and $(target), defaulting to 32 if none of the previous matches. Then the multiarch path is constructed from the ABI. Index: gcc/config/mips/t-linux64 === --- gcc/config/mips/t-linux64 (révision 205437) +++ gcc/config/mips/t-linux64 (copie de travail) @@ -24,3 +24,22 @@ ../lib32$(call if_multiarch,:mips64$(MIPS_EL)-linux-gnuabin32$(MIPS_SOFT)) \ ../lib$(call if_multiarch,:mips$(MIPS_EL)-linux-gnu$(MIPS_SOFT)) \ ../lib64$(call if_multiarch,:mips64$(MIPS_EL)-linux-gnuabi64$(MIPS_SOFT)) + +ifneq ($(with_abi),) +MIPS_ABI = $(with_abi) +endif +ifeq ($(MIPS_ABI),) +MIPS_ABI=$(if $(strip $(filter MIPS_ABI_DEFAULT=ABI_N32, $(tm_defines))),n32) +endif +ifeq ($(MIPS_ABI),) +MIPS_ABI = $(subst abi,,$(subst gnu,,$(lastword $(subst -, ,$(target) +endif +ifeq ($(MIPS_ABI),) +MIPS_ABI=32 +endif + +ifeq ($(MIPS_ABI),32) +MULTIARCH_DIRNAME = $(call if_multiarch,mips$(MIPS_EL)-linux-gnu$(MIPS_SOFT)) +else +MULTIARCH_DIRNAME = $(call if_multiarch,mips64$(MIPS_EL)-linux-gnuabi$(MIPS_ABI)$(MIPS_SOFT)) hm, shouldn't this be gnueabi with an 'e' or is that implied for n64 anyway? just curious.. thanks, +endif -- Aurelien Jarno GPG: 1024D/F1BCDB73 aurel...@aurel32.net http://www.aurel32.net
RE: [PATCH] Remove keep_aligning from get_inner_reference
On Wed, 27 Nov 2013 12:47:19, Richard Biener wrote: On Wed, Nov 27, 2013 at 10:43 AM, Eric Botcazou ebotca...@adacore.com wrote: I think you are right, this flag is no longer necessary, and removing this code path would simplify everything. Therefore I'd like to propose to remove the keep_aligning parameter of get_inner_reference as a split-out patch. Boot-strapped (with languages=all,ada,go) and regression-tested on x86_64-linux-gnu. I don't understand how you can commit a patch that changes something only on strict-alignment platforms and test it only on x86-64. This change *must* be tested with Ada on a strict-alignment platform, that's the only combination for which it is exercised. If you cannot do that, then please back it out. More generally speaking, it's not acceptable to make cleanup changes like that in the RTL expander without extreme care, which of course starts with proper testing. The patch should not have been approved either for that reason. I'm fine with reverting it for now (you were in CC of the patch submission but silent on it, I asked for the patch to start simplifying the way mems are expanded - ultimately to avoid the recursion and mem-attribute compute by the recursion). Ok, then I'll revert this patch in the evening. Bernd. We can come back during stage1. get_object_alignment should be able to properly handle this case if you call it on the full reference in the normal_inner_ref: case. All the weird duplicate code on the VIEW_CONVERT_EXPR case should IMHO go. Richard. -- Eric Botcazou
Re: [patch] set MULTIARCH_DIRNAME for multilib architectures
On Wed, Nov 27, 2013 at 12:55:54PM +0100, Bernhard Reutner-Fischer wrote: On 27 November 2013 11:10, Aurelien Jarno aure...@debian.org wrote: On Thu, Jun 20, 2013 at 02:26:12PM +0200, Matthias Klose wrote: Am 13.06.2013 11:42, schrieb Richard Sandiford: Bernhard Reutner-Fischer rep.dot@gmail.com writes: On 12 June 2013 20:20:50 Richard Sandiford rdsandif...@googlemail.com wrote: Matthias Klose d...@ubuntu.com writes: Index: config/mips/t-linux64 === --- config/mips/t-linux64(revision 200012) +++ config/mips/t-linux64(working copy) @@ -24,3 +24,13 @@ ../lib32$(call if_multiarch,:mips64$(MIPS_EL)-linux-gnuabin32$(MIPS_SOFT)) \ ../lib$(call if_multiarch,:mips$(MIPS_EL)-linux-gnu$(MIPS_SOFT)) \ ../lib64$(call if_multiarch,:mips64$(MIPS_EL)-linux-gnuabi64$(MIPS_SOFT)) + +ifneq (,$(findstring abin32,$(target))) +MULTIARCH_DIRNAME = $(call if_multiarch,mips64$(MIPS_EL)-linux-gnuabin32$(MIPS_SOFT)) +else +ifneq (,$(findstring abi64,$(target))) +MULTIARCH_DIRNAME = $(call if_multiarch,mips64$(MIPS_EL)-linux-gnuabi64$(MIPS_SOFT)) +else +MULTIARCH_DIRNAME = $(call if_multiarch,mips$(MIPS_EL)-linux-gnu$(MIPS_SOFT)) +endif +endif findstring seems a bit fragile for a full triple. I think it would be better to have something similar to the current MIPS_SOFT definition: MIPS_SOFT = $(if $(strip $(filter MASK_SOFT_FLOAT_ABI, $(target_cpu_default)) $(filter soft, $(with_float))),soft) but for ABIs. It could then also take with_abi into account. Maybe something like: MIPS_ABI = $(or $(with_abi), \ $(if $(filter MIPS_ABI_DEFAULT=ABI_N32, \ $(target_cpu_default)), n32), \ o32) (completely untested). Bikeshedding: Doko would know, but ISTR that $(or) did not exist in make-3.80 which is currently the minimum prerequisite, fwiw. Gah, that's a pity. Thanks for the catch though. Maybe firstword would be OK instead. I see I also fell into the usual ABI trap. It wouldn't have affected the use in this patch, but the default ought to be 32 rather than o32. the define is in tm_defines, not target_cpu_default. MIPS_ABI = $(or $(with_abi), \ $(if $(filter MIPS_ABI_DEFAULT=ABI_N32, \ $(tm_defines)), n32), \ 32) However I can't see yet how this should be used. Maybe Aurelian could come up with a tested version of this patch? How about the patch below? It first determines the ABI without using $(or), by looking (in this order) for $(with_abi), $(tm_defines) and $(target), defaulting to 32 if none of the previous matches. Then the multiarch path is constructed from the ABI. Index: gcc/config/mips/t-linux64 === --- gcc/config/mips/t-linux64 (révision 205437) +++ gcc/config/mips/t-linux64 (copie de travail) @@ -24,3 +24,22 @@ ../lib32$(call if_multiarch,:mips64$(MIPS_EL)-linux-gnuabin32$(MIPS_SOFT)) \ ../lib$(call if_multiarch,:mips$(MIPS_EL)-linux-gnu$(MIPS_SOFT)) \ ../lib64$(call if_multiarch,:mips64$(MIPS_EL)-linux-gnuabi64$(MIPS_SOFT)) + +ifneq ($(with_abi),) +MIPS_ABI = $(with_abi) +endif +ifeq ($(MIPS_ABI),) +MIPS_ABI=$(if $(strip $(filter MIPS_ABI_DEFAULT=ABI_N32, $(tm_defines))),n32) +endif +ifeq ($(MIPS_ABI),) +MIPS_ABI = $(subst abi,,$(subst gnu,,$(lastword $(subst -, ,$(target) +endif +ifeq ($(MIPS_ABI),) +MIPS_ABI=32 +endif + +ifeq ($(MIPS_ABI),32) +MULTIARCH_DIRNAME = $(call if_multiarch,mips$(MIPS_EL)-linux-gnu$(MIPS_SOFT)) +else +MULTIARCH_DIRNAME = $(call if_multiarch,mips64$(MIPS_EL)-linux-gnuabi$(MIPS_ABI)$(MIPS_SOFT)) hm, shouldn't this be gnueabi with an 'e' or is that implied for n64 anyway? just curious.. thanks, This is correct, the abis are n32 and 64. MIPS EABI is another ABI which hasn't really existed in practice. -- Aurelien Jarno GPG: 1024D/F1BCDB73 aurel...@aurel32.net http://www.aurel32.net
Re: [PATCH] Use libbacktrace as libsanitizer's symbolizer
On Mon, Nov 25, 2013 at 7:02 PM, Jakub Jelinek ja...@redhat.com wrote: On Mon, Nov 25, 2013 at 06:53:59PM +0400, Alexey Samsonov wrote: In GCC, libbacktrace is built as a libtool convenience library only and then linked into whatever libraries want to use it. So indeed, the plan is to link libbacktrace.la into libasan.so.1.0.0 and libasan.a (and the earlier GCC patch I've posted included corresponding toplevel configure/Makefile bits and libsanitizer Makefile/configure changes to make it happen). E.g. libgo.so.* links libbacktrace.la into itself too. So, for GCC user experience, things will work automatically. Good. Note, however, that you'd need at least two versions of libbacktrace - 32-bit and 64-bit. In GCC you get that automatically (unless --disable-multilib, but then you don't get 32-bit libsanitizer either say on x86_64-linux). it means finding some way in the build system/configure to detect or request libbacktrace availability (for libraries not included in GCC tree GCC's configure typically uses --with-package=path or --with-package-lib=path/--with-package-include=path) and just link against it and you'd just unpack/configure/build libbacktrace on one of your test bots. We indeed can add support for detecting presence of libbacktrace binaries and headers when we configure a build tree. But to just link against it we have to either link against it in Clang driver (I don't think we should add support for it), or link against it in all the places where we build something with -fsanitize=foo, and there are quite a few of them. Or change the rules for creation of *sanitizer_common.*a, so that you link the libbacktrace.la convenience library into it (if configure found it). If you build it using libtool/automake, you get it automatically, otherwise you can e.g. using ar unpack libbacktrace.a and note the filenames of the object files from it, then add those to the ar command line for creation of *sanitizer_common.*a. This would be messy, especially assuming that we have two (both pretty ugly) build systems in compiler-rt repository. And the argument about two versions (32-bit and 64-bit) of libbacktrace is more serious in this setting. Also, see note below. As for redirecting libc calls from libbacktrace to our internal versions (or interceptors) - if it works fine for our toy tests (which is the case), I suggest to wait for the actual problems gcc users may discover. If there will be need in compiling a separate sanitizer-specific version of libbacktrace, or providing an additional layer between libbacktrace and sanitizer runtimes, or both, this could probably be done in gcc version of the runtime only (w/o modifying any existing source files, only adding standalone gcc-specific stuff). Sure, worst case you keep it untested in your LLVM copy of libsanitizer and we'll just need to fix it up during merges if something breaks. If it will be used for GCC (and we have a P1 bug so it is a release blocker if llvm-symbolizer is still used), then it will be tested there. Yes, I think that's what we should do in the short term. I've submitted (slightly edited) version of your patch to compiler-rt as r195837, I think it should work after the next merge to gcc. I've done a minimal testing of this change (built the runtime with -DSANITIZER_LIBBACKTRACE, headers provided, libbacktrace.a linked), and it kind of worked - libbacktrace functions were properly picked up, error callbacks was not called, but the symbolization didn't work either: Turns out libbacktrace doesn't work with executables produced by LLVM: emitted compile unit DIEs don't have neither DW_AT_low_pc/DW_AT_high_pc pair, nor DW_AT_ranges reference. It means that to build a set of address ranges corresponding to each compile unit, one actually has to extract all the subprogram DIEs. I treat this as a bug in libbacktrace (unless it's goal is to work only for GCC-produced binaries). That's what I was afraid when Ian suggested us to fork the sources. -- Alexey Samsonov, MSK
Re: [PATCH] Fixing PR59006 and PR58921 by delaying loop invariant hoisting in vectorizer.
On Wed, Nov 27, 2013 at 12:54:14PM +0100, Richard Biener wrote: On Wed, 27 Nov 2013, Jakub Jelinek wrote: On Wed, Nov 27, 2013 at 10:53:56AM +0100, Richard Biener wrote: Hmm. I'm still thinking that we should handle this during the regular transform step. I wonder if it can't be done instead just in vectorizable_load, if LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo) and the load is invariant, just emit the (broadcasted) load not inside of the loop, but on the loop preheader edge. It is safe even for !LOOP_REQUIRES_VERSIONING_FOR_ALIAS. It's just a missed optimization I even noted when originally implementing support for invariant loads ... True, but only for non-simd loops, or if we proved it by looking at all relevant LOOP_VINFO_DDRSs. But, if it is not a simd loop, and not !LOOP_REQUIRES_VERSIONING_FOR_ALIAS, wouldn't previous optimizations hoist the load before the loop already? Ick. I hate this behind-the-back stuff - so safelen doesn't mean that a[i] and a[0] do not alias. My initial understanding of the SIMD loops was also that it allows the the up to safelen consecutive iterations to be randomly reordered or intermixed without affecting valid programs, but further mails from Tobias and others on this topic plus testcases changed my understanding of it. Note that we don't purge LOOP_VINFO_DDRSs in any way for loop-safelen, just don't add versioning for aliasor punt if there is some possible (or proven) aliasing. Perhaps we could add a bool flag to loop_vinfo which would tell us whether the loop has no data dependencies at all (i.e. either for non-safelen is !LOOP_REQUIRES_VERSIONING_FOR_ALIAS, or with safelen non-zero would be !LOOP_REQUIRES_VERSIONING_FOR_ALIAS). Then we could hoist if that flag is set or LOOP_REQUIRES_VERSIONING_FOR_ALIAS (because then the runtime test checks the dependency). Note that this will break with SLP stuff at least as that will re-order reads/writes. Not sure how safelen applies to SLP though. That is a[i] = i; b[i] = a[0]; a[i+1] = i+1; b[i+1] = a[1]; will eventually end up re-ordering reads/writes in non-obvious ways. You mean SLP inside of loop vectorization, right? Because for normal SLP outside of loop vectorizer simdlen is ignored and normal data ref is performed without any bypassing. Jakub
Re: [PING][GCC ARM]Refine scaled address expression on ARM
Ping^2 Thanks, bin On Fri, Nov 22, 2013 at 3:32 PM, bin.cheng bin.ch...@arm.com wrote: PING. Hi, there is a patch at http://gcc.gnu.org/ml/gcc-patches/2013-09/msg01353.html which slipped away. Thanks, bin -- Best Regards.
Re: [RFC][LIBGCC][1 of 2] 64 bit divide implementation for processor without hw divide instruction
On 27 November 2013 06:53, Jeff Law l...@redhat.com wrote: On 11/25/13 17:29, Kugan wrote: [...] Thanks for the review. Is this OK for trunk now? Yes. Please install. jeff Thanks, committed on Kugan's behalf as r205444.
Re: [PATCH] Use libbacktrace as libsanitizer's symbolizer
On Wed, Nov 27, 2013 at 04:12:33PM +0400, Alexey Samsonov wrote: Sure, worst case you keep it untested in your LLVM copy of libsanitizer and we'll just need to fix it up during merges if something breaks. If it will be used for GCC (and we have a P1 bug so it is a release blocker if llvm-symbolizer is still used), then it will be tested there. Yes, I think that's what we should do in the short term. I've submitted (slightly edited) version of your patch to compiler-rt as r195837, I think Thanks, once Kostya merges it in, I'll rework a patch for it (which hopefully will not need to change anything in the sanitizer_common/ stuff, just Makefiles/configure and other stuff owned by GCC). it should work after the next merge to gcc. I've done a minimal testing of this change (built the runtime with -DSANITIZER_LIBBACKTRACE, headers provided, libbacktrace.a linked), and it kind of worked - libbacktrace functions were properly picked up, error callbacks was not called, but the symbolization didn't work either: Turns out libbacktrace doesn't work with executables produced by LLVM: emitted compile unit DIEs don't have neither DW_AT_low_pc/DW_AT_high_pc pair, nor DW_AT_ranges reference. I'd call that a LLVM bug, not emitting those attributes on a CU sounds like a flaw in it. Jakub
Re: [PATCH] Fixing PR59006 and PR58921 by delaying loop invariant hoisting in vectorizer.
On Wed, 27 Nov 2013, Jakub Jelinek wrote: On Wed, Nov 27, 2013 at 12:54:14PM +0100, Richard Biener wrote: On Wed, 27 Nov 2013, Jakub Jelinek wrote: On Wed, Nov 27, 2013 at 10:53:56AM +0100, Richard Biener wrote: Hmm. I'm still thinking that we should handle this during the regular transform step. I wonder if it can't be done instead just in vectorizable_load, if LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo) and the load is invariant, just emit the (broadcasted) load not inside of the loop, but on the loop preheader edge. It is safe even for !LOOP_REQUIRES_VERSIONING_FOR_ALIAS. It's just a missed optimization I even noted when originally implementing support for invariant loads ... True, but only for non-simd loops, or if we proved it by looking at all relevant LOOP_VINFO_DDRSs. But, if it is not a simd loop, and not !LOOP_REQUIRES_VERSIONING_FOR_ALIAS, wouldn't previous optimizations hoist the load before the loop already? Well - there is the case of int g; int *p; for (;;) { *p = g; } where we know in the _vectorized_ loop body that they cannot alias (because we have at least two vector elements). But it seems we lost that optimization at some point as int g; void foo (int *p, int n) { int i; for (i = 0; i n; ++i) *p = g; } uses alias versioning. Bah ;) Maybe I'm just dreaming that I implemented that as well. Ick. I hate this behind-the-back stuff - so safelen doesn't mean that a[i] and a[0] do not alias. My initial understanding of the SIMD loops was also that it allows the the up to safelen consecutive iterations to be randomly reordered or intermixed without affecting valid programs, but further mails from Tobias and others on this topic plus testcases changed my understanding of it. Note that we don't purge LOOP_VINFO_DDRSs in any way for loop-safelen, just don't add versioning for aliasor punt if there is some possible (or proven) aliasing. Perhaps we could add a bool flag to loop_vinfo which would tell us whether the loop has no data dependencies at all (i.e. either for non-safelen is !LOOP_REQUIRES_VERSIONING_FOR_ALIAS, or with safelen non-zero would be !LOOP_REQUIRES_VERSIONING_FOR_ALIAS). Then we could hoist if that flag is set or LOOP_REQUIRES_VERSIONING_FOR_ALIAS (because then the runtime test checks the dependency). Note that this will break with SLP stuff at least as that will re-order reads/writes. Not sure how safelen applies to SLP though. That is a[i] = i; b[i] = a[0]; a[i+1] = i+1; b[i+1] = a[1]; will eventually end up re-ordering reads/writes in non-obvious ways. You mean SLP inside of loop vectorization, right? Because for normal SLP outside of loop vectorizer simdlen is ignored and normal data ref is performed without any bypassing. Yes, SLP inside of a loop. Richard.
Re: [PATCH] Use libbacktrace as libsanitizer's symbolizer
On Wed, Nov 27, 2013 at 4:20 PM, Jakub Jelinek ja...@redhat.com wrote: On Wed, Nov 27, 2013 at 04:12:33PM +0400, Alexey Samsonov wrote: Sure, worst case you keep it untested in your LLVM copy of libsanitizer and we'll just need to fix it up during merges if something breaks. If it will be used for GCC (and we have a P1 bug so it is a release blocker if llvm-symbolizer is still used), then it will be tested there. Yes, I think that's what we should do in the short term. I've submitted (slightly edited) version of your patch to compiler-rt as r195837, I think Thanks, once Kostya merges it in, I'll rework a patch for it (which hopefully will not need to change anything in the sanitizer_common/ stuff, just Makefiles/configure and other stuff owned by GCC). it should work after the next merge to gcc. I've done a minimal testing of this change (built the runtime with -DSANITIZER_LIBBACKTRACE, headers provided, libbacktrace.a linked), and it kind of worked - libbacktrace functions were properly picked up, error callbacks was not called, but the symbolization didn't work either: Turns out libbacktrace doesn't work with executables produced by LLVM: emitted compile unit DIEs don't have neither DW_AT_low_pc/DW_AT_high_pc pair, nor DW_AT_ranges reference. I'd call that a LLVM bug, not emitting those attributes on a CU sounds like a flaw in it. LLVM emits just a DW_AT_low_pc (base address of a compilation unit). The standard tells that compilation unit entries may have attributes specifying the address range, but doesn't tell they are obligatory. DWARF consumers probably shouldn't rely on their presence. -- Alexey Samsonov, MSK
[PING] [PATCH, PR 57748] Check for out of bounds access, Part 2
Hi, ping... this patch still open: http://gcc.gnu.org/ml/gcc-patches/2013-11/msg02291.html Note: it does, as it is, _not_ depend on the keep_aligning patch. And it would fix some really nasty wrong code generation issues. Thanks Bernd. Date: Tue, 19 Nov 2013 15:39:39 +0100 Hello, this is a minor update to my previous version of this patch, (using a boolean expand_reference, instead of adding a new expand_modifier enum value): I forgot to pass down the expand_reference value at the second expand_expr call inside the case VIEW_CONVERT_EXPR. Sorry for the inconvenience. @@ -10219,7 +10229,8 @@ expand_expr_real_1 (tree exp, rtx target, enum mac } if (!op0) - op0 = expand_expr (treeop0, NULL_RTX, VOIDmode, modifier); + op0 = expand_expr_real (treeop0, NULL_RTX, VOIDmode, modifier, + NULL, expand_reference); /* If the input and output modes are both the same, we are done. */ if (mode == GET_MODE (op0)) Boot-strapped and regression-tested on X86_64-pc-linux-gnu. Ok for trunk? Thanks Bernd. Date: Thu, 7 Nov 2013 13:58:55 +0100 oops - this time with attachments... Hi, On Fri, 25 Oct 2013 12:51:13, Richard Biener wrote: On Fri, Oct 25, 2013 at 12:02 PM, Bernd Edlinger bernd.edlin...@hotmail.de wrote: Hi, Eh ... even register struct { int i; int a[0]; } asm (ebx); works. Also with int a[1] but not with a[2]. So just handling trailing arrays makes this case regress as well. Now I'd call this use questionable ... but we've likely supported that for decades and cannot change that now. Back to fixing everything in expand. Richard. Ok, finally you asked for it. Here is my previous version of that patch again. I have now added a new value EXPAND_REFERENCE to the expand_modifier enumeration. It is almost like EXPAND_MEMORY but it does not interfere with constant values. I have done the same modification to VIEW_CONVERT_EXPR too, because this is a possible inner reference, itself. It is however inherently hard to test around this code. To understand this patch it is good to know what type of object the return value tem of get_inner_reference can be. From the program logic at get_inner_reference it is clear that the return value may *not* be BIT_FIELD_REF, COMPONENT_REF, ARRAY_REF, ARRAY_RANGE_REF, REALPART_EXPR, IMAGPART_EXPR. The result may be VIEW_CONVERT_EXPR only on a STRICT_ALIGNMENT target. This is probably further restricted because exp is gimplified. Usually the result will be a MEM_REF or a SSA_NAME of the memory where the structure is to be found. When you look at where EXPAND_MEMORY is handled you see it is special-cased in TARGET_MEM_REF, MEM_REF, ARRAY_REF, COMPONENT_REF, BIT_FIELD_REF, ARRAY_RANGE_REF. At TARGET_MEM_REF, MEM_REF, VIEW_CONVERT_EXPR, it should be the same if EXPAND_MEMORY, EXPAND_WRITE or EXPAND_REFERENCE is given: If it is an unaligned memory, we just return the unaligned reference. This was missing for VIEW_CONVERT_EXPR, and unfortunately I have no test case, because it is only a problem for STRICT_ALIGNMENT targets, and even there it will certainly be really hard to find test cases that exercise this code. In ARRAY_REF, COMPONENT_REF, BIT_FIELD_REF, ARRAY_RANGE_REF we do not have to touch the handling of the outer modifier. However we pass EXPAND_REFERENCE to the inner object, which should not be a recursive use of any ARRAY_REF, COMPONENT_REF, BIT_FIELD_REF, ARRAY_RANGE_REF. TARGET_MEM_REF, MEM_REF and VIEW_CONVERT_EXPR know how to handle EXPAND_REFERENCE, anything else handles it like EXPAND_NORMAL. Boot-strapped and regression-tested on x86_64-linux-gnu OK for trunk? You point to a weak spot in expansion - that it handles creating the base MEM to offset with handled components by recursing into the case that handles bare MEM_REFs. This makes the bare MEM_REF handling code somewhat awkward (it's the one to assign mem-attrs which are later adjusted for example). Maybe a better appropach than adding yet another expand modifier would be to split out the base MEM expansion part out of the bare MEM_REF handling code so we can call that instead of recursing. In this light - instead of a new expand modifier don't you want an actual flag that specifies we are coming from a call that wants to expand a base? That is, allow EXPAND_SUM but with the recursion flag set? I think you are right. After some thought, I start to like that idea. This way we have at least much more flexibility, how to handle the inner references correctly, and if I change only the interfaces of expand_expr_real/real_1 that will not be used at too many places, either. Finally I think the recursion into the VIEW_CONVERT_EXPR case is only there because of the keep_aligning flag of get_inner_reference which should be obsolete now that we properly handle its effects in get_object_alignment. So you wouldn't need to adjust this path if we finally can get rid of that. I
Re: [PATCH] Remove keep_aligning from get_inner_reference
I'm fine with reverting it for now (you were in CC of the patch submission but silent on it, I asked for the patch to start simplifying the way mems are expanded - ultimately to avoid the recursion and mem-attribute compute by the recursion). Because I'm totally lost in this thread and its many sub-threads. We can come back during stage1. Sure, let's do that instead and not enter stage #3 with hazardous changes. get_object_alignment should be able to properly handle this case if you call it on the full reference in the normal_inner_ref: case. How exactly? Once you flatten everything with get_inner_reference at the beginning, the TYPE_ALIGN_OK flag on the VIEW_CONVERT_EXPR is lost. All the weird duplicate code on the VIEW_CONVERT_EXPR case should IMHO go. What has changed since 2004 exactly? If you do a grep for TYPE_ALIGN_OK on 4.1 and 4.9 trees, you get exactly the same 4 occurrences in the middle-end. -- Eric Botcazou
Patch ping (stage1-ish patches)
In fact, I would suggest that anyone with a pending patch from prior to stage1 close that hasn't gotten feedback by midnight Tuesday ping their patch. I'd like to have a sense of everything that is outstanding sooner rather than later and wrap up any loose ends as quickly as possible. Improve debug info for small structures (2) http://gcc.gnu.org/ml/gcc-patches/2013-10/msg02007.html -- Eric Botcazou
Re: [PATCH] Remove keep_aligning from get_inner_reference
On Wed, Nov 27, 2013 at 1:29 PM, Eric Botcazou ebotca...@adacore.com wrote: I'm fine with reverting it for now (you were in CC of the patch submission but silent on it, I asked for the patch to start simplifying the way mems are expanded - ultimately to avoid the recursion and mem-attribute compute by the recursion). Because I'm totally lost in this thread and its many sub-threads. We can come back during stage1. Sure, let's do that instead and not enter stage #3 with hazardous changes. get_object_alignment should be able to properly handle this case if you call it on the full reference in the normal_inner_ref: case. How exactly? Once you flatten everything with get_inner_reference at the beginning, the TYPE_ALIGN_OK flag on the VIEW_CONVERT_EXPR is lost. Well, I want tem = get_inner_reference (from, ...); op = expand_expr (tem); ... set_mem_attrs (op, from); instead of setting mem_attrs from 'tem' by some obfuscated means of recursing through multiple levels of expanding the memory reference. That is, completely refactor this stuff as it has become so non-obvious what happens. All the weird duplicate code on the VIEW_CONVERT_EXPR case should IMHO go. What has changed since 2004 exactly? If you do a grep for TYPE_ALIGN_OK on 4.1 and 4.9 trees, you get exactly the same 4 occurrences in the middle-end. Ah, get_object_alignment used keep_aligning ... Richard. -- Eric Botcazou
Re: [PATCH] Remove keep_aligning from get_inner_reference
Ah, get_object_alignment used keep_aligning ... Yes, the patch contains the rather explicit hunks: Index: gcc/builtins.c === --- gcc/builtins.c (revision 204101) +++ gcc/builtins.c (working copy) @@ -315,7 +315,7 @@ get_object_alignment_2 (tree exp, unsigned int *al /* Get the innermost object and the constant (bitpos) and possibly variable (offset) offset of the access. */ exp = get_inner_reference (exp, bitsize, bitpos, offset, -mode, unsignedp, volatilep, true); +mode, unsignedp, volatilep); /* Extract alignment information from the innermost object and possibly adjust bitpos and offset. */ @@ -346,10 +346,6 @@ get_object_alignment_2 (tree exp, unsigned int *al align = DECL_ALIGN (exp); known_alignment = true; } - else if (TREE_CODE (exp) == VIEW_CONVERT_EXPR) -{ - align = TYPE_ALIGN (TREE_TYPE (exp)); -} else if (TREE_CODE (exp) == INDIRECT_REF || TREE_CODE (exp) == MEM_REF || TREE_CODE (exp) == TARGET_MEM_REF) -- Eric Botcazou
Re: [PATCH] Use libbacktrace as libsanitizer's symbolizer
On Wed, Nov 27, 2013 at 04:31:48PM +0400, Alexey Samsonov wrote: LLVM emits just a DW_AT_low_pc (base address of a compilation unit). The standard tells that compilation unit entries may have attributes specifying the address range, but doesn't tell they are obligatory. DWARF consumers probably shouldn't rely on their presence. DWARF is generally full of may have and almost no must have. If the DWARF consumer crashes on absence of some attribute/die etc., that would be of course a consumer's fault. But not being able to symbolize something because the provided DWARF info omits important stuff is not wrong. BTW, libbacktrace also uses symbol table (and for backtrace_syminfo uses that exclusively), so even if it can't symbolize using DWARF info, it should at least symbolize using symbol table (of course inlining info isn't available in that case, nor tail call frames (but libbacktrace doesn't support those yet, only GDB does so far)). Jakub
Re: [PATCH] Use libbacktrace as libsanitizer's symbolizer
On Wed, Nov 27, 2013 at 4:51 PM, Jakub Jelinek ja...@redhat.com wrote: On Wed, Nov 27, 2013 at 04:31:48PM +0400, Alexey Samsonov wrote: LLVM emits just a DW_AT_low_pc (base address of a compilation unit). The standard tells that compilation unit entries may have attributes specifying the address range, but doesn't tell they are obligatory. DWARF consumers probably shouldn't rely on their presence. DWARF is generally full of may have and almost no must have. If the DWARF consumer crashes on absence of some attribute/die etc., that would be of course a consumer's fault. But not being able to symbolize something because the provided DWARF info omits important stuff is not wrong. Ok, I agree to call it missing feature instead of bug =) BTW, libbacktrace also uses symbol table (and for backtrace_syminfo uses that exclusively), so even if it can't symbolize using DWARF info, it should at least symbolize using symbol table (of course inlining info isn't available in that case, nor tail call frames (but libbacktrace doesn't support those yet, only GDB does so far)). Yes, symbol table symbolization works for me (it has no file/line info, of course). -- Alexey Samsonov, MSK
Re: [PING^2] [PATCH] PR59063
I don't like the unconditional -lrt added for -static-lib*san (note, you need it for both -static-lib{a,t}san). Makes sense. Perhaps it is time for libsanitizer.spec filled in during configure of libsanitizer that the spec would source in? Draft patch is attached, let's see if I understood your recommendation correctly. Some obvious quirks: 1) I didn't add link_libubsan/link_liblsan because they seem to be happy with default libs from %(link_sanitizer). 2) I left LIBASAN_EARLY_SPEC/LIBASAN_SPEC logic in gnu-user.h and gcc.c because they rely on LD_STATIC_OPTION and HAVE_LD_STATIC_DYNAMIC which are defined in gcc/configure and thus not available in libsanitizer/configure. -Y diff --git a/gcc/config/gnu-user.h b/gcc/config/gnu-user.h index 157e147..60eb30d 100644 --- a/gcc/config/gnu-user.h +++ b/gcc/config/gnu-user.h @@ -126,19 +126,3 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see LD_STATIC_OPTION --whole-archive -ltsan --no-whole-archive \ LD_DYNAMIC_OPTION }}%{!static-libtsan:-ltsan} #endif - -/* Additional libraries needed by -static-libasan. */ -#undef STATIC_LIBASAN_LIBS -#define STATIC_LIBASAN_LIBS -ldl -lpthread - -/* Additional libraries needed by -static-libtsan. */ -#undef STATIC_LIBTSAN_LIBS -#define STATIC_LIBTSAN_LIBS -ldl -lpthread - -/* Additional libraries needed by -static-liblsan. */ -#undef STATIC_LIBLSAN_LIBS -#define STATIC_LIBLSAN_LIBS -ldl -lpthread - -/* Additional libraries needed by -static-libubsan. */ -#undef STATIC_LIBUBSAN_LIBS -#define STATIC_LIBUBSAN_LIBS -ldl -lpthread diff --git a/gcc/gcc.c b/gcc/gcc.c index 4edf677..f18ac46 100644 --- a/gcc/gcc.c +++ b/gcc/gcc.c @@ -535,20 +535,16 @@ proper position among the other output files. */ #define STACK_SPLIT_SPEC %{fsplit-stack: --wrap=pthread_create} #ifndef LIBASAN_SPEC -#ifdef STATIC_LIBASAN_LIBS -#define ADD_STATIC_LIBASAN_LIBS \ - %{static-libasan: STATIC_LIBASAN_LIBS } -#else -#define ADD_STATIC_LIBASAN_LIBS -#endif +#define STATIC_LIBASAN_LIBS \ + %{static-libasan:%:include(libsanitizer.spec)%(link_libasan) %(link_sanitizer)} #ifdef LIBASAN_EARLY_SPEC -#define LIBASAN_SPEC ADD_STATIC_LIBASAN_LIBS +#define LIBASAN_SPEC STATIC_LIBASAN_LIBS #elif defined(HAVE_LD_STATIC_DYNAMIC) #define LIBASAN_SPEC %{static-libasan: LD_STATIC_OPTION \ } -lasan %{static-libasan: LD_DYNAMIC_OPTION } \ - ADD_STATIC_LIBASAN_LIBS + STATIC_LIBASAN_LIBS #else -#define LIBASAN_SPEC -lasan ADD_STATIC_LIBASAN_LIBS +#define LIBASAN_SPEC -lasan STATIC_LIBASAN_LIBS #endif #endif @@ -557,20 +553,16 @@ proper position among the other output files. */ #endif #ifndef LIBTSAN_SPEC -#ifdef STATIC_LIBTSAN_LIBS -#define ADD_STATIC_LIBTSAN_LIBS \ - %{static-libtsan: STATIC_LIBTSAN_LIBS } -#else -#define ADD_STATIC_LIBTSAN_LIBS -#endif +#define STATIC_LIBTSAN_LIBS \ + %{static-libtsan:%:include(libsanitizer.spec)%(link_libtsan) %(link_sanitizer)} #ifdef LIBTSAN_EARLY_SPEC -#define LIBTSAN_SPEC ADD_STATIC_LIBTSAN_LIBS +#define LIBTSAN_SPEC STATIC_LIBTSAN_LIBS #elif defined(HAVE_LD_STATIC_DYNAMIC) #define LIBTSAN_SPEC %{static-libtsan: LD_STATIC_OPTION \ } -ltsan %{static-libtsan: LD_DYNAMIC_OPTION } \ - ADD_STATIC_LIBTSAN_LIBS + STATIC_LIBTSAN_LIBS #else -#define LIBTSAN_SPEC -ltsan ADD_STATIC_LIBTSAN_LIBS +#define LIBTSAN_SPEC -ltsan STATIC_LIBTSAN_LIBS #endif #endif @@ -579,34 +571,26 @@ proper position among the other output files. */ #endif #ifndef LIBLSAN_SPEC -#ifdef STATIC_LIBLSAN_LIBS -#define ADD_STATIC_LIBLSAN_LIBS \ - %{static-liblsan: STATIC_LIBLSAN_LIBS } -#else -#define ADD_STATIC_LIBLSAN_LIBS -#endif +#define STATIC_LIBLSAN_LIBS \ + %{static-liblsan:%:include(libsanitizer.spec) %(link_sanitizer)} #ifdef HAVE_LD_STATIC_DYNAMIC #define LIBLSAN_SPEC %{!shared:%{static-liblsan: LD_STATIC_OPTION \ } -llsan %{static-liblsan: LD_DYNAMIC_OPTION } \ - ADD_STATIC_LIBLSAN_LIBS } + STATIC_LIBLSAN_LIBS } #else -#define LIBLSAN_SPEC %{!shared:-llsan ADD_STATIC_LIBLSAN_LIBS } +#define LIBLSAN_SPEC %{!shared:-llsan STATIC_LIBLSAN_LIBS } #endif #endif #ifndef LIBUBSAN_SPEC -#ifdef STATIC_LIBUBSAN_LIBS -#define ADD_STATIC_LIBUBSAN_LIBS \ - %{static-libubsan: STATIC_LIBUBSAN_LIBS } -#else -#define ADD_STATIC_LIBUBSAN_LIBS -#endif +#define STATIC_LIBUBSAN_LIBS \ + %{static-libubsan:%:include(libsanitizer.spec) %(link_sanitizer)} #ifdef HAVE_LD_STATIC_DYNAMIC #define LIBUBSAN_SPEC %{static-libubsan: LD_STATIC_OPTION \ } -lubsan %{static-libubsan: LD_DYNAMIC_OPTION } \ - ADD_STATIC_LIBUBSAN_LIBS + STATIC_LIBUBSAN_LIBS #else -#define LIBUBSAN_SPEC -lubsan ADD_STATIC_LIBUBSAN_LIBS +#define LIBUBSAN_SPEC -lubsan STATIC_LIBUBSAN_LIBS #endif #endif diff --git a/gcc/testsuite/c-c++-common/asan/pr59063-1.c b/gcc/testsuite/c-c++-common/asan/pr59063-1.c index e69de29..a4e01f7 100644 --- a/gcc/testsuite/c-c++-common/asan/pr59063-1.c +++ b/gcc/testsuite/c-c++-common/asan/pr59063-1.c @@
[v3] Fix ext/random/hypergeometric_distribution/operators/values.cc on Solaris 9
ext/random/hypergeometric_distribution/operators/values.cc fails to compile on Solaris 9 which lacks full C99 support in libc/libm: FAIL: ext/random/hypergeometric_distribution/operators/values.cc (test for excess errors) Excess errors: /vol/gcc/src/hg/trunk/local/libstdc++-v3/testsuite/ext/random/hypergeometric_dis tribution/operators/values.cc:41:45: error: 'hypergeometric_pdf' was not declare d in this scope /vol/gcc/src/hg/trunk/local/libstdc++-v3/testsuite/ext/random/hypergeometric_dis tribution/operators/values.cc:46:48: error: 'hypergeometric_pdf' was not declare d in this scope /vol/gcc/src/hg/trunk/local/libstdc++-v3/testsuite/ext/random/hypergeometric_dis tribution/operators/values.cc:51:47: error: 'hypergeometric_pdf' was not declare d in this scope /vol/gcc/src/hg/trunk/local/libstdc++-v3/testsuite/util/testsuite_random.h:56:7: error: void value not ignored as it ought to be /vol/gcc/src/hg/trunk/local/libstdc++-v3/testsuite/util/testsuite_random.h:56:7: error: void value not ignored as it ought to be /vol/gcc/src/hg/trunk/local/libstdc++-v3/testsuite/util/testsuite_random.h:56:7: error: void value not ignored as it ought to be WARNING: ext/random/hypergeometric_distribution/operators/values.cc compilation failed to produce executable hypergeometric_pdf is defined in testsuite/util/testsuite_random.h inside _GLIBCXX_USE_C99_MATH_TR1, but used unconditionally. Fixed which the following patch which allows the test to compile and pass on i386-pc-solaris2.9. On i386-pc-solaris2.11, it FAILs with an Arithmetic exception, as it did before the patch: Program received signal SIGFPE, Arithmetic exception. [Switching to Thread 1 (LWP 1)] 0x080542ba in std::generate_canonicalunsigned int, 32u, std::mersenne_twister_engineunsigned int, 32u, 624u, 397u, 31u, 2567483615u, 11u, 4294967295u, 7u, 2636928640u, 15u, 4022730752u, 18u, 1812433253u (__urng=...) at /var/gcc/regression/trunk/11-gcc/build/i386-pc-solaris2.11/libstdc++-v3/include/bits/random.tcc:3480 3480 return __sum / __tmp; __tmp is 0 here. Ok for mainline? Rainer 2013-11-27 Rainer Orth r...@cebitec.uni-bielefeld.de * testsuite/ext/random/hypergeometric_distribution/operators/values.cc (test01): Wrap in _GLIBCXX_USE_C99_MATH_TR1. diff --git a/libstdc++-v3/testsuite/ext/random/hypergeometric_distribution/operators/values.cc b/libstdc++-v3/testsuite/ext/random/hypergeometric_distribution/operators/values.cc --- a/libstdc++-v3/testsuite/ext/random/hypergeometric_distribution/operators/values.cc +++ b/libstdc++-v3/testsuite/ext/random/hypergeometric_distribution/operators/values.cc @@ -31,6 +31,7 @@ void test01() { +#if _GLIBCXX_USE_C99_MATH_TR1 using namespace __gnu_test; std::mt19937 eng; @@ -49,6 +50,7 @@ test01() auto bhd3 = std::bind(hd3, eng); testDiscreteDist(bhd3, [](int k) { return hypergeometric_pdf(k, 100, 20, 5); }); +#endif } int -- - Rainer Orth, Center for Biotechnology, Bielefeld University
Re: [v3] Fix ext/random/hypergeometric_distribution/operators/values.cc on Solaris 9
On 27 November 2013 14:14, Rainer Orth wrote: Ok for mainline? OK, thanks.
Re: [PING] [PATCH, PR 57748] Check for out of bounds access, Part 2
On Wed, Nov 27, 2013 at 1:29 PM, Bernd Edlinger bernd.edlin...@hotmail.de wrote: Hi, ping... this patch still open: http://gcc.gnu.org/ml/gcc-patches/2013-11/msg02291.html Note: it does, as it is, _not_ depend on the keep_aligning patch. And it would fix some really nasty wrong code generation issues. I'll come back to all these patches once the late features have all hit stage3 (hopefully next week). We have some time left to fix this and related bugs. Thanks, Richard. Thanks Bernd. Date: Tue, 19 Nov 2013 15:39:39 +0100 Hello, this is a minor update to my previous version of this patch, (using a boolean expand_reference, instead of adding a new expand_modifier enum value): I forgot to pass down the expand_reference value at the second expand_expr call inside the case VIEW_CONVERT_EXPR. Sorry for the inconvenience. @@ -10219,7 +10229,8 @@ expand_expr_real_1 (tree exp, rtx target, enum mac } if (!op0) - op0 = expand_expr (treeop0, NULL_RTX, VOIDmode, modifier); + op0 = expand_expr_real (treeop0, NULL_RTX, VOIDmode, modifier, + NULL, expand_reference); /* If the input and output modes are both the same, we are done. */ if (mode == GET_MODE (op0)) Boot-strapped and regression-tested on X86_64-pc-linux-gnu. Ok for trunk? Thanks Bernd. Date: Thu, 7 Nov 2013 13:58:55 +0100 oops - this time with attachments... Hi, On Fri, 25 Oct 2013 12:51:13, Richard Biener wrote: On Fri, Oct 25, 2013 at 12:02 PM, Bernd Edlinger bernd.edlin...@hotmail.de wrote: Hi, Eh ... even register struct { int i; int a[0]; } asm (ebx); works. Also with int a[1] but not with a[2]. So just handling trailing arrays makes this case regress as well. Now I'd call this use questionable ... but we've likely supported that for decades and cannot change that now. Back to fixing everything in expand. Richard. Ok, finally you asked for it. Here is my previous version of that patch again. I have now added a new value EXPAND_REFERENCE to the expand_modifier enumeration. It is almost like EXPAND_MEMORY but it does not interfere with constant values. I have done the same modification to VIEW_CONVERT_EXPR too, because this is a possible inner reference, itself. It is however inherently hard to test around this code. To understand this patch it is good to know what type of object the return value tem of get_inner_reference can be. From the program logic at get_inner_reference it is clear that the return value may *not* be BIT_FIELD_REF, COMPONENT_REF, ARRAY_REF, ARRAY_RANGE_REF, REALPART_EXPR, IMAGPART_EXPR. The result may be VIEW_CONVERT_EXPR only on a STRICT_ALIGNMENT target. This is probably further restricted because exp is gimplified. Usually the result will be a MEM_REF or a SSA_NAME of the memory where the structure is to be found. When you look at where EXPAND_MEMORY is handled you see it is special-cased in TARGET_MEM_REF, MEM_REF, ARRAY_REF, COMPONENT_REF, BIT_FIELD_REF, ARRAY_RANGE_REF. At TARGET_MEM_REF, MEM_REF, VIEW_CONVERT_EXPR, it should be the same if EXPAND_MEMORY, EXPAND_WRITE or EXPAND_REFERENCE is given: If it is an unaligned memory, we just return the unaligned reference. This was missing for VIEW_CONVERT_EXPR, and unfortunately I have no test case, because it is only a problem for STRICT_ALIGNMENT targets, and even there it will certainly be really hard to find test cases that exercise this code. In ARRAY_REF, COMPONENT_REF, BIT_FIELD_REF, ARRAY_RANGE_REF we do not have to touch the handling of the outer modifier. However we pass EXPAND_REFERENCE to the inner object, which should not be a recursive use of any ARRAY_REF, COMPONENT_REF, BIT_FIELD_REF, ARRAY_RANGE_REF. TARGET_MEM_REF, MEM_REF and VIEW_CONVERT_EXPR know how to handle EXPAND_REFERENCE, anything else handles it like EXPAND_NORMAL. Boot-strapped and regression-tested on x86_64-linux-gnu OK for trunk? You point to a weak spot in expansion - that it handles creating the base MEM to offset with handled components by recursing into the case that handles bare MEM_REFs. This makes the bare MEM_REF handling code somewhat awkward (it's the one to assign mem-attrs which are later adjusted for example). Maybe a better appropach than adding yet another expand modifier would be to split out the base MEM expansion part out of the bare MEM_REF handling code so we can call that instead of recursing. In this light - instead of a new expand modifier don't you want an actual flag that specifies we are coming from a call that wants to expand a base? That is, allow EXPAND_SUM but with the recursion flag set? I think you are right. After some thought, I start to like that idea. This way we have at least much more flexibility, how to handle the inner references correctly, and if I change only the interfaces of expand_expr_real/real_1 that will not be used at too many places, either. Finally I think the recursion into the
[PATCH] Fix PR58723
This fixes the cgraph machinery wrt internal calls and makes LTO and the recent OMP work regarding vectorization work (to some extent). LTO bootstrapped and tested on x86_64-unknown-linux-gnu, applied. Richard. 2013-11-27 Richard Biener rguent...@suse.de PR middle-end/58723 * cgraphbuild.c (build_cgraph_edges): Do not build edges for internal calls. (rebuild_cgraph_edges): Likewise. * ipa-inline-analysis.c (estimate_function_body_sizes): Skip internal calls. * tree-inline.c (estimate_num_insns): Estimate size of internal calls as 0. (gimple_expand_calls_inline): Do not try inline-expanding internal calls. * lto-streamer-in.c (input_cfg): Stream loop safelen, force_vect and simduid. (input_struct_function_base): Stream has_force_vect_loops and has_simduid_loops. (input_function): Adjust. * lto-streamer-out.c (output_cfg): Stream loop safelen, force_vect and simduid. (output_struct_function_base): Stream has_force_vect_loops and has_simduid_loops. Index: gcc/cgraphbuild.c === *** gcc/cgraphbuild.c.orig 2013-11-27 11:01:21.0 +0100 --- gcc/cgraphbuild.c 2013-11-27 11:01:33.767324692 +0100 *** build_cgraph_edges (void) *** 335,340 --- 335,342 if (decl) cgraph_create_edge (node, cgraph_get_create_node (decl), stmt, bb-count, freq); + else if (gimple_call_internal_p (stmt)) + ; else cgraph_create_indirect_edge (node, stmt, gimple_call_flags (stmt), *** rebuild_cgraph_edges (void) *** 464,469 --- 466,473 if (decl) cgraph_create_edge (node, cgraph_get_create_node (decl), stmt, bb-count, freq); + else if (gimple_call_internal_p (stmt)) + ; else cgraph_create_indirect_edge (node, stmt, gimple_call_flags (stmt), Index: gcc/ipa-inline-analysis.c === *** gcc/ipa-inline-analysis.c.orig 2013-11-27 11:01:21.0 +0100 --- gcc/ipa-inline-analysis.c 2013-11-27 11:01:33.771324737 +0100 *** estimate_function_body_sizes (struct cgr *** 2502,2508 } ! if (is_gimple_call (stmt)) { struct cgraph_edge *edge = cgraph_edge (node, stmt); struct inline_edge_summary *es = inline_edge_summary (edge); --- 2502,2509 } ! if (is_gimple_call (stmt) ! !gimple_call_internal_p (stmt)) { struct cgraph_edge *edge = cgraph_edge (node, stmt); struct inline_edge_summary *es = inline_edge_summary (edge); Index: gcc/tree-inline.c === *** gcc/tree-inline.c.orig 2013-11-27 11:01:21.0 +0100 --- gcc/tree-inline.c 2013-11-27 11:01:53.957557118 +0100 *** estimate_num_insns (gimple stmt, eni_wei *** 3801,3812 case GIMPLE_CALL: { ! tree decl = gimple_call_fndecl (stmt); struct cgraph_node *node = NULL; /* Do not special case builtins where we see the body. This just confuse inliner. */ ! if (!decl || !(node = cgraph_get_node (decl)) || node-definition) ; /* For buitins that are likely expanded to nothing or inlined do not account operand costs. */ --- 3801,3816 case GIMPLE_CALL: { ! tree decl; struct cgraph_node *node = NULL; /* Do not special case builtins where we see the body. This just confuse inliner. */ ! if (gimple_call_internal_p (stmt)) ! return 0; ! else if (!(decl = gimple_call_fndecl (stmt)) !|| !(node = cgraph_get_node (decl)) !|| node-definition) ; /* For buitins that are likely expanded to nothing or inlined do not account operand costs. */ *** gimple_expand_calls_inline (basic_block *** 4427,4432 --- 4431,4437 gimple stmt = gsi_stmt (gsi); if (is_gimple_call (stmt) + !gimple_call_internal_p (stmt) expand_call_inline (bb, stmt, id)) return true; } Index: gcc/lto-streamer-in.c === *** gcc/lto-streamer-in.c.orig 2013-11-25 10:44:28.0 +0100 --- gcc/lto-streamer-in.c 2013-11-27 11:28:13.006784679 +0100 *** make_new_block (struct function *fn, uns *** 598,604 /* Read the CFG for function FN from input block IB. */
Re: [v3] Fix ext/random/hypergeometric_distribution/operators/values.cc on Solaris 9
Hi, On 11/27/2013 03:14 PM, Rainer Orth wrote: hypergeometric_pdf is defined in testsuite/util/testsuite_random.h inside _GLIBCXX_USE_C99_MATH_TR1, but used unconditionally. Fixed which the following patch which allows the test to compile and pass on i386-pc-solaris2.9. On i386-pc-solaris2.11, it FAILs with an Arithmetic exception, as it did before the patch: Program received signal SIGFPE, Arithmetic exception. [Switching to Thread 1 (LWP 1)] 0x080542ba in std::generate_canonicalunsigned int, 32u, std::mersenne_twister_engineunsigned int, 32u, 624u, 397u, 31u, 2567483615u, 11u, 4294967295u, 7u, 2636928640u, 15u, 4022730752u, 18u, 1812433253u (__urng=...) at /var/gcc/regression/trunk/11-gcc/build/i386-pc-solaris2.11/libstdc++-v3/include/bits/random.tcc:3480 3480 return __sum / __tmp; __tmp is 0 here. Ok for mainline? Please use // { dg-require-cmath } instead. Thanks, Paolo.
Re: [C++ Patch] PR 58647
On 11/27/2013 05:22 AM, Paolo Carlini wrote: Thus something like the below? Passes testing. Yep. With a comment that we can only get there in checking mode via build_non_dependent_expr, because any expression that calls or takes the address of the function will have pull a FUNCTION_DECL out of the COMPONENT_REF. Jason
Re: [PATCH] Fix split_live_ranges_for_shrink_wrap (PR rtl-optimization/59166)
On Tue, Nov 26, 2013 at 09:25:40PM +0100, Jakub Jelinek wrote: Hi! The problem on this testcase is that we have (debug_insn 30 29 31 7 (var_location:HI D#1 (subreg:HI (reg/v:SI 93 [ p ]) 0)) pr59166.c:20 -1 (nil)) and split_live_ranges_for_shrink_wrap decides to replace SImode pseudo 93 with some other SImode pseudo. But it uses DF_REF_LOC, which is address of the HImode subreg around the pseudo, validate_change succeeds on it, because there is no validation inside of debug_insns and then we crash during var-tracking because of a mode mismatch. thanks a lot for fixing it, I had no idea that DF_REF_REAL_LOC even existed. This change also apparently makes it possible to throw away the SUBREG check in the same function. I'll enqueue a patch removing it for the next stage1. The following patch uses DF_REF_REAL_LOC instead, so that it looks through the subreg and adjusts what it should. Code inspection showed the same issue in find_moveable_pseudos, don't have a testcase for that though. That is where I copied the substitution from. Martin Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2013-11-26 Jakub Jelinek ja...@redhat.com PR rtl-optimization/59166 * ira.c (find_moveable_pseudos): Use DF_REF_REAL_LOC instead of DF_REF_LOC in validate_change call. (split_live_ranges_for_shrink_wrap): Likewise. * gcc.dg/torture/pr59166.c: New test.
Re: [PING]: [GOMP4] [PATCH] SIMD-Enabled Functions (formerly Elemental functions) for C
Iyer, Balaji V balaji.v.i...@intel.com writes: c_finish_omp_declare_simd (c_parser *parser, tree fndecl, tree parms, vecc_token clauses) { + + if (flag_enable_cilkplus + clauses.exists () !vec_safe_is_empty (parser-elem_fn_tokens)) +{ + error (%#pragma omp declare simd% cannot be used in the same + function marked as a SIMD-enabled function); + vec_free (parser-elem_fn_tokens); + return; +} I see Cilk Plus elementals are handled as omp declare simd in the above function. This function sets an omp declare simd attribute here: if (c != NULL_TREE) c = tree_cons (NULL_TREE, c, NULL_TREE); c = build_tree_list (get_identifier (omp declare simd), c); TREE_CHAIN (c) = DECL_ATTRIBUTES (fndecl); DECL_ATTRIBUTES (fndecl) = c; but you also need a cilk plus elemental attribute so the rest of the compiler can differentiate Cilk Plus elementals from omp declare simds. See simd_clone_clauses_extract(): + /* To distinguish from an OpenMP simd clone, Cilk Plus functions to + be cloned have a distinctive artificial label in addition to omp + declare simd. */ + bool cilk_clone += (flag_enable_cilkplus +lookup_attribute (cilk plus elemental, + DECL_ATTRIBUTES (node-decl))); Also you probably want some kind of test for this, so test for whatever cilk_elemental is doing. In trunk, the only use of cilk_elemental is in ix86_simd_clone_compute_vecsize_and_simdlen(), so come up with some x86 specific test for cilk_elemental==true. Aldy
Re: [C++ Patch] PR 58647
On 11/27/2013 04:35 PM, Jason Merrill wrote: On 11/27/2013 05:22 AM, Paolo Carlini wrote: Thus something like the below? Passes testing. Yep. With a comment that we can only get there in checking mode via build_non_dependent_expr, because any expression that calls or takes the address of the function will have pull a FUNCTION_DECL out of the COMPONENT_REF. Agreed, I applied the below. Since the issue doesn't affect release-mode for the time being at least I'm not going to fiddle with 4.7/4.8. Thanks, Paolo. /// /cp 2013-11-27 Paolo Carlini paolo.carl...@oracle.com PR c++/58647 * semantics.c (cxx_eval_constant_expression, [COMPONENT_REF]): Handle function COMPONENT_REFs. /testsuite 2013-11-27 Paolo Carlini paolo.carl...@oracle.com PR c++/58647 * g++.dg/parse/crash66.C: New. Index: cp/semantics.c === --- cp/semantics.c (revision 205448) +++ cp/semantics.c (working copy) @@ -9603,6 +9603,16 @@ cxx_eval_constant_expression (const constexpr_call break; case COMPONENT_REF: + if (is_overloaded_fn (t)) + { + /* We can only get here in checking mode via +build_non_dependent_expr, because any expression that +calls or takes the address of the function will have +pulled a FUNCTION_DECL out of the COMPONENT_REF. */ + gcc_checking_assert (allow_non_constant); + *non_constant_p = true; + return t; + } r = cxx_eval_component_reference (call, t, allow_non_constant, addr, non_constant_p, overflow_p); break; Index: testsuite/g++.dg/parse/crash66.C === --- testsuite/g++.dg/parse/crash66.C(revision 0) +++ testsuite/g++.dg/parse/crash66.C(working copy) @@ -0,0 +1,11 @@ +// PR c++/58647 + +struct A +{ + static void foo(); +}; + +templatetypename void bar() +{ + A().foo; +}
Re: RFA: patch to fix PR58785 (an ARM LRA crash)
On 20/11/13 09:22, Yvan Roux wrote: Hi, as Richard said, only a subset of rclass is allowed to be returned by preferred_reload_class. I've tested the attached patched in Thumb mode, on ARMv5, A9 and A9hf and on cross A15 without regression. Yvan 2013-11-20 Yvan Roux yvan.r...@linaro.org PR target/58785 * config/arm/arm.c (arm_preferred_reload_class): Only return LO_REGS when rclass is GENERAL_REGS. preferred_reload.diff diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 5c53440..63f10bd 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -6882,10 +6882,7 @@ arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass) return rclass; else { - if (rclass == GENERAL_REGS - || rclass == HI_REGS - || rclass == NO_REGS - || rclass == STACK_REG) + if (rclass == GENERAL_REGS) return LO_REGS; else return rclass; OK. R.
Re: [v3] Fix ext/random/hypergeometric_distribution/operators/values.cc on Solaris 9
Hi Paolo, On 11/27/2013 03:14 PM, Rainer Orth wrote: hypergeometric_pdf is defined in testsuite/util/testsuite_random.h inside _GLIBCXX_USE_C99_MATH_TR1, but used unconditionally. Fixed which the following patch which allows the test to compile and pass on i386-pc-solaris2.9. On i386-pc-solaris2.11, it FAILs with an Arithmetic exception, as it did before the patch: Program received signal SIGFPE, Arithmetic exception. [Switching to Thread 1 (LWP 1)] 0x080542ba in std::generate_canonicalunsigned int, 32u, std::mersenne_twister_engineunsigned int, 32u, 624u, 397u, 31u, 2567483615u, 11u, 4294967295u, 7u, 2636928640u, 15u, 4022730752u, 18u, 1812433253u (__urng=...) at /var/gcc/regression/trunk/11-gcc/build/i386-pc-solaris2.11/libstdc++-v3/include/bits/random.tcc:3480 3480 return __sum / __tmp; __tmp is 0 here. Ok for mainline? Please use // { dg-require-cmath } instead. make sense since this renders the test unsupported instead of pass. Since I'd already committed the previous version based on Jonathan's approval, I've installed the following on top of it after testing with the appropriate runtest command. Rainer 2013-11-27 Rainer Orth r...@cebitec.uni-bielefeld.de * testsuite/ext/random/hypergeometric_distribution/operators/values.cc: Use dg-require-cmath instead. diff --git a/libstdc++-v3/testsuite/ext/random/hypergeometric_distribution/operators/values.cc b/libstdc++-v3/testsuite/ext/random/hypergeometric_distribution/operators/values.cc --- a/libstdc++-v3/testsuite/ext/random/hypergeometric_distribution/operators/values.cc +++ b/libstdc++-v3/testsuite/ext/random/hypergeometric_distribution/operators/values.cc @@ -1,5 +1,6 @@ // { dg-options -std=gnu++11 } // { dg-require-cstdint } +// { dg-require-cmath } // // 2013-11-18 Edward M. Smith-Rowland 3dw...@verizon.net // @@ -31,7 +32,6 @@ void test01() { -#if _GLIBCXX_USE_C99_MATH_TR1 using namespace __gnu_test; std::mt19937 eng; @@ -50,7 +50,6 @@ test01() auto bhd3 = std::bind(hd3, eng); testDiscreteDist(bhd3, [](int k) { return hypergeometric_pdf(k, 100, 20, 5); }); -#endif } int -- - Rainer Orth, Center for Biotechnology, Bielefeld University
Re: [PATCH] Postpone __LINE__ evaluation to the end of #line directives
On Wed, 27 Nov 2013, mtewoodb...@gmail.com wrote: Copyright 2013 assigned to the Free Software Foundation. FWIW I don't see this in copyright.list yet. If you sent the paperwork (whether paper mail or scans) to the FSF over a week ago and haven't had it acknowledged, please chase up ass...@gnu.org, and keep chasing them up weekly until it's acknowledged and a maintainer confirms it's now listed in copyright.list. diff --git a/gcc/testsuite/gcc.dg/cpp/line4.c b/gcc/testsuite/gcc.dg/cpp/line4.c I think it's best to leave this test as-is and add a new test line9.c specifically for this case of #line __LINE__. Changing existing tests unnecessarily complicates establishing whether a newly seen failure is a regression or not. I think it would be worth including tests where the __LINE__ expansion (which as you note should be the line number of the line *after* the directive) is involved in concatenation or stringized, e.g. #define xstr(x) #x #define str(x) xstr(x) #line 1 str(__LINE__) which should set __FILE__ to be the stringized number of the line after the directive. (I hope these cases will just work given your patch, so this is just a matter of adding more to the testcase.) -- Joseph S. Myers jos...@codesourcery.com
Re: wide-int, rtl
Richi has asked the we break the wide-int patch so that the individual port and front end maintainers can review their parts without have to go through the entire patch.This patch covers the first half of the rtl code. --- a/gcc/cse.c +++ b/gcc/cse.c @@ -2336,15 +2336,23 @@ hash_rtx_cb (const_rtx x, enum machine_mode mode, + (unsigned int) INTVAL (x)); return hash; +case CONST_WIDE_INT: + { + int i; + for (i = 0; i CONST_WIDE_INT_NUNITS (x); i++) + hash += CONST_WIDE_INT_ELT (x, i); + } + return hash; You can write for (int i = 0; ... now and remove the parentheses. --- a/gcc/cselib.c +++ b/gcc/cselib.c @@ -1121,15 +1120,23 @@ cselib_hash_rtx (rtx x, int create, enum machine_mode memmode) hash += ((unsigned) CONST_INT 7) + INTVAL (x); return hash ? hash : (unsigned int) CONST_INT; +case CONST_WIDE_INT: + { + int i; + for (i = 0; i CONST_WIDE_INT_NUNITS (x); i++) + hash += CONST_WIDE_INT_ELT (x, i); + } + return hash; + Likewise. --- a/gcc/expmed.c +++ b/gcc/expmed.c @@ -3298,23 +3268,13 @@ choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision, pow = n + lgup; pow2 = n + lgup - precision; - /* We could handle this with some effort, but this case is much - better handled directly with a scc insn, so rely on caller using - that. */ - gcc_assert (pow != HOST_BITS_PER_DOUBLE_INT); Why removing it? --- a/gcc/expr.c +++ b/gcc/expr.c @@ -722,64 +722,33 @@ convert_modes (enum machine_mode mode, enum machine_mode oldmode, rtx x, int uns if (mode == oldmode) return x; - /* There is one case that we must handle specially: If we are converting - a CONST_INT into a mode whose size is twice HOST_BITS_PER_WIDE_INT and - we are to interpret the constant as unsigned, gen_lowpart will do - the wrong if the constant appears negative. What we want to do is - make the high-order word of the constant zero, not all ones. */ - - if (unsignedp GET_MODE_CLASS (mode) == MODE_INT - GET_MODE_BITSIZE (mode) == HOST_BITS_PER_DOUBLE_INT - CONST_INT_P (x) INTVAL (x) 0) + if (CONST_SCALAR_INT_P (x) + GET_MODE_CLASS (mode) == MODE_INT) On a single line. { - double_int val = double_int::from_uhwi (INTVAL (x)); - - /* We need to zero extend VAL. */ - if (oldmode != VOIDmode) - val = val.zext (GET_MODE_BITSIZE (oldmode)); - - return immed_double_int_const (val, mode); + /* If the caller did not tell us the old mode, then there is +not much to do with respect to canonization. We have to assume +that all the bits are significant. */ + if (GET_MODE_CLASS (oldmode) != MODE_INT) + oldmode = MAX_MODE_INT; + wide_int w = wide_int::from (std::make_pair (x, oldmode), + GET_MODE_PRECISION (mode), + unsignedp ? UNSIGNED : SIGNED); + return immed_wide_int_const (w, mode); canonicalization? @@ -5301,10 +5271,10 @@ store_expr (tree exp, rtx target, int call_param_p, bool nontemporal) alt_rtl); } - /* If TEMP is a VOIDmode constant and the mode of the type of EXP is not - the same as that of TARGET, adjust the constant. This is needed, for - example, in case it is a CONST_DOUBLE and we want only a word-sized - value. */ + /* If TEMP is a VOIDmode constant and the mode of the type of EXP is + not the same as that of TARGET, adjust the constant. This is + needed, for example, in case it is a CONST_DOUBLE or + CONST_WIDE_INT and we want only a word-sized value. */ if (CONSTANT_P (temp) GET_MODE (temp) == VOIDmode TREE_CODE (exp) != ERROR_MARK GET_MODE (target) != TYPE_MODE (TREE_TYPE (exp))) Why reformatting the whole comment? @@ -9481,11 +9459,19 @@ expand_expr_real_1 (tree exp, rtx target, enum machine_mode tmode, return decl_rtl; case INTEGER_CST: - temp = immed_double_const (TREE_INT_CST_LOW (exp), -TREE_INT_CST_HIGH (exp), mode); - - return temp; - + { + tree type = TREE_TYPE (exp); Redundant, see the beginning of the function. + /* One could argue that GET_MODE_PRECISION (TYPE_MODE (type)) + should always be the same as TYPE_PRECISION (type). + However, it is not. Since we are converting from tree to + rtl, we have to expose this ugly truth here. */ + temp = immed_wide_int_const (wide_int::from + (exp, + GET_MODE_PRECISION (TYPE_MODE (type)), + TYPE_SIGN (type)), +TYPE_MODE (type)); + return temp; + } I don't really see how one could argue that, given that there are much fewer modes than possible type precisions, so please rewrite
a testcase for PR57410
Here is the test I've committed as rev. 205451 for http://gcc.gnu.org/bugzilla/show_bug.cgi?id=57410 2013-11-27 Vladimir Makarov vmaka...@redhat.com PR rtl-optimization/57410 * gcc.target/i386/pr57410.c: New. Index: testsuite/gcc.target/i386/pr57410.c === --- testsuite/gcc.target/i386/pr57410.c (revision 0) +++ testsuite/gcc.target/i386/pr57410.c (working copy) @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options -O -fpeel-loops } */ + +extern char outbuffer[]; +extern char buffer[]; + +void foo(int j) +{ + unsigned i, fp = fp; + for (i = 0; i 6; i++) +buffer[j++] = outbuffer[fp - i]; +}
Re: [PATCH] Improve handling of threads which cross over the current loops header
On 11/27/13 04:28, Richard Biener wrote: Ick ;) Loop fixup isn't able to recover the original loop here, it discovers a new one, the one just covering looping through the default: case of the switch. I wouldn't expect it to. I don't have the transformed CFG in front of me right now, but as you probably saw, it's scrambled pretty badly. Sadly, we found out yesterday that the submitter of that BZ reduced coremark too far and the transformation doesn't actually apply to coremark. We're not going to try and extend things at this point in the game -- that'll have to be 4.10/5.0 material. jeff
Re: [PING] [PATCH, ARM, testcase] Skip target arm-neon for lp1243022.c
On 11/27/13 02:05, Zhenqiang Chen wrote: Ping? Thanks for including the actual patch you're pinging, it helps :-) Hi, lp1243022.c will fail with options: -mfpu=neon -mfloat-abi=hard. Logs show it does not generate auto-incremental instruction in pass auto_inc_dec. In this case, the check of REG_INC note at subreg2 will be invalid. So skip the check for target arm-neon. All PASS with the following options: -mthumb/-mcpu=cortex-a9/-mfloat-abi=hard -mthumb/-mcpu=cortex-a9/-mfloat-abi=soft -mthumb/-mcpu=cortex-a9/-mfloat-abi=softfp -mthumb/-mcpu=cortex-a9/-mfloat-abi=soft/-mfpu=vfpv3 -mthumb/-mcpu=cortex-a9/-mfloat-abi=softfp/-mfpu=vfpv3 -mthumb/-mcpu=cortex-a9/-mfloat-abi=hard/-mfpu=vfpv3 -mthumb/-mcpu=cortex-a9/-mfloat-abi=soft/-mfpu=neon -mthumb/-mcpu=cortex-a9/-mfloat-abi=softfp/-mfpu=neon -mthumb/-mcpu=cortex-a9/-mfloat-abi=hard/-mfpu=neon -mthumb/-mcpu=cortex-a15/-mfloat-abi=hard -mthumb/-mcpu=cortex-a15/-mfloat-abi=soft -mthumb/-mcpu=cortex-a15/-mfloat-abi=softfp -mthumb/-mcpu=cortex-a15/-mfloat-abi=soft/-mfpu=vfpv4 -mthumb/-mcpu=cortex-a15/-mfloat-abi=softfp/-mfpu=vfpv4 -mthumb/-mcpu=cortex-a15/-mfloat-abi=hard/-mfpu=vfpv4 -mthumb/-mcpu=cortex-a15/-mfloat-abi=soft/-mfpu=neon -mthumb/-mcpu=cortex-a15/-mfloat-abi=softfp/-mfpu=neon -mthumb/-mcpu=cortex-a15/-mfloat-abi=hard/-mfpu=neon Is it OK? Thanks! -Zhenqiang testsuite/ChangeLog: 2013-11-08 Zhenqiang Chen zhenqiang.c...@linaro.org * gcc.target/arm/lp1243022.c: Skip target arm-neon. It seems to me you should be xfailing arm-neon, not skipping the test. Unless there is some fundamental reason why we can not generate auto-inc instructions on the neon. jeff
Re: [PATCH, score] Remove unused REG_CLASS_FROM_LETTER define
On 11/27/13 02:52, Liqin Chen wrote: 2013-11-27 Chen Liqin liqin@gmail.com * config/score/score.h (REG_CLASS_FROM_LETTER): Delete. Installed on the trunk. Thanks. jeff
Re: Some wide-int review comments
committed as revision 205448 to trunk. committed as revision 205455 to wide-int branch. On 11/27/2013 05:50 AM, Richard Biener wrote: On Tue, Nov 26, 2013 at 5:33 PM, Kenneth Zadeck zad...@naturalbridge.com wrote: Richi, patch ping Ok. Thanks, Richard. also two more pieces of information.With further testing, this seems to fix Tests that now work, but didn't before: === ext/random/hypergeometric_distribution/operators/values.cc (test for excess errors) New tests that PASS: ext/random/hypergeometric_distribution/operators/values.cc execution test also, the corresponding frag for fold-const.c on the wide-int branch will look like Index: gcc/fold-const.c === --- gcc/fold-const.c(revision 205224) +++ gcc/fold-const.c(working copy) @@ -1030,51 +1030,51 @@ int_const_binop_1 (enum tree_code code, case TRUNC_DIV_EXPR: case EXACT_DIV_EXPR: - res = wi::div_trunc (arg1, arg2, sign, overflow); - if (overflow) + if (arg2 == 0) return NULL_TREE; + res = wi::div_trunc (arg1, arg2, sign, overflow); break; case FLOOR_DIV_EXPR: - res = wi::div_floor (arg1, arg2, sign, overflow); - if (overflow) + if (arg2 == 0) return NULL_TREE; + res = wi::div_floor (arg1, arg2, sign, overflow); break; case CEIL_DIV_EXPR: - res = wi::div_ceil (arg1, arg2, sign, overflow); - if (overflow) + if (arg2 == 0) return NULL_TREE; + res = wi::div_ceil (arg1, arg2, sign, overflow); break; case ROUND_DIV_EXPR: - res = wi::div_round (arg1, arg2, sign, overflow); - if (overflow) + if (arg2 == 0) return NULL_TREE; + res = wi::div_round (arg1, arg2, sign, overflow); break; case TRUNC_MOD_EXPR: - res = wi::mod_trunc (arg1, arg2, sign, overflow); - if (overflow) + if (arg2 == 0) return NULL_TREE; + res = wi::mod_trunc (arg1, arg2, sign, overflow); break; case FLOOR_MOD_EXPR: - res = wi::mod_floor (arg1, arg2, sign, overflow); - if (overflow) + if (arg2 == 0) return NULL_TREE; + res = wi::mod_floor (arg1, arg2, sign, overflow); break; case CEIL_MOD_EXPR: - res = wi::mod_ceil (arg1, arg2, sign, overflow); - if (overflow) + if (arg2 == 0) return NULL_TREE; + res = wi::mod_ceil (arg1, arg2, sign, overflow); break; case ROUND_MOD_EXPR: - res = wi::mod_round (arg1, arg2, sign, overflow); - if (overflow) + if (arg2 == 0) return NULL_TREE; + res = wi::mod_round (arg1, arg2, sign, overflow); break; case MIN_EXPR: On 11/20/2013 06:31 PM, Kenneth Zadeck wrote: On 11/13/2013 04:59 AM, Richard Biener wrote: On Tue, Nov 12, 2013 at 5:43 PM, Kenneth Zadeck zad...@naturalbridge.com wrote: On 11/12/2013 11:27 AM, Joseph S. Myers wrote: On Tue, 12 Nov 2013, Kenneth Zadeck wrote: Richi, i am having a little trouble putting this back the way that you want. The issue is rem. what is supposed to happen for INT_MIN % -1? I would assume because i am failing the last case of gcc.dg/c90-const-expr-8.c that INT_MIN %-1 should not overflow even if INT_MIN / -1 does. however, Given the conclusion in C11 that a%b should be considered undefined if a/b is not representable, I think it's reasonable to say INT_MIN % -1 *should* be considered to overflow (for all C standard versions) (and bug 30484 is only a bug for -fwrapv). however, my local question is what do we want the api to be int-const-binop-1?The existing behavior seems to be that at least for common modes this function silently returns 0 and it is up to the front ends to put their own spin on it. For wide-int you create 1:1 the behavior of current trunk (if a change of behavior in TImode is not tested in the testsuite then you can ignore that). Whatever change you do to semantics of functions you do separately from wide-int (preferably first on trunk, or at your choice after the wide-int merge). For this case in question I'd say a % -1 should return 0, but for INT_MIN % -1 that 0 should have TREE_OVERFLOW set (and thus you need to adjust that c90-const-expr-8.c testcase). Richard. kenny richi, I have done this exactly as you suggested. bootstrapped and regression tested on x86-64. 2013-11-20 Kenneth Zadeck zad...@naturalbridge.com * fold-const.c (int_const_binop_1): Make INT_MIN % -1 return 0 with the overflow bit set. 2013-11-20 Kenneth Zadeck zad...@naturalbridge.com * gcc.dg/c90-const-expr-8.c: Look for overflow on INT_MIN % -1. * gcc.dg/c99-const-expr-8.c: Look for overflow on INT_MIN % -1. ok to commit? kenny Index: gcc/fold-const.c === --- gcc/fold-const.c (revision 205447) +++ gcc/fold-const.c (working copy) @@
Re: [PING]RE: [PATCH] _Cilk_for for C and C++
On 11/26/2013 12:23 PM, Iyer, Balaji V wrote: Did you get a chance to look at my _Cilk_for patch for C? BTW, I think pinging less than 24 hours after you send the patch is a bit excessive. :) Jason
Re: [PATCH] _Cilk_for for C and C++
On 11/25/2013 11:03 PM, Iyer, Balaji V wrote: On a broad note, I think there's a lot of OpenMP code you could be reusing here rather than writing it all again. And that way Cilk code will benefit from improvements to OpenMP handling, and vice versa. It probably makes sense to turn Cilk_for into an OMP_FOR loop, and then gimplify into GIMPLE_OMP_FOR, rather than create a new tree code and handle everything at the tree level. But I don't know the OMP code well enough to suggest exactly how that would work. Finer-grained comments: + tree exp = build_new_op (EXPR_LOCATION (op1), code, flags, op0, op1, + NULL_TREE, NULL, tf_none); + if (exp == error_mark_node) +exp = build_x_modify_expr (EXPR_LOCATION (op1), op0, code, op1, tf_none); + if (exp exp != error_mark_node) +return exp; I thought you were changing this? +/* Handler for iterator to compute the loop variable. ADD_OP indicates + whether we need a '+' or '-' operation. LOW indicates the starting point + and LOOP_VAR is the induction variable. This functin returns an + INIT_EXPR. */ This comment still doesn't document VAR2. function + tree exp = build_new_op (loc, add_op, 0, low, loop_var, NULL_TREE, 0, + tf_none); + gcc_assert (exp != error_mark_node); + exp = cp_build_modify_expr (var2, INIT_EXPR, exp, tf_warning_or_error); Looking at online Cilk documentation I see: The increment expression must add to or subtract from the control variable using one of the following supported operations: += -= ++ (prefix or postfix) -- (prefix or postfix) From this, I think people would expect the increment to use a user-defined operator+=/-=/++/--, but your code above uses operator+/- instead. + -fcilkplus must be enabled t use %_Cilk_for%); to +cp_parser_cilk_for (cp_parser *parser, tree grain) Please reuse cp_parser_omp_for, like Aldy did for #pragma simd (cp_parser_cilk_simd) rather than write yet another for-statement parser. This should reduce the patch size quite a bit. +case PRAGMA_CILK_GRAINSIZE: + if (context == pragma_external) + { + error_at (pragma_tok-location, + %#pragma cilk grainsize% may only be be used inside a + function); + break; + } + + /* Ignore the pragma if Cilk Plus is not enabled. */ + if (flag_enable_cilkplus) + { + cp_parser_cilk_grainsize (parser, pragma_tok); + return true; + } default: Do you mean to fall through to the default case if Cilk+ is not enabled? + tmp = CILK_FOR_COND (t); + if (COMPARISON_CLASS_P (tmp)) + { + tree op0 = RECUR (TREE_OPERAND (tmp, 0)); + tree op1 = RECUR (TREE_OPERAND (tmp, 1)); + tmp = build2 (TREE_CODE (tmp), boolean_type_node, op0, op1); + } + CILK_FOR_COND (stmt) = tmp; Why not just recur into CILK_FOR_COND? + tmp = CILK_FOR_EXPR (t); + if (TREE_CODE (tmp) == MODIFY_EXPR) + { + tree lhs = TREE_OPERAND (tmp, 0); + tree rhs = TREE_OPERAND (tmp, 1); + lhs = RECUR (lhs); + rhs = build2 (TREE_CODE (rhs), TREE_TYPE (lhs), + RECUR (TREE_OPERAND (rhs, 0)), + RECUR (TREE_OPERAND (rhs, 1))); + tmp = build2 (MODIFY_EXPR, void_type_node, lhs, rhs); + } + else + tmp = build2 (TREE_CODE (tmp), void_type_node, + RECUR (TREE_OPERAND (tmp, 0)), + RECUR (TREE_OPERAND (tmp, 1))); + finish_for_expr (tmp, stmt); And CILK_FOR_EXPR? Jason
Re: [Patch, RTL] Eliminate redundant vec_select moves.
Richard Sandiford wrote: Tejas Belagod tbela...@arm.com writes: The problem is that one reg rtx can span several hard registers. E.g. (reg:V4SI 32) might represent one 64-bit register (no. 32), but it might instead represent two 32-bit registers (nos. 32 and 33). Obviously the latter's not very likely for vectors this small, but more likely for larger ones (including on NEON IIRC). So if we had 2 32-bit registers being treated as a V4HI, it would be: --3233-- msb lsb msb lsb --32-- for big endian and: --3332-- msb lsb msb lsb --32-- for little endian. Ah, ok, that makes things clearer. Thanks for that. I can't find any helper function that figures out if we're writing partial or full result regs. Would something like REGNO (src) == REGNO (dst) HARD_REGNO_NREGS (src) == HARD_REGNO_NREGS (dst) == 1 be a sane check for partial result regs? Yeah, that should work. I think a more general alternative would be: simplify_subreg_regno (REGNO (src), GET_MODE (src), offset, GET_MODE (dst)) == (int) REGNO (dst) where: offset = GET_MODE_UNIT_SIZE (GET_MODE (src)) * INTVAL (XVECEXP (sel, 0)) That offset is the byte offset of the first selected element from the start of a vector in memory, which is also the way that SUBREG_BYTEs are counted. For little-endian it gives the offset of the lsb of the slice, while for big-endian it gives the offset of the msb (which is also how SUBREG_BYTEs work). The simplify_subreg_regno should cope with both single-register vectors and multi-register vectors. Sorry for the delayed response to this. Thanks for the tip. Here's an improved patch that implements the simplify_sureg_regno () method of eliminating redundant moves. Regarding the test case, I failed to get the ppc back-end to generate RTL pattern that this patch checks for. I can easily write a test case for aarch64(big and little endian) on these lines typedef float float32x4_t __attribute__ ((__vector_size__ (16))); float foo_be (float32x4_t x) { return x[3]; } float foo_le (float32x4_t x) { return x[0]; } where I know that the vector indexing will generate a vec_select on the same src and dst regs that could be optimized away and hence test it. But I'm struggling to get a test case that the ppc altivec back-end will generate such a vec_select for. I see that altivec does not define vec_extract, so a simple indexing like this seems to happen via memory. Also, I don't know enough about the ppc PCS or architecture to write a test that will check for this optimization opportunity on same src and dst hard-registers. Any hints? This patch has been bootstrapped on x64_64 and regressed on aarch64-none-elf and aarch64_be-none-elf. Thanks for your patience, Tejas.diff --git a/gcc/rtlanal.c b/gcc/rtlanal.c index 0cd0c7e..ca25ce5 100644 --- a/gcc/rtlanal.c +++ b/gcc/rtlanal.c @@ -1180,6 +1180,22 @@ set_noop_p (const_rtx set) dst = SUBREG_REG (dst); } + /* It is a NOOP if destination overlaps with selected src vector + elements. */ + if (GET_CODE (src) == VEC_SELECT + REG_P (XEXP (src, 0)) REG_P (dst) + HARD_REGISTER_P (XEXP (src, 0)) + HARD_REGISTER_P (dst)) +{ + rtx par = XEXP (src, 1); + rtx src0 = XEXP (src, 0); + HOST_WIDE_INT offset = + GET_MODE_UNIT_SIZE (GET_MODE (src0)) * INTVAL (XVECEXP (par, 0, 0)); + + return simplify_subreg_regno (REGNO (src0), GET_MODE (src0), + offset, GET_MODE (dst)) == (int)REGNO (dst); +} + return (REG_P (src) REG_P (dst) REGNO (src) == REGNO (dst)); }
Re: [PATCH] Fix VRP register_edge_assert_for_1 (PR tree-optimization/59014)
On Tue, Nov 26, 2013 at 02:15:58PM -0700, Jeff Law wrote: On 11/26/13 13:33, Jakub Jelinek wrote: Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? I'll try to create a testcase for 4.8 branch tomorrow. I've managed to create a testcase which reproduces this on 4.8 too, so I've committed it to trunk and committed both the patch from yesterday and this one to 4.8 branch too. 2013-11-27 Jakub Jelinek ja...@redhat.com PR tree-optimization/59014 * gcc.c-torture/execute/pr59014-2.c: New test. --- gcc/testsuite/gcc.c-torture/execute/pr59014-2.c 2013-08-25 18:20:55.717911035 +0200 +++ gcc/testsuite/gcc.c-torture/execute/pr59014-2.c 2013-11-27 15:31:09.833340947 +0100 @@ -0,0 +1,23 @@ +/* PR tree-optimization/59014 */ + +__attribute__((noinline, noclone)) long long int +foo (long long int x, long long int y) +{ + if (((int) x | (int) y) != 0) +return 6; + return x + y; +} + +int +main () +{ + if (sizeof (long long) == sizeof (int)) +return 0; + int shift_half = sizeof (int) * __CHAR_BIT__ / 2; + long long int x = (3LL shift_half) shift_half; + long long int y = (5LL shift_half) shift_half; + long long int z = foo (x, y); + if (z != ((8LL shift_half) shift_half)) +__builtin_abort (); + return 0; +} Jakub
Re: [PATCH] Get rid of useless -fno-rtti for libubsan
On 11/27/13 01:01, Yury Gribov wrote: Hi all, As discussed in http://gcc.gnu.org/bugzilla/show_bug.cgi?id=59106 only a subset of libubsan should be built with RTTI support. Attached patch adds custom build rules for relevant files. Wasn't that already checked in? commit 5e0d610a433356af747fdeda5d8acfe34d3115a9 Author: ygribov ygribov@138bc75d-0d04-0410-961f-82ee72b054a4 Date: Mon Nov 18 08:03:16 2013 + libsanitizer: 2013-11-18 Yury Gribov y.gri...@samsung.com PR sanitizer/59106 * asan/Makefile.am (AM_CXXFLAGS): Add -fno-rtti. * interception/Makefile.am (AM_CXXFLAGS): Likewise. * lsan/Makefile.am (AM_CXXFLAGS): Likewise. * sanitizer_common/Makefile.am (AM_CXXFLAGS): Likewise. * tsan/Makefile.am (AM_CXXFLAGS): Likewise. * asan/Makefile.in: Regenerate. * interception/Makefile.in: Regenerate. * tsan/Makefile.in: Regenerate. * lsan/Makefile.in: Regenerate. * sanitizer_common/Makefile.in: Regenerate. gcc/testsuite: 2013-11-18 Yury Gribov y.gri...@samsung.com PR sanitizer/59106 * c-c++-common/asan/pr59106.c: New test. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@204934 138bc75d-0d04-0410-96 Jeff
Re: [PATCH, ARM, LRA] Fixed bootstrap failure in Thumb mode
On 11/27/13 03:18, Yvan Roux wrote: Ping On 18 November 2013 09:40, Yvan Roux yvan.r...@linaro.org wrote: Ping. On 7 November 2013 15:56, Yvan Roux yvan.r...@linaro.org wrote: Hi, this patch fixed an LRA cycling due to secondary reload (Thumb mode). Notice that this patch is a prerequisite to turn on LRA by default on ARM. Bootstrapped on a9 and a15 without any regression in the testsuite as LRA is off by default and with the regression reported in the thread bellow when LRA is on. http://gcc.gnu.org/ml/gcc-patches/2013-11/msg00725.html Thanks, Yvan 2013-11-07 Yvan Roux yvan.r...@linaro.org * config/arm/arm.h (THUMB_SECONDARY_INPUT_RELOAD_CLASS): Return NO_REGS for LRA ? How can that be correct? The secondary reload macros/hooks define cases where additional registers are needed to reload certain forms of rtl. I doubt the use of LRA completely eliminates the need for secondary reloads. Jeff
Re: [patch][RFC] make lra.c:check_rtl set maybe_hot_insn_p
On 11/27/13 03:18, Yvan Roux wrote: Ping. On 19 November 2013 09:52, Yvan Roux yvan.r...@linaro.org wrote: yep, all good performance-wise :) Great, Thanks Kyrill. Ok for trunk ? Please include either the patch you are pinging or at the least a link to it in the archives. jeff
Re: [testsuite] Properly set ld_library_path in cilk-plus tests
On 11/27/13 04:39, Rainer Orth wrote: All 64-bit gcc.dg/atomic and c-c++-common/cilk-plus/CK execution tests were FAILing on Solaris 10 and 11/x86: ld.so.1: c11-atomic-exec-1.exe: fatal: /var/gcc/regression/trunk/10-gcc-gas/build/./gcc/libgcc_s.so.1: wrong ELF class: ELFCLASS32 ld.so.1: fib.exe: fatal: /var/gcc/regression/trunk/10-gcc-gas/build/i386-pc-solaris2.10/./libcilkrts/.libs/libcilkrts.so.5: wrong ELF class: ELFCLASS32 This happens because both cilk-plus.exp files override ld_library_path instead of appending to it. Fixed as follows by using the customary method for setting ld_library_path. Bootstrapped without regressions on i386-pc-solaris2.1[01] and x86_64-unknown-linux-gnu, installed on mainline. Rainer 2013-11-26 Rainer Orth r...@cebitec.uni-bielefeld.de * gcc.dg/cilk-plus/cilk-plus.exp: Append to ld_library_path. Call set_ld_library_path_env_vars. * g++.dg/cilk-plus/cilk-plus.exp: Likewise. Thanks for taking care of this. You're probably getting more cilk+ fallout than most because you do a lot of Solaris work. Sorry about that. jeff
RE: [testsuite] Properly set ld_library_path in cilk-plus tests
-Original Message- From: Rainer Orth [mailto:r...@cebitec.uni-bielefeld.de] Sent: Wednesday, November 27, 2013 6:39 AM To: gcc-patches@gcc.gnu.org Cc: Iyer, Balaji V Subject: [testsuite] Properly set ld_library_path in cilk-plus tests All 64-bit gcc.dg/atomic and c-c++-common/cilk-plus/CK execution tests were FAILing on Solaris 10 and 11/x86: ld.so.1: c11-atomic-exec-1.exe: fatal: /var/gcc/regression/trunk/10-gcc- gas/build/./gcc/libgcc_s.so.1: wrong ELF class: ELFCLASS32 ld.so.1: fib.exe: fatal: /var/gcc/regression/trunk/10-gcc-gas/build/i386-pc- solaris2.10/./libcilkrts/.libs/libcilkrts.so.5: wrong ELF class: ELFCLASS32 This happens because both cilk-plus.exp files override ld_library_path instead of appending to it. Fixed as follows by using the customary method for setting ld_library_path. Bootstrapped without regressions on i386-pc- solaris2.1[01] and x86_64-unknown-linux-gnu, installed on mainline. Rainer 2013-11-26 Rainer Orth r...@cebitec.uni-bielefeld.de * gcc.dg/cilk-plus/cilk-plus.exp: Append to ld_library_path. Call set_ld_library_path_env_vars. * g++.dg/cilk-plus/cilk-plus.exp: Likewise. Thanks for catching this! Sorry I didn't catch it sooner. I am just getting myself familiar with the DejaGNU framework. -Balaji V. Iyer.
Re: [patch][RFC] make lra.c:check_rtl set maybe_hot_insn_p
Please include either the patch you are pinging or at the least a link to it in the archives. Ok, sorry for that, here is the patch and Changelog Yvan 2013-11-17 Yvan Roux yvan.r...@linaro.org * config/arm/arm.md (store_minmaxsi): Use only when optimize_function_for_size_p. diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index e8d5464..da387fb 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -3642,7 +3642,7 @@ [(match_operand:SI 1 s_register_operand r) (match_operand:SI 2 s_register_operand r)])) (clobber (reg:CC CC_REGNUM))] - TARGET_32BIT optimize_insn_for_size_p() + TARGET_32BIT optimize_function_for_size_p (cfun) * operands[3] = gen_rtx_fmt_ee (minmax_code (operands[3]), SImode, operands[1], operands[2]);
Re: [PING]RE: [PATCH] _Cilk_for for C and C++
On 11/27/2013 12:06 PM, Jason Merrill wrote: On 11/26/2013 12:23 PM, Iyer, Balaji V wrote: Did you get a chance to look at my _Cilk_for patch for C? BTW, I think pinging less than 24 hours after you send the patch is a bit excessive. :) Ah, I see, you were pinging the non-C++ parts. Jason
Re: Patch ping (stage1-ish patches)
On 11/27/13 05:30, Eric Botcazou wrote: In fact, I would suggest that anyone with a pending patch from prior to stage1 close that hasn't gotten feedback by midnight Tuesday ping their patch. I'd like to have a sense of everything that is outstanding sooner rather than later and wrap up any loose ends as quickly as possible. Improve debug info for small structures (2) http://gcc.gnu.org/ml/gcc-patches/2013-10/msg02007.html This is fine. Sorry about the delay. jeff
Re: Patch ping (stage1-ish patches)
On 11/27/13 04:48, Rainer Orth wrote: Jakub Jelinek ja...@redhat.com writes: On Fri, Nov 22, 2013 at 09:36:18AM -0700, Jeff Law wrote: In fact, I would suggest that anyone with a pending patch from prior to stage1 close that hasn't gotten feedback by midnight Tuesday ping their patch. I'd like to have a sense of everything that is outstanding sooner rather than later and wrap up any loose ends as quickly as possible. On my side, there's [c++, driver] Add -lrt on Solaris http://gcc.gnu.org/ml/gcc-patches/2013-05/msg01488.html resubmitted as http://gcc.gnu.org/ml/gcc-patches/2013-06/msg00412.html It's unclear if the more intrusive solution outlined in the second message (introduce libstdc++.spec) were acceptable in stage3, and I'm uncertain if I can get it ready in time. Well, the short-term hack to g++spec.c along with the corresponding change to sol2.h is, OK for the trunk. As for the more invasive change, I'd let the C++ runtime guys decide if its too invasive for stage3. If you go that route, worst case is it's considered too invasive and it goes in during stage1 and you can remove the hack-ish solution from this patch. jeff
Re: [PATCH] Postpone __LINE__ evaluation to the end of #line directives
From 6c95593f684c120a0ea7ef6178401283f63250b7 Mon Sep 17 00:00:00 2001 From: Max TenEyck Woodbury max+...@mtew.isa-geek.net Date: Sun, 24 Nov 2013 09:48:09 -0500 Subject: [PATCH] Postpone __LINE__ evaluation to the end of #line directives To: gcc-patches@gcc.gnu.org Copyright 2013 assigned to the Free Software Foundation. --- gcc/testsuite/gcc.dg/cpp/line4.c | 19 +++ libcpp/directives.c | 9 - libcpp/internal.h| 1 + libcpp/macro.c | 3 +++ 4 files changed, 27 insertions(+), 5 deletions(-) diff --git a/gcc/testsuite/gcc.dg/cpp/line4.c b/gcc/testsuite/gcc.dg/cpp/line4.c index 84dbf96..0120a2b 100644 --- a/gcc/testsuite/gcc.dg/cpp/line4.c +++ b/gcc/testsuite/gcc.dg/cpp/line4.c @@ -13,7 +13,18 @@ enum { i = __LINE__ }; enum { j = __LINE__ }; #line 16 /* N.B. the _next_ line is line 16. */ - -char array1[i== 44 ? 1 : -1]; -char array2[j== 90 ? 1 : -1]; -char array3[__LINE__ == 19 ? 1 : -1]; + /* __LINE__ should be 16 */ +char array1[i== 44 ? 1 : -1]; /* 17 */ +char array2[j== 90 ? 1 : -1]; /* 18 */ +char array3[__LINE__ == 19 ? 1 : -1]; /* 19 */ + /* 20 */ +# line __LINE__ /* N.B. the __LINE__ sequence should _not_ change here. */ + /* 22 */ +char array4[__LINE__ == 23 ? 1: -1]; /* 23 */ +char array5[__LINE__ == 23 ? -1: 1]; /* 24 */ + /* 25 */ +# line __LINE__ /* N.B. nor should a multi-line comment change the fact + that the __LINE__ sequence should _not_ change here. */ + /* 28 */ +char array6[__LINE__ == 29 ? 1: -1]; /* 29 */ +char array7[__LINE__ == 27 ? -1: 1]; /* 30 */ diff --git a/libcpp/directives.c b/libcpp/directives.c index 65b2034..adb04a5 100644 --- a/libcpp/directives.c +++ b/libcpp/directives.c @@ -900,7 +900,9 @@ do_line (cpp_reader *pfile) bool wrapped; /* #line commands expand macros. */ + ++pfile-state.in_directive; /* Request special __LINE__ handling. */ token = cpp_get_token (pfile); + --pfile-state.in_directive; /* Cancel request */ if (token-type != CPP_NUMBER || strtolinenum (token-val.str.text, token-val.str.len, new_lineno, wrapped)) @@ -914,7 +916,9 @@ do_line (cpp_reader *pfile) return; } - if (CPP_PEDANTIC (pfile) (new_lineno == 0 || new_lineno cap || wrapped)) + if (CPP_PEDANTIC (pfile) (new_lineno == 0 + || (new_lineno cap new_lineno != CUR__LINE__) + || wrapped)) cpp_error (pfile, CPP_DL_PEDWARN, line number out of range); else if (wrapped) cpp_error (pfile, CPP_DL_WARNING, line number out of range); @@ -936,6 +940,9 @@ do_line (cpp_reader *pfile) } skip_rest_of_line (pfile); + if ( new_lineno == CUR__LINE__ ) /* Postponed evaluation ? */ +new_lineno = linemap_get_expansion_line (pfile-line_table, + pfile-line_table-highest_line); _cpp_do_file_change (pfile, LC_RENAME_VERBATIM, new_file, new_lineno, map_sysp); } diff --git a/libcpp/internal.h b/libcpp/internal.h index 5321458..268de86 100644 --- a/libcpp/internal.h +++ b/libcpp/internal.h @@ -604,6 +604,7 @@ cpp_in_primary_file (cpp_reader *pfile) { return pfile-line_table-depth == 1; } +#define CUR__LINE__ -1U /* In macro.c */ extern void _cpp_free_definition (cpp_hashnode *); diff --git a/libcpp/macro.c b/libcpp/macro.c index e359d15..47e41b6 100644 --- a/libcpp/macro.c +++ b/libcpp/macro.c @@ -309,6 +309,9 @@ _cpp_builtin_macro_text (cpp_reader *pfile, cpp_hashnode *node) /* If __LINE__ is embedded in a macro, it must expand to the line of the macro's invocation, not its definition. Otherwise things like assert() will not work properly. */ + if (pfile-state.in_directive 1) /* In #line directive? */ +number = CUR__LINE__; /* yes, postpone the lookup... */ + else number = linemap_get_expansion_line (pfile-line_table, CPP_OPTION (pfile, traditional) ? pfile-line_table-highest_line -- 1.8.0.rc0.18.gf84667d
Re: _Cilk_spawn and _Cilk_sync for C++
On 11/25/2013 10:50 AM, Iyer, Balaji V wrote: I have fixed this issue. My function to map the variable's context from the spawner to the spawn helper function was going into the lambda function. I made it stop by adding a language specific copy_tree_body (basically stop going into the lambda function's body for C++ and for the rest of the times just use copy_tree_body_r, no code duplicating is done between the two) that and it works fine now. I doubt it was walking from the enclosing function into the body of the lambda function. Looking at the patch, it seems that what you're avoiding is walking into the closure object itself, and adding an entire new langhook seems like overkill for that. I think a better approach would be to add a cp_build_cilk_spawn that uses stabilize_call to pre-evaluate the arguments of the call. Jason
Re: [PATCH] Use libbacktrace as libsanitizer's symbolizer
On Wed, Nov 27, 2013 at 4:31 AM, Alexey Samsonov samso...@google.com wrote: LLVM emits just a DW_AT_low_pc (base address of a compilation unit). The standard tells that compilation unit entries may have attributes specifying the address range, but doesn't tell they are obligatory. DWARF consumers probably shouldn't rely on their presence. How are consumers of LLVM debug info expected to determine the address range of the function? Is the intent that the consumer should assume that the function continues from the DW_AT_low_pc address to the next DW_AT_low_pc address from the debug info? Or should the consumer look at the function size from the symbol table? Or should the consumer examine all the DIEs looking for address ranges? Does LLVM support anything like GCC's -freorder-blocks-and-partition option, and, if so, how does it represent the address ranges in the debug info? Ian
Re: [PATCH] _Cilk_for for C and C++
On 11/15/2013 02:23 PM, Iyer, Balaji V wrote: One small thing that I have not done that Jakub and several other have asked me before is that, there are no tests in c-c++-common for _Cilk_for. The reason being that the syntax between C and C++ implementations are different. In C++, the induction variable must be defined in the initializer (e.g. it should start wth _Cilk_for (int ii = 0)). In C, this is not allowed (e.g. it should start as _Cilk_for (ii = 0; ii 10; ii++)). That can be handled with #ifdef __cplusplus. Jason
Re: [PATCH, ARM, LRA] Fixed bootstrap failure in Thumb mode
How can that be correct? The secondary reload macros/hooks define cases where additional registers are needed to reload certain forms of rtl. I doubt the use of LRA completely eliminates the need for secondary reloads. Vladimir explained me that in that case on arm, secondary reload hook confuses LRA, and that returning NO_REGS will let LRA deal with constraints, but I may have badly understand what he said. Yvan
Re: [PATCH] _Cilk_for for C and C++
On Wed, Nov 27, 2013 at 12:48:11PM -0500, Jason Merrill wrote: On 11/15/2013 02:23 PM, Iyer, Balaji V wrote: One small thing that I have not done that Jakub and several other have asked me before is that, there are no tests in c-c++-common for _Cilk_for. The reason being that the syntax between C and C++ implementations are different. In C++, the induction variable must be defined in the initializer (e.g. it should start wth _Cilk_for (int ii = 0)). In C, this is not allowed (e.g. it should start as _Cilk_for (ii = 0; ii 10; ii++)). That can be handled with #ifdef __cplusplus. It isn't allowed even in C99? For OpenMP, int a[30]; void foo () { #pragma omp for for (int i = 0; i 30; i++) a[i] = i; } is valid for C99. So, perhaps you just want /* { dg-additional-options -std=c99 { target c } } */ in the c-c++-common tests? Jakub
Re: [PATCH, ARM, LRA] Fixed bootstrap failure in Thumb mode
On 27/11/13 17:49, Yvan Roux wrote: How can that be correct? The secondary reload macros/hooks define cases where additional registers are needed to reload certain forms of rtl. I doubt the use of LRA completely eliminates the need for secondary reloads. Vladimir explained me that in that case on arm, secondary reload hook confuses LRA, and that returning NO_REGS will let LRA deal with constraints, but I may have badly understand what he said. Yvan But wasn't that in the context of PREFERRED_RELOAD_CLASS? That's a different beast. R.
Re: [PATCH, ARM, LRA] Fixed bootstrap failure in Thumb mode
On 11/27/2013, 12:16 PM, Jeff Law wrote: On 11/27/13 03:18, Yvan Roux wrote: Ping On 18 November 2013 09:40, Yvan Roux yvan.r...@linaro.org wrote: Ping. On 7 November 2013 15:56, Yvan Roux yvan.r...@linaro.org wrote: Hi, this patch fixed an LRA cycling due to secondary reload (Thumb mode). Notice that this patch is a prerequisite to turn on LRA by default on ARM. Bootstrapped on a9 and a15 without any regression in the testsuite as LRA is off by default and with the regression reported in the thread bellow when LRA is on. http://gcc.gnu.org/ml/gcc-patches/2013-11/msg00725.html Thanks, Yvan 2013-11-07 Yvan Roux yvan.r...@linaro.org * config/arm/arm.h (THUMB_SECONDARY_INPUT_RELOAD_CLASS): Return NO_REGS for LRA ? How can that be correct? The secondary reload macros/hooks define cases where additional registers are needed to reload certain forms of rtl. I doubt the use of LRA completely eliminates the need for secondary reloads. When I designed LRA I wanted to remove as many hooks as possible because I thought insn constraints as major info source are enough. Unfortunately I did not succeed fully with secondary reload hooks and macros. It is still needed for some complicated cases but general rule to port LRA to a target is to try to switch these hooks off. For example, LRA can generate a move of pseudos of classes for which hardware has no real insn. After that looking on the move constraints, LRA can generate more move insns and additional pseudos to generate moves (loads or stores if additional pseudos got NO_REGS) which represent real hardware insns. In complicated cases when these macros are still needed, LRA runs into infinite loop of such move generation if the macros are not used. I might be return to a project to remove necessity of such hooks and macros for LRA but I am not sure when. I guess I need to write a small guidance too how to port LRA. Also I found that in many cases as here, the macros although working for reload can confuse LRA (because of different reload pass and LRA implementation approaches). So I guess the patch is ok although I can not approve it as I am not an arm port maintainer.
Re: [PATCH] _Cilk_for for C and C++
On 11/27/13 10:54, Jakub Jelinek wrote: On Wed, Nov 27, 2013 at 12:48:11PM -0500, Jason Merrill wrote: On 11/15/2013 02:23 PM, Iyer, Balaji V wrote: One small thing that I have not done that Jakub and several other have asked me before is that, there are no tests in c-c++-common for _Cilk_for. The reason being that the syntax between C and C++ implementations are different. In C++, the induction variable must be defined in the initializer (e.g. it should start wth _Cilk_for (int ii = 0)). In C, this is not allowed (e.g. it should start as _Cilk_for (ii = 0; ii 10; ii++)). That can be handled with #ifdef __cplusplus. It isn't allowed even in C99? For OpenMP, int a[30]; void foo () { #pragma omp for for (int i = 0; i 30; i++) a[i] = i; } is valid for C99. So, perhaps you just want /* { dg-additional-options -std=c99 { target c } } */ in the c-c++-common tests? Jakub Yup, that's what I did for the Cilk Plus pragma simd tests.
Re: Patch ping (stage1-ish patches)
On 11/27/13 01:28, Alexander Ivchenko wrote: Here is the patch series that had been posted in Sep that is aimed to isolate the Android support from targets that actually don't have that support (We discussed the need of it with Jakub here http://gcc.gnu.org/ml/gcc-patches/2013-09/msg00185.html): http://gcc.gnu.org/ml/gcc-patches/2013-09/msg01420.html This patch series is fine. Just a minor typo in patch #4: +/* IFUNCs are supportted by glibc, but not by uClibc or Bionic. */ s/supportted/supported/ jeff
Re: wide-int, rtl
Eric, Let me make one high level comment here and the low level comments will be responded to when i fix the patch. CONST_DOUBLE has two hwis in it. So in practice, you get 128 bits and that is it.a CONST_WIDE_INT has an array of HWIs that has as many elements as it needs to represent the value. If TARGET_SUPPORTS_WIDE_INT == 0 (the default and currently the value for every public port except the RS6000), then rtl integer constants that do not fit in a CONST_INT are put in a CONST_DOUBLE and there are never any CONST_WIDE_INTS created.A platform like this cannot correctly represent variables larger than 128 bits. i.e. this is like the current trunk except that there are fewer bugs for the 128 bit variables. if TARGET_SUPPORTS_WIDE_INT is not 0, then rtl integer constants are put in CONST_WIDE_INT and CONST_DOUBLES only hold floating point values. For the last comment that you made, if the target does not support wide int, then there is simply no way to represent that constant properly and if the target accepts types larger than TImode, then the maintainer needs to do the work to use CONST_WIDE_INTs. This is not a lot a work, but it requires target specific knowledge to convert the patterns, predicates and constraints to look for CONST_WIDE_INTs rather than CONST_DOUBLE for larger integers. As far as your comment about const_double_operand, i could add some conditional code to make this not accept integers if the TARGET... is not 0. Richard Sandiford wanted my to use that test as infrequently as possible. thanks for your comments. Kenny On 11/27/2013 11:24 AM, Eric Botcazou wrote: Richi has asked the we break the wide-int patch so that the individual port and front end maintainers can review their parts without have to go through the entire patch.This patch covers the first half of the rtl code. --- a/gcc/cse.c +++ b/gcc/cse.c @@ -2336,15 +2336,23 @@ hash_rtx_cb (const_rtx x, enum machine_mode mode, + (unsigned int) INTVAL (x)); return hash; +case CONST_WIDE_INT: + { + int i; + for (i = 0; i CONST_WIDE_INT_NUNITS (x); i++) + hash += CONST_WIDE_INT_ELT (x, i); + } + return hash; You can write for (int i = 0; ... now and remove the parentheses. --- a/gcc/cselib.c +++ b/gcc/cselib.c @@ -1121,15 +1120,23 @@ cselib_hash_rtx (rtx x, int create, enum machine_mode memmode) hash += ((unsigned) CONST_INT 7) + INTVAL (x); return hash ? hash : (unsigned int) CONST_INT; +case CONST_WIDE_INT: + { + int i; + for (i = 0; i CONST_WIDE_INT_NUNITS (x); i++) + hash += CONST_WIDE_INT_ELT (x, i); + } + return hash; + Likewise. --- a/gcc/expmed.c +++ b/gcc/expmed.c @@ -3298,23 +3268,13 @@ choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision, pow = n + lgup; pow2 = n + lgup - precision; - /* We could handle this with some effort, but this case is much - better handled directly with a scc insn, so rely on caller using - that. */ - gcc_assert (pow != HOST_BITS_PER_DOUBLE_INT); Why removing it? --- a/gcc/expr.c +++ b/gcc/expr.c @@ -722,64 +722,33 @@ convert_modes (enum machine_mode mode, enum machine_mode oldmode, rtx x, int uns if (mode == oldmode) return x; - /* There is one case that we must handle specially: If we are converting - a CONST_INT into a mode whose size is twice HOST_BITS_PER_WIDE_INT and - we are to interpret the constant as unsigned, gen_lowpart will do - the wrong if the constant appears negative. What we want to do is - make the high-order word of the constant zero, not all ones. */ - - if (unsignedp GET_MODE_CLASS (mode) == MODE_INT - GET_MODE_BITSIZE (mode) == HOST_BITS_PER_DOUBLE_INT - CONST_INT_P (x) INTVAL (x) 0) + if (CONST_SCALAR_INT_P (x) + GET_MODE_CLASS (mode) == MODE_INT) On a single line. { - double_int val = double_int::from_uhwi (INTVAL (x)); - - /* We need to zero extend VAL. */ - if (oldmode != VOIDmode) - val = val.zext (GET_MODE_BITSIZE (oldmode)); - - return immed_double_int_const (val, mode); + /* If the caller did not tell us the old mode, then there is +not much to do with respect to canonization. We have to assume +that all the bits are significant. */ + if (GET_MODE_CLASS (oldmode) != MODE_INT) + oldmode = MAX_MODE_INT; + wide_int w = wide_int::from (std::make_pair (x, oldmode), + GET_MODE_PRECISION (mode), + unsignedp ? UNSIGNED : SIGNED); + return immed_wide_int_const (w, mode); canonicalization? @@ -5301,10 +5271,10 @@ store_expr (tree exp, rtx target, int call_param_p, bool nontemporal) alt_rtl); } - /* If TEMP is a VOIDmode constant and the mode of the type of EXP is not - the same as that of TARGET, adjust the
Re: [PATCH] Use libbacktrace as libsanitizer's symbolizer
On Wed, Nov 27, 2013 at 9:44 PM, Ian Lance Taylor i...@google.com wrote: On Wed, Nov 27, 2013 at 4:31 AM, Alexey Samsonov samso...@google.com wrote: LLVM emits just a DW_AT_low_pc (base address of a compilation unit). The standard tells that compilation unit entries may have attributes specifying the address range, but doesn't tell they are obligatory. DWARF consumers probably shouldn't rely on their presence. How are consumers of LLVM debug info expected to determine the address range of the function? Sorry, I wasn't clear enough. GCC emits DW_AT_low_pc and DW_AT_high_pc for *compile unit* DIE, so the consumer doesn't have to iterate over all the functions DIEs. LLVM doesn't emit DW_AT_low_pc/DW_AT_high_pc for compile unit, but emits them for function DIEs. I've added some debugging output to libbacktrace and noticed it was able to build [address range]-[compile unit] mapping only for sources compiled with GCC, so I suggested that it doesn't examine all the descendants of compile unit DIEs to build the ranges. Is the intent that the consumer should assume that the function continues from the DW_AT_low_pc address to the next DW_AT_low_pc address from the debug info? Or should the consumer look at the function size from the symbol table? Or should the consumer examine all the DIEs looking for address ranges? Does LLVM support anything like GCC's -freorder-blocks-and-partition option, and, if so, how does it represent the address ranges in the debug info? Ian -- Alexey Samsonov, MSK
Re: [PATCH, ARM, LRA] Fixed bootstrap failure in Thumb mode
On 27 November 2013 18:58, Vladimir Makarov vmaka...@redhat.com wrote: On 11/27/2013, 12:16 PM, Jeff Law wrote: On 11/27/13 03:18, Yvan Roux wrote: Ping On 18 November 2013 09:40, Yvan Roux yvan.r...@linaro.org wrote: Ping. On 7 November 2013 15:56, Yvan Roux yvan.r...@linaro.org wrote: Hi, this patch fixed an LRA cycling due to secondary reload (Thumb mode). Notice that this patch is a prerequisite to turn on LRA by default on ARM. Bootstrapped on a9 and a15 without any regression in the testsuite as LRA is off by default and with the regression reported in the thread bellow when LRA is on. http://gcc.gnu.org/ml/gcc-patches/2013-11/msg00725.html Thanks, Yvan 2013-11-07 Yvan Roux yvan.r...@linaro.org * config/arm/arm.h (THUMB_SECONDARY_INPUT_RELOAD_CLASS): Return NO_REGS for LRA ? How can that be correct? The secondary reload macros/hooks define cases where additional registers are needed to reload certain forms of rtl. I doubt the use of LRA completely eliminates the need for secondary reloads. When I designed LRA I wanted to remove as many hooks as possible because I thought insn constraints as major info source are enough. Unfortunately I did not succeed fully with secondary reload hooks and macros. It is still needed for some complicated cases but general rule to port LRA to a target is to try to switch these hooks off. For example, LRA can generate a move of pseudos of classes for which hardware has no real insn. After that looking on the move constraints, LRA can generate more move insns and additional pseudos to generate moves (loads or stores if additional pseudos got NO_REGS) which represent real hardware insns. In complicated cases when these macros are still needed, LRA runs into infinite loop of such move generation if the macros are not used. I might be return to a project to remove necessity of such hooks and macros for LRA but I am not sure when. I guess I need to write a small guidance too how to port LRA. Also I found that in many cases as here, the macros although working for reload can confuse LRA (because of different reload pass and LRA implementation approaches). So I guess the patch is ok although I can not approve it as I am not an arm port maintainer. Thanks for the clarification Vladimir, and BTW I just find the same cycling issue on iWMMXT. Yvan
Re: [PATCH, i386]: Fix PR56788, _mm_frcz_sd and _mm_frcz_ss ignore their second argument
On Wed, Nov 27, 2013 at 7:45 AM, Gopalasubramanian, Ganesh ganesh.gopalasubraman...@amd.com wrote: Hopefully someone from AMD will provide tests that are mysteriously missing from XOP testsuite. As pointed out by Marc, I added myself to the bug later. I was bit confused about the internal insn representation with user-visible function. So, couldn't add test then and there. I could have solved that earlier. Sorry for that. Attached is the test that checks the (controversial) frcz functions. Uros could you please add this to your patch while committing. Thanks, I have changed the patch slightly to skip runtime test on non-xop machines. 2013-11-27 Uros Bizjak ubiz...@gmail.com Ganesh Gopalasubramanian ganesh.gopalasubraman...@amd.com PR target/56788 * gcc.target/i386/xop-frczX.c: New test. Tested on x86_64-pc-linux-gnu {,-m32} and committed to mainline SVN. Uros. Index: gcc.target/i386/xop-frczX.c === --- gcc.target/i386/xop-frczX.c (revision 0) +++ gcc.target/i386/xop-frczX.c (working copy) @@ -0,0 +1,60 @@ +/* { dg-do run } */ +/* { dg-require-effective-target xop } */ +/* { dg-options -O2 -mxop } */ + +#include xop-check.h + +#include x86intrin.h + +void +check_mm_vmfrcz_sd (__m128d __A, __m128d __B) +{ + union128d a, b, c; + double d[2]; + + a.x = __A; + b.x = __B; + c.x = _mm_frcz_sd (__A, __B); + d[0] = b.a[0] - (int)b.a[0] ; + d[1] = a.a[1]; + if (check_union128d (c, d)) +abort (); +} + +void +check_mm_vmfrcz_ss (__m128 __A, __m128 __B) +{ + union128 a, b, c; + float f[4]; + + a.x = __A; + b.x = __B; + c.x = _mm_frcz_ss (__A, __B); + f[0] = b.a[0] - (int)b.a[0] ; + f[1] = a.a[1]; + f[2] = a.a[2]; + f[3] = a.a[3]; + if (check_union128 (c, f)) +abort (); +} + +static void +xop_test (void) +{ + union128 a, b; + union128d c,d; + int i; + + for (i = 0; i 4; i++) +{ + a.a[i] = i + 3.5; + b.a[i] = i + 7.9; +} + for (i = 0; i 2; i++) +{ + c.a[i] = i + 3.5; + d.a[i] = i + 7.987654321; +} + check_mm_vmfrcz_ss (a.x, b.x); + check_mm_vmfrcz_sd (c.x, d.x); +}
Re: [patch][RFC] make lra.c:check_rtl set maybe_hot_insn_p
On 11/27/13 10:30, Yvan Roux wrote: Please include either the patch you are pinging or at the least a link to it in the archives. Ok, sorry for that, here is the patch and Changelog Yvan 2013-11-17 Yvan Roux yvan.r...@linaro.org * config/arm/arm.md (store_minmaxsi): Use only when optimize_function_for_size_p. Thanks. This is fine for the trunk. And yes, having an insn's validity change based on what block it's in is most definitely bad. I was a bit concerned have the x86 backend, as I happened to know it uses optimize_insn_for_* rather extensively. But thankfully it's only used in splitters, expanders peephole2 patterns, which should all be safe. If you wouldn't mind, could you look at md.texi and see if there's a reasonable place to put verbage about this issue in the internals manual? It might save someone time in the future :-) Thanks, Jeff
RE: [PING]: [GOMP4] [PATCH] SIMD-Enabled Functions (formerly Elemental functions) for C
HI Aldy and Jakub, Attached, please find a fixed patch. I have fixed all the changes you have mentioned below. Is this OK to install? Here are the ChangeLog entries: gcc/ChangeLog 2013-11-27 Balaji V. Iyer balaji.v.i...@intel.com * config/i386/i386.c (ix86_simd_clone_compute_vecsize_and_simdlen): Removed a carriage return from the warning string. * omp-low.c (simd_clone_clauses_extract): Added a check for cilk plus SIMD-enabled function attributes. gcc/c/ChangeLog 2013-11-27 Balaji V. Iyer balaji.v.i...@intel.com * c-parser.c (struct c_parser::elem_fn_tokens): Added new field. (c_parser_declaration_or_fndef): Added a check if elem_fn_tokens field in parser is not empty. If not-empty, call the function c_parser_finish_omp_declare_simd. (c_parser_elem_fn_vectorlength): New function. (c_parser_elem_fn_expr_list): Likewise. (c_finish_elem_fn_tokens): Likewise. (c_parser_attributes): Added a elem_fn_tokens parameter. Added a check for vector attribute and if so call c_parser_elem_fn_expr_list. Also, called c_finish_elem_fn_tokens when Cilk Plus is enabled. (c_finish_omp_declare_simd): Added a check if elem_fn_tokens in parser field is non-empty. If so, parse them as you would parse the omp declare simd pragma. gcc/testsuite/ChangeLog 2013-11-27 Balaji V. Iyer balaji.v.i...@intel.com * c-c++-common/cilk-plus/EF/ef_test.c: New test. * c-c++-common/cilk-plus/EF/ef_test2.c: Likewise. * c-c++-common/cilk-plus/EF/vlength_errors.c: Likewise. * c-c++-common/cilk-plus/EF/ef_error.c: Likewise. * c-c++-common/cilk-plus/EF/ef_error2.c: Likewise. * gcc.dg/cilk-plus/cilk-plus.exp: Added calls for the above tests. Thanks, Balaji V. Iyer. -Original Message- From: Aldy Hernandez [mailto:al...@redhat.com] Sent: Wednesday, November 27, 2013 10:52 AM To: Iyer, Balaji V Cc: Jakub Jelinek; gcc-patches@gcc.gnu.org Subject: Re: [PING]: [GOMP4] [PATCH] SIMD-Enabled Functions (formerly Elemental functions) for C Iyer, Balaji V balaji.v.i...@intel.com writes: c_finish_omp_declare_simd (c_parser *parser, tree fndecl, tree parms, vecc_token clauses) { + + if (flag_enable_cilkplus + clauses.exists () !vec_safe_is_empty (parser- elem_fn_tokens)) +{ + error (%#pragma omp declare simd% cannot be used in the same +function marked as a SIMD-enabled function); + vec_free (parser-elem_fn_tokens); + return; +} I see Cilk Plus elementals are handled as omp declare simd in the above function. This function sets an omp declare simd attribute here: if (c != NULL_TREE) c = tree_cons (NULL_TREE, c, NULL_TREE); c = build_tree_list (get_identifier (omp declare simd), c); TREE_CHAIN (c) = DECL_ATTRIBUTES (fndecl); DECL_ATTRIBUTES (fndecl) = c; but you also need a cilk plus elemental attribute so the rest of the compiler can differentiate Cilk Plus elementals from omp declare simds. Fixed. See simd_clone_clauses_extract(): + /* To distinguish from an OpenMP simd clone, Cilk Plus functions to + be cloned have a distinctive artificial label in addition to omp + declare simd. */ + bool cilk_clone += (flag_enable_cilkplus +lookup_attribute (cilk plus elemental, + DECL_ATTRIBUTES (node-decl))); Also you probably want some kind of test for this, so test for whatever cilk_elemental is doing. In trunk, the only use of cilk_elemental is in ix86_simd_clone_compute_vecsize_and_simdlen(), so come up with some x86 specific test for cilk_elemental==true. Fixed. Aldy Index: gcc/c/c-parser.c === --- gcc/c/c-parser.c(revision 205457) +++ gcc/c/c-parser.c(working copy) @@ -208,6 +208,12 @@ /* True if we are in a context where the Objective-C Property attribute keywords are valid. */ BOOL_BITFIELD objc_property_attr_context : 1; + + /* Cilk Plus specific parser/lexer information. */ + + /* Buffer to hold all the tokens from parsing the vector attribute for the + SIMD Enabled functions (formerly known as elemental functions). */ + vec c_token, va_gc *elem_fn_tokens; } c_parser; @@ -1647,7 +1653,8 @@ C_DTR_NORMAL, dummy); if (declarator == NULL) { - if (omp_declare_simd_clauses.exists ()) + if (omp_declare_simd_clauses.exists () + || !vec_safe_is_empty (parser-elem_fn_tokens)) c_finish_omp_declare_simd (parser, NULL_TREE, NULL_TREE, omp_declare_simd_clauses); c_parser_skip_to_end_of_block_or_statement (parser); @@ -1734,7 +1741,8 @@ chainon (postfix_attrs,