[PATCH] PR gcc/84923 - gcc.dg/attr-weakref-1.c failed on aarch64
From: Vladimir Mezentsev When weakref_targets is not empty a target cannot be removed from weak_decls. A small example is below when 'wv12' is removed from the weak list on aarch64: static vtype Wv12 __attribute__((weakref ("wv12"))); extern vtype wv12 __attribute__((weak)); Bootstrapped on aarch64-unknown-linux-gnu including (c,c++ and go). Tested on aarch64-linux-gnu. No regression. The attr-weakref-1.c test passed. ChangeLog: 2018-04-12 Vladimir Mezentsev PR gcc/84923 * varasm.c (weak_finish): clean up weak_decls --- gcc/varasm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/gcc/varasm.c b/gcc/varasm.c index d24bac4..2a70234 100644 --- a/gcc/varasm.c +++ b/gcc/varasm.c @@ -5683,8 +5683,7 @@ weak_finish (void) nor multiple .weak directives for the latter. */ for (p = &weak_decls; (t2 = *p) ; ) { - if (TREE_VALUE (t2) == alias_decl - || target == DECL_ASSEMBLER_NAME (TREE_VALUE (t2))) + if (TREE_VALUE (t2) == alias_decl) *p = TREE_CHAIN (t2); else p = &TREE_CHAIN (t2); -- 1.8.3.1
[PATCH, rs6000] (PR84302) Fix _mm_slli_epi{32,64} for shift values 16 through 31 and negative
The powerpc versions of _mm_slli_epi32 and __mm_slli_epi64 in emmintrin.h do not properly handle shift values between 16 and 31, inclusive. These were setting up the shift with vec_splat_s32, which only accepts *5 bit signed* shift values, or a range of -16 to 15. Values above 15 produced an error: error: argument 1 must be a 5-bit signed literal Fix is to effectively reduce the range for which vec_splat_s32 is used to < 32 and use vec_splats otherwise. Also, __mm_slli_epi{16,32,64}, when given a negative shift value, should always return a vector of {0}. 2018-04-12 Paul A. Clarke gcc/config PR target/83402 * rs6000/emmintrin.h (_mm_slli_epi{16,32,64}): Ensure that vec_splat_s32 is only called with 0 < shift < 16. Ensure negative shifts result in {0}. gcc/testsuite/gcc.target/powerpc PR target/83402 * gcc.target/powerpc/sse2-psllw-1.c: Refactor and add tests for several positive and negative values. * gcc.target/powerpc/sse2-pslld-1.c: Same. * gcc.target/powerpc/sse2-psllq-1.c: Same. Index: gcc/config/rs6000/emmintrin.h === --- gcc/config/rs6000/emmintrin.h (revision 259016) +++ gcc/config/rs6000/emmintrin.h (working copy) @@ -1488,7 +1488,7 @@ _mm_slli_epi16 (__m128i __A, int __B) __v8hu lshift; __v8hi result = { 0, 0, 0, 0, 0, 0, 0, 0 }; - if (__B < 16) + if (__B > 0 && __B < 16) { if (__builtin_constant_p(__B)) lshift = (__v8hu) vec_splat_s16(__B); @@ -1507,12 +1507,12 @@ _mm_slli_epi32 (__m128i __A, int __B) __v4su lshift; __v4si result = { 0, 0, 0, 0 }; - if (__B < 32) + if (__B > 0 && __B < 32) { - if (__builtin_constant_p(__B)) - lshift = (__v4su) vec_splat_s32(__B); + if (__builtin_constant_p(__B) && __B < 16) +lshift = (__v4su) vec_splat_s32(__B); else - lshift = vec_splats ((unsigned int) __B); +lshift = vec_splats ((unsigned int) __B); result = vec_vslw ((__v4si) __A, lshift); } @@ -1527,17 +1527,12 @@ _mm_slli_epi64 (__m128i __A, int __B) __v2du lshift; __v2di result = { 0, 0 }; - if (__B < 64) + if (__B > 0 && __B < 64) { - if (__builtin_constant_p(__B)) - { - if (__B < 32) - lshift = (__v2du) vec_splat_s32(__B); - else - lshift = (__v2du) vec_splats((unsigned long long)__B); - } + if (__builtin_constant_p(__B) && __B < 16) + lshift = (__v2du) vec_splat_s32(__B); else - lshift = (__v2du) vec_splats ((unsigned int) __B); + lshift = (__v2du) vec_splats ((unsigned int) __B); result = vec_vsld ((__v2di) __A, lshift); } Index: gcc/testsuite/gcc.target/powerpc/sse2-pslld-1.c === --- gcc/testsuite/gcc.target/powerpc/sse2-pslld-1.c (revision 259016) +++ gcc/testsuite/gcc.target/powerpc/sse2-pslld-1.c (working copy) @@ -13,32 +13,50 @@ #define TEST sse2_test_pslld_1 #endif -#define N 0xf - #include -static __m128i -__attribute__((noinline, unused)) -test (__m128i s1) -{ - return _mm_slli_epi32 (s1, N); -} +#define TEST_FUNC(id, N) \ + static __m128i \ + __attribute__((noinline, unused)) \ + test##id (__m128i s1) \ + { \ +return _mm_slli_epi32 (s1, N); \ + } +TEST_FUNC(0, 0) +TEST_FUNC(15, 15) +TEST_FUNC(16, 16) +TEST_FUNC(31, 31) +TEST_FUNC(neg1, -1) +TEST_FUNC(neg16, -16) +TEST_FUNC(neg32, -32) +TEST_FUNC(neg64, -64) +TEST_FUNC(neg128, -128) + +#define TEST_CODE(id, N) \ + { \ +int e[4] = {0}; \ +union128i_d u, s; \ +int i; \ +s.x = _mm_set_epi32 (1, -2, 3, 4); \ +u.x = test##id (s.x); \ +if (N > 0 && N < 32) \ + for (i = 0; i < 4; i++) \ +e[i] = s.a[i] << (N * (N > 0)); \ +if (check_union128i_d (u, e)) \ + abort (); \ + } + static void TEST (void) { - union128i_d u, s; - int e[4] = {0}; - int i; - - s.x = _mm_set_epi32 (1, -2, 3, 4); - - u.x = test (s.x); - - if (N < 32) -for (i = 0; i < 4; i++) - e[i] = s.a[i] << N; - - if (check_union128i_d (u, e)) -abort (); + TEST_CODE(0, 0); + TEST_CODE(15, 15); + TEST_CODE(16, 16); + TEST_CODE(31, 31); + TEST_CODE(neg1, -1); + TEST_CODE(neg16, -16); + TEST_CODE(neg32, -32); + TEST_CODE(neg64, -64); + TEST_CODE(neg128, -128); } Index: gcc/testsuite/gcc.target/powerpc/sse2-psllq-1.c === --- gcc/testsuite/gcc.target/powerpc/sse2-psllq-1.c (revision 259016) +++ gcc/testsuite/gcc.target/powerpc/sse2-psllq-1.c (working copy) @@ -13,36 +13,56 @@ #define TEST sse2_test_psllq_1 #endif -#define N 60 - #include #ifdef _ARCH_PWR8 -static __m128i -__attribute__((noinline, unused)) -test (__m128i s1) -{ - return _mm_slli_epi64 (s1, N); -} +#define TEST_FUNC(id, N) \ + static __m128i \ + __attribute__((noinline, unused)) \ + test##id (
Re: [PATCH] avoid duplicate warning for strcmp with a nonstring (PR 85359)
Attached is a minor update that avoids additional duplicate warnings exposed by more extensive testing (for PR 85369). On 04/12/2018 02:52 PM, Martin Sebor wrote: The attached patch makes a small tweak to avoid issuing a duplicate warning for calls to strcmp with a nonstring argument. The most onerous part of this was figuring out how to test for the absence of duplicate warnings. The "hack" I used (dg-regexp) is in place until a more straightforward solution becomes available. (David Malcolm has something planned for GCC 9.) Martin PR middle-end/85359 - duplicate -Wstringop-overflow for a strcmp call with a nonstring pointer gcc/ChangeLog: PR middle-end/85359 * builtins.c (expand_builtin_strcpy): Call maybe_warn_nonstring_arg only when expasion succeeds. (expand_builtin_strcmp): Same. (expand_builtin_strncmp): Same. gcc/testsuite/ChangeLog: PR middle-end/85359 * gcc.dg/attr-nonstring.c: New test. Index: gcc/builtins.c === --- gcc/builtins.c (revision 259298) +++ gcc/builtins.c (working copy) @@ -3777,7 +3777,17 @@ expand_builtin_strcpy (tree exp, rtx target) src, destsize); } - return expand_builtin_strcpy_args (dest, src, target); + if (rtx ret = expand_builtin_strcpy_args (dest, src, target)) +{ + /* Check to see if the argument was declared attribute nonstring + and if so, issue a warning since at this point it's not known + to be nul-terminated. */ + tree fndecl = get_callee_fndecl (exp); + maybe_warn_nonstring_arg (fndecl, exp); + return ret; +} + + return NULL_RTX; } /* Helper function to do the actual work for expand_builtin_strcpy. The @@ -4570,14 +4580,14 @@ expand_builtin_strcmp (tree exp, ATTRIBUTE_UNUSED } } - /* Check to see if the argument was declared attribute nonstring - and if so, issue a warning since at this point it's not known - to be nul-terminated. */ tree fndecl = get_callee_fndecl (exp); - maybe_warn_nonstring_arg (fndecl, exp); - if (result) { + /* Check to see if the argument was declared attribute nonstring + and if so, issue a warning since at this point it's not known + to be nul-terminated. */ + maybe_warn_nonstring_arg (fndecl, exp); + /* Return the value in the proper mode for this function. */ machine_mode mode = TYPE_MODE (TREE_TYPE (exp)); if (GET_MODE (result) == mode) @@ -4674,14 +4684,14 @@ expand_builtin_strncmp (tree exp, ATTRIBUTE_UNUSED arg2_rtx, TREE_TYPE (len), arg3_rtx, MIN (arg1_align, arg2_align)); - /* Check to see if the argument was declared attribute nonstring - and if so, issue a warning since at this point it's not known - to be nul-terminated. */ tree fndecl = get_callee_fndecl (exp); - maybe_warn_nonstring_arg (fndecl, exp); - if (result) { + /* Check to see if the argument was declared attribute nonstring + and if so, issue a warning since at this point it's not known + to be nul-terminated. */ + maybe_warn_nonstring_arg (fndecl, exp); + /* Return the value in the proper mode for this function. */ mode = TYPE_MODE (TREE_TYPE (exp)); if (GET_MODE (result) == mode) === --- gcc/testsuite/gcc.dg/attr-nonstring.c (nonexistent) +++ gcc/testsuite/gcc.dg/attr-nonstring.c (working copy) @@ -0,0 +1,123 @@ +/* PR middle-end/85359 - duplicate -Wstringop-overflow for a strcmp call + with a nonstring pointer + { dg-do compile } + { dg-options "-O2 -Wall" } */ + +typedef __SIZE_TYPE__ size_t; +typedef __builtin_va_list va_list; + +int printf (const char*, ...); +int puts (const char*); +int puts_unlocked (const char*); +int sprintf (char*, const char*, ...); +int snprintf (char*, size_t, const char*, ...); +int vsprintf (char*, const char*, va_list); +int vsnprintf (char*, size_t, const char*, va_list); + +int strcmp (const char*, const char*); +int strncmp (const char*, const char*, size_t); + +char* stpcpy (char*, const char*); +char* stpncpy (char*, const char*, size_t); + +char* strcat (char*, const char*); +char* strncat (char*, const char*, size_t); + +char* strcpy (char*, const char*); +char* strncpy (char*, const char*, size_t); + +char* strchr (const char*, int); +char* strrchr (const char*, int); +char* strstr (const char*, const char*); +char* strdup (const char*); +size_t strlen (const char*); +size_t strnlen (const char*, size_t); +char* strndup (const char*, size_t); + +#define NONSTRING __attribute__ ((nonstring)) + +extern char ns5[5] NONSTRING; + +int strcmp_nonstring_1 (NONSTRING const char *a, const char *b) +{ + /* dg-warning matches one or more instances of the warning so it's + no good on its own. Use dg-regexp instead to verify that just + one instance of the warning is issued. See gcc.dg/pr64223-1 + for a different approach. */ + return strcmp (a, b); /* { dg-regexp "\[^\n\
[PATCH] issue nonstring warning for strcpy even on s360 (PR 85369)
PR 85369 notes that the c-c++-common/attr-nonstring-3.c fails on IBM Z (and other similar targets) whose back-end provides the movstr expander. The failure is cause by an expected warning failing to trigger because the strcpy call is expanded early and the checker never runs. The attached patch adjusts the code to make sure the warning is not bypassed on these targets. I've verified the patch with an s390-linux cross-compiler and with a full x86_64-linux native build and regression run. Martin PR middle-end/85369 - no -Wstringop-overflow for a strcpy / stpcpy call with a nonstring pointer when providing movstr pattern gcc/ChangeLog: PR middle-end/85369 * builtins.c (expand_builtin_strcpy_1): New function. (expand_builtin_stpcpy): Call it, and call maybe_warn_nonstring_arg only if the former succeeds. diff --git a/gcc/builtins.c b/gcc/builtins.c index b751a4b..f681488 100644 --- a/gcc/builtins.c +++ b/gcc/builtins.c @@ -3808,7 +3808,7 @@ expand_builtin_strcpy_args (tree dest, tree src, rtx target) mode MODE if that's convenient). */ static rtx -expand_builtin_stpcpy (tree exp, rtx target, machine_mode mode) +expand_builtin_stpcpy_1 (tree exp, rtx target, machine_mode mode) { tree dst, src; location_t loc = EXPR_LOCATION (exp); @@ -3885,6 +3885,25 @@ expand_builtin_stpcpy (tree exp, rtx target, machine_mode mode) } } +/* Expand a call EXP to the stpcpy builtin and diagnose uses of nonstring + arguments while being careful to avoid duplicate warnings (which could + be issued if the expander were to expand the call, resulting in it + being emitted in expand_call(). */ + +static rtx +expand_builtin_stpcpy (tree exp, rtx target, machine_mode mode) +{ + if (rtx ret = expand_builtin_stpcpy_1 (exp, target, mode)) +{ + /* The call has been successfully expanded. Check for nonstring + arguments and issue warnings as appropriate. */ + maybe_warn_nonstring_arg (get_callee_fndecl (exp), exp); + return ret; +} + + return NULL_RTX; +} + /* Check a call EXP to the stpncpy built-in for validity. Return NULL_RTX on both success and failure. */
[committed] Prevent erroneous "macro had not yet been defined" messages (PR c++/85385)
PR c++/85385 reports an issue where we emit bogus "macro had not yet been defined" notes when a macro is mis-used: $ cat test.c #define MACRO(X,Y) void test () { MACRO(42); } $ ./xg++ -B. -c test.c test.c:5:11: error: macro "MACRO" requires 2 arguments, but only 1 given MACRO(42); ^ test.c: In function ‘void test()’: test.c:5:3: error: ‘MACRO’ was not declared in this scope MACRO(42); ^ test.c:5:3: note: test.c:1: note: it was later defined here #define MACRO(X,Y) The macro *had* been defined, it was merely misused. This patch fixes the issue by only issuing the note if the use location is before the definition location (using linemap_location_before_p). Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu; adds 39 PASS results to g++.sum. Committed to trunk as r259360. gcc/cp/ChangeLog: PR c++/85385 * name-lookup.c (macro_use_before_def::maybe_make): New function, checking that the use is indeed before the definition. (macro_use_before_def::macro_use_before_def): Make private. (macro_use_before_def::~macro_use_before_def): Make private. Move check for UNKNOWN_LOCATION to macro_use_before_def::maybe_make. (lookup_name_fuzzy): Call macro_use_before_def::maybe_make rather than using new directly. gcc/testsuite/ChangeLog: PR c++/85385 * g++.dg/diagnostic/macro-arg-count.C: New test. --- gcc/cp/name-lookup.c | 39 - gcc/testsuite/g++.dg/diagnostic/macro-arg-count.C | 51 +++ 2 files changed, 80 insertions(+), 10 deletions(-) create mode 100644 gcc/testsuite/g++.dg/diagnostic/macro-arg-count.C diff --git a/gcc/cp/name-lookup.c b/gcc/cp/name-lookup.c index b923107..d2e5acb 100644 --- a/gcc/cp/name-lookup.c +++ b/gcc/cp/name-lookup.c @@ -5888,6 +5888,27 @@ consider_binding_level (tree name, best_match &bm, class macro_use_before_def : public deferred_diagnostic { public: + /* Factory function. Return a new macro_use_before_def instance if + appropriate, or return NULL. */ + static macro_use_before_def * + maybe_make (location_t use_loc, cpp_hashnode *macro) + { +source_location def_loc = cpp_macro_definition_location (macro); +if (def_loc == UNKNOWN_LOCATION) + return NULL; + +/* We only want to issue a note if the macro was used *before* it was + defined. + We don't want to issue a note for cases where a macro was incorrectly + used, leaving it unexpanded (e.g. by using the wrong argument + count). */ +if (!linemap_location_before_p (line_table, use_loc, def_loc)) + return NULL; + +return new macro_use_before_def (use_loc, macro); + } + + private: /* Ctor. LOC is the location of the usage. MACRO is the macro that was used. */ macro_use_before_def (location_t loc, cpp_hashnode *macro) @@ -5901,13 +5922,10 @@ class macro_use_before_def : public deferred_diagnostic if (is_suppressed_p ()) return; -source_location def_loc = cpp_macro_definition_location (m_macro); -if (def_loc != UNKNOWN_LOCATION) - { - inform (get_location (), "the macro %qs had not yet been defined", - (const char *)m_macro->ident.str); - inform (def_loc, "it was later defined here"); - } +inform (get_location (), "the macro %qs had not yet been defined", + (const char *)m_macro->ident.str); +inform (cpp_macro_definition_location (m_macro), + "it was later defined here"); } private: @@ -5990,12 +6008,13 @@ lookup_name_fuzzy (tree name, enum lookup_name_fuzzy_kind kind, location_t loc) bm.consider ((const char *)best_macro->ident.str); else if (bmm.get_best_distance () == 0) { - /* If we have an exact match for a macro name, then the -macro has been used before it was defined. */ + /* If we have an exact match for a macro name, then either the +macro was used with the wrong argument count, or the macro +has been used before it was defined. */ cpp_hashnode *macro = bmm.blithely_get_best_candidate (); if (macro && (macro->flags & NODE_BUILTIN) == 0) return name_hint (NULL, - new macro_use_before_def (loc, macro)); + macro_use_before_def::maybe_make (loc, macro)); } /* Try the "starts_decl_specifier_p" keywords to detect diff --git a/gcc/testsuite/g++.dg/diagnostic/macro-arg-count.C b/gcc/testsuite/g++.dg/diagnostic/macro-arg-count.C new file mode 100644 index 000..12b2dbd --- /dev/null +++ b/gcc/testsuite/g++.dg/diagnostic/macro-arg-count.C @@ -0,0 +1,51 @@ +// { dg-options "-fdiagnostics-show-caret" } + +#define MACRO_1(X,Y) +void test_1 () +{ + MACRO_1(42); // { dg-line "use_of_MACRO_1" } + // { dg-error "macro \"MACRO_1\" requires 2 arguments, but only 1 given" "" { target *-*-* } use_of_MACRO_1 } + /* { dg-begin
Re: [PATCH] Don't mark IFUNC resolver as only called directly
On Thu, Apr 12, 2018 at 6:39 AM, H.J. Lu wrote: > On Thu, Apr 12, 2018 at 5:17 AM, Jan Hubicka wrote: >>> On Thu, Apr 12, 2018 at 1:29 PM, H.J. Lu wrote: >>> > Since IFUNC resolver is called indirectly, don't mark IFUNC resolver as >>> > only called directly. >>> > >>> > OK for trunk? >>> > >>> > >>> > H.J. >>> > --- >>> > gcc/ >>> > >>> > PR target/85345 >>> > * cgraph.h: Include stringpool.h" and "attribs.h". >>> > (cgraph_node::only_called_directly_or_aliased_p): Return false >>> > for IFUNC resolver. >>> > >>> > gcc/testsuite/ >>> > >>> > PR target/85345 >>> > * gcc.target/i386/pr85345.c: New test. >>> > --- >>> > gcc/cgraph.h| 5 +++- >>> > gcc/testsuite/gcc.target/i386/pr85345.c | 44 >>> > + >>> > 2 files changed, 48 insertions(+), 1 deletion(-) >>> > create mode 100644 gcc/testsuite/gcc.target/i386/pr85345.c >>> > >>> > diff --git a/gcc/cgraph.h b/gcc/cgraph.h >>> > index d1ef8408497..9e195824fcc 100644 >>> > --- a/gcc/cgraph.h >>> > +++ b/gcc/cgraph.h >>> > @@ -24,6 +24,8 @@ along with GCC; see the file COPYING3. If not see >>> > #include "profile-count.h" >>> > #include "ipa-ref.h" >>> > #include "plugin-api.h" >>> > +#include "stringpool.h" >>> > +#include "attribs.h" >>> > >>> > class ipa_opt_pass_d; >>> > typedef ipa_opt_pass_d *ipa_opt_pass; >>> > @@ -2894,7 +2896,8 @@ cgraph_node::only_called_directly_or_aliased_p >>> > (void) >>> > && !DECL_STATIC_CONSTRUCTOR (decl) >>> > && !DECL_STATIC_DESTRUCTOR (decl) >>> > && !used_from_object_file_p () >>> > - && !externally_visible); >>> > + && !externally_visible >>> > + && !lookup_attribute ("ifunc", DECL_ATTRIBUTES (decl))); >>> >>> How's it handled for our own generated resolver functions? That is, >>> isn't there sth cheaper than doing a lookup_attribute here? I see >>> that make_dispatcher_decl nor ix86_get_function_versions_dispatcher >>> adds the 'ifunc' attribute (though they are TREE_PUBLIC there). >> >> Is there any drawback of setting force_output flag? >> Honza > > Setting force_output may prevent some optimizations. Can we add a bit > for IFUNC resolver? > Here is the patch to add ifunc_resolver to cgraph_node. Tested on x86-64 and i686. Any comments? Thanks. -- H.J. From 283a3282d018a40ab550a137a5a2770ce63f4a40 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Wed, 11 Apr 2018 12:31:21 -0700 Subject: [PATCH] Don't mark IFUNC resolver as only called directly Since IFUNC resolver is called indirectly, don't mark IFUNC resolver as only called directly. This patch adds ifunc_resolver to cgraph_node, sets ifunc_resolver for ifunc attribute and checks ifunc_resolver instead of looking up ifunc attribute. gcc/ PR target/85345 * cgraph.h (cgraph_node::create): Set ifunc_resolver for ifunc attribute. (cgraph_node::create_alias): Likewise. (cgraph_node::get_availability): Check ifunc_resolver instead of looking up ifunc attribute. * cgraphunit.c (maybe_diag_incompatible_alias): Likewise. * symtab.c (symtab_node::binds_to_current_def_p): Likewise. * varasm.c (do_assemble_alias): Likewise. (assemble_alias): Likewise. (default_binds_local_p_3): Likewise. * cgraph.h (cgraph_node): Add ifunc_resolver. (cgraph_node::only_called_directly_or_aliased_p): Return false for IFUNC resolver. * lto-cgraph.c (input_node): Set ifunc_resolver for ifunc attribute. gcc/testsuite/ PR target/85345 * gcc.target/i386/pr85345.c: New test. --- gcc/cgraph.c| 7 +- gcc/cgraph.h| 4 +++ gcc/cgraphunit.c| 2 +- gcc/lto-cgraph.c| 2 ++ gcc/symtab.c| 4 +-- gcc/testsuite/gcc.target/i386/pr85345.c | 44 + gcc/varasm.c| 8 +++--- 7 files changed, 64 insertions(+), 7 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr85345.c diff --git a/gcc/cgraph.c b/gcc/cgraph.c index 9a7d54d7cee..9f3a2929f6b 100644 --- a/gcc/cgraph.c +++ b/gcc/cgraph.c @@ -517,6 +517,9 @@ cgraph_node::create (tree decl) g->have_offload = true; } + if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (decl))) +node->ifunc_resolver = true; + node->register_symbol (); if (DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl)) == FUNCTION_DECL) @@ -575,6 +578,8 @@ cgraph_node::create_alias (tree alias, tree target) alias_node->alias = true; if (lookup_attribute ("weakref", DECL_ATTRIBUTES (alias)) != NULL) alias_node->transparent_alias = alias_node->weakref = true; + if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (alias))) +alias_node->ifunc_resolver = true; return alias_node; } @@ -2299,7 +2304,7 @@ cgraph_node::get_availability (symtab_node *ref) avail = AVAIL_AVAILABLE; else if (transparent_alias) ultimate_alias_target (&avail, ref); - else if
Re: [PATCH] Prefer mempcpy to memcpy on x86_64 target (PR middle-end/81657, take 2)
On Thu, Apr 12, 2018 at 07:37:22PM +0200, Jakub Jelinek wrote: > On Thu, Apr 12, 2018 at 05:29:35PM +, Wilco Dijkstra wrote: > > > Depending on what you mean old, I see e.g. in 2010 power7 mempcpy got > > > added, > > > in 2013 other power versions, in 2016 s390*, etc. Doing a decent mempcpy > > > isn't hard if you have asm version of memcpy and one spare register. > > > > More mempcpy implementations have been added in recent years indeed, but > > almost all > > add an extra copy of the memcpy code rather than using a single combined > > implementation. > > That means it is still better to call memcpy (which is frequently used and > > thus likely in L1/L2) > > rather than mempcpy (which is more likely to be cold and thus not cached). > > That really depends, usually when some app uses mempcpy, it uses it very > heavily. And all the proposed patches do is honor what the user asked, if > you use memcpy () + n, we aren't transforming that into mempcpy behind the > user's back. > > Anyway, here is what I think Richard was asking for, that I'm currently > bootstrapping/regtesting. It can be easily combined with Martin's target > hook if needed, or do it only for > endp == 1 && target != const0_rtx && CALL_EXPR_TAILCALL (exp) > etc. > > 2018-04-12 Martin Liska > Jakub Jelinek > > PR middle-end/81657 > * expr.h (enum block_op_methods): Add BLOCK_OP_NO_LIBCALL_RET. > * expr.c (emit_block_move_hints): Handle BLOCK_OP_NO_LIBCALL_RET. > * builtins.c (expand_builtin_memory_copy_args): Use > BLOCK_OP_NO_LIBCALL_RET method for mempcpy with non-ignored target, > handle dest_addr == pc_rtx. > > * gcc.dg/string-opt-1.c: Remove bogus comment. Expect a mempcpy > call. Successfully bootstrapped/regtested on x86_64-linux and i686-linux. Jakub
Re: [PATCH] configure.ac: honor --with-gcc-major-version in gcc-driver-name.h (PR jit/85384)
On Thu, Apr 12, 2018 at 04:51:21PM -0400, David Malcolm wrote: > Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu. > > OK for trunk? > > config/ChangeLog: > PR jit/85384 > * acx.m4 (GCC_BASE_VER): Remove \$\$ from sed expression. > > gcc/ChangeLog: > PR jit/85384 > * configure.ac (gcc-driver-name.h): Honor --with-gcc-major-version > by using gcc_base_ver to generate a gcc_driver_version, and use > it when generating GCC_DRIVER_NAME. > * configure.ac: Regenerate. I'd prefer not touching acx.m4 and instead just: gcc_driver_version=$gcc_BASEVER if test x$with_gcc_major_version_only = xyes ; then gcc_driver_version=`echo $gcc_BASEVER | sed -e 's/^\([0-9]*\).*$/\1/'` fi in configure.ac; after all, it is something what is done elsewhere in configure.ac: #define GCCPLUGIN_VERSION_MAJOR `echo $gcc_BASEVER | sed -e 's/^\([0-9]*\).*$/\1/'` #define GCCPLUGIN_VERSION_MINOR `echo $gcc_BASEVER | sed -e 's/^[0-9]*\.\([0-9]*\).*$/\1/'` #define GCCPLUGIN_VERSION_PATCHLEVEL `echo $gcc_BASEVER | sed -e 's/^[0-9]*\.[0-9]*\.\([0-9]*\)$/\1/'` and the $ is used in all similar sed patterns. > --- a/gcc/configure.ac > +++ b/gcc/configure.ac > @@ -6499,8 +6499,10 @@ AC_DEFINE_UNQUOTED(DIAGNOSTICS_COLOR_DEFAULT, > $DIAGNOSTICS_COLOR_DEFAULT, > > # Generate gcc-driver-name.h containing GCC_DRIVER_NAME for the benefit > # of jit/jit-playback.c. > +gcc_driver_version=`eval "${get_gcc_base_ver} $srcdir/BASE-VER"` > +echo "gcc_driver_version: ${gcc_driver_version}" > cat > gcc-driver-name.h < -#define GCC_DRIVER_NAME "${target_noncanonical}-gcc-${gcc_BASEVER}${exeext}" > +#define GCC_DRIVER_NAME > "${target_noncanonical}-gcc-${gcc_driver_version}${exeext}" > EOF > > # Check whether --enable-default-pie was given. Jakub
[PATCH] Fix CSE CLZ/CTZ handling (PR rtl-optimization/85376)
Hi! The following testcase is miscompiled, because due to various disabled optimization passes we end up with a dead bsf instruction (CTZ) of a register known to be zero. fold_rtx uses simplify_unary_operation, which has in this case: case CTZ: if (wi::ne_p (op0, 0)) int_value = wi::ctz (op0); else if (! CTZ_DEFINED_VALUE_AT_ZERO (imode, int_value)) int_value = GET_MODE_PRECISION (imode); result = wi::shwi (int_value, result_mode); break; x86_64 is a target where CTZ_DEFINED_VALUE_AT_ZERO is false, the instruction keeps previous value of the destination register, so something pretty random. As it is undefined, simplifying it to something random is fine, except when used the way CSE uses it, by remembering that the value (const_int 32) is stored in the destination register and optimizing later code that has (set some_reg (const_int 32)) to that destination register. Beucase that destination register contains an indeterminate value, we can't expect it will be exactly 32. The following patch let us punt in these cases. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? Another option would be to tweak simplify-rtx.c and instead of doing else if (! CTZ_DEFINED_VALUE_AT_ZERO (imode, int_value)) int_value = GET_MODE_PRECISION (imode); do else if (! CTZ_DEFINED_VALUE_AT_ZERO (imode, int_value)) return NULL_RTX; and similarly for CLZ, haven't tested what would break if anything; we've been doing something like that since r62453 when the C?Z_DEFINED_VALUE_AT_ZERO macros have been introduced, and before that actually the same, just unconditionally assumed the value is undefined at 0. 2018-04-12 Jakub Jelinek PR rtl-optimization/85376 * cse.c (fold_rtx): For CLZ and CTZ don't try to simplify if the source is known to be zero and CLZ/CTZ is not defined at zero for the target. * gcc.dg/pr85376.c: New test. --- gcc/cse.c.jj2018-02-12 23:24:47.350482694 +0100 +++ gcc/cse.c 2018-04-12 17:49:32.157664289 +0200 @@ -3322,6 +3322,19 @@ fold_rtx (rtx x, rtx_insn *insn) && mode_arg0 == VOIDmode) break; + /* Avoid recording a constant value for CLZ or CTZ if the argument is + known to be zero when the operation is undefined for zero on the + target. See PR85376. */ + if ((code == CLZ || code == CTZ) + && ((const_arg0 ? const_arg0 : folded_arg0) == CONST0_RTX (mode))) + { + int dummy; + scalar_mode imode = GET_MODE_INNER (mode); + if ((code == CLZ && !CLZ_DEFINED_VALUE_AT_ZERO (imode, dummy)) + || (code == CTZ && !CTZ_DEFINED_VALUE_AT_ZERO (imode, dummy))) + break; + } + new_rtx = simplify_unary_operation (code, mode, const_arg0 ? const_arg0 : folded_arg0, mode_arg0); --- gcc/testsuite/gcc.dg/pr85376.c.jj 2018-04-12 17:44:41.506370642 +0200 +++ gcc/testsuite/gcc.dg/pr85376.c 2018-04-12 17:45:11.669401115 +0200 @@ -0,0 +1,32 @@ +/* PR rtl-optimization/85376 */ +/* { dg-do run { target int128 } } */ +/* { dg-options "-Og -fno-dce -fgcse -fno-tree-ccp -fno-tree-copy-prop -Wno-psabi" } */ + +typedef unsigned int U __attribute__ ((vector_size (64))); +typedef unsigned __int128 V __attribute__ ((vector_size (64))); +unsigned int e, i, l; +unsigned char f; +U g, h, k, j; + +static inline V +foo (unsigned char n, unsigned short o, unsigned int p, U q, U r, U s) +{ + unsigned int t; + o <<= 5; + q[7] >>= __builtin_add_overflow (0xfff0, __builtin_ffs (n), &s[5]); + t = __builtin_ffs (g[7]); + e *= __builtin_sub_overflow (o, t, &f); + return f + (V) g + (V) h + (V) q + i + (V) j + (V) s + (V) k + l; +} + +int +main () +{ + if (__SIZEOF_INT128__ != 16 || __SIZEOF_INT__ != 4 || __CHAR_BIT__ != 8) +return 0; + V x = foo (0, 1, 5, (U) { }, (U) { }, (U) { }); + for (unsigned i = 0; i < 4; i++) +if ((unsigned int) x[i] != 0x20) + __builtin_abort (); + return 0; +} Jakub
[PATCH] Fix -fsanitize=address VLA instrumentation (PR sanitizer/85230)
Hi! As mentioned in the PR, we need to unpoison the red zones when leaving a scope with VLA variable(s); this is done through __asan_allocas_unpoison call, unfortunately it is called after the __builtin_stack_restore which restores the stack pointer; now, if an interrupt comes in between the stack restore and the __asan_allocas_unpoison call, the interrupt handler might have some stack bytes marked as red zones in the shadow memory and might diagnose sanitizing error even when there is none in the original program. The following patch ought to fix this by swapping the two calls, so we first unpoison and only after it is unpoisoned in shadow memory release the stack. The second argument to the __asan_allocas_unpoison call is meant to be virtual_dynamic_stack_rtx after the __builtin_stack_restore, i.e. the new stack_pointer_rtx value + STACK_DYNAMIC_OFFSET (current_function_decl). As the STACK_DYNAMIC_OFFSET value isn't known until the vregs pass, the code used a hack where it ignored the second argument and replaced it by virtual_dynamic_stack_rtx. With the asan.c change below this doesn't work anymore, because virtual_dynamic_stack_rtx aka stack_pointer_rtx + STACK_DYNAMIC_OFFSET (current_function_decl) before the __builtin_stack_restore is a different value. The patch instead uses the argument passed to the __asan_allocas_unpoison at GIMPLE time, which is the same as passed to __builtin_stack_restore; this is the new stack_pointer_rtx value after __builtin_stack_restore. And, because we don't want that value, but that + STACK_DYNAMIC_OFFSET (current_function_decl), we compute arg1 + (virtual_dynamic_stack_rtx - stack_pointer_rtx) and let CSE/combiner optimize it into arg1 (on targets like x86_64 where STACK_DYNAMIC_OFFSET can be even 0 when not accumulating outgoing args or when that size is 0) or arg1 + some_constant. Bootstrapped on {x86_64,i686,powerpc64,powerpc64le,aarch64,s390x,armv7hl}-linux, regtested on {x86_64,i686,powerpc64,powerpc64le}-linux so far, but on the power* ones on virtual address space size that isn't really supported (likely https://github.com/google/sanitizers/issues/933#issuecomment-380058705 issue, so while nothing regresses there, pretty much all asan tests fail there before and after the patch); also tested successfully with asan.exp=alloca* on gcc110 and gcc112 on compile farm where it doesn't suffer from the VA issue. Ok if testing passes also on aarch64, s390x and armv7hl? 2018-04-12 Jakub Jelinek PR sanitizer/85230 * asan.c (handle_builtin_stack_restore): Adjust comment. Emit __asan_allocas_unpoison call and last_alloca_addr = new_sp before __builtin_stack_restore rather than after it. * builtins.c (expand_asan_emit_allocas_unpoison): Pass arg1 + (virtual_dynamic_stack_rtx - stack_pointer_rtx) as second argument instead of virtual_dynamic_stack_rtx. --- gcc/asan.c.jj 2018-01-09 21:53:38.821577722 +0100 +++ gcc/asan.c 2018-04-12 13:22:59.166095523 +0200 @@ -554,14 +554,14 @@ get_last_alloca_addr () return last_alloca_addr; } -/* Insert __asan_allocas_unpoison (top, bottom) call after +/* Insert __asan_allocas_unpoison (top, bottom) call before __builtin_stack_restore (new_sp) call. The pseudocode of this routine should look like this: - __builtin_stack_restore (new_sp); top = last_alloca_addr; bot = new_sp; __asan_allocas_unpoison (top, bot); last_alloca_addr = new_sp; + __builtin_stack_restore (new_sp); In general, we can't use new_sp as bot parameter because on some architectures SP has non zero offset from dynamic stack area. Moreover, on some architectures this offset (STACK_DYNAMIC_OFFSET) becomes known for each @@ -570,9 +570,8 @@ get_last_alloca_addr () http://refspecs.linuxfoundation.org/ELF/ppc64/PPC-elf64abi.html#DYNAM-STACK. To overcome the issue we use following trick: pass new_sp as a second parameter to __asan_allocas_unpoison and rewrite it during expansion with - virtual_dynamic_stack_rtx later in expand_asan_emit_allocas_unpoison - function. -*/ + new_sp + (virtual_dynamic_stack_rtx - sp) later in + expand_asan_emit_allocas_unpoison function. */ static void handle_builtin_stack_restore (gcall *call, gimple_stmt_iterator *iter) @@ -584,9 +583,9 @@ handle_builtin_stack_restore (gcall *cal tree restored_stack = gimple_call_arg (call, 0); tree fn = builtin_decl_implicit (BUILT_IN_ASAN_ALLOCAS_UNPOISON); gimple *g = gimple_build_call (fn, 2, last_alloca, restored_stack); - gsi_insert_after (iter, g, GSI_NEW_STMT); + gsi_insert_before (iter, g, GSI_SAME_STMT); g = gimple_build_assign (last_alloca, restored_stack); - gsi_insert_after (iter, g, GSI_NEW_STMT); + gsi_insert_before (iter, g, GSI_SAME_STMT); } /* Deploy and poison redzones around __builtin_alloca call. To do this, we --- gcc/builtins.c.jj 2018-04-04 21:33:20.530639395 +0200 +++ gcc/builtins.c 2018-04-12 13:35:34.32839
Re: [patch, fortran] Remove parallell annotation from DO CONCURRENT
On Thu, Apr 12, 2018 at 11:14:45PM +0200, Thomas Koenig wrote: > 2018-04-12 Thomas Koenig > > PR fortran/83064 > PR testsuite/85346 > * trans-stmt.c (gfc_trans_forall_loop): Use annot_expr_ivdep_kind > for annotation and remove dependence on -ftree-parallelize-loops. > > 2018-04-12 Thomas Koenig > > PR fortran/83064 > PR testsuite/85346 > * gfortran.dg/do_concurrent_5.f90: Dynamically allocate main work > array and move test to libgomp/testsuite/libgomp.fortran. > * gfortran.dg/do_concurrent_6.f90: New test. > > 2018-04-12 Thomas Koenig > > PR fortran/83064 > PR testsuite/85346 > * testsuite/libgomp.fortran: Move modified test from gfortran.dg > to here. Please use full filename here, like: * testsuite/libgomp.fortran/do_concurrent_5.f90: New test, moved from gfortran.dg. Make edof array allocatable. Ok with that change. > Index: trans-stmt.c > === > --- trans-stmt.c (Revision 259326) > +++ trans-stmt.c (Arbeitskopie) > @@ -3643,12 +3643,12 @@ gfc_trans_forall_loop (forall_info *forall_tmp, tr >cond = fold_build2_loc (input_location, LE_EXPR, logical_type_node, > count, build_int_cst (TREE_TYPE (count), 0)); > > - /* PR 83064 means that we cannot use the annotation if the > - autoparallelizer is active. */ > - if (forall_tmp->do_concurrent && ! flag_tree_parallelize_loops) > + /* PR 83064 means that we cannot use annot_expr_parallel_kind until > + the autoparallelizer can hande this. */ > + if (forall_tmp->do_concurrent) > cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond, > build_int_cst (integer_type_node, > - annot_expr_parallel_kind), > + annot_expr_ivdep_kind), > integer_zero_node); > >tmp = build1_v (GOTO_EXPR, exit_label); > ! { dg-do run } > ! PR 83064 - this used to give wrong results. > ! { dg-additional-options "-O1 -ftree-parallelize-loops=2" } > ! Original test case by Christian Felter > > program main > use, intrinsic :: iso_fortran_env > implicit none > > integer, parameter :: nsplit = 4 > integer(int64), parameter :: ne = 2**20 > integer(int64) :: stride, low(nsplit), high(nsplit), i > integer(int64), dimension(:), allocatable :: edof > real(real64), dimension(nsplit) :: pi > > allocate (edof(ne)) > edof(1::4) = 1 > edof(2::4) = 2 > edof(3::4) = 3 > edof(4::4) = 4 > > stride = ceiling(real(ne)/nsplit) > do i = 1, nsplit > high(i) = stride*i > end do > do i = 2, nsplit > low(i) = high(i-1) + 1 > end do > low(1) = 1 > high(nsplit) = ne > > pi = 0 > do concurrent (i = 1:nsplit) > pi(i) = sum(compute( low(i), high(i) )) > end do > if (abs (sum(pi) - atan(1.0d0)) > 1e-5) STOP 1 > > contains > > pure function compute( low, high ) result( ttt ) > integer(int64), intent(in) :: low, high > real(real64), dimension(nsplit) :: ttt > integer(int64) :: j, k > > ttt = 0 > > ! Unrolled loop > ! do j = low, high, 4 > ! k = 1 > ! ttt(k) = ttt(k) + (-1)**(j+1) / real( 2*j-1 ) > > ! k = 2 > ! ttt(k) = ttt(k) + (-1)**(j+2) / real( 2*j+1 ) > > ! k = 3 > ! ttt(k) = ttt(k) + (-1)**(j+3) / real( 2*j+3 ) > > ! k = 4 > ! ttt(k) = ttt(k) + (-1)**(j+4) / real( 2*j+5 ) > > ! end do > > ! Loop with modulo operation > ! do j = low, high > ! k = mod( j, nsplit ) + 1 > ! ttt(k) = ttt(k) + (-1)**(j+1) / real( 2*j-1 ) > > ! end do > > ! Loop with subscripting via host association > do j = low, high > k = edof(j) > ttt(k) = ttt(k) + (-1.0_real64)**(j+1) / real( 2*j-1 ) > > end do > end function > > end program main > ! { dg-do compile } > ! { dg-additional-options "-fdump-tree-original" } > > program main > real, dimension(100) :: a,b > call random_number(a) > do concurrent (i=1:100) > b(i) = a(i)*a(i) > end do > print *,sum(a) > end program main > > ! { dg-final { scan-tree-dump-times "ivdep" 1 "original" } } Jakub
Re: [patch, fortran] Remove parallell annotation from DO CONCURRENT
Well, here's a variation which actually passes regression-test. Seems I implicitly believed that the implicit save on main program variables actually works... well, it turns out that it doesn't, which is now PR85364. OK for trunk? Thomas 2018-04-12 Thomas Koenig PR fortran/83064 PR testsuite/85346 * trans-stmt.c (gfc_trans_forall_loop): Use annot_expr_ivdep_kind for annotation and remove dependence on -ftree-parallelize-loops. 2018-04-12 Thomas Koenig PR fortran/83064 PR testsuite/85346 * gfortran.dg/do_concurrent_5.f90: Dynamically allocate main work array and move test to libgomp/testsuite/libgomp.fortran. * gfortran.dg/do_concurrent_6.f90: New test. 2018-04-12 Thomas Koenig PR fortran/83064 PR testsuite/85346 * testsuite/libgomp.fortran: Move modified test from gfortran.dg to here. Index: trans-stmt.c === --- trans-stmt.c (Revision 259326) +++ trans-stmt.c (Arbeitskopie) @@ -3643,12 +3643,12 @@ gfc_trans_forall_loop (forall_info *forall_tmp, tr cond = fold_build2_loc (input_location, LE_EXPR, logical_type_node, count, build_int_cst (TREE_TYPE (count), 0)); - /* PR 83064 means that we cannot use the annotation if the - autoparallelizer is active. */ - if (forall_tmp->do_concurrent && ! flag_tree_parallelize_loops) + /* PR 83064 means that we cannot use annot_expr_parallel_kind until + the autoparallelizer can hande this. */ + if (forall_tmp->do_concurrent) cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond, build_int_cst (integer_type_node, - annot_expr_parallel_kind), + annot_expr_ivdep_kind), integer_zero_node); tmp = build1_v (GOTO_EXPR, exit_label); ! { dg-do run } ! PR 83064 - this used to give wrong results. ! { dg-additional-options "-O1 -ftree-parallelize-loops=2" } ! Original test case by Christian Felter program main use, intrinsic :: iso_fortran_env implicit none integer, parameter :: nsplit = 4 integer(int64), parameter :: ne = 2**20 integer(int64) :: stride, low(nsplit), high(nsplit), i integer(int64), dimension(:), allocatable :: edof real(real64), dimension(nsplit) :: pi allocate (edof(ne)) edof(1::4) = 1 edof(2::4) = 2 edof(3::4) = 3 edof(4::4) = 4 stride = ceiling(real(ne)/nsplit) do i = 1, nsplit high(i) = stride*i end do do i = 2, nsplit low(i) = high(i-1) + 1 end do low(1) = 1 high(nsplit) = ne pi = 0 do concurrent (i = 1:nsplit) pi(i) = sum(compute( low(i), high(i) )) end do if (abs (sum(pi) - atan(1.0d0)) > 1e-5) STOP 1 contains pure function compute( low, high ) result( ttt ) integer(int64), intent(in) :: low, high real(real64), dimension(nsplit) :: ttt integer(int64) :: j, k ttt = 0 ! Unrolled loop ! do j = low, high, 4 ! k = 1 ! ttt(k) = ttt(k) + (-1)**(j+1) / real( 2*j-1 ) ! k = 2 ! ttt(k) = ttt(k) + (-1)**(j+2) / real( 2*j+1 ) ! k = 3 ! ttt(k) = ttt(k) + (-1)**(j+3) / real( 2*j+3 ) ! k = 4 ! ttt(k) = ttt(k) + (-1)**(j+4) / real( 2*j+5 ) ! end do ! Loop with modulo operation ! do j = low, high ! k = mod( j, nsplit ) + 1 ! ttt(k) = ttt(k) + (-1)**(j+1) / real( 2*j-1 ) ! end do ! Loop with subscripting via host association do j = low, high k = edof(j) ttt(k) = ttt(k) + (-1.0_real64)**(j+1) / real( 2*j-1 ) end do end function end program main ! { dg-do compile } ! { dg-additional-options "-fdump-tree-original" } program main real, dimension(100) :: a,b call random_number(a) do concurrent (i=1:100) b(i) = a(i)*a(i) end do print *,sum(a) end program main ! { dg-final { scan-tree-dump-times "ivdep" 1 "original" } }
Re: [PATCH] PR libstdc++/85222 allow catching iostream errors as gcc4-compatible ios::failure
This fixes some comments with misspelled files and classes. Committed to trunk and gcc-7-branch. It occurred to me that the name of the new __ios_failure type is visible in the verbose terminate handler messages: terminate called after throwing an instance of 'std::__ios_failure' what(): basic_filebuf::underflow error reading the file: Is a directory Aborted (core dumped) And that there's no need for this type to use a reserved name. Users can't refer to it, or define macros that affect it (because it's never exposed in headers). So we could call it something else, like std::ios_failure rather than std::__ios_failure. Anybody got a preference they want to argue for? commit a28bcba2a812d4eac6da8ce86907b670361a09a6 Author: Jonathan Wakely Date: Thu Apr 12 21:28:38 2018 +0100 Fix comments that misspell names of files and classes * src/c++11/Makefile.am: Fix comment. * src/c++11/Makefile.in: Regenerate. * src/c++11/cxx11-ios_failure.cc: Fix comment. * src/c++98/ios_failure.cc: Likewise. diff --git a/libstdc++-v3/src/c++11/Makefile.am b/libstdc++-v3/src/c++11/Makefile.am index 6f49f0d55d3..8d524b67232 100644 --- a/libstdc++-v3/src/c++11/Makefile.am +++ b/libstdc++-v3/src/c++11/Makefile.am @@ -127,7 +127,7 @@ hashtable_c++0x.o: hashtable_c++0x.cc $(CXXCOMPILE) -fimplicit-templates -c $< if ENABLE_DUAL_ABI -# Rewrite the type info for __dual_abi_ios_failure. +# Rewrite the type info for __ios_failure. rewrite_ios_failure_typeinfo = sed -e '/^_ZTISt13__ios_failure:$$/{' \ -e 'n' \ -e 's/_ZTVN10__cxxabiv120__si_class_type_infoE/_ZTVSt19__iosfail_type_info/' \ diff --git a/libstdc++-v3/src/c++11/cxx11-ios_failure.cc b/libstdc++-v3/src/c++11/cxx11-ios_failure.cc index 847b5946234..b1e4bfb2b44 100644 --- a/libstdc++-v3/src/c++11/cxx11-ios_failure.cc +++ b/libstdc++-v3/src/c++11/cxx11-ios_failure.cc @@ -140,7 +140,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION // __ios_failure can be upcast to the type in a catch handler. bool __iosfail_type_info::__do_upcast(const __class_type_info *dst_type, - void **obj_ptr) const + void **obj_ptr) const { // If the handler is for the gcc4-compatible ios::failure type then // catch the object stored in __ios_failure::buf instead of @@ -150,7 +150,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION *obj_ptr = static_cast<__ios_failure*>(*obj_ptr)->buf; return true; } -// Otherwise proceeed as normal to see if the handler matches. +// Otherwise proceed as normal to see if the handler matches. return __class_type_info::__do_upcast(dst_type, obj_ptr); } #else // ! __cpp_rtti diff --git a/libstdc++-v3/src/c++98/ios_failure.cc b/libstdc++-v3/src/c++98/ios_failure.cc index a2fc5593e15..49d24f49620 100644 --- a/libstdc++-v3/src/c++98/ios_failure.cc +++ b/libstdc++-v3/src/c++98/ios_failure.cc @@ -57,7 +57,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION #if _GLIBCXX_USE_DUAL_ABI // When the dual ABI is enabled __throw_ios_failure() is defined in - // src/c++11/ios_failure.cc + // src/c++11/cxx11-ios_failure.cc #if __cpp_rtti // If RTTI is enabled the exception type thrown will use these functions to // construct/destroy a gcc4-compatible ios::failure object in a buffer,
[PATCH] avoid duplicate warning for strcmp with a nonstring (PR 85359)
The attached patch makes a small tweak to avoid issuing a duplicate warning for calls to strcmp with a nonstring argument. The most onerous part of this was figuring out how to test for the absence of duplicate warnings. The "hack" I used (dg-regexp) is in place until a more straightforward solution becomes available. (David Malcolm has something planned for GCC 9.) Martin PR middle-end/85359 - duplicate -Wstringop-overflow for a strcmp call with a nonstring pointer gcc/ChangeLog: PR middle-end/85359 * builtins.c (expand_builtin_strcmp): Take care to avoid issuing a duplicate warning. gcc/testsuite/ChangeLog: PR middle-end/85359 * gcc.dg/attr-nonstring.c: New test. Index: gcc/builtins.c === --- gcc/builtins.c (revision 259298) +++ gcc/builtins.c (working copy) @@ -4570,14 +4570,15 @@ expand_builtin_strcmp (tree exp, ATTRIBUTE_UNUSED } } - /* Check to see if the argument was declared attribute nonstring - and if so, issue a warning since at this point it's not known - to be nul-terminated. */ tree fndecl = get_callee_fndecl (exp); - maybe_warn_nonstring_arg (fndecl, exp); - if (result) { + /* Check to see if the argument was declared attribute nonstring + and if so, issue a warning since at this point it's not known + to be nul-terminated. Avoid doing this when RESULT is false + and let expand_call() do it. */ + maybe_warn_nonstring_arg (fndecl, exp); + /* Return the value in the proper mode for this function. */ machine_mode mode = TYPE_MODE (TREE_TYPE (exp)); if (GET_MODE (result) == mode) Index: gcc/testsuite/gcc.dg/attr-nonstring.c === --- gcc/testsuite/gcc.dg/attr-nonstring.c (nonexistent) +++ gcc/testsuite/gcc.dg/attr-nonstring.c (working copy) @@ -0,0 +1,58 @@ +/* PR middle-end/85359 - duplicate -Wstringop-overflow for a strcmp call + with a nonstring pointer + { dg-do compile } + { dg-options "-O2 -Wall" } */ + +extern char* strchr (const char*, int); +extern char* strrchr (const char*, int); +extern char* stpcpy (char*, const char*); +extern char* strcpy (char*, const char*); +extern int strcmp (const char*, const char*); +extern char* strstr (const char*, const char*); + +#define NONSTRING __attribute__ ((nonstring)) + +int strcmp_nonstring_1 (NONSTRING const char *a, const char *b) +{ + /* dg-warning matches one or more instances of the warning so it's + no good on its own. Use dg-regexp instead to verify that just + one instance of the warning is issued. See gcc.dg/pr64223-1 + for a different approach. */ + return strcmp (a, b); /* { dg-regexp "\[^\n\r\]+: warning: .strcmp. argument 1 declared attribute .nonstring. \\\[-Wstringop-overflow=]" } */ +} + +int strcmp_nonstring_2 (const char *a, NONSTRING const char *b) +{ + return strcmp (a, b); /* { dg-regexp "\[^\n\r\]+: warning: .strcmp. argument 2 declared attribute .nonstring. \\\[-Wstringop-overflow=]" } */ +} + + +char* stpcpy_nonstring (char *a, NONSTRING const char *b) +{ + return stpcpy (a, b); /* { dg-regexp "\[^\n\r\]+: warning: .stpcpy. argument 2 declared attribute .nonstring. \\\[-Wstringop-overflow=]" } */ +} + +char* strchr_nonstring (NONSTRING const char *s, int c) +{ + return strchr (s, c); /* { dg-regexp "\[^\n\r\]+: warning: .strchr. argument 1 declared attribute .nonstring. \\\[-Wstringop-overflow=]" } */ +} + +char* strrchr_nonstring (NONSTRING const char *s, int c) +{ + return strrchr (s, c); /* { dg-regexp "\[^\n\r\]+: warning: .strrchr. argument 1 declared attribute .nonstring. \\\[-Wstringop-overflow=]" } */ +} + +char* strcpy_nonstring (char *a, NONSTRING const char *b) +{ + return strcpy (a, b); /* { dg-regexp "\[^\n\r\]+: warning: .strcpy. argument 2 declared attribute .nonstring. \\\[-Wstringop-overflow=]" } */ +} + +char* strstr_nonstring_1 (NONSTRING const char *a, const char *b) +{ + return strstr (a, b); /* { dg-regexp "\[^\n\r\]+: warning: .strstr. argument 1 declared attribute .nonstring. \\\[-Wstringop-overflow=]" } */ +} + +char* strstr_nonstring_2 (const char *a, NONSTRING const char *b) +{ + return strstr (a, b); /* { dg-regexp "\[^\n\r\]+: warning: .strstr. argument 2 declared attribute .nonstring. \\\[-Wstringop-overflow=]" } */ +}
[PATCH] configure.ac: honor --with-gcc-major-version in gcc-driver-name.h (PR jit/85384)
This patch updates gcc/configure.ac to use gcc_base_ver. I had to drop the \$\$ from the sed expression to get it to work within the configure script; I'm not entirely sure what their purpose is. Without them, it's still matching on the first group of numeric characters in BASE-VER. Tested with and without --with-gcc-major-version; in each case, gcc-driver-name.h is correctly determined. Fixes the linker issue reported downstream in https://bugzilla.redhat.com/show_bug.cgi?id=1566178 and fixes the driver not found issue with: gcc_jit_context_set_bool_use_external_driver (ctxt, 1); Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu. OK for trunk? config/ChangeLog: PR jit/85384 * acx.m4 (GCC_BASE_VER): Remove \$\$ from sed expression. gcc/ChangeLog: PR jit/85384 * configure.ac (gcc-driver-name.h): Honor --with-gcc-major-version by using gcc_base_ver to generate a gcc_driver_version, and use it when generating GCC_DRIVER_NAME. * configure.ac: Regenerate. --- config/acx.m4| 2 +- gcc/configure.ac | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/config/acx.m4 b/config/acx.m4 index aa1d34b..87c1b5e 100644 --- a/config/acx.m4 +++ b/config/acx.m4 @@ -246,7 +246,7 @@ AC_DEFUN([GCC_BASE_VER], [AS_HELP_STRING([--with-gcc-major-version-only], [use only GCC major number in filesystem paths])], [if test x$with_gcc_major_version_only = xyes ; then changequote(,)dnl -get_gcc_base_ver="sed -e 's/^\([0-9]*\).*\$\$/\1/'" +get_gcc_base_ver="sed -e 's/^\([0-9]*\).*/\1/'" changequote([,])dnl fi ]) diff --git a/gcc/configure.ac b/gcc/configure.ac index 67e1682..b066cc6 100644 --- a/gcc/configure.ac +++ b/gcc/configure.ac @@ -6499,8 +6499,10 @@ AC_DEFINE_UNQUOTED(DIAGNOSTICS_COLOR_DEFAULT, $DIAGNOSTICS_COLOR_DEFAULT, # Generate gcc-driver-name.h containing GCC_DRIVER_NAME for the benefit # of jit/jit-playback.c. +gcc_driver_version=`eval "${get_gcc_base_ver} $srcdir/BASE-VER"` +echo "gcc_driver_version: ${gcc_driver_version}" cat > gcc-driver-name.h <
Re: [PATCH] Handle empty infinite loops in OpenACC for PR84955
On Thu, Apr 12, 2018 at 11:39:43AM -0700, Cesar Philippidis wrote: > Strange. I didn't observe any regressions when I tested it. But, then > again, I was testing against revision > > r259092 | jason | 2018-04-04 09:42:55 -0700 (Wed, 04 Apr 2018) | 4 lines > > which is over a week old. I'll revert that patch for now, and revisit > this issue in stage1. You should have kept the omp-expand.c chunk, that is correct and shouldn't cause issues. Jakub
C++ PATCH for c++/85356, C++17 ICE with pointer to member function in template
We weren't instantiating exception-specifications when a template referred to them, but that won't fly in the C++17 world where they're part of the type, so we need to resolve them to do overload resolution for non-dependent expressions. The change to check_redeclaration_exception_specification is necessary because type_dependent_expression_p (fn) will fail for a dependent new_decl, beacuse it doesn't have DECL_TEMPLATE_INFO yet. Tested x86_64-pc-linux-gnu, applying to trunk. commit e68e003bf7c837312bab52de2195ef4707150a3a Author: Jason Merrill Date: Thu Apr 12 07:45:03 2018 -0400 PR c++/85356 - ICE with pointer to member function. * pt.c (maybe_instantiate_noexcept): Do instantiate in templates if flag_noexcept_type. Build the new spec within the function context. * except.c (build_noexcept_spec): Do get constant value in templates if flag_noexcept_type. * decl.c (check_redeclaration_exception_specification): Don't instantiate noexcept on a dependent declaration. diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c index 44a152bd195..9f1a171ead7 100644 --- a/gcc/cp/decl.c +++ b/gcc/cp/decl.c @@ -1232,8 +1232,11 @@ check_redeclaration_exception_specification (tree new_decl, && UNEVALUATED_NOEXCEPT_SPEC_P (old_exceptions)) return; - maybe_instantiate_noexcept (new_decl); - maybe_instantiate_noexcept (old_decl); + if (!type_dependent_expression_p (old_decl)) +{ + maybe_instantiate_noexcept (new_decl); + maybe_instantiate_noexcept (old_decl); +} new_exceptions = TYPE_RAISES_EXCEPTIONS (TREE_TYPE (new_decl)); old_exceptions = TYPE_RAISES_EXCEPTIONS (TREE_TYPE (old_decl)); diff --git a/gcc/cp/except.c b/gcc/cp/except.c index 0b46698b974..6dab6d6bd96 100644 --- a/gcc/cp/except.c +++ b/gcc/cp/except.c @@ -1194,11 +1194,14 @@ build_noexcept_spec (tree expr, int complain) { /* This isn't part of the signature, so don't bother trying to evaluate it until instantiation. */ - if (!processing_template_decl && TREE_CODE (expr) != DEFERRED_NOEXCEPT) + if (TREE_CODE (expr) != DEFERRED_NOEXCEPT + && (!processing_template_decl + || (flag_noexcept_type && !value_dependent_expression_p (expr { expr = perform_implicit_conversion_flags (boolean_type_node, expr, complain, LOOKUP_NORMAL); + expr = instantiate_non_dependent_expr (expr); expr = cxx_constant_value (expr); } if (TREE_CODE (expr) == INTEGER_CST) diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c index 76e546cdeaa..da8a5264d33 100644 --- a/gcc/cp/pt.c +++ b/gcc/cp/pt.c @@ -23234,7 +23234,8 @@ maybe_instantiate_noexcept (tree fn, tsubst_flags_t complain) tree fntype, spec, noex, clone; /* Don't instantiate a noexcept-specification from template context. */ - if (processing_template_decl) + if (processing_template_decl + && (!flag_noexcept_type || type_dependent_expression_p (fn))) return true; if (DECL_CLONED_FUNCTION_P (fn)) @@ -23273,10 +23274,10 @@ maybe_instantiate_noexcept (tree fn, tsubst_flags_t complain) tf_warning_or_error, fn, /*function_p=*/false, /*integral_constant_expression_p=*/true); + spec = build_noexcept_spec (noex, tf_warning_or_error); pop_deferring_access_checks (); pop_access_scope (fn); pop_tinst_level (); - spec = build_noexcept_spec (noex, tf_warning_or_error); if (spec == error_mark_node) spec = noexcept_false_spec; } diff --git a/gcc/testsuite/g++.dg/template/mem_func_ptr2.C b/gcc/testsuite/g++.dg/template/mem_func_ptr2.C new file mode 100644 index 000..9ceabd3642b --- /dev/null +++ b/gcc/testsuite/g++.dg/template/mem_func_ptr2.C @@ -0,0 +1,13 @@ +// PR c++/85356 + +struct A +{ + A& operator=(int); +}; + +void foo(A&(A::*)(int)); + +template void bar() +{ + foo(&A::operator=); +}
Re: C++ PATCH for c++/85258, ICE with invalid range-based for-loop
OK. On Thu, Apr 12, 2018 at 1:47 PM, Marek Polacek wrote: > This is a crash on invalid which started when we changed > decl_maybe_constant_var_p > to say true for references. Then in tsubst_copy we take this branch: > if (decl_maybe_constant_var_p (r)) > { > /* We can't call cp_finish_decl, so handle the > initializer by hand. */ > tree init = tsubst_init (DECL_INITIAL (t), r, args, >complain, in_decl); > but tsubst_init can return NULL_TREE, which potential_constant_expression > knows how to handle, but reduced_constant_expression_p didn't. So the > following patch will fix the ICE. > > Bootstrapped/regtested on x86_64-linux, ok for trunk? > > 2018-04-12 Marek Polacek > > PR c++/85258 > * constexpr.c (reduced_constant_expression_p): Return false for null > trees. > > * g++.dg/parse/error61.C: New test. > > diff --git gcc/cp/constexpr.c gcc/cp/constexpr.c > index 75f56df4465..82f14baaefd 100644 > --- gcc/cp/constexpr.c > +++ gcc/cp/constexpr.c > @@ -1773,6 +1773,9 @@ cxx_eval_call_expression (const constexpr_ctx *ctx, > tree t, > bool > reduced_constant_expression_p (tree t) > { > + if (t == NULL_TREE) > +return false; > + >switch (TREE_CODE (t)) > { > case PTRMEM_CST: > @@ -1794,9 +1797,8 @@ reduced_constant_expression_p (tree t) > field = NULL_TREE; >FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (t), i, idx, val) > { > - if (!val) > - /* We're in the middle of initializing this element. */ > - return false; > + /* If VAL is null, we're in the middle of initializing this > +element. */ > if (!reduced_constant_expression_p (val)) > return false; > if (field) > diff --git gcc/testsuite/g++.dg/parse/error61.C > gcc/testsuite/g++.dg/parse/error61.C > index e69de29bb2d..199e1aa721c 100644 > --- gcc/testsuite/g++.dg/parse/error61.C > +++ gcc/testsuite/g++.dg/parse/error61.C > @@ -0,0 +1,14 @@ > +// PR c++/85258 > +// { dg-do compile { target c++11 } } > + > +template void foo() > +{ > + int x[8]; > + for (int& i, j : x) // { dg-error "multiple" } > +i = 0; // { dg-error "local variable" } > +} > + > +void bar() > +{ > + foo<0>(); > +} > > Marek
[PATCH] rs6000: Fix an ICE with -mno-direct-move (PR85291)
This fixes an ICE with -mno-direct-move. Tested etc.; committing. Segher 2018-04-12 Segher Boessenkool * config/rs6000/rs6000.md (fix_truncsi2): Use legacy code if asked to not generate direct moves. (fix_truncsi2_stfiwx): Similar. (fix_truncsi2_internal): Similar. --- gcc/config/rs6000/rs6000.md | 7 --- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 25ac0b8..de652fa 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -5591,7 +5591,7 @@ (define_expand "fix_truncsi2" (fix:SI (match_operand:SFDF 1 "gpc_reg_operand")))] "TARGET_HARD_FLOAT && " { - if (!TARGET_P8_VECTOR) + if (!(TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)) { rtx src = force_reg (mode, operands[1]); @@ -5618,7 +5618,7 @@ (define_insn_and_split "fix_truncsi2_stfiwx" "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && (mode != SFmode || TARGET_SINGLE_FLOAT) && TARGET_STFIWX && can_create_pseudo_p () - && !TARGET_P8_VECTOR" + && !(TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)" "#" "" [(pc)] @@ -5659,7 +5659,8 @@ (define_insn_and_split "fix_truncsi2_internal" (fix:SI (match_operand:SFDF 1 "gpc_reg_operand" "d,"))) (clobber (match_operand:DI 2 "gpc_reg_operand" "=1,d")) (clobber (match_operand:DI 3 "offsettable_mem_operand" "=o,o"))] - "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && !TARGET_P8_VECTOR" + "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT + && !(TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)" "#" "" [(pc)] -- 1.8.3.1
Re: [PATCH] libgcc/CET: Skip signal frames when unwinding shadow stack
On Wed, Apr 11, 2018 at 3:37 AM, H.J. Lu wrote: > When -fcf-protection -mcet is used, I got > > FAIL: g++.dg/eh/sighandle.C > > (gdb) bt > #0 _Unwind_RaiseException (exc=exc@entry=0x416ed0) > at /export/gnu/import/git/sources/gcc/libgcc/unwind.inc:140 > #1 0x77d9936b in __cxxabiv1::__cxa_throw (obj=, > tinfo=0x403dd0 , dest=0x0) > at > /export/gnu/import/git/sources/gcc/libstdc++-v3/libsupc++/eh_throw.cc:90 > #2 0x00401255 in sighandler (signo=11, si=0x7fffd6f8, > uc=0x7fffd5c0) > at > /export/gnu/import/git/sources/gcc/gcc/testsuite/g++.dg/eh/sighandle.C:9 > #3 Signal frame which isn't on shadow stack > #4 dosegv () > at > /export/gnu/import/git/sources/gcc/gcc/testsuite/g++.dg/eh/sighandle.C:14 > #5 0x004012e3 in main () > at > /export/gnu/import/git/sources/gcc/gcc/testsuite/g++.dg/eh/sighandle.C:30 > (gdb) p frames > $6 = 5 > (gdb) > > frame count should be 4, not 5. This patch skips signal frames when > unwinding shadow stack. > > Tested on i686 and x86-64. OK for trunk? > > H.J. > > PR libgcc/85334 > * unwind-generic.h (_Unwind_Frames_Increment): New. > * config/i386/shadow-stack-unwind.h (_Unwind_Frames_Increment): > Likewise. > * unwind.inc (_Unwind_RaiseException_Phase2): Increment frame > count with _Unwind_Frames_Increment. > (_Unwind_ForcedUnwind_Phase2): Likewise. > --- > libgcc/config/i386/shadow-stack-unwind.h | 5 + > libgcc/unwind-generic.h | 3 +++ > libgcc/unwind.inc| 6 -- > 3 files changed, 12 insertions(+), 2 deletions(-) > > diff --git a/libgcc/config/i386/shadow-stack-unwind.h > b/libgcc/config/i386/shadow-stack-unwind.h > index 40f48df2aec..a32f3e74b52 100644 > --- a/libgcc/config/i386/shadow-stack-unwind.h > +++ b/libgcc/config/i386/shadow-stack-unwind.h > @@ -49,3 +49,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively. > If not, see > } \ > } \ > while (0) > + > +/* Increment frame count. Skip signal frames. */ > +#undef _Unwind_Frames_Increment > +#define _Unwind_Frames_Increment(context, frames) \ > + if (!_Unwind_IsSignalFrame (context)) frames++ > diff --git a/libgcc/unwind-generic.h b/libgcc/unwind-generic.h > index b5e3568e1bc..639c96f438e 100644 > --- a/libgcc/unwind-generic.h > +++ b/libgcc/unwind-generic.h > @@ -291,4 +291,7 @@ EXCEPTION_DISPOSITION _GCC_specific_handler > (PEXCEPTION_RECORD, void *, > /* Additional actions to unwind number of stack frames. */ > #define _Unwind_Frames_Extra(frames) > > +/* Increment frame count. */ > +#define _Unwind_Frames_Increment(context, frames) frames++ > + > #endif /* unwind.h */ > diff --git a/libgcc/unwind.inc b/libgcc/unwind.inc > index 68c08964d30..b49f8797009 100644 > --- a/libgcc/unwind.inc > +++ b/libgcc/unwind.inc > @@ -72,8 +72,9 @@ _Unwind_RaiseException_Phase2(struct _Unwind_Exception *exc, >/* Don't let us unwind past the handler context. */ >gcc_assert (!match_handler); > > + _Unwind_Frames_Increment (context, frames); > + >uw_update_context (context, &fs); > - frames++; > } > >*frames_p = frames; > @@ -187,10 +188,11 @@ _Unwind_ForcedUnwind_Phase2 (struct _Unwind_Exception > *exc, > return _URC_FATAL_PHASE2_ERROR; > } > > + _Unwind_Frames_Increment (context, frames); > + >/* Update cur_context to describe the same frame as fs, and discard > the previous context if necessary. */ >uw_advance_context (context, &fs); > - frames++; > } > >*frames_p = frames; > -- > 2.14.3 > I need to increment frame count after uw_advance_context which will set the signal frame bit. OK for trunk? -- H.J. From 6ced07f8318d2c1faf616395b630c32c32e332f3 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Tue, 10 Apr 2018 20:46:04 -0700 Subject: [PATCH] libgcc/CET: Skip signal frames when unwinding shadow stack When -fcf-protection -mcet is used, I got FAIL: g++.dg/eh/sighandle.C (gdb) bt #0 _Unwind_RaiseException (exc=exc@entry=0x416ed0) at /export/gnu/import/git/sources/gcc/libgcc/unwind.inc:140 #1 0x77d9936b in __cxxabiv1::__cxa_throw (obj=, tinfo=0x403dd0 , dest=0x0) at /export/gnu/import/git/sources/gcc/libstdc++-v3/libsupc++/eh_throw.cc:90 #2 0x00401255 in sighandler (signo=11, si=0x7fffd6f8, uc=0x7fffd5c0) at /export/gnu/import/git/sources/gcc/gcc/testsuite/g++.dg/eh/sighandle.C:9 #3 Signal frame which isn't on shadow stack #4 dosegv () at /export/gnu/import/git/sources/gcc/gcc/testsuite/g++.dg/eh/sighandle.C:14 #5 0x004012e3 in main () at /export/gnu/import/git/sources/gcc/gcc/testsuite/g++.dg/eh/sighandle.C:30 (gdb) p frames $6 = 5 (gdb) frame count should be 4, not 5. This patch skips signal frames when unwinding shadow stack. gcc/tes
Re: [wwwdocs] [COMMITTED] ARC gcc8 changes entry
On 11 April 2018 13:05:52 CEST, Claudiu Zissulescu wrote: >Hi, > >Please find the ARC's gcc8 changes entry section as committed to >wwwdocs. s/qualifer/qualifier/ thanks,
Re: [PATCH] Handle empty infinite loops in OpenACC for PR84955
On 04/12/2018 11:27 AM, H.J. Lu wrote: > On Wed, Apr 11, 2018 at 12:30 PM, Cesar Philippidis > wrote: >> On 04/09/2018 04:31 AM, Richard Biener wrote: >>> On Fri, 6 Apr 2018, Jakub Jelinek wrote: >>> On Fri, Apr 06, 2018 at 06:48:52AM -0700, Cesar Philippidis wrote: > 2018-04-06 Cesar Philippidis > > PR middle-end/84955 > > gcc/ > * cfgloop.c (flow_loops_find): Add assert. > * omp-expand.c (expand_oacc_for): Add dummy false branch for > tiled basic blocks without omp continue statements. > * tree-cfg.c (execute_fixup_cfg): Handle calls to internal > functions like regular functions. > > libgomp/ > * testsuite/libgomp.oacc-c-c++-common/pr84955.c: New test. > * testsuite/libgomp.oacc-fortran/pr84955.f90: New test. I'd like to defer the cfgloop.c and tree-cfg.c changes to Richard, just want to mention that: > --- a/gcc/tree-cfg.c > +++ b/gcc/tree-cfg.c > @@ -9586,10 +9586,7 @@ execute_fixup_cfg (void) >for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);) > { > gimple *stmt = gsi_stmt (gsi); > - tree decl = is_gimple_call (stmt) > - ? gimple_call_fndecl (stmt) > - : NULL; > - if (decl) > + if (is_gimple_call (stmt)) This change doesn't affect just internal functions, but also all indirect calls through function pointers with const, pure or noreturn attributes. >>> >>> I think the change is desirable nevertheless. The question is if we >>> want to do it at this point in time. >>> >>> The description of the problem sounds more like LTO writing writing out >>> loops without previously fixing up state. So sth like the following >>> which I'd prefer at this stage (the above hunk is ok for stage1 then). >> >> OK, I'll save that hunk for stage 1. >> >>> Index: gcc/lto-streamer-out.c >>> === >>> --- gcc/lto-streamer-out.c (revision 259227) >>> +++ gcc/lto-streamer-out.c (working copy) >>> @@ -2084,6 +2151,9 @@ output_function (struct cgraph_node *nod >>>/* Set current_function_decl and cfun. */ >>>push_cfun (fn); >>> >>> + /* Fixup loops if required to match discovery done in the reader. */ >>> + loop_optimizer_init (AVOID_CFG_MODIFICATIONS); >>> + >>>/* Make string 0 be a NULL string. */ >>>streamer_write_char_stream (ob->string_stream, 0); >>> >>> @@ -2176,12 +2246,13 @@ output_function (struct cgraph_node *nod >>>streamer_write_record_start (ob, LTO_null); >>> >>>output_cfg (ob, fn); >>> - >>> - pop_cfun (); >>> } >>>else >>> streamer_write_uhwi (ob, 0); >>> >>> + loop_optimizer_finalize (); >>> + pop_cfun (); >>> + >>>/* Create a section to hold the pickled output of this function. */ >>>produce_asm (ob, function); >> >> That worked. Is this patch OK for trunk, GCC 6 and GCC 7? > > This caused: > > https://gcc.gnu.org/ml/gcc-regression/2018-04/msg00099.html > > FAIL: g++.dg/ipa/pr46984.C -std=gnu++11 (internal compiler error) > FAIL: g++.dg/ipa/pr46984.C -std=gnu++11 (test for excess errors) > FAIL: g++.dg/ipa/pr46984.C -std=gnu++14 (internal compiler error) > FAIL: g++.dg/ipa/pr46984.C -std=gnu++14 (test for excess errors) > FAIL: g++.dg/ipa/pr46984.C -std=gnu++98 (internal compiler error) > FAIL: g++.dg/ipa/pr46984.C -std=gnu++98 (test for excess errors) > FAIL: g++.dg/lto/20081217-1 cp_lto_20081217-1_0.o assemble, -O0 -flto > -flto-partition=1to1 -fno-use-linker-plugin (internal compiler error) > FAIL: g++.dg/lto/20081217-1 cp_lto_20081217-1_0.o assemble, -O0 -flto > -flto-partition=none -fuse-linker-plugin (internal compiler error) > FAIL: g++.dg/lto/20081217-1 cp_lto_20081217-1_0.o assemble, -O0 -flto > -fuse-linker-plugin -fno-fat-lto-objects (internal compiler error) > FAIL: g++.dg/lto/20081217-1 cp_lto_20081217-1_0.o assemble, -O2 -flto > -flto-partition=1to1 -fno-use-linker-plugin (internal compiler error) > FAIL: g++.dg/lto/20081217-1 cp_lto_20081217-1_0.o assemble, -O2 -flto > -flto-partition=none -fuse-linker-plugin -fno-fat-lto-objects > (internal compiler error) > FAIL: g++.dg/lto/20081217-1 cp_lto_20081217-1_0.o assemble, -O2 -flto > -fuse-linker-plugin (internal compiler error) > FAIL: g++.dg/lto/20081217-2 cp_lto_20081217-2_0.o assemble, -O0 -flto > -flto-partition=1to1 -fno-use-linker-plugin (internal compiler error) > FAIL: g++.dg/lto/20081217-2 cp_lto_20081217-2_0.o assemble, -O0 -flto > -flto-partition=none -fuse-linker-plugin (internal compiler error) > FAIL: g++.dg/lto/20081217-2 cp_lto_20081217-2_0.o assemble, -O0 -flto > -fuse-linker-plugin -fno-fat-lto-objects (internal compiler error) > FAIL: g++.dg/lto/20081217-2 cp_lto_20081217-2_0.o assemble, -O2 -flto > -flto-partition=1to1 -fno-use-linker-plugin (internal compiler error) > FAIL: g++.dg/lto/20081217-2 cp_lto_20081217-2_0.o asse
Re: [PATCH] Make redirection only for target_clones: V2 (PR ipa/85329).
On Thu, Apr 12, 2018 at 03:46:26PM +0200, Jan Hubicka wrote: > If you make C++ inline and get the idea to use target cloning attribute on > this, > this will likely lead to link error if you compile multiple files because you > turn comdat to non-comdat. > > For comdats this woudl effectivly need to become C++ abi extension and we > would > need to define comdat sections for these. Perhaps easiest way is to simply > reject the attribute on comdats and probaby also extern functions? I'm not really sure we can do that, various packages in the wild are already using this. What is the problem with comdats and multi-versioning? The question is what comdat groups we should use for the comdat resolver and the versioned functions, shall the ifunc symbol be the original mangling of the method (or other comdat) and the other entrypoints just be .local non-weak symbols inside of the same section? Jakub
Re: [PATCH] Make redirection only for target_clones: V2 (PR ipa/85329).
On Thu, Apr 12, 2018 at 07:53:35PM +0200, Jakub Jelinek wrote: > On Thu, Apr 12, 2018 at 03:46:26PM +0200, Jan Hubicka wrote: > > If you make C++ inline and get the idea to use target cloning attribute on > > this, > > this will likely lead to link error if you compile multiple files because > > you > > turn comdat to non-comdat. > > > > For comdats this woudl effectivly need to become C++ abi extension and we > > would > > need to define comdat sections for these. Perhaps easiest way is to simply > > reject the attribute on comdats and probaby also extern functions? > > I'm not really sure we can do that, various packages in the wild are already > using this. > What is the problem with comdats and multi-versioning? > The question is what comdat groups we should use for the comdat resolver and > the versioned functions, shall the ifunc symbol be the original mangling of > the method (or other comdat) and the other entrypoints just be .local > non-weak symbols inside of the same section? Ah, but we emit the resolver only if we see a use of it. That sounds quite broken, resolver in each TU that uses it? Better to have one at each definition... Jakub
Re: [PATCH] Handle empty infinite loops in OpenACC for PR84955
On Wed, Apr 11, 2018 at 12:30 PM, Cesar Philippidis wrote: > On 04/09/2018 04:31 AM, Richard Biener wrote: >> On Fri, 6 Apr 2018, Jakub Jelinek wrote: >> >>> On Fri, Apr 06, 2018 at 06:48:52AM -0700, Cesar Philippidis wrote: 2018-04-06 Cesar Philippidis PR middle-end/84955 gcc/ * cfgloop.c (flow_loops_find): Add assert. * omp-expand.c (expand_oacc_for): Add dummy false branch for tiled basic blocks without omp continue statements. * tree-cfg.c (execute_fixup_cfg): Handle calls to internal functions like regular functions. libgomp/ * testsuite/libgomp.oacc-c-c++-common/pr84955.c: New test. * testsuite/libgomp.oacc-fortran/pr84955.f90: New test. >>> >>> I'd like to defer the cfgloop.c and tree-cfg.c changes to Richard, just >>> want to >>> mention that: >>> --- a/gcc/tree-cfg.c +++ b/gcc/tree-cfg.c @@ -9586,10 +9586,7 @@ execute_fixup_cfg (void) for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);) { gimple *stmt = gsi_stmt (gsi); - tree decl = is_gimple_call (stmt) - ? gimple_call_fndecl (stmt) - : NULL; - if (decl) + if (is_gimple_call (stmt)) >>> >>> This change doesn't affect just internal functions, but also all indirect >>> calls through function pointers with const, pure or noreturn attributes. >> >> I think the change is desirable nevertheless. The question is if we >> want to do it at this point in time. >> >> The description of the problem sounds more like LTO writing writing out >> loops without previously fixing up state. So sth like the following >> which I'd prefer at this stage (the above hunk is ok for stage1 then). > > OK, I'll save that hunk for stage 1. > >> Index: gcc/lto-streamer-out.c >> === >> --- gcc/lto-streamer-out.c (revision 259227) >> +++ gcc/lto-streamer-out.c (working copy) >> @@ -2084,6 +2151,9 @@ output_function (struct cgraph_node *nod >>/* Set current_function_decl and cfun. */ >>push_cfun (fn); >> >> + /* Fixup loops if required to match discovery done in the reader. */ >> + loop_optimizer_init (AVOID_CFG_MODIFICATIONS); >> + >>/* Make string 0 be a NULL string. */ >>streamer_write_char_stream (ob->string_stream, 0); >> >> @@ -2176,12 +2246,13 @@ output_function (struct cgraph_node *nod >>streamer_write_record_start (ob, LTO_null); >> >>output_cfg (ob, fn); >> - >> - pop_cfun (); >> } >>else >> streamer_write_uhwi (ob, 0); >> >> + loop_optimizer_finalize (); >> + pop_cfun (); >> + >>/* Create a section to hold the pickled output of this function. */ >>produce_asm (ob, function); > > That worked. Is this patch OK for trunk, GCC 6 and GCC 7? This caused: https://gcc.gnu.org/ml/gcc-regression/2018-04/msg00099.html FAIL: g++.dg/ipa/pr46984.C -std=gnu++11 (internal compiler error) FAIL: g++.dg/ipa/pr46984.C -std=gnu++11 (test for excess errors) FAIL: g++.dg/ipa/pr46984.C -std=gnu++14 (internal compiler error) FAIL: g++.dg/ipa/pr46984.C -std=gnu++14 (test for excess errors) FAIL: g++.dg/ipa/pr46984.C -std=gnu++98 (internal compiler error) FAIL: g++.dg/ipa/pr46984.C -std=gnu++98 (test for excess errors) FAIL: g++.dg/lto/20081217-1 cp_lto_20081217-1_0.o assemble, -O0 -flto -flto-partition=1to1 -fno-use-linker-plugin (internal compiler error) FAIL: g++.dg/lto/20081217-1 cp_lto_20081217-1_0.o assemble, -O0 -flto -flto-partition=none -fuse-linker-plugin (internal compiler error) FAIL: g++.dg/lto/20081217-1 cp_lto_20081217-1_0.o assemble, -O0 -flto -fuse-linker-plugin -fno-fat-lto-objects (internal compiler error) FAIL: g++.dg/lto/20081217-1 cp_lto_20081217-1_0.o assemble, -O2 -flto -flto-partition=1to1 -fno-use-linker-plugin (internal compiler error) FAIL: g++.dg/lto/20081217-1 cp_lto_20081217-1_0.o assemble, -O2 -flto -flto-partition=none -fuse-linker-plugin -fno-fat-lto-objects (internal compiler error) FAIL: g++.dg/lto/20081217-1 cp_lto_20081217-1_0.o assemble, -O2 -flto -fuse-linker-plugin (internal compiler error) FAIL: g++.dg/lto/20081217-2 cp_lto_20081217-2_0.o assemble, -O0 -flto -flto-partition=1to1 -fno-use-linker-plugin (internal compiler error) FAIL: g++.dg/lto/20081217-2 cp_lto_20081217-2_0.o assemble, -O0 -flto -flto-partition=none -fuse-linker-plugin (internal compiler error) FAIL: g++.dg/lto/20081217-2 cp_lto_20081217-2_0.o assemble, -O0 -flto -fuse-linker-plugin -fno-fat-lto-objects (internal compiler error) FAIL: g++.dg/lto/20081217-2 cp_lto_20081217-2_0.o assemble, -O2 -flto -flto-partition=1to1 -fno-use-linker-plugin (internal compiler error) FAIL: g++.dg/lto/20081217-2 cp_lto_20081217-2_0.o assemble, -O2 -flto -flto-partition=none -fuse-linker-plugin -fno-fat-lto-objects (internal compiler error) FAIL: g++.dg/lto/20081217-2 cp_lto_20081217-2_0.o assemble, -O2 -flto -fuse-linker-
Re: Patch ping^3
On 04/12/2018 02:41 AM, Richard Biener wrote: > On Thu, 12 Apr 2018, Jakub Jelinek wrote: > >> Hi! >> >> I'd like to ping the >> >> http://gcc.gnu.org/ml/gcc-patches/2018-03/msg01244.html >> - PR83157 - improve debug info for x86 setcc peepholes >> >> patch. Thanks. > > OK for stage1 and backporting after it soaked there for a while. > I'm too unfamiliar with the code to approve it at this point > (esp. concerned about the cselib part affecting others than > var-tracking in unexpected ways). I think it's reasonable to include now. Jeff
C++ PATCH for c++/85258, ICE with invalid range-based for-loop
This is a crash on invalid which started when we changed decl_maybe_constant_var_p to say true for references. Then in tsubst_copy we take this branch: if (decl_maybe_constant_var_p (r)) { /* We can't call cp_finish_decl, so handle the initializer by hand. */ tree init = tsubst_init (DECL_INITIAL (t), r, args, complain, in_decl); but tsubst_init can return NULL_TREE, which potential_constant_expression knows how to handle, but reduced_constant_expression_p didn't. So the following patch will fix the ICE. Bootstrapped/regtested on x86_64-linux, ok for trunk? 2018-04-12 Marek Polacek PR c++/85258 * constexpr.c (reduced_constant_expression_p): Return false for null trees. * g++.dg/parse/error61.C: New test. diff --git gcc/cp/constexpr.c gcc/cp/constexpr.c index 75f56df4465..82f14baaefd 100644 --- gcc/cp/constexpr.c +++ gcc/cp/constexpr.c @@ -1773,6 +1773,9 @@ cxx_eval_call_expression (const constexpr_ctx *ctx, tree t, bool reduced_constant_expression_p (tree t) { + if (t == NULL_TREE) +return false; + switch (TREE_CODE (t)) { case PTRMEM_CST: @@ -1794,9 +1797,8 @@ reduced_constant_expression_p (tree t) field = NULL_TREE; FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (t), i, idx, val) { - if (!val) - /* We're in the middle of initializing this element. */ - return false; + /* If VAL is null, we're in the middle of initializing this +element. */ if (!reduced_constant_expression_p (val)) return false; if (field) diff --git gcc/testsuite/g++.dg/parse/error61.C gcc/testsuite/g++.dg/parse/error61.C index e69de29bb2d..199e1aa721c 100644 --- gcc/testsuite/g++.dg/parse/error61.C +++ gcc/testsuite/g++.dg/parse/error61.C @@ -0,0 +1,14 @@ +// PR c++/85258 +// { dg-do compile { target c++11 } } + +template void foo() +{ + int x[8]; + for (int& i, j : x) // { dg-error "multiple" } +i = 0; // { dg-error "local variable" } +} + +void bar() +{ + foo<0>(); +} Marek
[PATCH] Prefer mempcpy to memcpy on x86_64 target (PR middle-end/81657, take 2)
On Thu, Apr 12, 2018 at 05:29:35PM +, Wilco Dijkstra wrote: > > Depending on what you mean old, I see e.g. in 2010 power7 mempcpy got added, > > in 2013 other power versions, in 2016 s390*, etc. Doing a decent mempcpy > > isn't hard if you have asm version of memcpy and one spare register. > > More mempcpy implementations have been added in recent years indeed, but > almost all > add an extra copy of the memcpy code rather than using a single combined > implementation. > That means it is still better to call memcpy (which is frequently used and > thus likely in L1/L2) > rather than mempcpy (which is more likely to be cold and thus not cached). That really depends, usually when some app uses mempcpy, it uses it very heavily. And all the proposed patches do is honor what the user asked, if you use memcpy () + n, we aren't transforming that into mempcpy behind the user's back. Anyway, here is what I think Richard was asking for, that I'm currently bootstrapping/regtesting. It can be easily combined with Martin's target hook if needed, or do it only for endp == 1 && target != const0_rtx && CALL_EXPR_TAILCALL (exp) etc. 2018-04-12 Martin Liska Jakub Jelinek PR middle-end/81657 * expr.h (enum block_op_methods): Add BLOCK_OP_NO_LIBCALL_RET. * expr.c (emit_block_move_hints): Handle BLOCK_OP_NO_LIBCALL_RET. * builtins.c (expand_builtin_memory_copy_args): Use BLOCK_OP_NO_LIBCALL_RET method for mempcpy with non-ignored target, handle dest_addr == pc_rtx. * gcc.dg/string-opt-1.c: Remove bogus comment. Expect a mempcpy call. --- gcc/expr.h.jj 2018-01-12 11:35:51.424222835 +0100 +++ gcc/expr.h 2018-04-12 18:38:07.377464114 +0200 @@ -100,7 +100,11 @@ enum block_op_methods BLOCK_OP_NO_LIBCALL, BLOCK_OP_CALL_PARM, /* Like BLOCK_OP_NORMAL, but the libcall can be tail call optimized. */ - BLOCK_OP_TAILCALL + BLOCK_OP_TAILCALL, + /* Like BLOCK_OP_NO_LIBCALL, but instead of emitting a libcall return + pc_rtx to indicate nothing has been emitted and let the caller handle + it. */ + BLOCK_OP_NO_LIBCALL_RET }; typedef rtx (*by_pieces_constfn) (void *, HOST_WIDE_INT, scalar_int_mode); --- gcc/expr.c.jj 2018-04-06 19:19:14.954130838 +0200 +++ gcc/expr.c 2018-04-12 18:39:58.866536619 +0200 @@ -1565,7 +1565,7 @@ emit_block_move_hints (rtx x, rtx y, rtx unsigned HOST_WIDE_INT max_size, unsigned HOST_WIDE_INT probable_max_size) { - bool may_use_call; + int may_use_call; rtx retval = 0; unsigned int align; @@ -1577,7 +1577,7 @@ emit_block_move_hints (rtx x, rtx y, rtx { case BLOCK_OP_NORMAL: case BLOCK_OP_TAILCALL: - may_use_call = true; + may_use_call = 1; break; case BLOCK_OP_CALL_PARM: @@ -1589,7 +1589,11 @@ emit_block_move_hints (rtx x, rtx y, rtx break; case BLOCK_OP_NO_LIBCALL: - may_use_call = false; + may_use_call = 0; + break; + +case BLOCK_OP_NO_LIBCALL_RET: + may_use_call = -1; break; default: @@ -1625,6 +1629,9 @@ emit_block_move_hints (rtx x, rtx y, rtx && ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x)) && ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (y))) { + if (may_use_call < 0) + return pc_rtx; + /* Since x and y are passed to a libcall, mark the corresponding tree EXPR as addressable. */ tree y_expr = MEM_EXPR (y); --- gcc/builtins.c.jj 2018-04-12 13:35:34.328395156 +0200 +++ gcc/builtins.c 2018-04-12 18:42:01.846616598 +0200 @@ -3650,12 +3650,16 @@ expand_builtin_memory_copy_args (tree de set_mem_align (src_mem, src_align); /* Copy word part most expediently. */ - dest_addr = emit_block_move_hints (dest_mem, src_mem, len_rtx, -CALL_EXPR_TAILCALL (exp) -&& (endp == 0 || target == const0_rtx) -? BLOCK_OP_TAILCALL : BLOCK_OP_NORMAL, + enum block_op_methods method = BLOCK_OP_NORMAL; + if (CALL_EXPR_TAILCALL (exp) && (endp == 0 || target == const0_rtx)) +method = BLOCK_OP_TAILCALL; + if (endp == 1 && target != const0_rtx) +method = BLOCK_OP_NO_LIBCALL_RET; + dest_addr = emit_block_move_hints (dest_mem, src_mem, len_rtx, method, expected_align, expected_size, min_size, max_size, probable_max_size); + if (dest_addr == pc_rtx) +return NULL_RTX; if (dest_addr == 0) { --- gcc/testsuite/gcc.dg/string-opt-1.c.jj 2017-08-01 19:23:09.923512205 +0200 +++ gcc/testsuite/gcc.dg/string-opt-1.c 2018-04-12 18:57:10.940217129 +0200 @@ -1,4 +1,3 @@ -/* Ensure mempcpy is "optimized" into memcpy followed by addition. */ /* { dg-do compile } */ /* { dg-options "-O2" } */ @@ -48,5 +47,5 @@ main (void) return 0; } -/* { dg-final { scan-assembler-not "\" } } */ +/* { dg-final {
Re: [PATCH] Prefer mempcpy to memcpy on x86_64 target (PR middle-end/81657).
On Thu, Apr 12, 2018 at 04:30:07PM +, Wilco Dijkstra wrote: > Jakub Jelinek wrote: > > On Thu, Apr 12, 2018 at 03:53:13PM +, Wilco Dijkstra wrote: > > >> The tailcall issue is just a distraction. Historically the handling of > >> mempcpy > >> has been horribly inefficient in both GCC and GLIBC for practically all > >> targets. > >> This is why it was decided to defer to memcpy. > > > > I guess we need to agree to disagree. But we have a P1 PR that we need to > > resolve and it is one of the last 6 blockers we have. I'm not suggesting to > > revert PR70140, just let use mempcpy libcall if it is what the user wrote > > and > > we aren't expanding it inline. > > Frankly I don't see why it is a P1 regression. Do you have a benchmark that That is how regression priorities are defined. > >> So generally it's a good idea to change mempcpy into memcpy by default. > >> It's > >> not slower than calling mempcpy even if you have a fast implementation, > >> it's faster > >> if you use an up to date GLIBC which calls memcpy, and it's significantly > >> better > >> when using an old GLIBC. > > > > mempcpy is quite good on many targets even in old GLIBCs. > > Only true if with "many" you mean x86, x86_64 and IIRC sparc. Depending on what you mean old, I see e.g. in 2010 power7 mempcpy got added, in 2013 other power versions, in 2016 s390*, etc. Doing a decent mempcpy isn't hard if you have asm version of memcpy and one spare register. Jakub
Re: [PATCH] Prefer mempcpy to memcpy on x86_64 target (PR middle-end/81657).
On Thu, Apr 12, 2018 at 05:17:29PM +0200, Richard Biener wrote: > >For -Os that is easily measurable regression, for -O2 it depends on the > >relative speed of memcpy vs. mempcpy and whether one or both of them > >are in > >I-cache or not. > > Well, then simply unconditionally not generate a libcall from the move > expander? We need to generate libcall for many callers and in fact, we don't have a mode nor a way to tell the caller that we haven't emitted anything. What we could do is add another enumerator to enum block_op_methods that would be like BLOCK_OP_NO_LIBCALL, but would not use emit_block_move_via_loop if move_by_pieces nor emit_block_move_via_movmem can be used, and say instead return const0_rtx or pc_rtx or some way to tell the caller that it hasn't emitted anything and in expand_builtin_memory_copy_args pass for endp == 1 && target != const0_rtx that new BLOCK_OP_NO_LIBCALL_LOOP to emit_block_move_hints and return 0 if dest_addr is const0_rtx (or pc_rtx or whatever is chosen). Jakub
Re: [PATCH] Prefer mempcpy to memcpy on x86_64 target (PR middle-end/81657).
Jakub Jelinek wrote: >On Thu, Apr 12, 2018 at 04:30:07PM +, Wilco Dijkstra wrote: >> Jakub Jelinek wrote: >> Frankly I don't see why it is a P1 regression. Do you have a benchmark that > >That is how regression priorities are defined. How can one justify considering this a release blocker without hard numbers? If this is a 1% regression on a large body of code it would be very serious, if 0.01% - not so much. >> >> So generally it's a good idea to change mempcpy into memcpy by default. >> >> It's >> >> not slower than calling mempcpy even if you have a fast implementation, >> >> it's faster >> >> if you use an up to date GLIBC which calls memcpy, and it's significantly >> >> better >> >> when using an old GLIBC. >> > >> > mempcpy is quite good on many targets even in old GLIBCs. >> >> Only true if with "many" you mean x86, x86_64 and IIRC sparc. > > Depending on what you mean old, I see e.g. in 2010 power7 mempcpy got added, > in 2013 other power versions, in 2016 s390*, etc. Doing a decent mempcpy > isn't hard if you have asm version of memcpy and one spare register. More mempcpy implementations have been added in recent years indeed, but almost all add an extra copy of the memcpy code rather than using a single combined implementation. That means it is still better to call memcpy (which is frequently used and thus likely in L1/L2) rather than mempcpy (which is more likely to be cold and thus not cached). Wilco
Re: Fix PR target/85238
On April 12, 2018 4:18:47 PM GMT+02:00, Eric Botcazou wrote: >This makes -g work again in LTO mode for Windows targets by kludging >around >the missing support for copying PE-COFF debug sections in the simple >object >module of libiberty, thus effectively disabling early debug in LTO >mode. >The patch also contains a fixlet for a related oversight in the LTO >wrapper. > >Bootstrapped and tested on x86-64/Windows, approved by Richard B. in >the audit >trail and applied on the mainline. Thanks Eric for fixing this. Richard. > >2018-04-12 Eric Botcazou > > PR target/85238 > * lto-wrapper.c (debug_objcopy): Open the files in binary mode. > * dwarf2out.c (dwarf2out_early_finish): Do not generate assembly in >LTO > mode for PE-COFF targets. > * config/i386/i386-protos.h (i386_pe_asm_lto_start): Declare. > (i386_pe_asm_lto_end): Likewise. > * config/i386/cygming.h (TARGET_ASM_LTO_START): Define. > (TARGET_ASM_LTO_END): Likewise. > * config/i386/winnt.c (saved_debug_info_level): New static variable. > (i386_pe_asm_lto_start): New function. > (i386_pe_asm_lto_end): Likewise.
[PATCH][OBVIOUS] PR85347: New testcase vec-ldl-1.c FAILs on powerpc64-linux
This new test case required a dejagnu qualifier to restrict its execution on big-endian platforms. The patch bootstrapped and tested without regressions. Was committed as obvious. gcc/testsuite/ChangeLog: 2018-04-12 Kelvin Nilsen PR target/85347 * gcc.target/powerpc/vec-ldl-1.c: Change dejagnu directives to specify -mvsx on gcc command line. Index: gcc/testsuite/gcc.target/powerpc/vec-ldl-1.c === --- gcc/testsuite/gcc.target/powerpc/vec-ldl-1.c (revision 259318) +++ gcc/testsuite/gcc.target/powerpc/vec-ldl-1.c (working copy) @@ -1,6 +1,6 @@ /* { dg-do run { target powerpc*-*-* } } */ -/* { dg-require-effective-target vmx_hw } */ -/* { dg-options "-maltivec -O0 -Wall" } */ +/* { dg-require-effective-target vsx_hw } */ +/* { dg-options "-mvsx -O0 -Wall" } */ #include #include
Re: [PATCH] Prefer mempcpy to memcpy on x86_64 target (PR middle-end/81657).
On Thu, Apr 12, 2018 at 03:53:13PM +, Wilco Dijkstra wrote: > Jakub Jelinek wrote: > > On Thu, Apr 12, 2018 at 03:52:09PM +0200, Richard Biener wrote: > >> Not sure if I missed some important part of the discussion but > >> for the testcase we want to preserve the tailcall, right? So > >> it would be enough to set avoid_libcall to > >> endp != 0 && CALL_EXPR_TAILCALL (exp) (and thus also handle > >> stpcpy)? > > The tailcall issue is just a distraction. Historically the handling of > mempcpy > has been horribly inefficient in both GCC and GLIBC for practically all > targets. > This is why it was decided to defer to memcpy. I guess we need to agree to disagree. But we have a P1 PR that we need to resolve and it is one of the last 6 blockers we have. I'm not suggesting to revert PR70140, just let use mempcpy libcall if it is what the user wrote and we aren't expanding it inline. > So generally it's a good idea to change mempcpy into memcpy by default. It's No. > not slower than calling mempcpy even if you have a fast implementation, it's > faster > if you use an up to date GLIBC which calls memcpy, and it's significantly > better > when using an old GLIBC. mempcpy is quite good on many targets even in old GLIBCs. Jakub
Re: [PATCH] Prefer mempcpy to memcpy on x86_64 target (PR middle-end/81657).
Jakub Jelinek wrote: > On Thu, Apr 12, 2018 at 03:53:13PM +, Wilco Dijkstra wrote: >> The tailcall issue is just a distraction. Historically the handling of >> mempcpy >> has been horribly inefficient in both GCC and GLIBC for practically all >> targets. >> This is why it was decided to defer to memcpy. > > I guess we need to agree to disagree. But we have a P1 PR that we need to > resolve and it is one of the last 6 blockers we have. I'm not suggesting to > revert PR70140, just let use mempcpy libcall if it is what the user wrote and > we aren't expanding it inline. Frankly I don't see why it is a P1 regression. Do you have a benchmark that regresses significantly (a few percent, not by a few bytes)? I already showed the AArch64 results for GLIBC, do you have x86 results that prove things are much worse? >> So generally it's a good idea to change mempcpy into memcpy by default. It's >> not slower than calling mempcpy even if you have a fast implementation, it's >> faster >> if you use an up to date GLIBC which calls memcpy, and it's significantly >> better >> when using an old GLIBC. > > mempcpy is quite good on many targets even in old GLIBCs. Only true if with "many" you mean x86, x86_64 and IIRC sparc. Wilco
Re: [PATCH] Prefer mempcpy to memcpy on x86_64 target (PR middle-end/81657).
On Thu, Apr 12, 2018 at 8:53 AM, Wilco Dijkstra wrote: > So generally it's a good idea to change mempcpy into memcpy by default. It's > not slower than calling mempcpy even if you have a fast implementation, it's > faster > if you use an up to date GLIBC which calls memcpy, and it's significantly > better > when using an old GLIBC. > It is a BAD idea for x86. We don't want to turn mempcpy to to memcpy on x86. PERIOD. -- H.J.
Re: [PATCH] PR libstdc++/85222 allow catching iostream errors as gcc4-compatible ios::failure
On 10 April 2018 at 00:36, Jonathan Wakely wrote: > Define a new exception type derived from std::ios::failure[abi:cxx11] > which also aggregates an object of the gcc4-compatible ios::failure > type. Make __throw_ios_failure throw this new type for iostream errors > that raise exceptions. Provide custom type info for the new type so that > it can be caught by handlers for the gcc4-compatible ios::failure type > as well as handlers for ios::failure[abi:cxx11] and its bases. > > PR libstdc++/85222 > * src/c++11/Makefile.am [ENABLE_DUAL_ABI]: Add special rules for > cxx11-ios_failure.cc to rewrite type info for __ios_failure. > * src/c++11/Makefile.in: Regenerate. > * src/c++11/cxx11-ios_failure.cc (__ios_failure, __iosfail_type_info): > New types. > [_GLIBCXX_USE_DUAL_ABI] (__throw_ios_failure): Define here. > * src/c++11/ios.cc (__throw_ios_failure): Remove definition. > * src/c++98/ios_failure.cc (__construct_ios_failure) > (__destroy_ios_failure, is_ios_failure_handler): New functions. > [!_GLIBCXX_USE_DUAL_ABI] (__throw_ios_failure): Define here. > * testsuite/27_io/ios_base/failure/dual_abi.cc: New. > * testsuite/27_io/basic_ios/copyfmt/char/1.cc: Revert changes to > handler types, to always catch std::ios_base::failure. > * testsuite/27_io/basic_ios/exceptions/char/1.cc: Likewise. > * testsuite/27_io/basic_istream/extractors_arithmetic/char/ > exceptions_failbit.cc: Likewise. > * testsuite/27_io/basic_istream/extractors_arithmetic/wchar_t/ > exceptions_failbit.cc: Likewise. > * testsuite/27_io/basic_istream/extractors_other/char/ > exceptions_null.cc: Likewise. > * testsuite/27_io/basic_istream/extractors_other/wchar_t/ > exceptions_null.cc: Likewise. > * testsuite/27_io/basic_istream/sentry/char/12297.cc: Likewise. > * testsuite/27_io/basic_istream/sentry/wchar_t/12297.cc: Likewise. > * testsuite/27_io/basic_ostream/inserters_other/char/ > exceptions_null.cc: Likewise. > * testsuite/27_io/basic_ostream/inserters_other/wchar_t/ > exceptions_null.cc: Likewise. > * testsuite/27_io/ios_base/storage/2.cc: Likewise. > > Tested x86_64-linux and powerpc64-linux, with the default config, and > --disable-libstdcxx-dual-abi, and > --with-default-libstdcxx-abi=gcc4-compatible. I intend to commit this > to trunk and gcc-7-branch soon. This removes the #define for _GLIBCXX_USE_CXX11_ABI from the top of src/c++11/ios.cc, because __throw_ios_failure is no longer defined there. Tested as before, committed to trunk. commit 845d8dc521d0958b625f2bc691b284e221009929 Author: Jonathan Wakely Date: Wed Apr 11 13:47:31 2018 +0100 Remove #define made redundant by r259281 The definition of __throw_ios_failure is no longer in this file, so setting the macro here is unnecessary. * src/c++11/ios.cc: Remove redundant macro definition. diff --git a/libstdc++-v3/src/c++11/ios.cc b/libstdc++-v3/src/c++11/ios.cc index e928c594149..82063e4b2f5 100644 --- a/libstdc++-v3/src/c++11/ios.cc +++ b/libstdc++-v3/src/c++11/ios.cc @@ -26,10 +26,6 @@ // ISO C++ 14882: 27.4 Iostreams base classes // -// Determines the version of ios_base::failure thrown by __throw_ios_failure. -// If !_GLIBCXX_USE_DUAL_ABI this will get undefined automatically. -#define _GLIBCXX_USE_CXX11_ABI 1 - #include #include
Re: [PATCH] Prefer mempcpy to memcpy on x86_64 target (PR middle-end/81657).
On April 12, 2018 5:38:44 PM GMT+02:00, Jakub Jelinek wrote: >On Thu, Apr 12, 2018 at 05:17:29PM +0200, Richard Biener wrote: >> >For -Os that is easily measurable regression, for -O2 it depends on >the >> >relative speed of memcpy vs. mempcpy and whether one or both of them >> >are in >> >I-cache or not. >> >> Well, then simply unconditionally not generate a libcall from the >move expander? > >We need to generate libcall for many callers and in fact, we don't have >a >mode nor a way to tell the caller that we haven't emitted anything. > >What we could do is add another enumerator to enum block_op_methods >that >would be like BLOCK_OP_NO_LIBCALL, but would not use >emit_block_move_via_loop >if move_by_pieces nor emit_block_move_via_movmem can be used, and say >instead return const0_rtx or pc_rtx or some way to tell the caller that >it hasn't emitted anything and in expand_builtin_memory_copy_args >pass for endp == 1 && target != const0_rtx that new >BLOCK_OP_NO_LIBCALL_LOOP >to emit_block_move_hints and return 0 if dest_addr is const0_rtx (or >pc_rtx >or whatever is chosen). Yes. Emit the "original" call whenever inline expansion fails. Richard. > Jakub
Re: [PATCH] Prefer mempcpy to memcpy on x86_64 target (PR middle-end/81657).
Jakub Jelinek wrote: > On Thu, Apr 12, 2018 at 03:52:09PM +0200, Richard Biener wrote: >> Not sure if I missed some important part of the discussion but >> for the testcase we want to preserve the tailcall, right? So >> it would be enough to set avoid_libcall to >> endp != 0 && CALL_EXPR_TAILCALL (exp) (and thus also handle >> stpcpy)? The tailcall issue is just a distraction. Historically the handling of mempcpy has been horribly inefficient in both GCC and GLIBC for practically all targets. This is why it was decided to defer to memcpy. For example small constant mempcpy was not expanded inline like memcpy until PR70140 was fixed. Except for a few targets which have added an optimized mempcpy, the default mempcpy implementation in almost all released GLIBCs is much slower than memcpy (due to using a badly written C implementation). Recent GLIBCs now call the optimized memcpy - this is better but still adds extra call/return overheads. So to improve that the GLIBC headers have an inline that changes any call to mempcpy into memcpy (this is the default but can be disabled on a per-target basis). Obviously it is best to do this optimization in GCC, which is what we finally do in GCC8. Inlining mempcpy means you sometimes miss a tailcall, but this is not common - in all of GLIBC the inlining on AArch64 adds 166 extra instructions and 12 callee-save registers. This is a small codesize cost to avoid the overhead of calling the generic C version. > My preference would be to have non-lame mempcpy etc. on all targets, but the > aarch64 folks disagree. The question is who is going to write the 30+ mempcpy implementations for all those targets which don't have one? And who says doing this is actually going to improve performance? Having mempcpy+memcpy typically means more Icache misses in code that uses both. So generally it's a good idea to change mempcpy into memcpy by default. It's not slower than calling mempcpy even if you have a fast implementation, it's faster if you use an up to date GLIBC which calls memcpy, and it's significantly better when using an old GLIBC. Wilco
Re: [PATCH] Prefer mempcpy to memcpy on x86_64 target (PR middle-end/81657).
On April 12, 2018 4:31:12 PM GMT+02:00, Jakub Jelinek wrote: >On Thu, Apr 12, 2018 at 04:19:38PM +0200, Richard Biener wrote: >> Well, but that wouldn't be a fix for a regression and IMHO there's >> no reason for a really lame mempcpy. If targets disgree well, > >It is a regression as well, in the past we've emitted mempcpy when user >wrote mempcpy, now we don't. > >E.g. >extern void *mempcpy (void *, const void *, __SIZE_TYPE__); >void bar (void *, void *, void *); > >void >foo (void *x, void *y, void *z, void *w, __SIZE_TYPE__ n) >{ > bar (mempcpy (x, w, n), mempcpy (y, w, n), mempcpy (z, w, n)); >} > >is on x86_64-linux -O2 in 7.x using the 3 mempcpy calls and 90 bytes in >foo, while >on the trunk uses 3 memcpy calls and 96 bytes in foo. > >For -Os that is easily measurable regression, for -O2 it depends on the >relative speed of memcpy vs. mempcpy and whether one or both of them >are in >I-cache or not. Well, then simply unconditionally not generate a libcall from the move expander? > >> then they get what they deserve. >> >> I don't see any aarch64 specific mempcpy in glibc btw so hopefully >> the default non-stupid one kicks in (it exactly looks like my C >> version) > > Jakub
Re: [PATCH] sel-sched: run cleanup_cfg just before loop_optimizer_init (PR 84659)
On 12.04.2018 0:55, Alexander Monakov wrote: > As noted in PR 85354, we cannot simply invoke cfg_cleanup after dominators are > computed, because they may become invalid but neither freed nor recomputed, so > this trips checking in flow_loops_find. > > We can move cleanup_cfg earlier (and run it for all sel-sched invocations, not > only when pipelining). OK. Sorry, I should have noticed that before, and our ia64 tester also misses libraries required for graphite. Best, Andrey > > Bootstrapped/regtested on x86_64 and ppc64 (my previous testing missed this > issue: the testcase requires graphite, but libisl wasn't present). > > PR rtl-optimization/85354 > * sel-sched-ir.c (sel_init_pipelining): Move cfg_cleanup call... > * sel-sched.c (sel_global_init): ... here. > > diff --git a/gcc/sel-sched-ir.c b/gcc/sel-sched-ir.c > index 50a7daafba6..ee970522890 100644 > --- a/gcc/sel-sched-ir.c > +++ b/gcc/sel-sched-ir.c > @@ -30,7 +30,6 @@ along with GCC; see the file COPYING3. If not see > #include "cfgrtl.h" > #include "cfganal.h" > #include "cfgbuild.h" > -#include "cfgcleanup.h" > #include "insn-config.h" > #include "insn-attr.h" > #include "recog.h" > @@ -6122,9 +6121,6 @@ make_regions_from_loop_nest (struct loop *loop) > void > sel_init_pipelining (void) > { > - /* Remove empty blocks: their presence can break assumptions elsewhere, > - e.g. the logic to invoke update_liveness_on_insn in sel_region_init. */ > - cleanup_cfg (0); >/* Collect loop information to be used in outer loops pipelining. */ >loop_optimizer_init (LOOPS_HAVE_PREHEADERS > | LOOPS_HAVE_FALLTHRU_PREHEADERS > diff --git a/gcc/sel-sched.c b/gcc/sel-sched.c > index cd29df35666..59762964c6e 100644 > --- a/gcc/sel-sched.c > +++ b/gcc/sel-sched.c > @@ -28,6 +28,7 @@ along with GCC; see the file COPYING3. If not see > #include "tm_p.h" > #include "regs.h" > #include "cfgbuild.h" > +#include "cfgcleanup.h" > #include "insn-config.h" > #include "insn-attr.h" > #include "params.h" > @@ -7661,6 +7662,10 @@ sel_sched_region (int rgn) > static void > sel_global_init (void) > { > + /* Remove empty blocks: their presence can break assumptions elsewhere, > + e.g. the logic to invoke update_liveness_on_insn in sel_region_init. */ > + cleanup_cfg (0); > + >calculate_dominance_info (CDI_DOMINATORS); >alloc_sched_pools (); >
Re: [PATCH] Prefer mempcpy to memcpy on x86_64 target (PR middle-end/81657).
On Thu, Apr 12, 2018 at 04:19:38PM +0200, Richard Biener wrote: > Well, but that wouldn't be a fix for a regression and IMHO there's > no reason for a really lame mempcpy. If targets disgree well, It is a regression as well, in the past we've emitted mempcpy when user wrote mempcpy, now we don't. E.g. extern void *mempcpy (void *, const void *, __SIZE_TYPE__); void bar (void *, void *, void *); void foo (void *x, void *y, void *z, void *w, __SIZE_TYPE__ n) { bar (mempcpy (x, w, n), mempcpy (y, w, n), mempcpy (z, w, n)); } is on x86_64-linux -O2 in 7.x using the 3 mempcpy calls and 90 bytes in foo, while on the trunk uses 3 memcpy calls and 96 bytes in foo. For -Os that is easily measurable regression, for -O2 it depends on the relative speed of memcpy vs. mempcpy and whether one or both of them are in I-cache or not. > then they get what they deserve. > > I don't see any aarch64 specific mempcpy in glibc btw so hopefully > the default non-stupid one kicks in (it exactly looks like my C > version) Jakub
Re: [PATCH] Prefer mempcpy to memcpy on x86_64 target (PR middle-end/81657).
On Thu, Apr 12, 2018 at 03:52:09PM +0200, Richard Biener wrote: > Not sure if I missed some important part of the discussion but > for the testcase we want to preserve the tailcall, right? So > it would be enough to set avoid_libcall to > endp != 0 && CALL_EXPR_TAILCALL (exp) (and thus also handle > stpcpy)? For the testcase yes. There the question is if some targets have so lame mempcpy that using a tailcall to mempcpy is slower over avoiding the tailcall (and on aarch64 it looked like maintainer's choice to have lame mempcpy and hope the compiler will avoid it at all costs). On the other side, that change has been forced over to all targets, even when they don't have lame mempcpy. So, the tailcall is one issue, and we can either use mempcpy if endp and CALL_EXPR_TAILCALL, or only do that if -Os. And another issue is mempcpy uses in other contexts, here again I think x86 has good enough mempcpy that if I use foo (mempcpy (x, y, z)) then it is better to use mempcpy over memcpy call, but not so on targets with lame mempcpy. My preference would be to have non-lame mempcpy etc. on all targets, but the aarch64 folks disagree. So, wonder e.g. about Martin's patch, which would use mempcpy if endp and either FAST_SPEED for mempcpy (regardless of the context), or not SLOW_SPEED and CALL_EXPR_TAILCALL. That way, targets could signal they have so lame mempcpy that they never want to use it (return SLOW_SPEED), or ask for it to be used every time it makes sense from caller POV, and have the default something in between (only use it in tail calls). Jakub
Re: [PATCH] Prefer mempcpy to memcpy on x86_64 target (PR middle-end/81657).
On Thu, 12 Apr 2018, Jakub Jelinek wrote: > On Thu, Apr 12, 2018 at 03:52:09PM +0200, Richard Biener wrote: > > Not sure if I missed some important part of the discussion but > > for the testcase we want to preserve the tailcall, right? So > > it would be enough to set avoid_libcall to > > endp != 0 && CALL_EXPR_TAILCALL (exp) (and thus also handle > > stpcpy)? > > For the testcase yes. There the question is if some targets have so lame > mempcpy that using a tailcall to mempcpy is slower over avoiding the > tailcall (and on aarch64 it looked like maintainer's choice to have lame > mempcpy and hope the compiler will avoid it at all costs). On the other > side, that change has been forced over to all targets, even when they don't > have lame mempcpy. > So, the tailcall is one issue, and we can either use mempcpy if endp > and CALL_EXPR_TAILCALL, or only do that if -Os. > > And another issue is mempcpy uses in other contexts, here again I think x86 > has good enough mempcpy that if I use > foo (mempcpy (x, y, z)) then it is better to use mempcpy over memcpy call, > but not so on targets with lame mempcpy. > > My preference would be to have non-lame mempcpy etc. on all targets, but the > aarch64 folks disagree. > > So, wonder e.g. about Martin's patch, which would use mempcpy if endp and > either FAST_SPEED for mempcpy (regardless of the context), or not > SLOW_SPEED and CALL_EXPR_TAILCALL. That way, targets could signal they have > so lame mempcpy that they never want to use it (return SLOW_SPEED), or ask > for it to be used every time it makes sense from caller POV, and have the > default something in between (only use it in tail calls). Well, but that wouldn't be a fix for a regression and IMHO there's no reason for a really lame mempcpy. If targets disgree well, then they get what they deserve. I don't see any aarch64 specific mempcpy in glibc btw so hopefully the default non-stupid one kicks in (it exactly looks like my C version) Richard. > Jakub > > -- Richard Biener SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)
Fix PR target/85238
This makes -g work again in LTO mode for Windows targets by kludging around the missing support for copying PE-COFF debug sections in the simple object module of libiberty, thus effectively disabling early debug in LTO mode. The patch also contains a fixlet for a related oversight in the LTO wrapper. Bootstrapped and tested on x86-64/Windows, approved by Richard B. in the audit trail and applied on the mainline. 2018-04-12 Eric Botcazou PR target/85238 * lto-wrapper.c (debug_objcopy): Open the files in binary mode. * dwarf2out.c (dwarf2out_early_finish): Do not generate assembly in LTO mode for PE-COFF targets. * config/i386/i386-protos.h (i386_pe_asm_lto_start): Declare. (i386_pe_asm_lto_end): Likewise. * config/i386/cygming.h (TARGET_ASM_LTO_START): Define. (TARGET_ASM_LTO_END): Likewise. * config/i386/winnt.c (saved_debug_info_level): New static variable. (i386_pe_asm_lto_start): New function. (i386_pe_asm_lto_end): Likewise. -- Eric BotcazouIndex: config/i386/cygming.h === --- config/i386/cygming.h (revision 259205) +++ config/i386/cygming.h (working copy) @@ -356,6 +356,12 @@ do { \ #undef TARGET_ASM_FILE_END #define TARGET_ASM_FILE_END i386_pe_file_end +/* Kludge because of missing PE-COFF support for early LTO debug. */ +#undef TARGET_ASM_LTO_START +#define TARGET_ASM_LTO_START i386_pe_asm_lto_start +#undef TARGET_ASM_LTO_END +#define TARGET_ASM_LTO_END i386_pe_asm_lto_end + #undef ASM_COMMENT_START #define ASM_COMMENT_START " #" Index: config/i386/i386-protos.h === --- config/i386/i386-protos.h (revision 259205) +++ config/i386/i386-protos.h (working copy) @@ -254,6 +254,8 @@ extern void i386_pe_asm_output_aligned_d HOST_WIDE_INT, HOST_WIDE_INT); extern void i386_pe_file_end (void); +extern void i386_pe_asm_lto_start (void); +extern void i386_pe_asm_lto_end (void); extern void i386_pe_start_function (FILE *, const char *, tree); extern void i386_pe_end_function (FILE *, const char *, tree); extern void i386_pe_end_cold_function (FILE *, const char *, tree); Index: config/i386/winnt.c === --- config/i386/winnt.c (revision 259205) +++ config/i386/winnt.c (working copy) @@ -808,6 +808,23 @@ i386_pe_file_end (void) } } +/* Kludge because of missing PE-COFF support for early LTO debug. */ + +static enum debug_info_levels saved_debug_info_level; + +void +i386_pe_asm_lto_start (void) +{ + saved_debug_info_level = debug_info_level; + debug_info_level = DINFO_LEVEL_NONE; +} + +void +i386_pe_asm_lto_end (void) +{ + debug_info_level = saved_debug_info_level; +} + /* x64 Structured Exception Handling unwind info. */ Index: dwarf2out.c === --- dwarf2out.c (revision 259205) +++ dwarf2out.c (working copy) @@ -31807,7 +31807,11 @@ dwarf2out_early_finish (const char *file early_dwarf_finished = true; /* Do not generate DWARF assembler now when not producing LTO bytecode. */ - if (!flag_generate_lto && !flag_generate_offload) + if ((!flag_generate_lto && !flag_generate_offload) + /* FIXME: Disable debug info generation for PE-COFF targets since the + copy_lto_debug_sections operation of the simple object support in + libiberty is not implemented for them yet. */ + || TARGET_PECOFF) return; /* Now as we are going to output for LTO initialize sections and labels Index: lto-wrapper.c === --- lto-wrapper.c (revision 259205) +++ lto-wrapper.c (working copy) @@ -983,7 +983,7 @@ debug_objcopy (const char *infile) infile = fname; inoff = (off_t) loffset; } - int infd = open (infile, O_RDONLY); + int infd = open (infile, O_RDONLY | O_BINARY); if (infd == -1) return NULL; simple_object_read *inobj = simple_object_start_read (infd, inoff,
[og7, nvptx, committed] Fix propagation of branch cond in vw-neutered code
Hi, Currently, when we enable -mlong-vector-in-workers in gemm.f90, we get: ... { .reg.u32%tidy; .reg.u64%t_bcast; .reg.u64%y64; mov.u32 %tidy, %tid.y; cvt.u64.u32 %y64, %tidy; add.u64 %y64, %y64, 1; cvta.shared.u64 %t_bcast, __oacc_bcast; mad.lo.u64 %r166, %y64, 104, %t_bcast; } @ %r179 bra.uni $L28; @ %r174 bra $L29; ... setp.le.s32 %r114,%r113,0; selp.u32 %r182,1,0,%r114; st.u32 [%r166],%r182; $L29: $L28: bar.sync %r167,128; ld.u32 %r183,[%r166]; setp.ne.u32 %r114,%r183,0; bar.sync %r167,128; @ %r114 bra.uni $L1 ... The branch condition %114 is computed in a W0V0 region, and then broadcast to a WAVA region. The broadcast is done using a partition of the broadcast buffer at %r166, but this is a worker-specific buffer. So since the writing of the buffer is done in worker 0 only, the read in workers other than 0 is reading uninitialized memory. This patch fixes this by using the generic broadcast buffer in this case, rather than a worker-specific one. Build x86_64 with nvptx accelerator and tested libgomp. Committed to og7. Thanks, - Tom [nvptx] Fix propagation of branch cond in vw-neutered code 2018-04-12 Tom de Vries PR target/85246 * config/nvptx/nvptx.c (nvptx_single): Don't use partitioning when propagating branch condition calculated in vector-worker-neutered code. * testsuite/libgomp.oacc-fortran/gemm.f90: Use -foffload=-mlong-vector-in-workers. --- gcc/config/nvptx/nvptx.c| 3 ++- libgomp/testsuite/libgomp.oacc-fortran/gemm.f90 | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c index 547022e..9d011eb 100644 --- a/gcc/config/nvptx/nvptx.c +++ b/gcc/config/nvptx/nvptx.c @@ -4306,13 +4306,14 @@ nvptx_single (unsigned mask, basic_block from, basic_block to) broadcast_data_t data; unsigned size = GET_MODE_SIZE (SImode); bool vector = (GOMP_DIM_MASK (GOMP_DIM_VECTOR) == mask) != 0; + bool worker = (GOMP_DIM_MASK (GOMP_DIM_WORKER) == mask) != 0; rtx barrier = GEN_INT (0); int threads = 0; data.base = oacc_bcast_sym; data.ptr = 0; - bool use_partitioning_p = (vector + bool use_partitioning_p = (vector && !worker && nvptx_mach_max_workers () > 1 && cfun->machine->bcast_partition); if (use_partitioning_p) diff --git a/libgomp/testsuite/libgomp.oacc-fortran/gemm.f90 b/libgomp/testsuite/libgomp.oacc-fortran/gemm.f90 index ad67dce..744d21e 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/gemm.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/gemm.f90 @@ -1,6 +1,7 @@ ! Exercise three levels of parallelism using SGEMM from BLAS. ! { dg-additional-options "-fopenacc-dim=-:-:128" } +! { dg-additional-options "-foffload=-mlong-vector-in-workers" } ! Implicitly set vector_length to 128 using -fopenacc-dim. subroutine openacc_sgemm (m, n, k, alpha, a, b, beta, c)
Re: [PATCH] Prefer mempcpy to memcpy on x86_64 target (PR middle-end/81657).
On Thu, 12 Apr 2018, Martin Liška wrote: > Hi. > > I'm reminding review request from Richi for generic part > and Uros/Honza for target part. Not sure if I missed some important part of the discussion but for the testcase we want to preserve the tailcall, right? So it would be enough to set avoid_libcall to endp != 0 && CALL_EXPR_TAILCALL (exp) (and thus also handle stpcpy)? I'm not sure I like the interfacing of that to emit_block_move_hints very much. I'd have used sth like BLOCK_OP_ABORT_ON_LIBCALL and extend the interface in a way to return what kind of method it chose rather than just a bool. Not sure what gcc.dg/20050503-1.c did on non-x86 targets - the test runs on all archs but only x86 is ever tested for a result. So - I think tail-calling is prefered, and somehow in the PR the discussion wandered off to whether there's fast implementations or not - but the testcase looks for a tailcall where the source was a tailcall, that should be authorative for the "default" decision when the hook isn't implemented or doesn't cover the case. IMO target libraries have to be quite stupid if they have anything slower than void *mempcpy (void *dest, const void *src, size_t n) { return memcpy (dest, src, n) + n; } which should be not (very much) slower than a non-tailcall memcpy call. So -- remove the hook and instead use CALL_EXPR_TAILCALL (exp) instead of its result. Thanks, Richard.
Re: [PATCH] Make redirection only for target_clones: V2 (PR ipa/85329).
> 2018-04-12 Martin Liska > > PR ipa/85329 > * multiple_target.c (create_dispatcher_calls): Set apostrophes > for target_clone error message. > (separate_attrs): Add new argument and check for an emptry > string. > (expand_target_clones): Handle it. > (ipa_target_clone): Make redirection just for target_clones > functions. > > gcc/testsuite/ChangeLog: > > 2018-04-12 Martin Liska > > PR ipa/85329 > * g++.dg/ext/pr85329.C: New test. > * gcc.target/i386/mvc12.c: New test. > @@ -413,7 +426,11 @@ expand_target_clones (struct cgraph_node *node, bool > definition) >tree attributes = make_attribute ("target", "default", > DECL_ATTRIBUTES (node->decl)); >DECL_ATTRIBUTES (node->decl) = attributes; > + DECL_COMDAT (node->decl) = 0; > + DECL_WEAK (node->decl) = 0; > + DECL_ARTIFICIAL (node->decl) = 1; >node->local.local = false; > + node->set_comdat_group (NULL); If you make C++ inline and get the idea to use target cloning attribute on this, this will likely lead to link error if you compile multiple files because you turn comdat to non-comdat. For comdats this woudl effectivly need to become C++ abi extension and we would need to define comdat sections for these. Perhaps easiest way is to simply reject the attribute on comdats and probaby also extern functions? Otherwise patch looks OK. Honza
Re: [PATCH] Prefer mempcpy to memcpy on x86_64 target (PR middle-end/81657).
> Hi. > > I'm reminding review request from Richi for generic part > and Uros/Honza for target part. OK for i386 bits. Honza > > Thanks, > Martin
[og7, nvptx] Simplifly logic in nvptx_single
Hi, this patch simplifies the logic in nvptx_single. Build x86_64 with nvptx accelerator and tested libgomp. Thanks, - Tom [nvptx] Simplifly logic in nvptx_single 2018-04-12 Tom de Vries * config/nvptx/nvptx.c (nvptx_single): Simplify init of vector variable. Add and use variable use_partitioning_p. --- gcc/config/nvptx/nvptx.c | 28 +++- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c index 3c48c14..547022e 100644 --- a/gcc/config/nvptx/nvptx.c +++ b/gcc/config/nvptx/nvptx.c @@ -4305,22 +4305,24 @@ nvptx_single (unsigned mask, basic_block from, basic_block to) we should never have worker mode only. */ broadcast_data_t data; unsigned size = GET_MODE_SIZE (SImode); - bool vector = true; + bool vector = (GOMP_DIM_MASK (GOMP_DIM_VECTOR) == mask) != 0; rtx barrier = GEN_INT (0); int threads = 0; - if (GOMP_DIM_MASK (GOMP_DIM_WORKER) == mask) - vector = false; - data.base = oacc_bcast_sym; data.ptr = 0; - if (vector - && nvptx_mach_max_workers () > 1 - && cfun->machine->bcast_partition) - data.base = cfun->machine->bcast_partition; - + bool use_partitioning_p = (vector + && nvptx_mach_max_workers () > 1 + && cfun->machine->bcast_partition); + if (use_partitioning_p) + { + data.base = cfun->machine->bcast_partition; + barrier = cfun->machine->sync_bar; + threads = nvptx_mach_vector_length (); + } gcc_assert (data.base != NULL); + gcc_assert (barrier); unsigned int psize = ROUND_UP (size, oacc_bcast_align); unsigned int pnum = (nvptx_mach_vector_length () > PTX_WARP_SIZE @@ -4335,14 +4337,6 @@ nvptx_single (unsigned mask, basic_block from, basic_block to) vector), before); - if (vector - && nvptx_mach_max_workers () > 1 - && cfun->machine->sync_bar) - { - barrier = cfun->machine->sync_bar; - threads = nvptx_mach_vector_length (); - } - /* Barrier so other workers can see the write. */ emit_insn_before (nvptx_cta_sync (barrier, threads), tail); data.offset = 0;
Re: [PATCH] Don't mark IFUNC resolver as only called directly
On Thu, Apr 12, 2018 at 5:17 AM, Jan Hubicka wrote: >> On Thu, Apr 12, 2018 at 1:29 PM, H.J. Lu wrote: >> > Since IFUNC resolver is called indirectly, don't mark IFUNC resolver as >> > only called directly. >> > >> > OK for trunk? >> > >> > >> > H.J. >> > --- >> > gcc/ >> > >> > PR target/85345 >> > * cgraph.h: Include stringpool.h" and "attribs.h". >> > (cgraph_node::only_called_directly_or_aliased_p): Return false >> > for IFUNC resolver. >> > >> > gcc/testsuite/ >> > >> > PR target/85345 >> > * gcc.target/i386/pr85345.c: New test. >> > --- >> > gcc/cgraph.h| 5 +++- >> > gcc/testsuite/gcc.target/i386/pr85345.c | 44 >> > + >> > 2 files changed, 48 insertions(+), 1 deletion(-) >> > create mode 100644 gcc/testsuite/gcc.target/i386/pr85345.c >> > >> > diff --git a/gcc/cgraph.h b/gcc/cgraph.h >> > index d1ef8408497..9e195824fcc 100644 >> > --- a/gcc/cgraph.h >> > +++ b/gcc/cgraph.h >> > @@ -24,6 +24,8 @@ along with GCC; see the file COPYING3. If not see >> > #include "profile-count.h" >> > #include "ipa-ref.h" >> > #include "plugin-api.h" >> > +#include "stringpool.h" >> > +#include "attribs.h" >> > >> > class ipa_opt_pass_d; >> > typedef ipa_opt_pass_d *ipa_opt_pass; >> > @@ -2894,7 +2896,8 @@ cgraph_node::only_called_directly_or_aliased_p (void) >> > && !DECL_STATIC_CONSTRUCTOR (decl) >> > && !DECL_STATIC_DESTRUCTOR (decl) >> > && !used_from_object_file_p () >> > - && !externally_visible); >> > + && !externally_visible >> > + && !lookup_attribute ("ifunc", DECL_ATTRIBUTES (decl))); >> >> How's it handled for our own generated resolver functions? That is, >> isn't there sth cheaper than doing a lookup_attribute here? I see >> that make_dispatcher_decl nor ix86_get_function_versions_dispatcher >> adds the 'ifunc' attribute (though they are TREE_PUBLIC there). > > Is there any drawback of setting force_output flag? > Honza Setting force_output may prevent some optimizations. Can we add a bit for IFUNC resolver? -- H.J.
Re: [PATCH] Don't mark IFUNC resolver as only called directly
On Thu, Apr 12, 2018 at 5:13 AM, Richard Biener wrote: > On Thu, Apr 12, 2018 at 1:29 PM, H.J. Lu wrote: >> Since IFUNC resolver is called indirectly, don't mark IFUNC resolver as >> only called directly. >> >> OK for trunk? >> >> >> H.J. >> --- >> gcc/ >> >> PR target/85345 >> * cgraph.h: Include stringpool.h" and "attribs.h". >> (cgraph_node::only_called_directly_or_aliased_p): Return false >> for IFUNC resolver. >> >> gcc/testsuite/ >> >> PR target/85345 >> * gcc.target/i386/pr85345.c: New test. >> --- >> gcc/cgraph.h| 5 +++- >> gcc/testsuite/gcc.target/i386/pr85345.c | 44 >> + >> 2 files changed, 48 insertions(+), 1 deletion(-) >> create mode 100644 gcc/testsuite/gcc.target/i386/pr85345.c >> >> diff --git a/gcc/cgraph.h b/gcc/cgraph.h >> index d1ef8408497..9e195824fcc 100644 >> --- a/gcc/cgraph.h >> +++ b/gcc/cgraph.h >> @@ -24,6 +24,8 @@ along with GCC; see the file COPYING3. If not see >> #include "profile-count.h" >> #include "ipa-ref.h" >> #include "plugin-api.h" >> +#include "stringpool.h" >> +#include "attribs.h" >> >> class ipa_opt_pass_d; >> typedef ipa_opt_pass_d *ipa_opt_pass; >> @@ -2894,7 +2896,8 @@ cgraph_node::only_called_directly_or_aliased_p (void) >> && !DECL_STATIC_CONSTRUCTOR (decl) >> && !DECL_STATIC_DESTRUCTOR (decl) >> && !used_from_object_file_p () >> - && !externally_visible); >> + && !externally_visible >> + && !lookup_attribute ("ifunc", DECL_ATTRIBUTES (decl))); > > How's it handled for our own generated resolver functions? That is, > isn't there sth cheaper than doing a lookup_attribute here? I see > that make_dispatcher_decl nor ix86_get_function_versions_dispatcher > adds the 'ifunc' attribute (though they are TREE_PUBLIC there). > ext/mv*.C tests failed to compile: error: '-fcf-protection=full' requires Intel CET support. Use -mcet or both of -mibt and -mshstk options to enable CET with -fcf-protection -mcet. So it is unsupported. -- H.J.
Re: [PATCH] Make redirection only for target_clones: V2 (PR ipa/85329).
Forgot to add the patch. Martin >From fb1bbf142af6668eeb1bdfeec96920de2f0edb21 Mon Sep 17 00:00:00 2001 From: marxin Date: Thu, 12 Apr 2018 12:15:17 +0200 Subject: [PATCH] Make redirection only for target_clones: V2 (PR ipa/85329). gcc/ChangeLog: 2018-04-12 Martin Liska PR ipa/85329 * multiple_target.c (create_dispatcher_calls): Set apostrophes for target_clone error message. (separate_attrs): Add new argument and check for an emptry string. (expand_target_clones): Handle it. (ipa_target_clone): Make redirection just for target_clones functions. gcc/testsuite/ChangeLog: 2018-04-12 Martin Liska PR ipa/85329 * g++.dg/ext/pr85329.C: New test. * gcc.target/i386/mvc12.c: New test. --- gcc/multiple_target.c | 43 --- gcc/testsuite/g++.dg/ext/pr85329.C| 19 gcc/testsuite/gcc.target/i386/mvc12.c | 11 + 3 files changed, 60 insertions(+), 13 deletions(-) create mode 100644 gcc/testsuite/g++.dg/ext/pr85329.C create mode 100644 gcc/testsuite/gcc.target/i386/mvc12.c diff --git a/gcc/multiple_target.c b/gcc/multiple_target.c index b006a5ab6ec..2357e458ec8 100644 --- a/gcc/multiple_target.c +++ b/gcc/multiple_target.c @@ -88,7 +88,7 @@ create_dispatcher_calls (struct cgraph_node *node) if (!idecl) { error_at (DECL_SOURCE_LOCATION (node->decl), - "default target_clones attribute was not set"); + "default % attribute was not set"); return; } @@ -216,26 +216,30 @@ get_attr_str (tree arglist, char *attr_str) } /* Return number of attributes separated by comma and put them into ARGS. - If there is no DEFAULT attribute return -1. */ + If there is no DEFAULT attribute return -1. If there is an empty + string in attribute return -2. */ static int -separate_attrs (char *attr_str, char **attrs) +separate_attrs (char *attr_str, char **attrs, int attrnum) { int i = 0; - bool has_default = false; + int default_count = 0; for (char *attr = strtok (attr_str, ","); attr != NULL; attr = strtok (NULL, ",")) { if (strcmp (attr, "default") == 0) { - has_default = true; + default_count++; continue; } attrs[i++] = attr; } - if (!has_default) + if (default_count == 0) return -1; + else if (i + default_count < attrnum) +return -2; + return i; } @@ -321,7 +325,7 @@ expand_target_clones (struct cgraph_node *node, bool definition) { warning_at (DECL_SOURCE_LOCATION (node->decl), 0, - "single target_clones attribute is ignored"); + "single % attribute is ignored"); return false; } @@ -345,7 +349,7 @@ expand_target_clones (struct cgraph_node *node, bool definition) int attrnum = get_attr_str (arglist, attr_str); char **attrs = XNEWVEC (char *, attrnum); - attrnum = separate_attrs (attr_str, attrs); + attrnum = separate_attrs (attr_str, attrs, attrnum); if (attrnum == -1) { error_at (DECL_SOURCE_LOCATION (node->decl), @@ -354,6 +358,14 @@ expand_target_clones (struct cgraph_node *node, bool definition) XDELETEVEC (attr_str); return false; } + else if (attrnum == -2) +{ + error_at (DECL_SOURCE_LOCATION (node->decl), + "an empty string cannot be in % attribute"); + XDELETEVEC (attrs); + XDELETEVEC (attr_str); + return false; +} cgraph_function_version_info *decl1_v = NULL; cgraph_function_version_info *decl2_v = NULL; @@ -382,6 +394,7 @@ expand_target_clones (struct cgraph_node *node, bool definition) DECL_ATTRIBUTES (new_node->decl) = attributes; location_t saved_loc = input_location; input_location = DECL_SOURCE_LOCATION (node->decl); + if (!targetm.target_option.valid_attribute_p (new_node->decl, NULL, TREE_VALUE (attributes), 0)) @@ -413,7 +426,11 @@ expand_target_clones (struct cgraph_node *node, bool definition) tree attributes = make_attribute ("target", "default", DECL_ATTRIBUTES (node->decl)); DECL_ATTRIBUTES (node->decl) = attributes; + DECL_COMDAT (node->decl) = 0; + DECL_WEAK (node->decl) = 0; + DECL_ARTIFICIAL (node->decl) = 1; node->local.local = false; + node->set_comdat_group (NULL); location_t saved_loc = input_location; input_location = DECL_SOURCE_LOCATION (node->decl); bool ret @@ -427,14 +444,14 @@ static unsigned int ipa_target_clone (void) { struct cgraph_node *node; + auto_vec to_dispatch; - bool target_clone_pass = false; FOR_EACH_FUNCTION (node) -target_clone_pass |= expand_target_clones (node, node->definition); +if (expand_target_clones (node, node->definition)) + to_dispatch.safe_push (node); - if (target_clone_pass) -FOR_EACH_FUNCTION (node) - create_dispatcher_calls (node); + for (unsigned i = 0; i < to_dispatch.length (); i++) +create_dispatcher_calls (to_dispatch[i]); return 0; } diff --git a/gcc/testsuite/g++.dg/ext/pr85329.C b/gcc/testsuite/g++.dg/ext/
[PATCH] Make redirection only for target_clones: V2 (PR ipa/85329).
Hi. I'm sending V2. The patch adjusts: - make redirection just for target_clones, done simply by recording nodes where expand_target_clones return true - reset various DECL_* flags on default version, needed for ipa-visibility assert I've seen - handle empty string in target_clones: __attribute__((target_clones("",.. I saw that during reduction of the ICE. Patch can bootstrap on x86_64-linux-gnu and survives regression tests. Ready to be installed? Martin
[PATCH] libgcc/CET: Add _CET_ENDBR to __stack_split_initialize
Program received signal SIGSEGV, Segmentation fault. __stack_split_initialize () at /export/gnu/import/git/sources/gcc/libgcc/config/i386/morestack.S:751 751 leaq-16000(%rsp),%rax # We should have at least 16K. Missing separate debuginfos, use: dnf debuginfo-install libgcc-8.0.1-0.21.0.fc28.x86_64 (gdb) disass Dump of assembler code for function __stack_split_initialize: => 0x00402858 <+0>: lea-0x3e80(%rsp),%rax 0x00402860 <+8>: mov%rax,%fs:0x70 0x00402869 <+17>:sub$0x8,%rsp 0x0040286d <+21>:mov%rsp,%rdi 0x00402870 <+24>:mov$0x3e80,%esi 0x00402875 <+29>:callq 0x401810 <__generic_morestack_set_initial_sp> 0x0040287a <+34>:add$0x8,%rsp 0x0040287e <+38>:retq End of assembler dump. (gdb) This patch adds the missing ENDBR to __stack_split_initialize. OK for trunk? H.J. --- PR libgcc/85379 * config/i386/morestack.S (__stack_split_initialize): Add _CET_ENDBR. --- libgcc/config/i386/morestack.S | 1 + 1 file changed, 1 insertion(+) diff --git a/libgcc/config/i386/morestack.S b/libgcc/config/i386/morestack.S index eca441a2867..99e65eaaff4 100644 --- a/libgcc/config/i386/morestack.S +++ b/libgcc/config/i386/morestack.S @@ -730,6 +730,7 @@ __morestack_large_model: #endif __stack_split_initialize: + _CET_ENDBR #ifndef __x86_64__ -- 2.14.3
Re: [wwwdocs] document new options in gcc-8/changes.html
On Wed, Apr 11, 2018 at 10:02 PM, Martin Sebor wrote: > On 04/04/2018 05:03 PM, Paolo Carlini wrote: >> >> Hi Martin >> >> On 05/04/2018 00:28, Martin Sebor wrote: >>> >>> + implementations do suppresses the warning. >> >> suppress > > > I was about to fix this but re-reading the full sentence made > me realize it's correct as is: > > Note that due to GCC bug 82944, defining strncat, strncpy, or > stpncpy as a macro in a system header as some implementations > do suppresses the warning. > > I've added a comma after the suppresses to make it clearer and > checked in revision 1.63. Sounds good, though you will also want a matching comma after "system header". Jason
Re: [PATCH] Prefer mempcpy to memcpy on x86_64 target (PR middle-end/81657).
Hi. I'm reminding review request from Richi for generic part and Uros/Honza for target part. Thanks, Martin
Re: [PATCH] Fix non-AVX512VL handling of lo extraction from AVX512F xmm16+ (PR target/85328)
On Thu, Apr 12, 2018 at 01:46:40PM +0300, Kirill Yukhin wrote: > > Hello Jakub! > > > On 11 Apr 2018, at 16:27, Jakub Jelinek wrote: > > In lots of patterns we assume that we never see xmm16+ hard registers > > with 128-bit and 256-bit vector modes when not -mavx512vl, because > > HARD_REGNO_MODE_OK refuses those. > > Unfortunately, as this testcase and patch shows, the vec_extract_lo* > > splitters work as a loophole around this, we happily create instructions > > like (set (reg:V32QI xmm5) (reg:V32QI xmm16)) and then hard register > > propagation can propagate the V32QI xmm16 into other insns like vpand. > > > > The following patch fixes it by making sure we never create such registers, > > just emit (set (reg:V64QI xmm5) (reg:V64QI xmm16)) instead, which by copying > > all the 512 bits also copies the low bits, and as the destination is > > originally V32QI which is not HARD_REGNO_MODE_OK in xmm16+, this should be > > fine. > > > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? > Patch is OK for trunk. I've posted an updated version of this patch later on in https://gcc.gnu.org/ml/gcc-patches/2018-04/msg00563.html Is that one ok for trunk instead? And sorry for not getting it right the first time. Jakub
Re: [PATCH] Don't mark IFUNC resolver as only called directly
> On Thu, Apr 12, 2018 at 1:29 PM, H.J. Lu wrote: > > Since IFUNC resolver is called indirectly, don't mark IFUNC resolver as > > only called directly. > > > > OK for trunk? > > > > > > H.J. > > --- > > gcc/ > > > > PR target/85345 > > * cgraph.h: Include stringpool.h" and "attribs.h". > > (cgraph_node::only_called_directly_or_aliased_p): Return false > > for IFUNC resolver. > > > > gcc/testsuite/ > > > > PR target/85345 > > * gcc.target/i386/pr85345.c: New test. > > --- > > gcc/cgraph.h| 5 +++- > > gcc/testsuite/gcc.target/i386/pr85345.c | 44 > > + > > 2 files changed, 48 insertions(+), 1 deletion(-) > > create mode 100644 gcc/testsuite/gcc.target/i386/pr85345.c > > > > diff --git a/gcc/cgraph.h b/gcc/cgraph.h > > index d1ef8408497..9e195824fcc 100644 > > --- a/gcc/cgraph.h > > +++ b/gcc/cgraph.h > > @@ -24,6 +24,8 @@ along with GCC; see the file COPYING3. If not see > > #include "profile-count.h" > > #include "ipa-ref.h" > > #include "plugin-api.h" > > +#include "stringpool.h" > > +#include "attribs.h" > > > > class ipa_opt_pass_d; > > typedef ipa_opt_pass_d *ipa_opt_pass; > > @@ -2894,7 +2896,8 @@ cgraph_node::only_called_directly_or_aliased_p (void) > > && !DECL_STATIC_CONSTRUCTOR (decl) > > && !DECL_STATIC_DESTRUCTOR (decl) > > && !used_from_object_file_p () > > - && !externally_visible); > > + && !externally_visible > > + && !lookup_attribute ("ifunc", DECL_ATTRIBUTES (decl))); > > How's it handled for our own generated resolver functions? That is, > isn't there sth cheaper than doing a lookup_attribute here? I see > that make_dispatcher_decl nor ix86_get_function_versions_dispatcher > adds the 'ifunc' attribute (though they are TREE_PUBLIC there). Is there any drawback of setting force_output flag? Honza > > Richard. > > > } > > > > /* Return true when function can be removed from callgraph > > diff --git a/gcc/testsuite/gcc.target/i386/pr85345.c > > b/gcc/testsuite/gcc.target/i386/pr85345.c > > new file mode 100644 > > index 000..63f771294ad > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/i386/pr85345.c > > @@ -0,0 +1,44 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-O2 -fcf-protection -mcet" } */ > > +/* { dg-final { scan-assembler-times {\mendbr} 4 } } */ > > + > > +int resolver_fn = 0; > > +int resolved_fn = 0; > > + > > +static inline void > > +do_it_right_at_runtime_A (void) > > +{ > > + resolved_fn++; > > +} > > + > > +static inline void > > +do_it_right_at_runtime_B (void) > > +{ > > + resolved_fn++; > > +} > > + > > +static inline void do_it_right_at_runtime (void); > > + > > +void do_it_right_at_runtime (void) > > + __attribute__ ((ifunc ("resolve_do_it_right_at_runtime"))); > > + > > +extern int r; > > +static void (*resolve_do_it_right_at_runtime (void)) (void) > > +{ > > + resolver_fn++; > > + > > + typeof(do_it_right_at_runtime) *func; > > + if (r & 1) > > +func = do_it_right_at_runtime_A; > > + else > > +func = do_it_right_at_runtime_B; > > + > > + return (void *) func; > > +} > > + > > +int > > +main () > > +{ > > + do_it_right_at_runtime (); > > + return 0; > > +} > > -- > > 2.14.3 > >
Re: [PATCH] Don't mark IFUNC resolver as only called directly
On Thu, Apr 12, 2018 at 1:29 PM, H.J. Lu wrote: > Since IFUNC resolver is called indirectly, don't mark IFUNC resolver as > only called directly. > > OK for trunk? > > > H.J. > --- > gcc/ > > PR target/85345 > * cgraph.h: Include stringpool.h" and "attribs.h". > (cgraph_node::only_called_directly_or_aliased_p): Return false > for IFUNC resolver. > > gcc/testsuite/ > > PR target/85345 > * gcc.target/i386/pr85345.c: New test. > --- > gcc/cgraph.h| 5 +++- > gcc/testsuite/gcc.target/i386/pr85345.c | 44 > + > 2 files changed, 48 insertions(+), 1 deletion(-) > create mode 100644 gcc/testsuite/gcc.target/i386/pr85345.c > > diff --git a/gcc/cgraph.h b/gcc/cgraph.h > index d1ef8408497..9e195824fcc 100644 > --- a/gcc/cgraph.h > +++ b/gcc/cgraph.h > @@ -24,6 +24,8 @@ along with GCC; see the file COPYING3. If not see > #include "profile-count.h" > #include "ipa-ref.h" > #include "plugin-api.h" > +#include "stringpool.h" > +#include "attribs.h" > > class ipa_opt_pass_d; > typedef ipa_opt_pass_d *ipa_opt_pass; > @@ -2894,7 +2896,8 @@ cgraph_node::only_called_directly_or_aliased_p (void) > && !DECL_STATIC_CONSTRUCTOR (decl) > && !DECL_STATIC_DESTRUCTOR (decl) > && !used_from_object_file_p () > - && !externally_visible); > + && !externally_visible > + && !lookup_attribute ("ifunc", DECL_ATTRIBUTES (decl))); How's it handled for our own generated resolver functions? That is, isn't there sth cheaper than doing a lookup_attribute here? I see that make_dispatcher_decl nor ix86_get_function_versions_dispatcher adds the 'ifunc' attribute (though they are TREE_PUBLIC there). Richard. > } > > /* Return true when function can be removed from callgraph > diff --git a/gcc/testsuite/gcc.target/i386/pr85345.c > b/gcc/testsuite/gcc.target/i386/pr85345.c > new file mode 100644 > index 000..63f771294ad > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr85345.c > @@ -0,0 +1,44 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fcf-protection -mcet" } */ > +/* { dg-final { scan-assembler-times {\mendbr} 4 } } */ > + > +int resolver_fn = 0; > +int resolved_fn = 0; > + > +static inline void > +do_it_right_at_runtime_A (void) > +{ > + resolved_fn++; > +} > + > +static inline void > +do_it_right_at_runtime_B (void) > +{ > + resolved_fn++; > +} > + > +static inline void do_it_right_at_runtime (void); > + > +void do_it_right_at_runtime (void) > + __attribute__ ((ifunc ("resolve_do_it_right_at_runtime"))); > + > +extern int r; > +static void (*resolve_do_it_right_at_runtime (void)) (void) > +{ > + resolver_fn++; > + > + typeof(do_it_right_at_runtime) *func; > + if (r & 1) > +func = do_it_right_at_runtime_A; > + else > +func = do_it_right_at_runtime_B; > + > + return (void *) func; > +} > + > +int > +main () > +{ > + do_it_right_at_runtime (); > + return 0; > +} > -- > 2.14.3 >
Re: [PATCH] Disable -gsplit-dwarf for all LTO debug
On Thu, 12 Apr 2018, Jakub Jelinek wrote: > On Thu, Apr 12, 2018 at 01:35:46PM +0200, Richard Biener wrote: > > > > The following disables split-dwarf for the LTO part of the early debug > > (keeping it for the fat part) and makes sure the driver doesn't > > see -gsplit-dwarf in effect. > > > > That works for all but the compile stage and slim objects > > (the default) which then ends up generating an empty .dwo file. > > I'm not sure where to prune for this case given that > > -fno-fat-lto-objects seems to be just ignored if it isn't supported. > > > > Note that we need the dwarf2out.c part as otherwise the late > > references cannot be resolved since the debug is copied from .o to .dwo > > files early. > > Wouldn't it be better to just sorry on the -flto -gsplit-dwarf combination? > It really isn't clear what the user is asking for in that case and what > exactly he wants. Certainly easier though then not necessary at this point (nor is this patch of course). It might be reasonable to keep -gsplit-dwarf working for the fat part of the object -- OTOH I think that support for fat LTO objects should be removed at some point as well... Richard.
Re: [PATCH] Disable -gsplit-dwarf for all LTO debug
On Thu, Apr 12, 2018 at 01:35:46PM +0200, Richard Biener wrote: > > The following disables split-dwarf for the LTO part of the early debug > (keeping it for the fat part) and makes sure the driver doesn't > see -gsplit-dwarf in effect. > > That works for all but the compile stage and slim objects > (the default) which then ends up generating an empty .dwo file. > I'm not sure where to prune for this case given that > -fno-fat-lto-objects seems to be just ignored if it isn't supported. > > Note that we need the dwarf2out.c part as otherwise the late > references cannot be resolved since the debug is copied from .o to .dwo > files early. Wouldn't it be better to just sorry on the -flto -gsplit-dwarf combination? It really isn't clear what the user is asking for in that case and what exactly he wants. Jakub
Re: Patch ping^3
On Thu, Apr 12, 2018 at 10:41:22AM +0200, Richard Biener wrote: > On Thu, 12 Apr 2018, Jakub Jelinek wrote: > > I'd like to ping the > > > > http://gcc.gnu.org/ml/gcc-patches/2018-03/msg01244.html > > - PR83157 - improve debug info for x86 setcc peepholes > > > > patch. Thanks. > > OK for stage1 and backporting after it soaked there for a while. > I'm too unfamiliar with the code to approve it at this point > (esp. concerned about the cselib part affecting others than > var-tracking in unexpected ways). Thanks. It shouldn't affect anything other than var-tracking, as it is guarded with cselib_record_sets_hook != NULL and var-tracking is the only cselib user that ever sets it to non-NULL (and clears afterwards). Even if that would be left out, no other cselib user preserves cselib VALUEs, so ! PRESERVED_VALUE_P (sets[n_sets + i].src_elt->val_rtx) would be always true for non-var-tracking and the second loop wouldn't do anything. Jakub
[PATCH] Disable -gsplit-dwarf for all LTO debug
The following disables split-dwarf for the LTO part of the early debug (keeping it for the fat part) and makes sure the driver doesn't see -gsplit-dwarf in effect. That works for all but the compile stage and slim objects (the default) which then ends up generating an empty .dwo file. I'm not sure where to prune for this case given that -fno-fat-lto-objects seems to be just ignored if it isn't supported. Note that we need the dwarf2out.c part as otherwise the late references cannot be resolved since the debug is copied from .o to .dwo files early. Opinions? Thanks, Richard. 2018-04-12 Richard Biener * lto-wrapper.c (run_gcc): Add -gno-split-dwarf to cancel any such option from compile or link time in a way visible to the driver. * dwarf2out.c (add_top_level_skeleton_die_attrs): Avoid adding DW_AT_GNU_pubnames twice. (dwarf2out_early_finish): Unconditionally call add_AT_pubnames. Disable dwarf_split_debug_info around the early LTO DWARF emission and remove then dead code. Index: gcc/lto-wrapper.c === --- gcc/lto-wrapper.c (revision 259337) +++ gcc/lto-wrapper.c (working copy) @@ -1123,6 +1123,7 @@ run_gcc (unsigned argc, char *argv[]) append_compiler_options (&argv_obstack, fdecoded_options, fdecoded_options_count); append_linker_options (&argv_obstack, decoded_options, decoded_options_count); + obstack_ptr_grow (&argv_obstack, "-gno-split-dwarf"); /* Scan linker driver arguments for things that are of relevance to us. */ for (j = 1; j < decoded_options_count; ++j) Index: gcc/dwarf2out.c === --- gcc/dwarf2out.c (revision 259337) +++ gcc/dwarf2out.c (working copy) @@ -11105,7 +11105,8 @@ add_top_level_skeleton_die_attrs (dw_die add_skeleton_AT_string (die, dwarf_AT (DW_AT_dwo_name), dwo_file_name); if (comp_dir != NULL) add_skeleton_AT_string (die, DW_AT_comp_dir, comp_dir); - add_AT_pubnames (die); + if (!get_AT (die, DW_AT_GNU_pubnames)) +add_AT_pubnames (die); add_AT_lineptr (die, DW_AT_GNU_addr_base, debug_addr_section_label); } @@ -31811,11 +31812,8 @@ dwarf2out_early_finish (const char *file note_variable_value (node->die); /* The AT_pubnames attribute needs to go in all skeleton dies, including - both the main_cu and all skeleton TUs. Making this call unconditional - would end up either adding a second copy of the AT_pubnames attribute, or - requiring a special case in add_top_level_skeleton_die_attrs. */ - if (!dwarf_split_debug_info) -add_AT_pubnames (comp_unit_die ()); + both the main_cu and all skeleton TUs. */ + add_AT_pubnames (comp_unit_die ()); /* The early debug phase is now finished. */ early_dwarf_finished = true; @@ -31824,6 +31822,11 @@ dwarf2out_early_finish (const char *file if (!flag_generate_lto && !flag_generate_offload) return; + /* For the early LTO DWARF we do not want split DWARF because it really + doesn't make much sense. */ + int saved_dwarf_split_debug_info = dwarf_split_debug_info; + dwarf_split_debug_info = 0; + /* Now as we are going to output for LTO initialize sections and labels to the LTO variants. We don't need a random-seed postfix as other LTO sections as linking the LTO debug sections into one in a partial @@ -31858,12 +31861,6 @@ dwarf2out_early_finish (const char *file save_macinfo_strings (); - if (dwarf_split_debug_info) -{ - unsigned int index = 0; - debug_str_hash->traverse_noresize (&index); -} - /* Output all of the compilation units. We put the main one last so that the offsets are available to output_pubnames. */ for (limbo_die_node *node = limbo_die_list; node; node = node->next) @@ -31884,9 +31881,7 @@ dwarf2out_early_finish (const char *file attributes. */ if (debug_info_level >= DINFO_LEVEL_TERSE) add_AT_lineptr (ctnode->root_die, DW_AT_stmt_list, -(!dwarf_split_debug_info - ? debug_line_section_label - : debug_skeleton_line_section_label)); +debug_line_section_label); output_comdat_type_unit (ctnode); *slot = ctnode; @@ -31939,6 +31934,8 @@ dwarf2out_early_finish (const char *file /* Switch back to the text section. */ switch_to_section (text_section); + + dwarf_split_debug_info = saved_dwarf_split_debug_info; } /* Reset all state within dwarf2out.c so that we can rerun the compiler
[PATCH] Don't mark IFUNC resolver as only called directly
Since IFUNC resolver is called indirectly, don't mark IFUNC resolver as only called directly. OK for trunk? H.J. --- gcc/ PR target/85345 * cgraph.h: Include stringpool.h" and "attribs.h". (cgraph_node::only_called_directly_or_aliased_p): Return false for IFUNC resolver. gcc/testsuite/ PR target/85345 * gcc.target/i386/pr85345.c: New test. --- gcc/cgraph.h| 5 +++- gcc/testsuite/gcc.target/i386/pr85345.c | 44 + 2 files changed, 48 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr85345.c diff --git a/gcc/cgraph.h b/gcc/cgraph.h index d1ef8408497..9e195824fcc 100644 --- a/gcc/cgraph.h +++ b/gcc/cgraph.h @@ -24,6 +24,8 @@ along with GCC; see the file COPYING3. If not see #include "profile-count.h" #include "ipa-ref.h" #include "plugin-api.h" +#include "stringpool.h" +#include "attribs.h" class ipa_opt_pass_d; typedef ipa_opt_pass_d *ipa_opt_pass; @@ -2894,7 +2896,8 @@ cgraph_node::only_called_directly_or_aliased_p (void) && !DECL_STATIC_CONSTRUCTOR (decl) && !DECL_STATIC_DESTRUCTOR (decl) && !used_from_object_file_p () - && !externally_visible); + && !externally_visible + && !lookup_attribute ("ifunc", DECL_ATTRIBUTES (decl))); } /* Return true when function can be removed from callgraph diff --git a/gcc/testsuite/gcc.target/i386/pr85345.c b/gcc/testsuite/gcc.target/i386/pr85345.c new file mode 100644 index 000..63f771294ad --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr85345.c @@ -0,0 +1,44 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fcf-protection -mcet" } */ +/* { dg-final { scan-assembler-times {\mendbr} 4 } } */ + +int resolver_fn = 0; +int resolved_fn = 0; + +static inline void +do_it_right_at_runtime_A (void) +{ + resolved_fn++; +} + +static inline void +do_it_right_at_runtime_B (void) +{ + resolved_fn++; +} + +static inline void do_it_right_at_runtime (void); + +void do_it_right_at_runtime (void) + __attribute__ ((ifunc ("resolve_do_it_right_at_runtime"))); + +extern int r; +static void (*resolve_do_it_right_at_runtime (void)) (void) +{ + resolver_fn++; + + typeof(do_it_right_at_runtime) *func; + if (r & 1) +func = do_it_right_at_runtime_A; + else +func = do_it_right_at_runtime_B; + + return (void *) func; +} + +int +main () +{ + do_it_right_at_runtime (); + return 0; +} -- 2.14.3
Re: [PATCH] Fix non-AVX512VL handling of lo extraction from AVX512F xmm16+ (PR target/85328)
> On 12 Apr 2018, at 13:53, Jakub Jelinek wrote: > > On Thu, Apr 12, 2018 at 01:46:40PM +0300, Kirill Yukhin wrote: >> >> Hello Jakub! >> >>> On 11 Apr 2018, at 16:27, Jakub Jelinek wrote: >>> In lots of patterns we assume that we never see xmm16+ hard registers >>> with 128-bit and 256-bit vector modes when not -mavx512vl, because >>> HARD_REGNO_MODE_OK refuses those. >>> Unfortunately, as this testcase and patch shows, the vec_extract_lo* >>> splitters work as a loophole around this, we happily create instructions >>> like (set (reg:V32QI xmm5) (reg:V32QI xmm16)) and then hard register >>> propagation can propagate the V32QI xmm16 into other insns like vpand. >>> >>> The following patch fixes it by making sure we never create such registers, >>> just emit (set (reg:V64QI xmm5) (reg:V64QI xmm16)) instead, which by copying >>> all the 512 bits also copies the low bits, and as the destination is >>> originally V32QI which is not HARD_REGNO_MODE_OK in xmm16+, this should be >>> fine. >>> >>> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? >> Patch is OK for trunk. > > I've posted an updated version of this patch later on in > https://gcc.gnu.org/ml/gcc-patches/2018-04/msg00563.html > Is that one ok for trunk instead? Yes. — Thanks, K > > And sorry for not getting it right the first time. > > Jakub
Re: [PATCH] Ada: Fix s-oscons.ads generation
On 07/03/18 14:15, Arnaud Charlet wrote: The $(GNATLIBCFLAGS) are already included in $(GNATLIBCFLAGS_FOR_C). We must call the C compiler with the right machine flags. So, add $(GNATLIBCFLAGS_FOR_C) to $(OSCONS_EXTRACT). For example, on a bi-arch compiler supporting 32-bit and 64-bit instruction sets we pick otherwise only one variant due to a missing -m32 or -m64 flag. gcc/ada * gcc-interface/Makefile.in (OSCONS_CPP): Remove redundant $(GNATLIBCFLAGS). (OSCONS_EXTRACT): Add $(GNATLIBCFLAGS_FOR_C). OK, thanks. Thanks for the quick review. I would like to back port this to GCC 7. Seems fine to me if it doesn't cause troubles on trunk. I back ported this to GCC 7 today. -- Sebastian Huber, embedded brains GmbH Address : Dornierstr. 4, D-82178 Puchheim, Germany Phone : +49 89 189 47 41-16 Fax : +49 89 189 47 41-09 E-Mail : sebastian.hu...@embedded-brains.de PGP : Public key available on request. Diese Nachricht ist keine geschäftliche Mitteilung im Sinne des EHUG.
[PATCH] Fix PR85371
This fixes crashes on Darwin with -flto -g because we pass the wrong (NULL) debug_line_section in case dw_val_class_lineptr: dw2_asm_output_offset (DWARF_OFFSET_SIZE, AT_lbl (a), debug_line_section, "%s", name); break; which is because for some reason I used debug_sekelton_line_section and friends for the early LTO dwarf. That looks mistaken in the above light so the following reverts that to use debug_line_section and friends. I verified that -flto -g -gsplit-dwarf still "works" (though that combo doesn't make much sense, if only because .dwo objects for the ltrans objects end up in /tmp ...). LTO bootstrap with -g[23] succeeded on x86_64-unknown-linux-gnu, bootstrap for all languages as well, testing in progress. Approved by Jakub on IRC so I'll go ahead with this after the above finished. Haven't yet found a convenient place to disable -gsplit-dwarf for the non-fat part of LTO in a way the driver sees it. Any hints appreciated. There's also the (unwanted) side-effect of -gsplit-dwarf enabling -ggnu-pubnames. Disabling on the LTRANS side alone should be possible by massaging lto-wrapper to append -gno-split-dwarf I guess. Richard. 2018-04-12 Richard Biener PR lto/85371 * dwarf2out.c (init_sections_and_labels): Use debug_line_section[_label] for the early LTO debug to properly generate references to it during DIE emission. Do not re-use that for the skeleton for split-dwarf. (dwarf2out_early_finish): Likewise. Index: gcc/dwarf2out.c === --- gcc/dwarf2out.c (revision 259337) +++ gcc/dwarf2out.c (working copy) @@ -28405,14 +28406,6 @@ init_sections_and_labels (bool early_lto debug_macinfo_section = get_section (debug_macinfo_section_name, SECTION_DEBUG | SECTION_EXCLUDE, NULL); - /* For macro info we have to refer to a debug_line section, so -similar to split-dwarf emit a skeleton one for early debug. */ - debug_skeleton_line_section - = get_section (DEBUG_LTO_LINE_SECTION, - SECTION_DEBUG | SECTION_EXCLUDE, NULL); - ASM_GENERATE_INTERNAL_LABEL (debug_skeleton_line_section_label, - DEBUG_SKELETON_LINE_SECTION_LABEL, - generation); } else { @@ -28459,6 +28452,13 @@ init_sections_and_labels (bool early_lto SECTION_DEBUG | SECTION_EXCLUDE, NULL); } + /* For macro info and the file table we have to refer to a +debug_line section. */ + debug_line_section = get_section (DEBUG_LTO_LINE_SECTION, + SECTION_DEBUG | SECTION_EXCLUDE, NULL); + ASM_GENERATE_INTERNAL_LABEL (debug_line_section_label, + DEBUG_LINE_SECTION_LABEL, generation); + debug_str_section = get_section (DEBUG_LTO_STR_SECTION, DEBUG_STR_SECTION_FLAGS | SECTION_EXCLUDE, NULL); @@ -31845,7 +31849,7 @@ dwarf2out_early_finish (const char *file /* AIX Assembler inserts the length, so adjust the reference to match the offset expected by debuggers. */ - strcpy (dl_section_ref, debug_skeleton_line_section_label); + strcpy (dl_section_ref, debug_line_section_label); if (XCOFF_DEBUGGING_INFO) strcat (dl_section_ref, DWARF_INITIAL_LENGTH_SIZE_STR); @@ -31918,7 +31922,7 @@ dwarf2out_early_finish (const char *file switch_to_section (debug_macinfo_section); ASM_OUTPUT_LABEL (asm_out_file, macinfo_section_label); - output_macinfo (debug_skeleton_line_section_label, true); + output_macinfo (debug_line_section_label, true); dw2_asm_output_data (1, 0, "End compilation unit"); if (flag_fat_lto_objects) @@ -31929,8 +31933,8 @@ dwarf2out_early_finish (const char *file } /* Emit a skeleton debug_line section. */ - switch_to_section (debug_skeleton_line_section); - ASM_OUTPUT_LABEL (asm_out_file, debug_skeleton_line_section_label); + switch_to_section (debug_line_section); + ASM_OUTPUT_LABEL (asm_out_file, debug_line_section_label); output_line_info (true); /* If we emitted any indirect strings, output the string table too. */
Re: [PATCH] Fix non-AVX512VL handling of lo extraction from AVX512F xmm16+ (PR target/85328)
Hello Jakub! > On 11 Apr 2018, at 16:27, Jakub Jelinek wrote: > > Hi! > > In lots of patterns we assume that we never see xmm16+ hard registers > with 128-bit and 256-bit vector modes when not -mavx512vl, because > HARD_REGNO_MODE_OK refuses those. > Unfortunately, as this testcase and patch shows, the vec_extract_lo* > splitters work as a loophole around this, we happily create instructions > like (set (reg:V32QI xmm5) (reg:V32QI xmm16)) and then hard register > propagation can propagate the V32QI xmm16 into other insns like vpand. > > The following patch fixes it by making sure we never create such registers, > just emit (set (reg:V64QI xmm5) (reg:V64QI xmm16)) instead, which by copying > all the 512 bits also copies the low bits, and as the destination is > originally V32QI which is not HARD_REGNO_MODE_OK in xmm16+, this should be > fine. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? Patch is OK for trunk. — Thanks, K
[Committed] IBM Z: Spectre: Prevent thunk cfi to be emitted with -fno-dwarf2-cfi-asm
The CFI magic we emit as part of the indirect branch thunks in order to have somewhat sane unwind information must not be emitted with -fno-dwarf2-cfi-asm. Committed to mainline, gcc-7-branch, and gcc-6-branch. gcc/ChangeLog: 2018-04-12 Andreas Krebbel * config/s390/s390.c (s390_output_indirect_thunk_function): Check also for flag_dwarf2_cfi_asm. gcc/testsuite/ChangeLog: 2018-04-12 Andreas Krebbel * gcc.target/s390/nobp-no-dwarf2-cfi.c: New test. --- gcc/config/s390/s390.c | 2 +- gcc/testsuite/gcc.target/s390/nobp-no-dwarf2-cfi.c | 19 +++ 2 files changed, 20 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/s390/nobp-no-dwarf2-cfi.c diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c index 59f5de9..5add598 100644 --- a/gcc/config/s390/s390.c +++ b/gcc/config/s390/s390.c @@ -16522,7 +16522,7 @@ s390_output_indirect_thunk_function (unsigned int regno, bool z10_p) Stopping in the thunk: backtrace will point to the thunk target is if it was interrupted by a signal. For a call this means that the call chain will be: caller->callee->thunk */ - if (flag_asynchronous_unwind_tables) + if (flag_asynchronous_unwind_tables && flag_dwarf2_cfi_asm) { fputs ("\t.cfi_signal_frame\n", asm_out_file); fprintf (asm_out_file, "\t.cfi_return_column %d\n", regno); diff --git a/gcc/testsuite/gcc.target/s390/nobp-no-dwarf2-cfi.c b/gcc/testsuite/gcc.target/s390/nobp-no-dwarf2-cfi.c new file mode 100644 index 000..75e32a1 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/nobp-no-dwarf2-cfi.c @@ -0,0 +1,19 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -march=z900 --save-temps -mfunction-return-reg=thunk -mindirect-branch-table -fno-dwarf2-cfi-asm" } */ + +/* Make sure that we do not emit .cfi directives when -fno-dwarf2-cfi-asm is being used. */ + +int +main () +{ + return 0; +} + +/* 1 x main +/* { dg-final { scan-assembler-times "jg\t__s390_indirect_jump" 1 } } */ +/* { dg-final { scan-assembler "ex\t" } } */ + +/* { dg-final { scan-assembler-not "section\t.s390_indirect_jump" } } */ +/* { dg-final { scan-assembler-not "section\t.s390_indirect_call" } } */ +/* { dg-final { scan-assembler "section\t.s390_return_reg" } } */ +/* { dg-final { scan-assembler-not "section\t.s390_return_mem" } } */ -- 2.9.1
Patch ping^3
Hi! I'd like to ping the http://gcc.gnu.org/ml/gcc-patches/2018-03/msg01244.html - PR83157 - improve debug info for x86 setcc peepholes patch. Thanks. Jakub
Re: Patch ping^3
On Thu, 12 Apr 2018, Jakub Jelinek wrote: > Hi! > > I'd like to ping the > > http://gcc.gnu.org/ml/gcc-patches/2018-03/msg01244.html > - PR83157 - improve debug info for x86 setcc peepholes > > patch. Thanks. OK for stage1 and backporting after it soaked there for a while. I'm too unfamiliar with the code to approve it at this point (esp. concerned about the cselib part affecting others than var-tracking in unexpected ways). Thanks, Richard.
Re: [PATCH] Fix copyprop_hardreg_forward_1 (PR rtl-optimization/85342)
On Wed, 11 Apr 2018, Jakub Jelinek wrote: > Hi! > > When switching regcprop.c to use validate_* and apply_change_group, > I have added code to restore recog_data.operands[i] if they have been > replaced after apply_change_group failure. That is bogus though, when > apply_change_group fails, recog_data.insn is NULL and rest of recog_data > structure is complete garbage; and nothing in copyprop_hardreg_forward_1 > seems to use it afterwards anyway, just will call extract_insn on the next > insn. Furthermore, the "fixups" were only for the recog_data structure > operands itself, nothing else, the instruction itself has been already > corrected by cancel_changes. > > Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux, ok for > trunk? OK. Richard. > 2018-04-11 Jakub Jelinek > > PR rtl-optimization/85342 > * regcprop.c (copyprop_hardreg_forward_1): Remove replaced array, use > a bool scalar var inside of the loop instead. Don't try to update > recog_data.operand after failed apply_change_group. > > * gcc.target/i386/pr85342.c: New test. > > --- gcc/regcprop.c.jj 2018-01-04 00:43:17.996703342 +0100 > +++ gcc/regcprop.c2018-04-11 16:17:29.883575142 +0200 > @@ -751,7 +751,6 @@ copyprop_hardreg_forward_1 (basic_block >bool is_asm, any_replacements; >rtx set; >rtx link; > - bool replaced[MAX_RECOG_OPERANDS]; >bool changed = false; >struct kill_set_value_data ksvd; > > @@ -934,7 +933,7 @@ copyprop_hardreg_forward_1 (basic_block >eldest live copy that's in an appropriate register class. */ >for (i = 0; i < n_ops; i++) > { > - replaced[i] = false; > + bool replaced = false; > > /* Don't scan match_operand here, since we've no reg class >information to pass down. Any operands that we could > @@ -951,26 +950,26 @@ copyprop_hardreg_forward_1 (basic_block > if (recog_data.operand_type[i] == OP_IN) > { > if (op_alt[i].is_address) > - replaced[i] > + replaced > = replace_oldest_value_addr (recog_data.operand_loc[i], > alternative_class (op_alt, i), > VOIDmode, ADDR_SPACE_GENERIC, > insn, vd); > else if (REG_P (recog_data.operand[i])) > - replaced[i] > + replaced > = replace_oldest_value_reg (recog_data.operand_loc[i], > alternative_class (op_alt, i), > insn, vd); > else if (MEM_P (recog_data.operand[i])) > - replaced[i] = replace_oldest_value_mem (recog_data.operand[i], > - insn, vd); > + replaced = replace_oldest_value_mem (recog_data.operand[i], > + insn, vd); > } > else if (MEM_P (recog_data.operand[i])) > - replaced[i] = replace_oldest_value_mem (recog_data.operand[i], > - insn, vd); > + replaced = replace_oldest_value_mem (recog_data.operand[i], > + insn, vd); > > /* If we performed any replacement, update match_dups. */ > - if (replaced[i]) > + if (replaced) > { > int j; > rtx new_rtx; > @@ -989,13 +988,6 @@ copyprop_hardreg_forward_1 (basic_block > { > if (! apply_change_group ()) > { > - for (i = 0; i < n_ops; i++) > - if (replaced[i]) > - { > - rtx old = *recog_data.operand_loc[i]; > - recog_data.operand[i] = old; > - } > - > if (dump_file) > fprintf (dump_file, >"insn %u: reg replacements not verified\n", > --- gcc/testsuite/gcc.target/i386/pr85342.c.jj2018-04-11 > 16:25:50.564848408 +0200 > +++ gcc/testsuite/gcc.target/i386/pr85342.c 2018-04-11 16:26:05.534856581 > +0200 > @@ -0,0 +1,29 @@ > +/* PR rtl-optimization/85342 */ > +/* { dg-do compile { target int128 } } */ > +/* { dg-options "-O2 -mavx512vl" } */ > + > +typedef unsigned char U __attribute__((vector_size (64))); > +typedef unsigned int V __attribute__((vector_size (64))); > +typedef unsigned __int128 W __attribute__((vector_size (64))); > +int i; > +V g, h, z, k, l, m; > +U j; > + > +W > +bar (W o, W p) > +{ > + U q; > + o |= (W){q[0]} >= o; > + o += 1 < o; > + j |= (U){} == j; > + return i + (W)q + (W)g + (W)h + (W)z + o + (W)j + (W)k + (W)l + (W)m + p; > +} > + > +W > +foo (U u) > +{ > + U q; > + W r = bar ((W)(U){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, > ~0}, (W)q); > + u += (U)bar ((W){~0}, r); > + return (W)u; > +} > > Jakub > >
Re: [PATCH] Handle empty infinite loops in OpenACC for PR84955
On Wed, Apr 11, 2018 at 9:30 PM, Cesar Philippidis wrote: > On 04/09/2018 04:31 AM, Richard Biener wrote: >> On Fri, 6 Apr 2018, Jakub Jelinek wrote: >> >>> On Fri, Apr 06, 2018 at 06:48:52AM -0700, Cesar Philippidis wrote: 2018-04-06 Cesar Philippidis PR middle-end/84955 gcc/ * cfgloop.c (flow_loops_find): Add assert. * omp-expand.c (expand_oacc_for): Add dummy false branch for tiled basic blocks without omp continue statements. * tree-cfg.c (execute_fixup_cfg): Handle calls to internal functions like regular functions. libgomp/ * testsuite/libgomp.oacc-c-c++-common/pr84955.c: New test. * testsuite/libgomp.oacc-fortran/pr84955.f90: New test. >>> >>> I'd like to defer the cfgloop.c and tree-cfg.c changes to Richard, just >>> want to >>> mention that: >>> --- a/gcc/tree-cfg.c +++ b/gcc/tree-cfg.c @@ -9586,10 +9586,7 @@ execute_fixup_cfg (void) for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);) { gimple *stmt = gsi_stmt (gsi); - tree decl = is_gimple_call (stmt) - ? gimple_call_fndecl (stmt) - : NULL; - if (decl) + if (is_gimple_call (stmt)) >>> >>> This change doesn't affect just internal functions, but also all indirect >>> calls through function pointers with const, pure or noreturn attributes. >> >> I think the change is desirable nevertheless. The question is if we >> want to do it at this point in time. >> >> The description of the problem sounds more like LTO writing writing out >> loops without previously fixing up state. So sth like the following >> which I'd prefer at this stage (the above hunk is ok for stage1 then). > > OK, I'll save that hunk for stage 1. > >> Index: gcc/lto-streamer-out.c >> === >> --- gcc/lto-streamer-out.c (revision 259227) >> +++ gcc/lto-streamer-out.c (working copy) >> @@ -2084,6 +2151,9 @@ output_function (struct cgraph_node *nod >>/* Set current_function_decl and cfun. */ >>push_cfun (fn); >> >> + /* Fixup loops if required to match discovery done in the reader. */ >> + loop_optimizer_init (AVOID_CFG_MODIFICATIONS); >> + >>/* Make string 0 be a NULL string. */ >>streamer_write_char_stream (ob->string_stream, 0); >> >> @@ -2176,12 +2246,13 @@ output_function (struct cgraph_node *nod >>streamer_write_record_start (ob, LTO_null); >> >>output_cfg (ob, fn); >> - >> - pop_cfun (); >> } >>else >> streamer_write_uhwi (ob, 0); >> >> + loop_optimizer_finalize (); >> + pop_cfun (); >> + >>/* Create a section to hold the pickled output of this function. */ >>produce_asm (ob, function); > > That worked. Is this patch OK for trunk, GCC 6 and GCC 7? Ok if you remove the cfgloop.c hunk. There's no point in an assert of sth being non-NULL when the immediately following stmt will dereference it. You get an ICE anyway. Thanks, Richard. > Thanks, > Cesar >
Re: [PATCH] Invoke maybe_warn_nonstring_arg for strcpy/stpcpy builtins.
On 04/11/2018 11:20 PM, Martin Sebor wrote: > On 04/11/2018 06:47 AM, Andreas Krebbel wrote: >> On 04/11/2018 10:02 AM, Jakub Jelinek wrote: >>> On Wed, Apr 11, 2018 at 09:48:05AM +0200, Andreas Krebbel wrote: c-c++-common/attr-nonstring-3.c fails on IBM Z. The reason appears to be that we provide builtin implementations for strcpy and stpcpy. The warnings currently will only be emitted when expanding these as normal calls. Bootstrapped and regression tested on x86_64 and s390x. Ok? gcc/ChangeLog: 2018-04-11 Andreas Krebbel * builtins.c (expand_builtin_strcpy): Invoke maybe_warn_nonstring_arg. (expand_builtin_stpcpy): Likewise. >>> >>> Don't you then warn twice if builtin implementations for strcpy and stpcpy >>> aren't available or can't be used, once here and once in calls.c? >> >> Looks like this could happen if the expander is present but rejects >> expansion. I basically copied >> this from the strcmp builtin which looks like possibly running into the same >> problem: > > I tried to avoid the problem in the other instances of the call > to maybe_warn_nonstring_arg (e.g., expand_builtin_strlen or > expand_builtin_strcmp). I don't know if the expander can fail > after the maybe_warn_nonstring_arg() call and so I have no > tests for it. > > In your patch the expander failing seems more likely than in > the others (in fact, on x86_64 it always fails because the call > to targetm.have_movstr () in expand_movstr() returns false). > > That said, I see two warnings for a call to strcmp() with > a nonstring argument even without the expander failing, so > what I did isn't quite right either. I opened bug 85359 for > it. I've opened BZ85369 for the strcpy / stpcpy issue. -Andreas-
[nvptx, PR85296] Fix handling of extern var with flexible array member
Hi, for the recently added test-case pr85244-1.c, we run into the following failure with the standalone nvptx toolchain: ... spawn nvptx-none-run ./pr85244-1.exe error : Size doesn't match for 'val' in 'input file 2 at offset 3047', first specified in 'input file 1 at offset 1805' nvptx-run: cuLinkAddData failed: unknown error (CUDA_ERROR_UNKNOWN, 999) ... The linking problem happens because while in pr85244-2.s we have an array of size 3: ... .visible .const .align 8 .u64 val[3] = { 0, 180388626432, 1337 }; ... in pr85244-1.s we have an array of size 2: ... .extern .const .align 8 .u64 val[2]; ... The ptx declarations correspond to this source bit in pr85244-1.c: ... struct s { long a; int b; int tab[]; }; extern const struct s val; ... and this one in pr85244-2.c (omitting type decl): ... const struct s val = { 0, 0, { 42, 1337 } }; ... Because ptx has no structs, structs are declared as arrays of a certain base type, in this case u64. In pr85244-2.c we calculate the size of the array, and based on the initializer we arrive at a size of 3. In pr85244-1.c we calculate the size of the array, and based on the type we arrive at at size of 2. The patch fixes this by declaring extern structs which have a flexible array member as an array without given dimension. Build and tested on nvptx. Committed to stage4 trunk. Thanks, - Tom [nvptx] Fix handling of extern var with flexible array member 2018-04-12 Tom de Vries PR target/85296 * config/nvptx/nvptx.c (flexible_array_member_type_p): New function. (nvptx_assemble_decl_begin): Add undefined param. Declare undefined array with flexible array member as array without given dimension. (nvptx_assemble_undefined_decl): Set nvptx_assemble_decl_begin call argument for undefined param to true. --- gcc/config/nvptx/nvptx.c | 35 +-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c index a9a3053..131b495 100644 --- a/gcc/config/nvptx/nvptx.c +++ b/gcc/config/nvptx/nvptx.c @@ -2021,6 +2021,30 @@ nvptx_output_ascii (FILE *, const char *str, unsigned HOST_WIDE_INT size) nvptx_assemble_value (str[i], 1); } +/* Return true if TYPE is a record type where the last field is an array without + given dimension. */ + +static bool +flexible_array_member_type_p (const_tree type) +{ + if (TREE_CODE (type) != RECORD_TYPE) +return false; + + const_tree last_field = NULL_TREE; + for (const_tree f = TYPE_FIELDS (type); f; f = TREE_CHAIN (f)) +last_field = f; + + if (!last_field) +return false; + + const_tree last_field_type = TREE_TYPE (last_field); + if (TREE_CODE (last_field_type) != ARRAY_TYPE) +return false; + + return (! TYPE_DOMAIN (last_field_type) + || ! TYPE_MAX_VALUE (TYPE_DOMAIN (last_field_type))); +} + /* Emit a PTX variable decl and prepare for emission of its initializer. NAME is the symbol name and SETION the PTX data area. The type is TYPE, object size SIZE and alignment is ALIGN. @@ -2031,11 +2055,18 @@ nvptx_output_ascii (FILE *, const char *str, unsigned HOST_WIDE_INT size) static void nvptx_assemble_decl_begin (FILE *file, const char *name, const char *section, - const_tree type, HOST_WIDE_INT size, unsigned align) + const_tree type, HOST_WIDE_INT size, unsigned align, + bool undefined = false) { bool atype = (TREE_CODE (type) == ARRAY_TYPE) && (TYPE_DOMAIN (type) == NULL_TREE); + if (undefined && flexible_array_member_type_p (type)) +{ + size = 0; + atype = true; +} + while (TREE_CODE (type) == ARRAY_TYPE) type = TREE_TYPE (type); @@ -2172,7 +2203,7 @@ nvptx_assemble_undefined_decl (FILE *file, const char *name, const_tree decl) tree size = DECL_SIZE_UNIT (decl); nvptx_assemble_decl_begin (file, name, section_for_decl (decl), TREE_TYPE (decl), size ? tree_to_shwi (size) : 0, - DECL_ALIGN (decl)); + DECL_ALIGN (decl), true); nvptx_assemble_decl_end (); }
Re: [PATCH] Use --push-state --as-needed and --pop-state instead of --as-needed and --no-as-needed for libgcc
On 11.04.2018 20:55, Jakub Jelinek wrote: > On Wed, Apr 11, 2018 at 06:07:17PM +0200, Matthias Klose wrote: >> On 11.04.2018 12:31, Jakub Jelinek wrote: >>> Hi! >>> >>> As discussed, using --as-needed and --no-as-needed is dangerous, because >>> it results in --no-as-needed even for libraries after -lgcc_s, even when the >>> default is --as-needed or --as-needed has been specified earlier on the >>> command line. >>> >>> If the linker supports --push-state/--pop-state, we should IMHO use it. >>> >>> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for stage1? >>> >>> Or is this something we want in GCC8 too? >> >> this is problematic for binutils versions with --push-state/--pop-state >> support >> in the BFD linker but not in gold, and then using -fuse-ld=gold. So maybe >> the >> version check for the BFD linker should only succeed for the first binutils >> version which also has -push-state/--pop-state support in gold. > > Does anybody use -fuse-ld=gold? grep the build log of your favorite distro, unless these are not beautified and not showing any command line options. For Debian/Ubuntu it's haskell using gold by default, and some upstreams like systemd turns it on by default, assuming it has the same quality on any architecture.
Re: [patch, fortran] Remove parallell annotation from DO CONCURRENT
On Wed, Apr 11, 2018 at 09:47:22PM +0200, Thomas Koenig wrote: > Am 11.04.2018 um 20:33 schrieb Jakub Jelinek: > > > > I have attached updated patch which moves the test case to > > > gfortran.dg/gomp (where it actually passes). > > > > How could it pass there? dg-do run tests don't belong into g*.dg/gomp/, > > nothing adds the -B etc. options needed to find libgomp.spec or libgomp > > as a library, or adds it to LD_LIBRARY_PATH etc. > > There are zero dg-do run tests in gfortran.dg/gomp/, there are 4 > > dg-do run tests in c-c++-common/gomp/, but those work fine because they > > use -fopenmp-simd option rather than > > -fopenmp/-fopenacc/-ftree-parallelize-loops= etc. > > So, where should the test go? > > The suggestion in PR 85346, to put it into > libgomp/testsuite/libgomp.fortran/, does not work: Yes, and I said what can be done to make it work; in patch form below. > Running ../../../../trunk/libgomp/testsuite/libgomp.fortran/fortran.exp ... > FAIL: libgomp.fortran/do_concurrent_5.f90 -O execution test > > even when ne (the array size) has been reduced to 2**20, far below ne is not the array size, the array size is 8 * ne, and 8 * 1MB is 8MB and you eat all of the usual stack limit just by that. > reasonable memory limits. The test passes when given the > -O1 -ftree-parallelize-loops=2 options by hand. > > So, what's the idea? Is there actually a directory which works, > or are we left with a wrong-code bug for which no test case is > possible? That would be quite bad, I think. Here is incremental diff. With the dg-skip-if and removal of explicit -O3, you make the test run only once with -O3 -g, and skip the other variants: UNSUPPORTED: libgomp.fortran/do_concurrent_5.f90 -O0 UNSUPPORTED: libgomp.fortran/do_concurrent_5.f90 -O1 UNSUPPORTED: libgomp.fortran/do_concurrent_5.f90 -O2 UNSUPPORTED: libgomp.fortran/do_concurrent_5.f90 -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions UNSUPPORTED: libgomp.fortran/do_concurrent_5.f90 -Os and with -fno-openmp you disable the default -fopenmp which you really don't need for the testcase, there are no OpenMP directives in there. --- libgomp/testsuite/libgomp.fortran/do_concurrent_5.f90 2018-04-11 17:27:59.035100057 +0200 +++ libgomp/testsuite/libgomp.fortran/do_concurrent_5.f90 2018-04-12 09:12:40.611789503 +0200 @@ -1,6 +1,7 @@ ! { dg-do run } ! PR 83064 - this used to give wrong results. -! { dg-additional-options "-O3 -ftree-parallelize-loops=2" } +! { dg-skip-if "" { ! run_expensive_tests } { "*" } { "-O3 -g" } } +! { dg-additional-options "-fno-openmp -ftree-parallelize-loops=2" } ! Original test case by Christian Felter program main @@ -8,7 +9,7 @@ program main implicit none integer, parameter :: nsplit = 4 -integer(int64), parameter :: ne = 2000 +integer(int64), parameter :: ne = 200 integer(int64) :: stride, low(nsplit), high(nsplit), edof(ne), i real(real64), dimension(nsplit) :: pi Jakub