Re: [PATCH, PR d/89255][1/3] Add -fbuilding-libphobos-tests
Patch updated and committed to trunk as r270301. -- Iain --- 2019-04-12 Iain Buclaw * d-tree.h (DECL_IN_UNITTEST_CONDITION_P): Define. * decl.cc (DeclVisitor): Add in_version_unittest_ field. (DeclVisitor::visit(ConditionalDeclaration)): New override. (DeclVisitor::visit(FuncDeclaration)): Set DECL_IN_UNITTEST_CONDITION_P. * lang.opt (-fbuilding-libphobos-tests): Add option. * modules.cc (current_testing_module): New static variable. (build_module_tree): Generate second moduleinfo symbol to hold reference to unittests if flag_building_libphobos_tests. (register_module_decl): Check DECL_IN_UNITTEST_CONDITION_P to decide which moduleinfo the decl should be registered against. --- diff --git a/gcc/d/d-tree.h b/gcc/d/d-tree.h index 0b3c5eddedd..cff832cc645 100644 --- a/gcc/d/d-tree.h +++ b/gcc/d/d-tree.h @@ -59,7 +59,8 @@ typedef Array Expressions; Usage of DECL_LANG_FLAG_?: 0: LABEL_VARIABLE_CASE (in LABEL_DECL). - DECL_BUILT_IN_CTFE (in FUNCTION_DECL). */ + DECL_BUILT_IN_CTFE (in FUNCTION_DECL). + 1: DECL_IN_UNITTEST_CONDITION_P (in FUNCTION_DECL). */ /* The kinds of scopes we recognize. */ @@ -380,6 +381,10 @@ lang_tree_node #define DECL_BUILT_IN_CTFE(NODE) \ (DECL_LANG_FLAG_0 (FUNCTION_DECL_CHECK (NODE))) +/* True if the decl is only compiled in when unittests are turned on. */ +#define DECL_IN_UNITTEST_CONDITION_P(NODE) \ + (DECL_LANG_FLAG_1 (FUNCTION_DECL_CHECK (NODE))) + enum d_tree_index { DTI_VTABLE_ENTRY_TYPE, diff --git a/gcc/d/decl.cc b/gcc/d/decl.cc index fffed97727f..f6c863988e3 100644 --- a/gcc/d/decl.cc +++ b/gcc/d/decl.cc @@ -21,6 +21,7 @@ along with GCC; see the file COPYING3. If not see #include "dmd/aggregate.h" #include "dmd/attrib.h" +#include "dmd/cond.h" #include "dmd/ctfe.h" #include "dmd/declaration.h" #include "dmd/enum.h" @@ -121,9 +122,13 @@ class DeclVisitor : public Visitor { using Visitor::visit; + /* If we're lowering the body of a version(unittest) condition. */ + bool in_version_unittest_; + public: DeclVisitor (void) { +this->in_version_unittest_ = false; } /* This should be overridden by each declaration class. */ @@ -241,6 +246,25 @@ public: visit ((AttribDeclaration *) d); } + /* Conditional compilation is the process of selecting which code to compile + and which code to not compile. Look for version conditions that may */ + + void visit (ConditionalDeclaration *d) + { +bool old_condition = this->in_version_unittest_; + +if (global.params.useUnitTests) + { + VersionCondition *vc = d->condition->isVersionCondition (); + if (vc && vc->ident == Identifier::idPool ("unittest")) + this->in_version_unittest_ = true; + } + +visit ((AttribDeclaration *) d); + +this->in_version_unittest_ = old_condition; + } + /* Walk over all members in the namespace scope. */ void visit (Nspace *d) @@ -868,6 +892,7 @@ public: } DECL_ARGUMENTS (fndecl) = param_list; +DECL_IN_UNITTEST_CONDITION_P (fndecl) = this->in_version_unittest_; rest_of_decl_compilation (fndecl, 1, 0); /* If this is a member function that nested (possibly indirectly) in another diff --git a/gcc/d/lang.opt b/gcc/d/lang.opt index 523f73c90de..f65be444d45 100644 --- a/gcc/d/lang.opt +++ b/gcc/d/lang.opt @@ -197,6 +197,10 @@ Enum(bounds_check) String(safeonly) Value(1) EnumValue Enum(bounds_check) String(on) Value(2) +; Generates a secondary ModuleInfo symbol for linking in unittests +fbuilding-libphobos-tests +D Undocumented Var(flag_building_libphobos_tests) + fbuiltin D Var(flag_no_builtin, 0) ; Documented in C diff --git a/gcc/d/modules.cc b/gcc/d/modules.cc index e9bd44115f9..315f5d82356 100644 --- a/gcc/d/modules.cc +++ b/gcc/d/modules.cc @@ -110,6 +110,11 @@ enum module_info_flags static module_info *current_moduleinfo; +/* When compiling with -fbuilding-libphobos-tests, this contains information + about the module that gets compiled in only when unittests are enabled. */ + +static module_info *current_testing_module; + /* The declaration of the current module being compiled. */ static Module *current_module_decl; @@ -706,8 +711,10 @@ build_module_tree (Module *decl) assert (!current_moduleinfo && !current_module_decl); module_info mi = module_info (); + module_info mitest = module_info (); current_moduleinfo = &mi; + current_testing_module = &mitest; current_module_decl = decl; /* Layout module members. */ @@ -720,6 +727,53 @@ build_module_tree (Module *decl) } } + /* For libphobos-internal use only. Generate a separate module info symbol + that references all compiled in unittests, this allows compiling library + modules and linking to libphobos without having run-time conflicts because + of two ModuleInfo records with the same name being present in two DSOs. */ + if (flag_building_libphobos_te
[PATCH, d] Committed merge with upstream dmd
Hi, This patch merges the D front-end implementation with dmd upstream c185f9df1. Adds new virtual isVersionCondition, this is so that in the code generation pass, a ConditionDeclaration's condition can be identified without requiring a Visitor function. Bootstrapped and regression tested on x86_64-linux-gnu. Committed to trunk as r270300. -- Iain --- diff --git a/gcc/d/dmd/MERGE b/gcc/d/dmd/MERGE index 800be95e4e6..be0c5a50da2 100644 --- a/gcc/d/dmd/MERGE +++ b/gcc/d/dmd/MERGE @@ -1,4 +1,4 @@ -d7ed327edb0b01ad56e7e73e77b3401cd565675e +c185f9df1789456c7d88d047f2df23dd784f1182 The first line of this file holds the git revision number of the last merge done from the dlang/dmd repository. diff --git a/gcc/d/dmd/cond.h b/gcc/d/dmd/cond.h index 891969be48d..8e33b16a9da 100644 --- a/gcc/d/dmd/cond.h +++ b/gcc/d/dmd/cond.h @@ -39,6 +39,7 @@ public: virtual Condition *syntaxCopy() = 0; virtual int include(Scope *sc, ScopeDsymbol *sds) = 0; virtual DebugCondition *isDebugCondition() { return NULL; } +virtual VersionCondition *isVersionCondition() { return NULL; } virtual void accept(Visitor *v) { v->visit(this); } }; @@ -91,6 +92,7 @@ public: VersionCondition(Module *mod, unsigned level, Identifier *ident); int include(Scope *sc, ScopeDsymbol *sds); +VersionCondition *isVersionCondition() { return this; } void accept(Visitor *v) { v->visit(this); } };
Re: [PR89528] reset debug uses of return value when dropping dead RTL call
On April 12, 2019 3:28:20 AM GMT+02:00, Alexandre Oliva wrote: >On Apr 5, 2019, Richard Biener wrote: > >> Looks OK but can you adjust the testcase to actually test >> something? > >Doh, I *knew* I'd failed to do something I should have ;-) > > >I resorted to the trick you used in pr89892.c, of using a more complex >expression to get UNSUPPORTED results instead of FAILs when the call >and >thus the variable are optimized out. How's this? Looks good! Thanks, Richard. > >[PR89528] reset debug uses of return value when dropping dead RTL call > >When we remove an RTL call, we wouldn't clean up references to the >return value of the call in debug insns. Make it so that we do. > > >for gcc/ChangeLog > > PR debug/89528 > * valtrack.c (dead_debug_insert_temp): Reset debug references > to the return value of a call being removed. > >for gcc/testsuite/ChangeLog > > PR debug/89528 > * gcc.dg/guality/pr89528.c: New. >--- >gcc/testsuite/gcc.dg/guality/pr89528.c | 25 + > gcc/valtrack.c | 22 ++ > 2 files changed, 31 insertions(+), 16 deletions(-) > create mode 100644 gcc/testsuite/gcc.dg/guality/pr89528.c > >diff --git a/gcc/testsuite/gcc.dg/guality/pr89528.c >b/gcc/testsuite/gcc.dg/guality/pr89528.c >new file mode 100644 >index ..04a7e84d8755 >--- /dev/null >+++ b/gcc/testsuite/gcc.dg/guality/pr89528.c >@@ -0,0 +1,25 @@ >+/* PR debug/89528 */ >+/* { dg-do run } */ >+/* { dg-options "-g" } */ >+ >+#include >+ >+char b; >+int d, e; >+static int i = 1; >+void a(int l) { printf("", l); } >+char c(char l) { return l || b && l == 1 ? b : b % l; } >+short f(int l, int m) { return l * m; } >+short g(short l, short m) { return m || l == 767 && m == 1; } >+int h(int l, int m) { return (l ^ m & l ^ (m & 647) - m ^ m) < m; } >+static int j(int l) { return d == 0 || l == 647 && d == 1 ? l : l % d; >} >+short k(int l) { return l >= 2 >> l; } >+void optimize_me_not() { asm(""); } >+static short n(void) { >+ int l_1127 = ~j(9 || 0) ^ 65535; >+ optimize_me_not(); /* { dg-final { gdb-test . "l_1127+1" "-65534" } >} */ >+ f(l_1127, i && e ^ 4) && g(0, 0); >+ e = 0; >+ return 5; >+} >+int main() { n(); } >diff --git a/gcc/valtrack.c b/gcc/valtrack.c >index 9b2bb333c0a3..1f67378a867c 100644 >--- a/gcc/valtrack.c >+++ b/gcc/valtrack.c >@@ -657,22 +657,12 @@ dead_debug_insert_temp (struct dead_debug_local >*debug, unsigned int uregno, > { > dest = SET_DEST (set); > src = SET_SRC (set); >-/* Lose if the REG-setting insn is a CALL. */ >-if (GET_CODE (src) == CALL) >- { >-while (uses) >- { >-cur = uses->next; >-XDELETE (uses); >-uses = cur; >- } >-return 0; >- } >-/* Asm in DEBUG_INSN is never useful, we can't emit debug info for >- that. And for volatile_insn_p, it is actually harmful >- - DEBUG_INSNs shouldn't have any side-effects. */ >-else if (GET_CODE (src) == ASM_OPERANDS >- || volatile_insn_p (src)) >+/* Reset uses if the REG-setting insn is a CALL. Asm in >+ DEBUG_INSN is never useful, we can't emit debug info for >+ that. And for volatile_insn_p, it is actually harmful - >+ DEBUG_INSNs shouldn't have any side-effects. */ >+if (GET_CODE (src) == CALL || GET_CODE (src) == ASM_OPERANDS >+|| volatile_insn_p (src)) > set = NULL_RTX; > } >
[PATCH] Uglify identifiers missed in previous commit(s)
* include/pstl/algorithm_impl.h: Uglify identfiers. * include/pstl/numeric_impl.h: Uglify identfiers. * include/pstl/parallel_backend_tbb.h: Uglify identfiers. >From b75813e885c50e667a1474c1d0e1fc47ee893d6e Mon Sep 17 00:00:00 2001 From: Thomas Rodgers Date: Thu, 11 Apr 2019 19:42:50 -0700 Subject: [PATCH] Uglify identifiers missed in previous commit(s) * include/pstl/algorithm_impl.h: Uglify identfiers. * include/pstl/numeric_impl.h: Uglify identfiers. * include/pstl/parallel_backend_tbb.h: Uglify identfiers. --- libstdc++-v3/include/pstl/algorithm_impl.h| 22 +-- libstdc++-v3/include/pstl/numeric_impl.h | 2 +- .../include/pstl/parallel_backend_tbb.h | 5 +++-- 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/libstdc++-v3/include/pstl/algorithm_impl.h b/libstdc++-v3/include/pstl/algorithm_impl.h index b0d60baae14..d39e99add05 100644 --- a/libstdc++-v3/include/pstl/algorithm_impl.h +++ b/libstdc++-v3/include/pstl/algorithm_impl.h @@ -283,20 +283,20 @@ __pattern_walk2(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardI template _ForwardIterator2 -__pattern_walk2_n(_ExecutionPolicy&&, _ForwardIterator1 __first1, _Size n, _ForwardIterator2 __first2, _Function f, +__pattern_walk2_n(_ExecutionPolicy&&, _ForwardIterator1 __first1, _Size __n, _ForwardIterator2 __first2, _Function __f, _IsVector is_vector, /*parallel=*/std::false_type) noexcept { -return __internal::__brick_walk2_n(__first1, n, __first2, f, is_vector); +return __internal::__brick_walk2_n(__first1, __n, __first2, __f, is_vector); } template _RandomAccessIterator2 -__pattern_walk2_n(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _Size n, _RandomAccessIterator2 __first2, - _Function f, _IsVector is_vector, /*parallel=*/std::true_type) +__pattern_walk2_n(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _Size __n, _RandomAccessIterator2 __first2, + _Function __f, _IsVector __is_vector, /*parallel=*/std::true_type) { -return __internal::__pattern_walk2(std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + n, __first2, f, is_vector, - std::true_type()); +return __internal::__pattern_walk2(std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n, __first2, __f, + __is_vector, std::true_type()); } template @@ -1033,7 +1033,7 @@ __pattern_copy_if(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Ran return __internal::__except_handler([&__exec, __n, __first, __result, __is_vector, __pred, &__mask_buf]() { bool* __mask = __mask_buf.get(); _DifferenceType __m{}; -__par_backend::parallel_strict_scan( +__par_backend::__parallel_strict_scan( std::forward<_ExecutionPolicy>(__exec), __n, _DifferenceType(0), [=](_DifferenceType __i, _DifferenceType __len) { // Reduce return __internal::__brick_calc_mask_1<_DifferenceType>(__first + __i, __first + (__i + __len), __mask + __i, @@ -1182,7 +1182,7 @@ __remove_elements(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardI __mask += __min; _DifferenceType __m{}; // 2. Elements that doesn't satisfy pred are moved to result -__par_backend::parallel_strict_scan( +__par_backend::__parallel_strict_scan( std::forward<_ExecutionPolicy>(__exec), __n, _DifferenceType(0), [__mask, __is_vector](_DifferenceType __i, _DifferenceType __len) { return __internal::__brick_count(__mask + __i, __mask + __i + __len, [](bool __val) { return __val; }, __is_vector); @@ -1309,7 +1309,7 @@ __pattern_unique_copy(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, return __internal::__except_handler([&__exec, __n, __first, __result, __pred, __is_vector, &__mask_buf]() { bool* __mask = __mask_buf.get(); _DifferenceType __m{}; -__par_backend::parallel_strict_scan( +__par_backend::__parallel_strict_scan( std::forward<_ExecutionPolicy>(__exec), __n, _DifferenceType(0), [=](_DifferenceType __i, _DifferenceType __len) -> _DifferenceType { // Reduce _DifferenceType __extra = 0; @@ -2033,7 +2033,7 @@ __pattern_partition_copy(_ExecutionPolicy&& __exec, _RandomAccessIterator __firs return __internal::__except_handler([&__exec, __n, __first, __out_true, __out_false, __is_vector, __pred, &__mask_buf]() { bool* __mask = __mask_buf.get(); _ReturnType __m{}; -__par_backend::parallel_strict_scan( +__par_backend::__parallel_strict_scan( std::forward<_ExecutionPolicy>(__exec), __n, std::make_pair(_DifferenceType(0), _Differenc
[PR86438] avoid too-long shift in test
The test fell back to long long and long when __int128 is not available, but it assumed sizeof(long) < sizeof(long long) because of a shift count that would be out of range for a long long if their widths are the same. Fixed by splitting it up into two shifts. Tested on x86_64-linux-gnu, -m64 and -m32. Hopefully Andrew and/or John David will let me know if it fails to fix the problem on the platforms in which they've observed it. Thanks for the report, sorry it took me so long to get to it. I'm going to install this as obvious, unless there are objections in the next few days. for gcc/testsuite/ChangeLog PR rtl-optimization/86438 * gcc.dg/torture/pr86438.c: Split up too-wide shift. --- gcc/testsuite/gcc.dg/torture/pr86438.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.dg/torture/pr86438.c b/gcc/testsuite/gcc.dg/torture/pr86438.c index 3e95515ae6a6..5f8b463f7572 100644 --- a/gcc/testsuite/gcc.dg/torture/pr86438.c +++ b/gcc/testsuite/gcc.dg/torture/pr86438.c @@ -24,6 +24,6 @@ main (void) u64 d = (g ? 5 : 4); u32 f = __builtin_sub_overflow_p (d, (u128) d, (u64) 0); u128 x = g + f + d; - check (x >> (sizeof (u64) * __CHAR_BIT__), x); + check ((x >> 1) >> (sizeof (u64) * __CHAR_BIT__ - 1), x); return 0; } -- Alexandre Oliva, freedom fighter https://FSFLA.org/blogs/lxo Be the change, be Free! FSF Latin America board member GNU Toolchain EngineerFree Software Evangelist Hay que enGNUrecerse, pero sin perder la terGNUra jamás-GNUChe
Re: [PR89528] reset debug uses of return value when dropping dead RTL call
On Apr 5, 2019, Richard Biener wrote: > Looks OK but can you adjust the testcase to actually test > something? Doh, I *knew* I'd failed to do something I should have ;-) I resorted to the trick you used in pr89892.c, of using a more complex expression to get UNSUPPORTED results instead of FAILs when the call and thus the variable are optimized out. How's this? [PR89528] reset debug uses of return value when dropping dead RTL call When we remove an RTL call, we wouldn't clean up references to the return value of the call in debug insns. Make it so that we do. for gcc/ChangeLog PR debug/89528 * valtrack.c (dead_debug_insert_temp): Reset debug references to the return value of a call being removed. for gcc/testsuite/ChangeLog PR debug/89528 * gcc.dg/guality/pr89528.c: New. --- gcc/testsuite/gcc.dg/guality/pr89528.c | 25 + gcc/valtrack.c | 22 ++ 2 files changed, 31 insertions(+), 16 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/guality/pr89528.c diff --git a/gcc/testsuite/gcc.dg/guality/pr89528.c b/gcc/testsuite/gcc.dg/guality/pr89528.c new file mode 100644 index ..04a7e84d8755 --- /dev/null +++ b/gcc/testsuite/gcc.dg/guality/pr89528.c @@ -0,0 +1,25 @@ +/* PR debug/89528 */ +/* { dg-do run } */ +/* { dg-options "-g" } */ + +#include + +char b; +int d, e; +static int i = 1; +void a(int l) { printf("", l); } +char c(char l) { return l || b && l == 1 ? b : b % l; } +short f(int l, int m) { return l * m; } +short g(short l, short m) { return m || l == 767 && m == 1; } +int h(int l, int m) { return (l ^ m & l ^ (m & 647) - m ^ m) < m; } +static int j(int l) { return d == 0 || l == 647 && d == 1 ? l : l % d; } +short k(int l) { return l >= 2 >> l; } +void optimize_me_not() { asm(""); } +static short n(void) { + int l_1127 = ~j(9 || 0) ^ 65535; + optimize_me_not(); /* { dg-final { gdb-test . "l_1127+1" "-65534" } } */ + f(l_1127, i && e ^ 4) && g(0, 0); + e = 0; + return 5; +} +int main() { n(); } diff --git a/gcc/valtrack.c b/gcc/valtrack.c index 9b2bb333c0a3..1f67378a867c 100644 --- a/gcc/valtrack.c +++ b/gcc/valtrack.c @@ -657,22 +657,12 @@ dead_debug_insert_temp (struct dead_debug_local *debug, unsigned int uregno, { dest = SET_DEST (set); src = SET_SRC (set); - /* Lose if the REG-setting insn is a CALL. */ - if (GET_CODE (src) == CALL) - { - while (uses) - { - cur = uses->next; - XDELETE (uses); - uses = cur; - } - return 0; - } - /* Asm in DEBUG_INSN is never useful, we can't emit debug info for -that. And for volatile_insn_p, it is actually harmful -- DEBUG_INSNs shouldn't have any side-effects. */ - else if (GET_CODE (src) == ASM_OPERANDS - || volatile_insn_p (src)) + /* Reset uses if the REG-setting insn is a CALL. Asm in +DEBUG_INSN is never useful, we can't emit debug info for +that. And for volatile_insn_p, it is actually harmful - +DEBUG_INSNs shouldn't have any side-effects. */ + if (GET_CODE (src) == CALL || GET_CODE (src) == ASM_OPERANDS + || volatile_insn_p (src)) set = NULL_RTX; } -- Alexandre Oliva, freedom fighter https://FSFLA.org/blogs/lxo Be the change, be Free! FSF Latin America board member GNU Toolchain EngineerFree Software Evangelist Hay que enGNUrecerse, pero sin perder la terGNUra jamás-GNUChe
Re: AW: [Patch, fortran] PRs 89843 and 90022 - C Fortran Interop fixes.
Reinhold, Thanks for the insights ! That means there is currently an other issue since copy-in is performed even if the argument is declared as ASYNCHRONOUS. I gave the copy-in mechanism some more thoughts, and as a library developers, I can clearly see a need *not* to do that on a case-by-case basis, mainly for performance reasons, but also to be friendly with legacy apps that are not strictly standard compliant. At this stage, I think the best way to move forward is to add an other directive in the interface definition. for example, we could declare module foo interface subroutine bar_f08(buf) BIND(C, name="bar_c") implicit none !GCC$ ATTRIBUTES NO_COPY_IN :: buf TYPE(*), DIMENSION(..), INTENT(IN) :: buf end subroutine end interface end module Does this make sense ? Gilles On 4/10/2019 4:22 PM, Bader, Reinhold wrote: Hi Gilles, I also found an other potential issue with copy-in. If in Fortran, we call foo(buf(0,0)) then the C subroutine can only access buf(0,0), and other elements such as buf(1025,1025) cannot be accessed. Such elements are valid in buf, but out of bounds in the copy (that contains a single element). Strictly speaking, I cannot say whether this is a violation of the standard or not, but I can see how this will break a lot of existing apps (once again, those apps might be incorrect in the first place, but most of us got used to them working). The above call will only be conforming if the dummy argument is declared assumed or explicit size. Otherwise, the compiler should reject it due to rank mismatch. For assumed rank, the call would be legitimate, but the rank of the dummy argument is then zero. Even if no copy-in is performed, accessing data beyond the address range of that scalar is not strictly allowed. Of more interest is the situation where the dummy argument in Fortran is declared, e.g., TYPE(*), ASYNCHRONOUS, INTENT(IN) :: BUF(..) The standard's semantics *forbids* performing copy-in/out in this case, IIRC. Otherwise ASYNCHRONOUS semantics would not work, and non-blocking MPI calls would fail due to buffers vanishing into thin air. Regards Reinhold To me, this is a second reason why copy-in is not desirable (at least as a default option). Cheers, Gilles On 4/9/2019 7:18 PM, Paul Richard Thomas wrote: The most part of this patch is concerned with implementing calls from C of of fortran bind c procedures with assumed shape or assumed rank dummies to completely fix PR89843. The conversion of the descriptors from CFI to gfc occur on entry to and reversed on exit from the procedure. This patch is safe for trunk, even at this late stage, because its effects are barricaded behind the tests for CFI descriptors. I believe that it appropriately rewards the bug reporters to have this feature work as well as possible at release. Between comments and the ChangeLogs, this patch is self explanatory. Bootstrapped and regtested on FC29/x86_64 - OK for trunk? Paul 2019-04-09 Paul Thomas PR fortran/89843 * trans-decl.c (gfc_get_symbol_decl): Assumed shape and assumed rank dummies of bind C procs require deferred initialization. (convert_CFI_desc): New procedure to convert incoming CFI descriptors to gfc types and back again. (gfc_trans_deferred_vars): Call it. * trans-expr.c (gfc_conv_gfc_desc_to_cfi_desc): Null the CFI descriptor pointer. Free the descriptor in all cases. PR fortran/90022 * trans-decl.c (gfc_get_symbol_decl): Make sure that the se expression is a pointer type before converting it to the symbol backend_decl type. 2019-04-09 Paul Thomas PR fortran/89843 * gfortran.dg/ISO_Fortran_binding_4.f90: Modify the value of x in ctg. Test the conversion of the descriptor types in the main program. * gfortran.dg/ISO_Fortran_binding_10.f90: New test. * gfortran.dg/ISO_Fortran_binding_10.c: Called by it. PR fortran/90022 * gfortran.dg/ISO_Fortran_binding_1.c: Correct the indexing for the computation of 'ans'. Also, change the expected results for CFI_is_contiguous to comply with standard. * gfortran.dg/ISO_Fortran_binding_1.f90: Correct the expected results for CFI_is_contiguous to comply with standard. * gfortran.dg/ISO_Fortran_binding_9.f90: New test. * gfortran.dg/ISO_Fortran_binding_9.c: Called by it. 2019-04-09 Paul Thomas PR fortran/89843 * runtime/ISO_Fortran_binding.c (cfi_desc_to_gfc_desc): Only return immediately if the source pointer is null. Bring forward the extraction of the gfc type. Extract the kind so that the element size can be correctly computed for sections and components of derived type arrays. Remove the free of the CFI descriptor since this is now done in trans-expr.c. (gfc_desc_to_cfi_desc): Only allocate the CFI descriptor if it is not null. (CFI_section): Normalise the difference b
Re: C++ PATCH for c++/87603 - constexpr functions are no longer noexcept
On 11/04/19 17:36 -0400, Marek Polacek wrote: This patch deals with constexpr functions and whether or not they should be noexcept. CWG 1129 specified that constexpr functions are noexcept: it was a special case in [expr.unary.noexcept]. This was accidentally removed in but the CWG conclusion was to keep it as-is. Clearly we need to change this for C++17. The question is whether we should treat it as a DR and apply it retroactively (which is what clang did). I took Well Clang never implemented CWG 1129 in the first place, so they applied P0003 retroactively by just doing nothing :-)
C++ PATCH for c++/87603 - constexpr functions are no longer noexcept
This patch deals with constexpr functions and whether or not they should be noexcept. CWG 1129 specified that constexpr functions are noexcept: it was a special case in [expr.unary.noexcept]. This was accidentally removed in but the CWG conclusion was to keep it as-is. Clearly we need to change this for C++17. The question is whether we should treat it as a DR and apply it retroactively (which is what clang did). I took the same approach and my reasoning is in the comment in check_noexcept_r. Arguably it might be too late to put this in now, maybe we should defer to GCC 10. But at least there's a patch. Bootstrapped/regtested on x86_64-linux. 2019-04-11 Marek Polacek PR c++/87603 - constexpr functions are no longer noexcept. * constexpr.c (is_sub_constant_expr): Remove unused function. * cp-tree.h (is_sub_constant_expr): Remove declaration. * except.c (check_noexcept_r): Don't consider a call to a constexpr function noexcept. * g++.dg/cpp0x/constexpr-noexcept.C: Adjust the expected result. * g++.dg/cpp0x/constexpr-noexcept3.C: Likewise. * g++.dg/cpp0x/constexpr-noexcept4.C: Likewise. * g++.dg/cpp0x/constexpr-noexcept8.C: New test. * g++.dg/cpp0x/inh-ctor32.C: Remove dg-message. * g++.dg/cpp1y/constexpr-noexcept1.C: New test. diff --git gcc/cp/constexpr.c gcc/cp/constexpr.c index 0ce5618a2fc..9c13f0d5f32 100644 --- gcc/cp/constexpr.c +++ gcc/cp/constexpr.c @@ -5423,27 +5423,6 @@ cxx_eval_outermost_constant_expr (tree t, bool allow_non_constant, return r; } -/* Returns true if T is a valid subexpression of a constant expression, - even if it isn't itself a constant expression. */ - -bool -is_sub_constant_expr (tree t) -{ - bool non_constant_p = false; - bool overflow_p = false; - hash_map map; - HOST_WIDE_INT constexpr_ops_count = 0; - - constexpr_ctx ctx -= { NULL, &map, NULL, NULL, NULL, NULL, &constexpr_ops_count, - true, true, false }; - - instantiate_constexpr_fns (t); - cxx_eval_constant_expression (&ctx, t, false, &non_constant_p, - &overflow_p); - return !non_constant_p && !overflow_p; -} - /* If T represents a constant expression returns its reduced value. Otherwise return error_mark_node. If T is dependent, then return NULL. */ diff --git gcc/cp/cp-tree.h gcc/cp/cp-tree.h index b87b968fa4e..ff4ce068a83 100644 --- gcc/cp/cp-tree.h +++ gcc/cp/cp-tree.h @@ -7720,7 +7720,6 @@ extern tree fold_non_dependent_init (tree, tsubst_flags_t = tf_warning_or_error, bool = false); extern tree fold_simple(tree); -extern bool is_sub_constant_expr(tree); extern bool reduced_constant_expression_p (tree); extern bool is_instantiation_of_constexpr (tree); extern bool var_in_constexpr_fn (tree); diff --git gcc/cp/except.c gcc/cp/except.c index 40e973fad66..7df17c52eb4 100644 --- gcc/cp/except.c +++ gcc/cp/except.c @@ -1128,11 +1128,14 @@ check_noexcept_r (tree *tp, int * /*walk_subtrees*/, void * /*data*/) && (DECL_ARTIFICIAL (fn) || nothrow_libfn_p (fn))) return TREE_NOTHROW (fn) ? NULL_TREE : fn; - /* A call to a constexpr function is noexcept if the call -is a constant expression. */ - if (DECL_DECLARED_CONSTEXPR_P (fn) - && is_sub_constant_expr (t)) - return NULL_TREE; + /* We used to treat a call to a constexpr function as noexcept if +the call was a constant expression (CWG 1129). This has changed +in P0003 whereby noexcept has no special rule for constant +expressions anymore. Since the current behavior is important for +certain library functionality, we treat this as a DR, therefore +adjusting the behavior for C++11 and C++14. Previously, we had +to evaluate the noexcept-specifier's operand here, but that could +cause instantiations that would fail. */ } if (!TYPE_NOTHROW_P (type)) return fn; diff --git gcc/testsuite/g++.dg/cpp0x/constexpr-noexcept.C gcc/testsuite/g++.dg/cpp0x/constexpr-noexcept.C index dbadaa8e3d2..035afd13d39 100644 --- gcc/testsuite/g++.dg/cpp0x/constexpr-noexcept.C +++ gcc/testsuite/g++.dg/cpp0x/constexpr-noexcept.C @@ -10,4 +10,7 @@ constexpr T value(T t) noexcept(is_funny::value) { return t; } // Line 7 constexpr bool ok = noexcept(value(42)); -static_assert(ok, "Assertion failure"); +// We used to treat a call to a constexpr function as noexcept if +// the call was a constant expression. We no longer do since +// c++/87603. +static_assert(!ok, "Assertion failure"); diff --git gcc/testsuite/g++.dg/cpp0x/constexpr-noexcept3.C gcc/testsuite/g++.dg/cpp0x/constexpr-noexcept3.C index 9541bc0781d..5a43899b5fa 100644
[PATCH, libphobos] Committed merge with upstream phobos
Hi, This patch merges the libphobos library with upstream phobos cf95639ff. Backports port fixes from upstream that have been committed since last sync. Bootstrapped and regression tested on x86_64-linux-gnu. Committed to trunk as r270296. -- Iain diff --git a/libphobos/src/MERGE b/libphobos/src/MERGE index 63bd46834c5..b4d44b55624 100644 --- a/libphobos/src/MERGE +++ b/libphobos/src/MERGE @@ -1,4 +1,4 @@ -ef07932811de50a1d5810ea23462d127a60574a6 +cf95639ffd9ed6f3b9d10d98461b2fbd31615757 The first line of this file holds the git revision number of the last merge done from the dlang/phobos repository. diff --git a/libphobos/src/std/datetime/systime.d b/libphobos/src/std/datetime/systime.d index e65d296e427..326b5441724 100644 --- a/libphobos/src/std/datetime/systime.d +++ b/libphobos/src/std/datetime/systime.d @@ -214,6 +214,22 @@ public: hnsecsToUnixEpoch; } } +else version (DragonFlyBSD) +{ +import core.sys.dragonflybsd.time : clock_gettime, CLOCK_REALTIME, +CLOCK_REALTIME_FAST, CLOCK_REALTIME_PRECISE, CLOCK_SECOND; +static if (clockType == ClockType.coarse) alias clockArg = CLOCK_REALTIME_FAST; +else static if (clockType == ClockType.normal) alias clockArg = CLOCK_REALTIME; +else static if (clockType == ClockType.precise) alias clockArg = CLOCK_REALTIME_PRECISE; +else static if (clockType == ClockType.second) alias clockArg = CLOCK_SECOND; +else static assert(0, "Previous static if is wrong."); +timespec ts; +if (clock_gettime(clockArg, &ts) != 0) +throw new TimeException("Call to clock_gettime() failed"); +return convert!("seconds", "hnsecs")(ts.tv_sec) + + ts.tv_nsec / 100 + + hnsecsToUnixEpoch; +} else version (Solaris) { static if (clockType == ClockType.second) diff --git a/libphobos/src/std/datetime/timezone.d b/libphobos/src/std/datetime/timezone.d index e923a34a98d..7ae19020243 100644 --- a/libphobos/src/std/datetime/timezone.d +++ b/libphobos/src/std/datetime/timezone.d @@ -292,10 +292,12 @@ public: version (Posix) { -version (FreeBSD) enum utcZone = "Etc/UTC"; -else version (NetBSD) enum utcZone = "UTC"; -else version (linux) enum utcZone = "UTC"; -else version (OSX) enum utcZone = "UTC"; +version (FreeBSD)enum utcZone = "Etc/UTC"; +else version (NetBSD)enum utcZone = "UTC"; +else version (DragonFlyBSD) enum utcZone = "UTC"; +else version (linux) enum utcZone = "UTC"; +else version (OSX) enum utcZone = "UTC"; +else version (Solaris) enum utcZone = "UTC"; else static assert(0, "The location of the UTC timezone file on this Posix platform must be set."); auto tzs = [testTZ("America/Los_Angeles", "PST", "PDT", dur!"hours"(-8), dur!"hours"(1)), @@ -1891,6 +1893,14 @@ public: // Android concatenates all time zone data into a single file and stores it here. enum defaultTZDatabaseDir = "/system/usr/share/zoneinfo/"; } +else version (Solaris) +{ +/++ +The default directory where the TZ Database files are. It's empty +for Windows, since Windows doesn't have them. + +/ +enum defaultTZDatabaseDir = "/usr/share/lib/zoneinfo/"; +} else version (Posix) { /++ diff --git a/libphobos/src/std/experimental/allocator/building_blocks/region.d b/libphobos/src/std/experimental/allocator/building_blocks/region.d index dfcecce72bd..3d8431c23ca 100644 --- a/libphobos/src/std/experimental/allocator/building_blocks/region.d +++ b/libphobos/src/std/experimental/allocator/building_blocks/region.d @@ -392,6 +392,8 @@ struct InSituRegion(size_t size, size_t minAlign = platformAlignment) else version (PPC64) enum growDownwards = Yes.growDownwards; else version (MIPS32) enum growDownwards = Yes.growDownwards; else version (MIPS64) enum growDownwards = Yes.growDownwards; +else version (RISCV32) enum growDownwards = Yes.growDownwards; +else version (RISCV64) enum growDownwards = Yes.growDownwards; else version (SPARC) enum growDownwards = Yes.growDownwards; else version (SystemZ) enum growDownwards = Yes.growDownwards; else static assert(0, "Dunno how the stack grows on this architecture."); diff --git a/libphobos/src/std/file.d b/libphobos/src/std/file.d index 17b7ca82654..9ba992944eb 100644 --- a/libphobos/src/std/file.d +++ b/libphobos/src/std/file.d @@ -1488,6 +1488,7 @@ if (isInputRange!R && !isInfinite!R && isSomeChar!(ElementEncodingType!R)) // - OS X, where the native filesystem (HFS+)
[PATCH, libphobos] Committed merge with upstream druntime
Hi, This patch merges the libdruntime sub-library with upstream druntime 175bf5fc. Backports extern(C) bindings from upstream druntime that have been committed since the last sync. Notably, the Solaris port has been undergoing a some testing to get it all passing. Bootstrapped and regression tested on x86_64-linux-gnu. Committed to trunk as r270295. -- Iain --- diff --git a/libphobos/libdruntime/MERGE b/libphobos/libdruntime/MERGE index 15a55ab612a..a7bbd3da964 100644 --- a/libphobos/libdruntime/MERGE +++ b/libphobos/libdruntime/MERGE @@ -1,4 +1,4 @@ -d57fa1ffaecc858229ed7a730e8486b59197dee5 +175bf5fc69d26fec60d533ff77f7e915fd5bb468 The first line of this file holds the git revision number of the last merge done from the dlang/druntime repository. diff --git a/libphobos/libdruntime/core/stdc/stdio.d b/libphobos/libdruntime/core/stdc/stdio.d index e708a51763c..6ce3f9d13e5 100644 --- a/libphobos/libdruntime/core/stdc/stdio.d +++ b/libphobos/libdruntime/core/stdc/stdio.d @@ -1643,9 +1643,6 @@ else version (CRuntime_Bionic) } else version (CRuntime_Musl) { -import core.sys.posix.sys.types : off_t; -/// -int fseeko(FILE *, off_t, int); @trusted { /// diff --git a/libphobos/libdruntime/core/stdc/time.d b/libphobos/libdruntime/core/stdc/time.d index 648f782601b..4a571e153bf 100644 --- a/libphobos/libdruntime/core/stdc/time.d +++ b/libphobos/libdruntime/core/stdc/time.d @@ -148,21 +148,22 @@ else } /// -double difftime(time_t time1, time_t time0); +pure double difftime(time_t time1, time_t time0); // MT-Safe /// -time_t mktime(tm* timeptr); +@system time_t mktime(scope tm* timeptr); // @system: MT-Safe env locale /// -time_t time(time_t* timer); +time_t time(scope time_t* timer); + /// -char* asctime(in tm* timeptr); +@system char* asctime(const scope tm* timeptr); // @system: MT-Unsafe race:asctime locale /// -char* ctime(in time_t* timer); +@system char* ctime(const scope time_t* timer); // @system: MT-Unsafe race:tmbuf race:asctime env locale /// -tm* gmtime(in time_t* timer); +@system tm* gmtime(const scope time_t* timer); // @system: MT-Unsafe race:tmbuf env locale /// -tm* localtime(in time_t* timer); +@system tm* localtime(const scope time_t* timer); // @system: MT-Unsafe race:tmbuf env locale /// -@system size_t strftime(char* s, size_t maxsize, in char* format, in tm* timeptr); +@system size_t strftime(scope char* s, size_t maxsize, const scope char* format, const scope tm* timeptr); // @system: MT-Safe env locale version (Windows) { @@ -171,9 +172,9 @@ version (Windows) /// void _tzset(); // non-standard /// -@system char* _strdate(char* s); // non-standard +@system char* _strdate(return scope char* s); // non-standard /// -@system char* _strtime(char* s); // non-standard +@system char* _strtime(return scope char* s); // non-standard /// extern __gshared const(char)*[2] tzname; // non-standard diff --git a/libphobos/libdruntime/core/sync/condition.d b/libphobos/libdruntime/core/sync/condition.d index b6755f2d998..8afa8f7cc38 100644 --- a/libphobos/libdruntime/core/sync/condition.d +++ b/libphobos/libdruntime/core/sync/condition.d @@ -23,7 +23,12 @@ public import core.time; version (Windows) { private import core.sync.semaphore; -private import core.sys.windows.windows; +private import core.sys.windows.basetsd /+: HANDLE+/; +private import core.sys.windows.winbase /+: CloseHandle, CreateSemaphoreA, CRITICAL_SECTION, +DeleteCriticalSection, EnterCriticalSection, INFINITE, InitializeCriticalSection, +LeaveCriticalSection, ReleaseSemaphore, WAIT_OBJECT_0, WaitForSingleObject+/; +private import core.sys.windows.windef /+: BOOL, DWORD+/; +private import core.sys.windows.winerror /+: WAIT_TIMEOUT+/; } else version (Posix) { diff --git a/libphobos/libdruntime/core/sync/mutex.d b/libphobos/libdruntime/core/sync/mutex.d index 798f8412a02..024009f48aa 100644 --- a/libphobos/libdruntime/core/sync/mutex.d +++ b/libphobos/libdruntime/core/sync/mutex.d @@ -20,7 +20,9 @@ public import core.sync.exception; version (Windows) { -private import core.sys.windows.windows; +private import core.sys.windows.winbase /+: CRITICAL_SECTION, DeleteCriticalSection, +EnterCriticalSection, InitializeCriticalSection, LeaveCriticalSection, +TryEnterCriticalSection+/; } else version (Posix) { @@ -144,7 +146,7 @@ class Mutex : { import core.internal.abort : abort; !pthread_mutex_destroy(&m_hndl) || -abort("Error: pthread_mutex_init failed."); +abort("Error: pthread_mutex_destroy failed."); } this.__monitor = null; } @@ -318,7 +320,7 @@ unittest void useResource() shared @safe nothrow @nogc { mtx.lock_nothrow(); -
[PATCH, d] Committed merge with upstream dmd
Hi, This patch merges the D front-end implementation with dmd upstream d7ed327ed. Backports fix for an ICE that occurred when accessing empty array in CTFE. Bootstrapped and regression tested on x86_64-linux-gnu. Committed to trunk as r270294. -- Iain --- diff --git a/gcc/d/dmd/MERGE b/gcc/d/dmd/MERGE index 31ea106965b..800be95e4e6 100644 --- a/gcc/d/dmd/MERGE +++ b/gcc/d/dmd/MERGE @@ -1,4 +1,4 @@ -5dd3eccc3b0758346f77bee3cdc3f6bd15de339b +d7ed327edb0b01ad56e7e73e77b3401cd565675e The first line of this file holds the git revision number of the last merge done from the dlang/dmd repository. diff --git a/gcc/d/dmd/dinterpret.c b/gcc/d/dmd/dinterpret.c index 777f89cf186..40f3e77cbc5 100644 --- a/gcc/d/dmd/dinterpret.c +++ b/gcc/d/dmd/dinterpret.c @@ -6272,11 +6272,14 @@ Expression *scrubReturnValue(Loc loc, Expression *e) /* Returns: true if e is void, * or is an array literal or struct literal of void elements. */ -static bool isVoid(Expression *e) +static bool isVoid(Expression *e, bool checkArray = false) { if (e->op == TOKvoid) return true; +if (checkArray && e->type->ty != Tsarray) +return false; + if (e->op == TOKarrayliteral) return isEntirelyVoid(((ArrayLiteralExp *)e)->elements); @@ -6314,7 +6317,7 @@ Expression *scrubArray(Loc loc, Expressions *elems, bool structlit) // A struct .init may contain void members. // Static array members are a weird special case (bug 10994). -if (structlit && isVoid(e)) +if (structlit && isVoid(e, true)) { e = NULL; } diff --git a/gcc/testsuite/gdc.test/compilable/test19778.d b/gcc/testsuite/gdc.test/compilable/test19778.d new file mode 100644 index 000..87905fae6a0 --- /dev/null +++ b/gcc/testsuite/gdc.test/compilable/test19778.d @@ -0,0 +1,6 @@ +struct S +{ +int[] data; +} +immutable X = S([]); +enum len = X.data.length;
Re: [PATCH] Replace direct PSTL uses of assert() with a macro
removed (patch version applied attached). Tested x86_64-linux, committed to trunk. Jonathan Wakely writes: > On 10/04/19 23:59 +0100, Jonathan Wakely wrote: >>On 10/04/19 15:57 -0700, Thomas Rodgers wrote: >>>Ok, lets try this again. >>> On 09/04/19 15:23 -0700, Thomas Rodgers wrote: >This also replaces calls to __TBB_ASSERT so that there are two macro >definitions provided by c++config - > __PSTL_ASSERT(_Condition) > __PSTL_ASSERT_MSG(_Condition, _Message) > > * include/bits/c++config: > Add definition for __PSTL_ASSERT. > Add definition for __PSTL_ASSERT_MSG. > * include/pstl/algorithm_impl.h: Replace use of assert(). > * include/pstl/numeric_impl.h: Replace use of assert(). > * include/pstl/parallel_backend_tbb.h: > Replace use of assert(). > Replace use of __TBB_ASSERT(). > > * include/pstl/parallel_backend_utils.h: Replace use of assert(). >>> >>>Jonathan Wakely writes: ... you fix now ... >> >>Looks good, OK for trunk, thanks. > > Looks like parallel_backend_tbb.h still includes after this > patch. Assuming tests still pass with that removed, that tweak to the > patch is pre-approved. >From 87483056b4fecb08455443b34b9658f4d7289a74 Mon Sep 17 00:00:00 2001 From: Thomas Rodgers Date: Fri, 5 Apr 2019 15:27:35 -0700 Subject: [PATCH] Replace direct PSTL uses of assert() with a macro This also replaces calls to __TBB_ASSERT so that there are two macro definitions provided by c++config - __PSTL_ASSERT(_Condition) __PSTL_ASSERT_MSG(_Condition, _Message) * include/bits/c++config: Add definition for __PSTL_ASSERT. Add definition for __PSTL_ASSERT_MSG. * include/pstl/algorithm_impl.h: Replace use of assert(). * include/pstl/numeric_impl.h: Replace use of assert(). * include/pstl/parallel_backend_tbb.h: Replace use of assert(). Replace use of __TBB_ASSERT(). * include/pstl/parallel_backend_utils.h: Replace use of assert(). --- libstdc++-v3/include/bits/c++config | 4 libstdc++-v3/include/pstl/algorithm_impl.h| 10 +- libstdc++-v3/include/pstl/numeric_impl.h | 4 ++-- libstdc++-v3/include/pstl/parallel_backend_tbb.h | 9 - .../include/pstl/parallel_backend_utils.h | 15 +++ 5 files changed, 22 insertions(+), 20 deletions(-) diff --git a/libstdc++-v3/include/bits/c++config b/libstdc++-v3/include/bits/c++config index 66420a9a3f2..ef8ba96737b 100644 --- a/libstdc++-v3/include/bits/c++config +++ b/libstdc++-v3/include/bits/c++config @@ -690,6 +690,10 @@ namespace std # undef __PSTL_PAR_BACKEND_TBB # endif +# define __PSTL_ASSERT(_Condition) __glibcxx_assert(_Condition) +# define __PSTL_ASSERT_MSG(_Condition, _Message) __glibcxx_assert(_Condition) + + # define __PSTL_PRAGMA(x) _Pragma (#x) # define __PSTL_STRING_AUX(x) #x diff --git a/libstdc++-v3/include/pstl/algorithm_impl.h b/libstdc++-v3/include/pstl/algorithm_impl.h index e06bf60151e..b0d60baae14 100644 --- a/libstdc++-v3/include/pstl/algorithm_impl.h +++ b/libstdc++-v3/include/pstl/algorithm_impl.h @@ -2731,8 +2731,8 @@ __pattern_includes(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Forwa return !__internal::__parallel_or( std::forward<_ExecutionPolicy>(__exec), __first2, __last2, [__first1, __last1, __first2, __last2, &__comp](_ForwardIterator2 __i, _ForwardIterator2 __j) { -assert(__j > __i); -//assert(__j - __i > 1); +__PSTL_ASSERT(__j > __i); +//__PSTL_ASSERT(__j - __i > 1); //1. moving boundaries to "consume" subsequence of equal elements auto __is_equal = [&__comp](_ForwardIterator2 __a, _ForwardIterator2 __b) -> bool { @@ -2756,8 +2756,8 @@ __pattern_includes(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Forwa //2. testing is __a subsequence of the second range included into the first range auto __b = std::lower_bound(__first1, __last1, *__i, __comp); -assert(!__comp(*(__last1 - 1), *__b)); -assert(!__comp(*(__j - 1), *__i)); +__PSTL_ASSERT(!__comp(*(__last1 - 1), *__b)); +__PSTL_ASSERT(!__comp(*(__j - 1), *__i)); return !std::includes(__b, __last1, __i, __j, __comp); }); }); @@ -2948,7 +2948,7 @@ __parallel_set_union_op(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ } const auto __m2 = __left_bound_seq_2 - __first2; -assert(__m1 == 0 || __m2 == 0); +__PSTL_ASSERT(__m1 == 0 || __m2 == 0); if (__m2 > __set_algo_cut_off) { auto __res_or = __result; diff --git a/libstdc++-v3/include/pstl/numeric_impl.h b/libstdc++-v3/include/pstl/numeric_impl.h index 49a4abf5a95..738a61d92f6 100644 --- a/libstdc++-v3/include/pstl/numeric_impl.h +++ b/libstdc++
[patch, fortran, committed] Fix error messages for translation
Hello world, I have just committed the attached patch as obvious and simple. It fixes an error message so that it can be extracted for translation. No test case because it does not change anything (which was verified by regression-testing). Regards Thomas 2019-04-11 Thomas Koenig PR translation/89939 * frontend-passes.c (B_ERROR): Delete macro. (C_ERROR): Delete macro. (B_ERROR_1): New macro. (C_ERROR_1): New macro. (C_ERROR_2): New macro. (inline_matmul_assign): Use new macros. (call_external_blas): Likewise. Index: frontend-passes.c === --- frontend-passes.c (Revision 270182) +++ frontend-passes.c (Arbeitskopie) @@ -3743,13 +3743,16 @@ check_conjg_transpose_variable (gfc_expr *e, bool /* Macros for unified error messages. */ -#define B_ERROR(n) _("Incorrect extent in argument B in MATMUL intrinsic in " \ - "dimension " #n ": is %ld, should be %ld") +#define B_ERROR_1 _("Incorrect extent in argument B in MATMUL intrinsic in " \ + "dimension 1: is %ld, should be %ld") -#define C_ERROR(n) _("Array bound mismatch for dimension " #n " of array " \ - "(%ld/%ld)") +#define C_ERROR_1 _("Array bound mismatch for dimension 1 of array " \ + "(%ld/%ld)") +#define C_ERROR_2 _("Array bound mismatch for dimension 2 of array " \ + "(%ld/%ld)") + /* Inline assignments of the form c = matmul(a,b). Handle only the cases currently where b and c are rank-two arrays. @@ -3976,7 +3979,7 @@ inline_matmul_assign (gfc_code **c, int *walk_subt b1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_b, 1); a2 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 2); - test = runtime_error_ne (b1, a2, B_ERROR(1)); + test = runtime_error_ne (b1, a2, B_ERROR_1); *next_code_point = test; next_code_point = &test->next; @@ -3984,7 +3987,7 @@ inline_matmul_assign (gfc_code **c, int *walk_subt { c1 = get_array_inq_function (GFC_ISYM_SIZE, expr1, 1); a1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 1); - test = runtime_error_ne (c1, a1, C_ERROR(1)); + test = runtime_error_ne (c1, a1, C_ERROR_1); *next_code_point = test; next_code_point = &test->next; } @@ -3994,7 +3997,7 @@ inline_matmul_assign (gfc_code **c, int *walk_subt b1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_b, 1); a1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 1); - test = runtime_error_ne (b1, a1, B_ERROR(1)); + test = runtime_error_ne (b1, a1, B_ERROR_1); *next_code_point = test; next_code_point = &test->next; @@ -4002,7 +4005,7 @@ inline_matmul_assign (gfc_code **c, int *walk_subt { c1 = get_array_inq_function (GFC_ISYM_SIZE, expr1, 1); b2 = get_array_inq_function (GFC_ISYM_SIZE, matrix_b, 2); - test = runtime_error_ne (c1, b2, C_ERROR(1)); + test = runtime_error_ne (c1, b2, C_ERROR_1); *next_code_point = test; next_code_point = &test->next; } @@ -4012,7 +4015,7 @@ inline_matmul_assign (gfc_code **c, int *walk_subt b1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_b, 1); a2 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 2); - test = runtime_error_ne (b1, a2, B_ERROR(1)); + test = runtime_error_ne (b1, a2, B_ERROR_1); *next_code_point = test; next_code_point = &test->next; @@ -4020,13 +4023,13 @@ inline_matmul_assign (gfc_code **c, int *walk_subt { c1 = get_array_inq_function (GFC_ISYM_SIZE, expr1, 1); a1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 1); - test = runtime_error_ne (c1, a1, C_ERROR(1)); + test = runtime_error_ne (c1, a1, C_ERROR_1); *next_code_point = test; next_code_point = &test->next; c2 = get_array_inq_function (GFC_ISYM_SIZE, expr1, 2); b2 = get_array_inq_function (GFC_ISYM_SIZE, matrix_b, 2); - test = runtime_error_ne (c2, b2, C_ERROR(2)); + test = runtime_error_ne (c2, b2, C_ERROR_2); *next_code_point = test; next_code_point = &test->next; } @@ -4037,7 +4040,7 @@ inline_matmul_assign (gfc_code **c, int *walk_subt b2 = get_array_inq_function (GFC_ISYM_SIZE, matrix_b, 2); a2 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 2); /* matrix_b is transposed, hence dimension 1 for the error message. */ - test = runtime_error_ne (b2, a2, B_ERROR(1)); + test = runtime_error_ne (b2, a2, B_ERROR_1); *next_code_point = test; next_code_point = &test->next; @@ -4045,13 +4048,13 @@ inline_matmul_assign (gfc_code **c, int *walk_subt { c1 = get_array_inq_function (GFC_ISYM_SIZE, expr1, 1); a1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 1); - test = runtime_error_ne (c1, a1, C_ERROR(1)); + test = runtime_error_ne (c1, a1, C_ERROR_1); *next_code_point = test; next_code_point = &test->next; c2
C++ PATCH to use build_converted_constant_bool_expr for noexcept-specifiers
[except.spec] says that "In a noexcept-specifier, the constant-expression, if supplied, shall be a contextually converted constant expression of type bool". We now have a dedicated function for creating such an expression, so build_noexcept_spec should make use of it. As a consequence, we now reject pr86397-2.C but that's correct: converting a pointer to bool is a "boolean conversion", which is not allowed under the rules for a converted constant expression ([expr.const]p7). Bootstrapped/regtested on x86_64-linux, ok for trunk? 2019-04-11 Marek Polacek * except.c (build_noexcept_spec): Use build_converted_constant_bool_expr instead of perform_implicit_conversion_flags. * g++.dg/cpp0x/noexcept30.C: Tweak dg-error. * g++.dg/cpp0x/pr86397-1.C: Likewise. * g++.dg/cpp0x/pr86397-2.C: Likewise. diff --git gcc/cp/except.c gcc/cp/except.c index 40e973fad66..25ab8699589 100644 --- gcc/cp/except.c +++ gcc/cp/except.c @@ -1285,9 +1285,7 @@ build_noexcept_spec (tree expr, tsubst_flags_t complain) if (TREE_CODE (expr) != DEFERRED_NOEXCEPT && !value_dependent_expression_p (expr)) { - expr = perform_implicit_conversion_flags (boolean_type_node, expr, - complain, - LOOKUP_NORMAL); + expr = build_converted_constant_bool_expr (expr, complain); expr = instantiate_non_dependent_expr (expr); expr = cxx_constant_value (expr); } diff --git gcc/testsuite/g++.dg/cpp0x/noexcept30.C gcc/testsuite/g++.dg/cpp0x/noexcept30.C index 8c7ff2aad45..6a9f7821092 100644 --- gcc/testsuite/g++.dg/cpp0x/noexcept30.C +++ gcc/testsuite/g++.dg/cpp0x/noexcept30.C @@ -5,7 +5,7 @@ template struct F { template - void f() noexcept(&F::template f) {} // { dg-error "exception specification" } + void f() noexcept(&F::template f) {} // { dg-error "exception specification|convert" } }; int main () { diff --git gcc/testsuite/g++.dg/cpp0x/pr86397-1.C gcc/testsuite/g++.dg/cpp0x/pr86397-1.C index a0123cba0da..c6cfc1b5561 100644 --- gcc/testsuite/g++.dg/cpp0x/pr86397-1.C +++ gcc/testsuite/g++.dg/cpp0x/pr86397-1.C @@ -1,5 +1,5 @@ // { dg-do compile { target c++11 } } // { dg-options "-fdelete-null-pointer-checks" } void e(); -template void f(int() noexcept(e)) {} -template void f(int()); // { dg-error "does not match" "" { target c++17 } } +template void f(int() noexcept(e)) {} // { dg-error "convert" } +template void f(int()); diff --git gcc/testsuite/g++.dg/cpp0x/pr86397-2.C gcc/testsuite/g++.dg/cpp0x/pr86397-2.C index 8e4956bcf6c..54aefdb0916 100644 --- gcc/testsuite/g++.dg/cpp0x/pr86397-2.C +++ gcc/testsuite/g++.dg/cpp0x/pr86397-2.C @@ -1,5 +1,5 @@ // { dg-do compile { target c++11 } } // { dg-options "-fdelete-null-pointer-checks" } void e(); -template void f(int() noexcept(e)) {} -template void f(int() noexcept); +template void f(int() noexcept(e)) {} // { dg-error "convert" } +template void f(int() noexcept); // { dg-error "does not match any template declaration" "" { target c++17 } }
[PATCH] PR libstdc++/90046 fix build failure on epiphany-elf
The epiphany-elf target aligns structs to 8 bytes, which causes the static_assert(alignof(_Chunk) == 1) to fail. Instead of requiring _Chunks to be positionable at any alignment, ensure new buffers are aligned to alignof(_Chunk). Because the buffer size is a power of two, we know that both the buffer size and sizeof(_Chunk) are multiples of alignof(_Chunk). So is p is aligned to alignof(_Chunk) then so is (p + size - sizeof(_Chunk)). So just ensure the new buffer is aligned to at least alignof(_Chunk), which should already be true because the caller requests at least alignof(max_align_t). PR libstdc++/90046 * src/c++17/memory_resource.cc (monotonic_buffer_resource::_Chunk::allocate): Increase alignment if needed to allow placing a _Chunk at the end of the buffer. (monotonic_buffer_resource::_M_new_buffer): Remove static_assert. Tested x86_64-linux, bootstrapped epiphany-elf, committed to trunk./ commit 8af70056b079035bfb5a6a3a9df4b10194b025cf Author: Jonathan Wakely Date: Thu Apr 11 20:32:40 2019 +0100 PR libstdc++/90046 fix build failure on epiphany-elf The epiphany-elf target aligns structs to 8 bytes, which causes the static_assert(alignof(_Chunk) == 1) to fail. Instead of requiring _Chunks to be positionable at any alignment, ensure new buffers are aligned to alignof(_Chunk). Because the buffer size is a power of two, we know that both the buffer size and sizeof(_Chunk) are multiples of alignof(_Chunk). So is p is aligned to alignof(_Chunk) then so is (p + size - sizeof(_Chunk)). So just ensure the new buffer is aligned to at least alignof(_Chunk), which should already be true because the caller requests at least alignof(max_align_t). PR libstdc++/90046 * src/c++17/memory_resource.cc (monotonic_buffer_resource::_Chunk::allocate): Increase alignment if needed to allow placing a _Chunk at the end of the buffer. (monotonic_buffer_resource::_M_new_buffer): Remove static_assert. diff --git a/libstdc++-v3/src/c++17/memory_resource.cc b/libstdc++-v3/src/c++17/memory_resource.cc index cd11bf5875c..b6698011f5c 100644 --- a/libstdc++-v3/src/c++17/memory_resource.cc +++ b/libstdc++-v3/src/c++17/memory_resource.cc @@ -182,7 +182,21 @@ namespace pmr _Chunk*& __head) { __size = std::__ceil2(__size + sizeof(_Chunk)); + + if constexpr (alignof(_Chunk) > 1) + { + // PR libstdc++/90046 + // For targets like epiphany-elf where alignof(_Chunk) != 1 + // ensure that the last sizeof(_Chunk) bytes in the buffer + // are suitably-aligned for a _Chunk. + // This should be unnecessary, because the caller already + // passes in max(__align, alignof(max_align_t)). + if (__align < alignof(_Chunk)) + __align = alignof(_Chunk); + } + void* __p = __r->allocate(__size, __align); + // Add a chunk defined by (__p, __size, __align) to linked list __head. void* const __back = (char*)__p + __size - sizeof(_Chunk); __head = ::new(__back) _Chunk(__size, __align, __head); @@ -231,9 +245,6 @@ namespace pmr void monotonic_buffer_resource::_M_new_buffer(size_t bytes, size_t alignment) { -// Need to check this somewhere, so put it here: -static_assert(alignof(monotonic_buffer_resource::_Chunk) == 1); - const size_t n = std::max(bytes, _M_next_bufsiz); const size_t m = std::max(alignment, alignof(std::max_align_t)); auto [p, size] = _Chunk::allocate(_M_upstream, n, m, _M_head);
Re: [RFC] D support for S/390
Hi Robin, >> This will occur on any 32-bit target. The following patch (using >> ssize_t instead) allowed the code to compile: > > thanks, included your fix and attempted a more generic version of the > 186 test. > > I also continued debugging some fails further: > > - Most of the MurmurHash fails are simply due to the wrong byte order > being asserted but I did not yet check whether multi-chunk hashes are > more complicated to get right - I suppose not, though. > > - The regex searches are even documented to not work properly on > big-endian platforms. I still guess they could be fixed without too much > effort. > > - Math unit tests fail due to lower precision than on other machines. > Maybe this is because I only tested using -O0. good. I'm seeing those two, but at the moment concentrate on a couple of Solaris/SPARC specific issues (mcontext_t was wrong, causing memory corruption; need a different variant of makecontext; stack needs doubleword alignment; some minor stuff). 64-bit results are not too bad ATM: === gdc Summary for unix/-m64 === # of expected passes29159 # of unexpected failures199 # of unsupported tests 20 === libphobos Summary for unix/-m64 === # of expected passes398 # of unexpected failures22 and some of the failures also occur on Solaris/x86, but 32-bit execution tests are terrible right now, mostly due to the same issue, I believe: FAIL: libphobos.aa/test_aa.d execution test Thread 2 received signal SIGSEGV, Segmentation fault. [Switching to Thread 1 (LWP 1)] 0x0009c9b0 in rt.aaA.Impl.length() const (this=...) at /vol/gcc/src/hg/trunk/solaris/libphobos/libdruntime/rt/aaA.d:87 87 assert(used >= deleted); (gdb) where #0 0x0009c9b0 in rt.aaA.Impl.length() const (this=...) at /vol/gcc/src/hg/trunk/solaris/libphobos/libdruntime/rt/aaA.d:87 #1 0x0009c960 in rt.aaA.AA.empty() const (this=...) at /vol/gcc/src/hg/trunk/solaris/libphobos/libdruntime/rt/aaA.d:44 #2 0x0009e83c in _aaValues (aa=..., keysz=4, valsz=1, tiValueArray=0x718fc ) at /vol/gcc/src/hg/trunk/solaris/libphobos/libdruntime/rt/aaA.d:513 #3 0x00082688 in object.values!(test_aa.testKeysValues1().T[int], test_aa.testKeysValues1().T, int).values(test_aa.testKeysValues1().T[int]) (aa=...) at /vol/gcc/src/hg/trunk/solaris/libphobos/libdruntime/object.d:2171 #4 0x00077a9c in test_aa.testKeysValues1() () at /vol/gcc/src/hg/trunk/solaris/libphobos/testsuite/libphobos.aa/test_aa.d:56 #5 0x00077604 in D main () at /vol/gcc/src/hg/trunk/solaris/libphobos/testsuite/libphobos.aa/test_aa.d:3 1: x/i $pc => 0x9c9b0 <_D2rt3aaA4Impl6lengthMxFNaNbNdNiZk+32>: ld [ %g1 + 8 ], %g2 (gdb) p/x $g1 $1 = 0x8 Investigating this is my next task. > @Iain: With the patch as it is - hoping no additional tab/space damage > :) - is there any chance of getting it upstream anytime soon? I noticed you missed one piece of Iain's typeinfo.cc patch, btw.: diff --git a/gcc/d/typeinfo.cc b/gcc/d/typeinfo.cc --- a/gcc/d/typeinfo.cc +++ b/gcc/d/typeinfo.cc @@ -886,7 +886,7 @@ public: if (cd->isCOMinterface ()) flags |= ClassFlags::isCOMclass; - this->layout_field (build_integer_cst (flags)); + this->layout_field (build_integer_cst (flags, d_uint_type)); /* void *deallocator; OffsetTypeInfo[] m_offTi; (not implemented) Rainer -- - Rainer Orth, Center for Biotechnology, Bielefeld University
Re: [Patch] PR rtl-optimization/87763 - generate more bfi instructions on aarch64
On Thu, 2019-04-11 at 17:00 +0100, Richard Earnshaw (lists) wrote: > > > Please add _alt at the end, to distinguish from the insn above. > > Otherwise OK. I added _alt, I also had to move the "0" contraint and the "r" constraint. I had "0" with operand 3 instead of operand 1 and that caused a couple of test failures. I fixed it and the regressions went away. I also had to tweak gcc.target/aarch64/combine_bfxil.c, some of the bfxil instructions became bfi instructions so I updated the scan checks. Steve Ellcey
[PATCH v2] Fix __patchable_function_entries section flags
When -fpatchable-relocation-entry is used, gcc places nops on the prologue of each compiled function and creates a section named __patchable_function_entries which holds relocation entries for the positions in which the nops were placed. As is, gcc creates this section without the proper section flags, causing crashes in the compiled program during its load. Given the above, fix the problem by creating the section with the SECTION_WRITE and SECTION_RELRO flags. The problem was noticed while compiling glibc with -fpatchable-function-entry compiler flag. After applying the patch, this issue was solved. This was also tested on x86-64 arch without visible problems under the gcc standard tests. 2019-04-10 Joao Moreira * targhooks.c (default_print_patchable_function_entry): Emit __patchable_function_entries section with writable flags to allow relocation resolution. Signed-off-by: Joao Moreira --- gcc/targhooks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/targhooks.c b/gcc/targhooks.c index 318f7e9784a..e6f54ddf41b 100644 --- a/gcc/targhooks.c +++ b/gcc/targhooks.c @@ -1814,7 +1814,7 @@ default_print_patchable_function_entry (FILE *file, ASM_GENERATE_INTERNAL_LABEL (buf, "LPFE", patch_area_number); switch_to_section (get_section ("__patchable_function_entries", - 0, NULL)); + SECTION_WRITE | SECTION_RELRO, NULL)); fputs (asm_op, file); assemble_name_raw (file, buf); fputc ('\n', file); -- 2.16.4
Packaging boxes
Hello, Attached you some of our factory made custom box with custom logo you may interest as following. Best regards Slite Best regards, Slite - SLC-PACKAGE CO.LTD www.slcpackage.com Always Serve you by Heart Address: No.10 Hualai Packaging Industry Park,Yuwu Jinhua,Zhejiang China Contact Person: Slite Cao Tel:0086 15314679598 Fax: 0086 571 85135628 What's App/Wechat: 008618159815925 Skype:slite...@outlook.com If you don't want to received such emails,please click UNSUBSCRIBE /http://tracking.slcpack.info/tracking/unsubscribe?d=j1jX4QlVQ6RF_ssSz-6BnzAZEDm5gj-tZu2ODvQ50wx8jQs-ZUMYF-9SdKkXv51xBzsPXJX9LFRG5jR7PWq9KktbjS61gcR5JcCd0Iy46139PgyLFrNQNKsw25_fL1XMlTfPVFdRTEo8eGPYPXE3e5oxQExloiS2uaydIM8wxKv64FKSZRUPgOogxjpBvBfV-egtcflisjbuP_fcolhu82_XUFDv2lsDJ_ixZXKT4JEJlpbZexa1IFuVxM0IHLA0SmsENt8WrCO4i4Jnc19ac-y4gv032ZWrUrzjHE_vNdQg0cc1P7_fGwrDp-Py1VtxBo9jm0Y0Q8NqrKGIMmZogy4j_6HSVXSQicuwQT-KCQIkpQ9B9nRdLuDYlBg_WyGjILz5EJsZVrSX36VO-ttK74TnxN_HFNea0oP7DaHJb1lQMwTCUzZMkPiZdWcb50mQqYMHa3gbe4F2p9BgSMXZ4M62V4hHfR6F61CVqfimE9I60. /Slite Cao, China, Longyan, Fujian, 364200, China
Re: [PATCH] Add support for missing AVX512* ISAs (PR target/89929).
On Thu, Apr 11, 2019 at 1:38 AM Martin Liška wrote: > > Hi. > > The patch is adding missing AVX512 ISAs for target and target_clone > attributes. > > Patch can bootstrap on x86_64-linux-gnu and survives regression tests. > > Ready to be installed? > Thanks, > Martin > > gcc/ChangeLog: > > 2019-04-10 Martin Liska > > PR target/89929 > * config/i386/i386.c (get_builtin_code_for_version): Add > support for missing AVX512 ISAs. > > gcc/testsuite/ChangeLog: > > 2019-04-10 Martin Liska > > PR target/89929 > * g++.target/i386/mv28.C: New test. > * gcc.target/i386/mvc14.c: New test. > --- > gcc/config/i386/i386.c| 34 ++- > gcc/testsuite/g++.target/i386/mv28.C | 30 +++ > gcc/testsuite/gcc.target/i386/mvc14.c | 16 + > 3 files changed, 79 insertions(+), 1 deletion(-) > create mode 100644 gcc/testsuite/g++.target/i386/mv28.C > create mode 100644 gcc/testsuite/gcc.target/i386/mvc14.c > > Since any ISAs beyond AVX512F may be enabled individually, we can't simply assign priorities to them. For GFNI, we can have 1. GFNI 2. GFNI + AVX 3. GFNI + AVX512F 4. GFNI + AVX512F + AVX512VL For this code, GFNI + AVX512BW is ignored: [hjl@gnu-cfl-1 pr89929]$ cat z.ii __attribute__((target("gfni"))) int foo(int i) { return 1; } __attribute__((target("gfni,avx512bw"))) int foo(int i) { return 4; } __attribute__((target("default"))) int foo(int i) { return 3; } int bar () { return foo(2); } [hjl@gnu-cfl-1 pr89929]$ /export/build/gnu/tools-build/gcc-wip-debug/build-x86_64-linux/gcc/xgcc -B/export/build/gnu/tools-build/gcc-wip-debug/build-x86_64-linux/gcc/ -S z.ii -O2 [hjl@gnu-cfl-1 pr89929]$ cat z.s .file "z.ii" .text .p2align 4 .globl _Z3fooi.gfni .type _Z3fooi.gfni, @function _Z3fooi.gfni: .LFB0: .cfi_startproc movl $1, %eax ret .cfi_endproc .LFE0: .size _Z3fooi.gfni, .-_Z3fooi.gfni .p2align 4 .globl _Z3fooi .type _Z3fooi, @function _Z3fooi: .LFB2: .cfi_startproc movl $3, %eax ret .cfi_endproc .LFE2: .size _Z3fooi, .-_Z3fooi .p2align 4 .globl _Z3fooi.avx512bw_gfni .type _Z3fooi.avx512bw_gfni, @function _Z3fooi.avx512bw_gfni: .LFB1: .cfi_startproc movl $4, %eax ret .cfi_endproc .LFE1: .size _Z3fooi.avx512bw_gfni, .-_Z3fooi.avx512bw_gfni .section .text._Z3fooi.resolver,"axG",@progbits,_Z3fooi.resolver,comdat .p2align 4 .weak _Z3fooi.resolver .type _Z3fooi.resolver, @function _Z3fooi.resolver: .LFB5: .cfi_startproc subq $8, %rsp .cfi_def_cfa_offset 16 call __cpu_indicator_init movl $_Z3fooi, %eax movl $_Z3fooi.gfni, %edx testb $1, __cpu_features2(%rip) cmovne %rdx, %rax addq $8, %rsp .cfi_def_cfa_offset 8 ret .cfi_endproc .LFE5: .size _Z3fooi.resolver, .-_Z3fooi.resolver .globl _Z7_Z3fooii .type _Z7_Z3fooii, @gnu_indirect_function .set _Z7_Z3fooii,_Z3fooi.resolver .text .p2align 4 .globl _Z3barv .type _Z3barv, @function _Z3barv: .LFB3: .cfi_startproc movl $2, %edi jmp _Z7_Z3fooii .cfi_endproc .LFE3: .size _Z3barv, .-_Z3barv .ident "GCC: (GNU) 9.0.1 20190411 (experimental)" .section .note.GNU-stack,"",@progbits [hjl@gnu-cfl-1 pr89929]$ For AVX512 ISAs, we need a different scheme for priorities. -- H.J.
Re: [Patch] PR rtl-optimization/87763 - generate more bfi instructions on aarch64
On 11/04/2019 16:21, Steve Ellcey wrote: > On Thu, 2019-04-11 at 14:58 +, Steve Ellcey wrote: >> >>> You've removed the ..._noshift_alt variant. That wasn't my >>> intention, >>> so perhaps you misunderstood what I was trying to say. >>> >>> The two versions are both needed, since the register tie is not >>> orthogonal to the constants used in the masks and canonicalization >>> will >>> not generate a consistent ordering of the operands. >> >> I started doing this and then convinced myself (perhaps incorrectly) >> that I didn't need the alt version. Operands 1 and 3 are registers >> and Operands 2 and 4 are constants, so the only difference is in the >> call to aarch64_masks_and_shift_for_bfi_p. Given that argument 2 to >> this call is 0 this call should be the equivelent of ((x & MASK1) | >> (y >> & MASK2)) and that should mean that: >> >> aarch64_masks_and_shift_for_bfi_p(X,0,Y) == >> aarch64_masks_and_shift_for_bfi_p(Y,0,X) >> >> >> Maybe I am wrong about that? I will do some expirements. My testing >> did not find any cases in the testsuite where not having the _alt >> version resulted in a bfi not being generated. > > OK, I think I see where my idea that I didn't need both versions is > wrong. There is an extra check on the final argument that is not done > on the initial argument. Here is the patch that I am testing, I think > it is what you have in mind. It looks wierd having arguments 3 and 4 > before 1 and 2, I think that is why I had it differently in my original > patch but if you prefer this version, that is fine with me. > > OK to check in once my build/test is finished? > > > > 2018-04-11 Steve Ellcey > > PR rtl-optimization/87763 > * config/aarch64/aarch64.md (*aarch64_bfi4_noshift): > New Instruction. > > > diff --git a/gcc/config/aarch64/aarch64.md > b/gcc/config/aarch64/aarch64.md > index e0df975..eac688a 100644 > --- a/gcc/config/aarch64/aarch64.md > +++ b/gcc/config/aarch64/aarch64.md > @@ -5565,7 +5565,8 @@ > ) > > ;; Like *aarch64_bfi5_shift but with no shifting, we are just > -;; copying the least significant bits of OP3 to OP0. > +;; copying the least significant bits of OP3 to OP0. We need two versions > +;; of the instruction to handle different checks on the constant values. > > (define_insn "*aarch64_bfi4_noshift" >[(set (match_operand:GPI 0 "register_operand" "=r") > @@ -5579,6 +5580,18 @@ >[(set_attr "type" "bfm")] > ) > > +(define_insn "*aarch64_bfi4_noshift" Please add _alt at the end, to distinguish from the insn above. Otherwise OK. R. > + [(set (match_operand:GPI 0 "register_operand" "=r") > +(ior:GPI (and:GPI (match_operand:GPI 3 "register_operand" "0") > + (match_operand:GPI 4 "const_int_operand" "n")) > + (and:GPI (match_operand:GPI 1 "register_operand" "r") > + (match_operand:GPI 2 "const_int_operand" "n"] > + "aarch64_masks_and_shift_for_bfi_p (mode, UINTVAL (operands[2]), 0, > + UINTVAL (operands[4]))" > + "bfi\t%0, %3, 0, %P4" > + [(set_attr "type" "bfm")] > +) > + > (define_insn "*extr_insv_lower_reg" >[(set (zero_extract:GPI (match_operand:GPI 0 "register_operand" "+r") > (match_operand 1 "const_int_operand" "n") >
Re: [RFC] D support for S/390
Hi Iain, > @Rainer Orth any last requests before I commit the fix for PR > d/89255? That will make testing individual library modules easier I > guess. no, the patch is perfectly fine as is. Sorry for not following up earlier; I've now tested it successfully on Solaris 11.[345]/x86 and am also using it on Solaris 11.5/SPARC (still WIP, but getting closer). Rainer -- - Rainer Orth, Center for Biotechnology, Bielefeld University
Re: [PATCH] Add target-zlib to top-level configure, use zlib from libphobos
On Sat, 6 Apr 2019 at 19:05, Iain Buclaw wrote: > > On Sat, 6 Apr 2019 at 17:27, Matthias Klose wrote: > > > > On 29.03.19 23:23, Iain Buclaw wrote: > > > On Mon, 18 Feb 2019 at 14:26, Matthias Klose wrote: > > >> > > >> > > >> sorry, I didn't mean to propose to rename the option, so > > >> --with-target-system-zlib=auto sounds fine. > > > > > > OK, a bit belated, but here it is --with-target-system-zlib=auto. > > > > yes, this does the job. > > > > Good. I added documentation to install.texi. > Is this OK for trunk? It's the only prerequisite for applying subdir-objects to libphobos. -- Iain
Re: [RFC] D support for S/390
On Thu, 11 Apr 2019 at 17:43, Robin Dapp wrote: > > Hi Rainer, > > > This will occur on any 32-bit target. The following patch (using > > ssize_t instead) allowed the code to compile: > > thanks, included your fix and attempted a more generic version of the > 186 test. > > I also continued debugging some fails further: > > - Most of the MurmurHash fails are simply due to the wrong byte order > being asserted but I did not yet check whether multi-chunk hashes are > more complicated to get right - I suppose not, though. > > - The regex searches are even documented to not work properly on > big-endian platforms. I still guess they could be fixed without too much > effort. > > - Math unit tests fail due to lower precision than on other machines. > Maybe this is because I only tested using -O0. > > @Iain: With the patch as it is - hoping no additional tab/space damage > :) - is there any chance of getting it upstream anytime soon? > I'll push for it tonight, thanks. @Rainer Orth any last requests before I commit the fix for PR d/89255? That will make testing individual library modules easier I guess. -- Iain
Re: [RFC] D support for S/390
Hi Rainer, > This will occur on any 32-bit target. The following patch (using > ssize_t instead) allowed the code to compile: thanks, included your fix and attempted a more generic version of the 186 test. I also continued debugging some fails further: - Most of the MurmurHash fails are simply due to the wrong byte order being asserted but I did not yet check whether multi-chunk hashes are more complicated to get right - I suppose not, though. - The regex searches are even documented to not work properly on big-endian platforms. I still guess they could be fixed without too much effort. - Math unit tests fail due to lower precision than on other machines. Maybe this is because I only tested using -O0. @Iain: With the patch as it is - hoping no additional tab/space damage :) - is there any chance of getting it upstream anytime soon? Regards Robin diff --git a/gcc/d/dmd/constfold.c b/gcc/d/dmd/constfold.c index ddd356bb966..cb58d4b4f5b 100644 --- a/gcc/d/dmd/constfold.c +++ b/gcc/d/dmd/constfold.c @@ -1752,14 +1752,17 @@ UnionExp Cat(Type *type, Expression *e1, Expression *e2) } else if (e1->op == TOKint64 && e2->op == TOKstring) { -// Concatenate the strings +// [w|d]?char ~ string --> string +// We assume that we only ever prepend one char of the same type +// (wchar,dchar) as the string's characters. + StringExp *es2 = (StringExp *)e2; size_t len = 1 + es2->len; unsigned char sz = es2->sz; dinteger_t v = e1->toInteger(); void *s = mem.xmalloc((len + 1) * sz); -memcpy((char *)s, &v, sz); +Port::valcpy((char *)s, v, sz); memcpy((char *)s + sz, es2->string, es2->len * sz); // Add terminating 0 diff --git a/gcc/d/typeinfo.cc b/gcc/d/typeinfo.cc index dac66acdcd4..21bc4d62a68 100644 --- a/gcc/d/typeinfo.cc +++ b/gcc/d/typeinfo.cc @@ -830,7 +830,7 @@ public: flags |= ClassFlags::noPointers; Lhaspointers: - this->layout_field (size_int (flags)); + this->layout_field (build_integer_cst (flags, d_uint_type)); /* void *deallocator; */ tree ddtor = (cd->aggDelete) @@ -886,7 +886,7 @@ public: if (cd->isCOMinterface ()) flags |= ClassFlags::isCOMclass; - this->layout_field (size_int (flags)); + this->layout_field (build_integer_cst (flags)); /* void *deallocator; OffsetTypeInfo[] m_offTi; (not implemented) @@ -1019,7 +1019,7 @@ public: StructFlags::Type m_flags = 0; if (ti->hasPointers ()) m_flags |= StructFlags::hasPointers; -this->layout_field (size_int (m_flags)); +this->layout_field (build_integer_cst (m_flags, d_uint_type)); /* void function(void*) xdtor; */ tree dtor = (sd->dtor) ? build_address (get_symbol_decl (sd->dtor)) @@ -1033,7 +1033,7 @@ public: this->layout_field (null_pointer_node); /* uint m_align; */ -this->layout_field (size_int (ti->alignsize ())); +this->layout_field (build_integer_cst (ti->alignsize (), d_uint_type)); if (global.params.is64bit) { @@ -1489,8 +1489,8 @@ create_typeinfo (Type *type, Module *mod) array_type_node, array_type_node, ptr_type_node, ptr_type_node, ptr_type_node, ptr_type_node, - size_type_node, ptr_type_node, - ptr_type_node, size_type_node, + d_uint_type, ptr_type_node, + ptr_type_node, d_uint_type, ptr_type_node, argtype, argtype, NULL); } t->vtinfo = TypeInfoStructDeclaration::create (t); diff --git a/gcc/testsuite/gdc.dg/runnable.d b/gcc/testsuite/gdc.dg/runnable.d index e36a2585027..8d9a5868831 100644 --- a/gcc/testsuite/gdc.dg/runnable.d +++ b/gcc/testsuite/gdc.dg/runnable.d @@ -890,12 +890,17 @@ struct S186 } } +static if (size_t.sizeof == 8) + size_t checkval = 0x0202; +static if (size_t.sizeof == 4) + size_t checkval = 0x0202; + void check186(in S186 obj, byte fieldB) { assert(obj.fieldA == 2); assert(obj.fieldB == 0); assert(obj.fieldC == 0); -assert(obj._complete == 2); +assert(obj._complete == checkval); assert(fieldB == 0); } @@ -907,7 +912,7 @@ void test186a(size_t val) assert(obj.fieldA == 2); assert(obj.fieldB == 0); assert(obj.fieldC == 0); -assert(obj._complete == 2); +assert(obj._complete == checkval); obj = S186(val); check186(obj, obj.fieldB); @@ -915,12 +920,12 @@ void test186a(size_t val) assert(obj.fieldA == 2); assert(obj.fieldB == 0); assert(obj.fieldC == 0); -assert(obj._complete == 2); +assert(obj._complete == checkval); } void test186() { -test186a(2); +test186a(checkval); } /**/ diff --git a/gcc/testsuite/gdc.dg/simd.d b/gcc/testsuite/gdc.dg/simd.d index 812b36649aa..7d0aa0168c0 100644 --- a/gcc/testsuite/gdc.dg/simd.d +++ b/gcc/testsuite/gdc.dg/simd.d @@ -1576,7 +1576,10 @@ ubyte[16] foounsto() void testOPvecunsto() { auto a
Re: [Patch] PR rtl-optimization/87763 - generate more bfi instructions on aarch64
On Thu, 2019-04-11 at 14:58 +, Steve Ellcey wrote: > > > You've removed the ..._noshift_alt variant. That wasn't my > > intention, > > so perhaps you misunderstood what I was trying to say. > > > > The two versions are both needed, since the register tie is not > > orthogonal to the constants used in the masks and canonicalization > > will > > not generate a consistent ordering of the operands. > > I started doing this and then convinced myself (perhaps incorrectly) > that I didn't need the alt version. Operands 1 and 3 are registers > and Operands 2 and 4 are constants, so the only difference is in the > call to aarch64_masks_and_shift_for_bfi_p. Given that argument 2 to > this call is 0 this call should be the equivelent of ((x & MASK1) | > (y > & MASK2)) and that should mean that: > > aarch64_masks_and_shift_for_bfi_p(X,0,Y) == > aarch64_masks_and_shift_for_bfi_p(Y,0,X) > > > Maybe I am wrong about that? I will do some expirements. My testing > did not find any cases in the testsuite where not having the _alt > version resulted in a bfi not being generated. OK, I think I see where my idea that I didn't need both versions is wrong. There is an extra check on the final argument that is not done on the initial argument. Here is the patch that I am testing, I think it is what you have in mind. It looks wierd having arguments 3 and 4 before 1 and 2, I think that is why I had it differently in my original patch but if you prefer this version, that is fine with me. OK to check in once my build/test is finished? 2018-04-11 Steve Ellcey PR rtl-optimization/87763 * config/aarch64/aarch64.md (*aarch64_bfi4_noshift): New Instruction. diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index e0df975..eac688a 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -5565,7 +5565,8 @@ ) ;; Like *aarch64_bfi5_shift but with no shifting, we are just -;; copying the least significant bits of OP3 to OP0. +;; copying the least significant bits of OP3 to OP0. We need two versions +;; of the instruction to handle different checks on the constant values. (define_insn "*aarch64_bfi4_noshift" [(set (match_operand:GPI 0 "register_operand" "=r") @@ -5579,6 +5580,18 @@ [(set_attr "type" "bfm")] ) +(define_insn "*aarch64_bfi4_noshift" + [(set (match_operand:GPI 0 "register_operand" "=r") +(ior:GPI (and:GPI (match_operand:GPI 3 "register_operand" "0") + (match_operand:GPI 4 "const_int_operand" "n")) + (and:GPI (match_operand:GPI 1 "register_operand" "r") + (match_operand:GPI 2 "const_int_operand" "n"] + "aarch64_masks_and_shift_for_bfi_p (mode, UINTVAL (operands[2]), 0, + UINTVAL (operands[4]))" + "bfi\t%0, %3, 0, %P4" + [(set_attr "type" "bfm")] +) + (define_insn "*extr_insv_lower_reg" [(set (zero_extract:GPI (match_operand:GPI 0 "register_operand" "+r") (match_operand 1 "const_int_operand" "n")
[C++ Patch/RFC] PR 89900 ("[9 Regression] ICE: Segmentation fault (in check_instantiated_arg)")
Hi, over the last few days I spent some time on this regression, which at first seemed just a minor error-recovery issue, but then I noticed that very slightly tweeking the original testcase uncovered a pretty serious ICE on valid: template void fk (XE..., int/*SW*/); void w9 (void) { fk (0); } The regression has to do with the changes committed by Jason for c++/86932, in particular with the condition in coerce_template_parms: if (template_parameter_pack_p (TREE_VALUE (parm)) && (arg || !(complain & tf_partial)) && !(arg && ARGUMENT_PACK_P (arg))) which has the additional (arg || !complain & tf_partial)) false for the present testcase, thus the null arg is not changed into an empty pack, thus later instantiate_template calls check_instantiated_args which finds it still null and crashes. Now, likely some additional analysis is in order, but for sure there is an important difference between the testcase which came with c++/86932 and the above: non-type vs type template parameter pack. It seems to me that the kind of problem fixed in c++/86932 cannot occur with type packs, because it boils down to a reference to a previous parm (full disclosure: the comments and logic in fixed_parameter_pack_p helped me a lot here). Thus I had the idea of simply restricting the scope of the new condition above by adding an || TREE_CODE (TREE_VALUE (parm)) == TYPE_DECL, which definitely leads to a clean testsuite and a proper behavior on the new testcases, AFAICS. I'm attaching what I tested on x86_64-linux. Thanks, Paolo. // Index: cp/pt.c === --- cp/pt.c (revision 270279) +++ cp/pt.c (working copy) @@ -8475,7 +8475,8 @@ coerce_template_parms (tree parms, arg = NULL_TREE; if (template_parameter_pack_p (TREE_VALUE (parm)) - && (arg || !(complain & tf_partial)) + && (arg || !(complain & tf_partial) + || TREE_CODE (TREE_VALUE (parm)) == TYPE_DECL) && !(arg && ARGUMENT_PACK_P (arg))) { /* Some arguments will be placed in the Index: testsuite/g++.dg/cpp0x/pr89900-1.C === --- testsuite/g++.dg/cpp0x/pr89900-1.C (nonexistent) +++ testsuite/g++.dg/cpp0x/pr89900-1.C (working copy) @@ -0,0 +1,10 @@ +// { dg-do compile { target c++11 } } + +template void +fk (XE..., SW); // { dg-error "12:.SW. has not been declared" } + +void +w9 (void) +{ + fk (0); +} Index: testsuite/g++.dg/cpp0x/pr89900-2.C === --- testsuite/g++.dg/cpp0x/pr89900-2.C (nonexistent) +++ testsuite/g++.dg/cpp0x/pr89900-2.C (working copy) @@ -0,0 +1,10 @@ +// { dg-do compile { target c++11 } } + +template void +fk (XE..., int); + +void +w9 (void) +{ + fk (0); +}
Re: [C++ PATCH] Make some type forbidden diagnostics translatable (PR translation/90035)
On 4/11/19 3:50 AM, Jakub Jelinek wrote: Hi! While looking into another PR, I've noticed that in two spots the parser->type_definition_forbidden_message messages are untranslatable, we construct them at runtime from 3 strings using concat. The following patch fixes it by adding a const char * member to the parser structure and passing that as another argument to error, in the more usual case where the argument string only contains %< and %> and not %qs added in this patch it will be just ignored. I believe this patch is more friendly to the translators (as well as less expensive at compile time because it doesn't have to concat/allocate anything). Another possibility would be to just use in cp_parser_has_attribute_expression G_("types may not be defined in %<__builtin_has_attribute%> expressions") and in cp_parser_sizeof_operand use a switch based on the 3 possible keyword values and use G_("types may not be defined in % expressions") G_("types may not be defined in %<__alignof__%> expressions") G_("types may not be defined in % expressions") G_("types may not be defined in % expressions") depending on that (and C++ mode which determines which alignof spelling is in ridpointers). We wouldn't need to add an extra member, on the other side the translators would need to translate 5 messages instead of just one. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? OK. Jason
[PR 89693] Reorganize cgraph_node::clone_of_p
Hi, this reorganization of cgraph_node:clone_of_p() prevents verifier falsely ICEing because it cannot recognize that a former thunk (expanded even before reaching pass pipeline) was cloned by IPA-CP. It basically traverses the clone_of chain at each step of thunk chain traversal, rather than just after it. This is only done when checking is on, so the extra little overhead should be of little concern. Bootstrapped, LTO-bootstrapped and tested on x86_64, OK for trunk? If so, I will check if we need it in GCC 8 and if so, backport it there too. Thanks, Martin 2019-04-11 Martin Jambor PR ipa/89693 * cgraph.c (clone_of_p): Loop over clone chain for each step in the thunk chain. testsuite/ * g++.dg/ipa/pr89693.C: New test. --- gcc/cgraph.c | 30 ++--- gcc/testsuite/g++.dg/ipa/pr89693.C | 52 ++ 2 files changed, 70 insertions(+), 12 deletions(-) create mode 100644 gcc/testsuite/g++.dg/ipa/pr89693.C diff --git a/gcc/cgraph.c b/gcc/cgraph.c index 49d80ad1e28..b1b0b4c42d5 100644 --- a/gcc/cgraph.c +++ b/gcc/cgraph.c @@ -2977,17 +2977,25 @@ cgraph_node::collect_callers (void) static bool clone_of_p (cgraph_node *node, cgraph_node *node2) { - bool skipped_thunk = false; node = node->ultimate_alias_target (); node2 = node2->ultimate_alias_target (); + if (node2->clone_of == node + || node2->former_clone_of == node->decl) +return true; + + if (!node->thunk.thunk_p && !node->former_thunk_p ()) +{ + while (node2 && node->decl != node2->decl) + node2 = node2->clone_of; + return node2 != NULL; +} + /* There are no virtual clones of thunks so check former_clone_of or if we might have skipped thunks because this adjustments are no longer necessary. */ while (node->thunk.thunk_p || node->former_thunk_p ()) { - if (node2->former_clone_of == node->decl) - return true; if (!node->thunk.this_adjusting) return false; /* In case of instrumented expanded thunks, which can have multiple calls @@ -2996,23 +3004,21 @@ clone_of_p (cgraph_node *node, cgraph_node *node2) if (node->callees->next_callee) return true; node = node->callees->callee->ultimate_alias_target (); - skipped_thunk = true; -} - if (skipped_thunk) -{ if (!node2->clone.args_to_skip || !bitmap_bit_p (node2->clone.args_to_skip, 0)) return false; if (node2->former_clone_of == node->decl) return true; - else if (!node2->clone_of) - return false; + + cgraph_node *n2 = node2; + while (n2 && node->decl != n2->decl) + n2 = n2->clone_of; + if (n2) + return true; } - while (node2 && node->decl != node2->decl) -node2 = node2->clone_of; - return node2 != NULL; + return false; } /* Verify edge count and frequency. */ diff --git a/gcc/testsuite/g++.dg/ipa/pr89693.C b/gcc/testsuite/g++.dg/ipa/pr89693.C new file mode 100644 index 000..4ac83eeeb3a --- /dev/null +++ b/gcc/testsuite/g++.dg/ipa/pr89693.C @@ -0,0 +1,52 @@ +// Copyright (C) 2005 Free Software Foundation, Inc. +// Contributed by Nathan Sidwell 4 Apr 2005 +// Re-purposed to check for re-rurgesnce of PR 89693 in 2019. + +// { dg-do compile } +// { dg-options "-O3 -fno-ipa-icf-functions" } + +// Origin: yan...@ca.ibm.com +// nat...@codesourcery.com + +struct A { + virtual void One (); +}; +struct B { + virtual B *Two (); + virtual B &Three (); +}; + +struct C : A, B +{ + virtual C *Two (); + virtual C &Three (); +}; +void A::One () {} +B *B::Two(){return this;} +B &B::Three(){return *this;} +C *C::Two () {return 0;} +C &C::Three () {return *(C *)0;} + +B *Foo (B *b) +{ + return b->Two (); +} + +B &Bar (B *b) +{ + return b->Three (); +} + +int main () +{ + C c; + + /* We should not adjust a null pointer. */ + if (Foo (&c)) +return 1; + /* But we should adjust a (bogus) null reference. */ + if (!&Bar (&c)) +return 2; + + return 0; +} -- 2.21.0
Re: [EXT] Re: [Patch] PR rtl-optimization/87763 - generate more bfi instructions on aarch64
On Thu, 2019-04-11 at 09:59 +0100, Richard Earnshaw (lists) wrote: > > > > > 2018-04-10 Steve Ellcey > > > > PR rtl-optimization/87763 > > * config/aarch64/aarch64-protos.h > > (aarch64_masks_and_shift_for_bfi_p): > > New prototype. > > * config/aarch64/aarch64.c (aarch64_masks_and_shift_for_bfi_p): > > New function. > > * config/aarch64/aarch64.md (*aarch64_bfi5_shift): > > New instruction. > > (*aarch64_bfi5_shift_alt): Ditto. > > (*aarch64_bfi4_noand): Ditto. > > (*aarch64_bfi4_noand_alt): Ditto. > > (*aarch64_bfi4_noshift): Ditto. > > > > You've removed the ..._noshift_alt variant. That wasn't my intention, > so perhaps you misunderstood what I was trying to say. > > The two versions are both needed, since the register tie is not > orthogonal to the constants used in the masks and canonicalization will > not generate a consistent ordering of the operands. I started doing this and then convinced myself (perhaps incorrectly) that I didn't need the alt version. Operands 1 and 3 are registers and Operands 2 and 4 are constants, so the only difference is in the call to aarch64_masks_and_shift_for_bfi_p. Given that argument 2 to this call is 0 this call should be the equivelent of ((x & MASK1) | (y & MASK2)) and that should mean that: aarch64_masks_and_shift_for_bfi_p(X,0,Y) == aarch64_masks_and_shift_for_bfi_p(Y,0,X) Maybe I am wrong about that? I will do some expirements. My testing did not find any cases in the testsuite where not having the _alt version resulted in a bfi not being generated. Steve Ellcey sell...@marvell.com > >
Re: [PATCH, doc] Note variable shadowing at max macro using statement expression
On 4/11/19 2:31 AM, Tom de Vries wrote: Hi Sandra, thanks for the review. I've attached the updated patch, as well as the resulting relevant gcc.info portion. OK for trunk? This version looks OK. Thanks for helping to improve the docs! -Sandra
Re: [Patch, fortran] PRs 89843 and 90022 - C Fortran Interop fixes.
Hi Dominique, Yes indeed - I used int(kind(loc(res))) to achieve the same effect. I am looking for but failing to find a similar problem for PR89846. Tomorrow I turn my attention to an incorrect cast in the compiler. Regards Paul On Thu, 11 Apr 2019 at 14:54, Dominique d'Humières wrote: > > > With the following code > > module cdesc > interface > function cdesc_f08(buf, expected) result (res) BIND(C, name="cdesc_c") > USE, INTRINSIC :: ISO_C_BINDING > implicit none > INTEGER(C_INT) :: res > type(*), dimension(..), INTENT(INOUT) :: buf > type(c_ptr),INTENT(IN) :: expected > end function cdesc_f08 > end interface > end module > > program cdesc_test > USE, INTRINSIC :: ISO_C_BINDING > use cdesc > implicit none > integer(c_int), target :: a0, a1(10), a2(10,10), a3(10,10,10) > if (cdesc_f08(a0, C_LOC(a0)) .ne. 1) stop 1 > if (cdesc_f08(a1, C_LOC(a1(1))) .ne. 1) stop 2 > if (cdesc_f08(a2, C_LOC(a2(1,1))) .ne. 1) stop 3 > if (cdesc_f08(a3, C_LOC(a3(1,1,1))) .ne. 1) stop 4 > end program > > The test ISO_Fortran_binding_9.f90 executes without failure for both -m32 and > -m64. > > Dominique > > > Le 10 avr. 2019 à 00:42, Paul Richard Thomas > > a écrit : > > > > Many thanks, sir! I will look into it. > > > > Paul > -- "If you can't explain it simply, you don't understand it well enough" - Albert Einstein
Re: [Patch, fortran] PRs 89843 and 90022 - C Fortran Interop fixes.
With the following code module cdesc interface function cdesc_f08(buf, expected) result (res) BIND(C, name="cdesc_c") USE, INTRINSIC :: ISO_C_BINDING implicit none INTEGER(C_INT) :: res type(*), dimension(..), INTENT(INOUT) :: buf type(c_ptr),INTENT(IN) :: expected end function cdesc_f08 end interface end module program cdesc_test USE, INTRINSIC :: ISO_C_BINDING use cdesc implicit none integer(c_int), target :: a0, a1(10), a2(10,10), a3(10,10,10) if (cdesc_f08(a0, C_LOC(a0)) .ne. 1) stop 1 if (cdesc_f08(a1, C_LOC(a1(1))) .ne. 1) stop 2 if (cdesc_f08(a2, C_LOC(a2(1,1))) .ne. 1) stop 3 if (cdesc_f08(a3, C_LOC(a3(1,1,1))) .ne. 1) stop 4 end program The test ISO_Fortran_binding_9.f90 executes without failure for both -m32 and -m64. Dominique > Le 10 avr. 2019 à 00:42, Paul Richard Thomas > a écrit : > > Many thanks, sir! I will look into it. > > Paul
[PATCH] tilepro.c diagnostics bug fix (PR target/52726)
On Tue, Mar 12, 2019 at 10:58:14AM +0100, Jakub Jelinek wrote: > These are the only remaining cases of gcc-internal-format diagnostics using > HOST_WIDE_INT_PRINT*. That is wrong because the string depends on the exact > host, and xgettext doesn't handle it anyway, so in gcc.pot the messages are > truncated at the spot where HOST_WIDE_INT_PRINT* appears. See also > PR79846 for an earlier change of the same kind. > > Tested on a cross to s390x-linux on the > gcc.target/s390/htm-builtins-compile-2.c > testcase. Ok for trunk? > > 2019-03-12 Jakub Jelinek > > PR target/52726 > * config/s390/s390.md (tabort): Use %wd instead of > HOST_WIDE_INT_PRINT_DEC in error message, reword to avoid two capital > letters and periods. > * config/tilepro/tilepro.c (tilepro_print_operand): Use %wd in > output_operand_lossage instead of HOST_WIDE_INT_PRINT_DEC, replace > 's with %< and %>. Unfortunately, today while building gcc.pot I've noticed: config/tilepro/tilepro.c:4774: warning: Although being used in a format string position, the msgid is not a valid C format string. Reason: In the directive number 2, the token after '<' is not the name of a format specifier macro. The valid macro names are listed in ISO C 99 section 7.8.1. Indeed, output_operand_lossage argument is not gcc-internal-format, so it can't use %wd nor %< nor %>. It is printed using: va_start (ap, cmsgid); pfx_str = this_is_asm_operands ? _("invalid 'asm': ") : "output_operand: "; fmt_string = xasprintf ("%s%s", pfx_str, _(cmsgid)); new_message = xvasprintf (fmt_string, ap); if (this_is_asm_operands) error_for_asm (this_is_asm_operands, "%s", new_message); else internal_error ("%s", new_message); so can use just the fprintf format specifiers. While I perhaps could use %ld instead of %<%wd%> and cast the HOST_WIDE_INT argument to long, it doesn't seem to be worth it IMHO, if the argument is not a CONST_INT, we print also just invalid %%t operand, so I'd suggest doing following. Ok for trunk? 2019-04-11 Jakub Jelinek PR target/52726 * config/tilepro/tilepro.c (tilepro_print_operand): Use just "invalid %%t operand" in output_operand_lossage message. --- gcc/config/tilepro/tilepro.c.jj 2019-03-12 10:46:40.995960027 +0100 +++ gcc/config/tilepro/tilepro.c2019-04-11 14:54:01.708668318 +0200 @@ -4771,7 +4771,7 @@ tilepro_print_operand (FILE *file, rtx x i = exact_log2 (n); if (i < 0) { - output_operand_lossage ("invalid %%t operand %<%wd%>", n); + output_operand_lossage ("invalid %%t operand"); return; } Jakub
[PATCH] Fix #error in mips loongson-mmintrin.h
Hi! While doing make gcc.pot, I've noticed warnings about unterminated string in loongson-mmintrin.h. I don't have any usable mips setup, but just tried: /tmp/1a.c: # error "You must select -mloongson-mmi or -march=loongson2e/2f/3a to use loongson-mmiintrin.h" /tmp/1b.c: # error You must select -mloongson-mmi or -march=loongson2e/2f/3a to use\ loongson-mmiintrin.h $ gcc -S -o /tmp/1a.{s,c} /tmp/1a.c:1:9: warning: missing terminating " character # error "You must select -mloongson-mmi or -march=loongson2e/2f/3a to use ^ /tmp/1a.c:1:3: error: #error "You must select -mloongson-mmi or -march=loongson2e/2f/3a to use # error "You must select -mloongson-mmi or -march=loongson2e/2f/3a to use ^ /tmp/1a.c:2:11: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or ‘__attribute__’ before ‘-’ token loongson-mmiintrin.h" ^ /tmp/1a.c:2:23: warning: missing terminating " character loongson-mmiintrin.h" ^ /tmp/1a.c:2:23: error: missing terminating " character $ gcc -S -o /tmp/1b.{s,c} /tmp/1b.c:1:3: error: #error You must select -mloongson-mmi or -march=loongson2e/2f/3a to use loongson-mmiintrin.h # error You must select -mloongson-mmi or -march=loongson2e/2f/3a to use\ ^ and similarly for g++. Ok for trunk? 2019-04-11 Jakub Jelinek * config/mips/loongson-mmiintrin.h: Fix up #error message. --- gcc/config/mips/loongson-mmiintrin.h.jj 2019-01-01 12:37:22.459887959 +0100 +++ gcc/config/mips/loongson-mmiintrin.h2019-04-11 14:41:39.901898228 +0200 @@ -28,8 +28,8 @@ #define _GCC_LOONGSON_MMIINTRIN_H #if !defined(__mips_loongson_mmi) -# error "You must select -mloongson-mmi or -march=loongson2e/2f/3a to use -loongson-mmiintrin.h" +# error You must select -mloongson-mmi or -march=loongson2e/2f/3a to use\ + loongson-mmiintrin.h #endif #ifdef __cplusplus Jakub
Re: Add parameter to limit LTO streaming parallelism
On Thu, 11 Apr 2019, Jan Hubicka wrote: > > On Thu, 11 Apr 2019, Jan Hubicka wrote: > > > > > Hi, > > > the LTO streaming forks for every partition. With the number of > > > partitions incrased to 128 and relatively large memory usage (around > > > 5GB) needed to WPA firefox this causes kernel to spend a lot of time > > > probably by copying the page tables. > > > > > > This patch makes the streamer to for only lto_parallelism times > > > and strem num_partitions/lto_paralleism in each thread. > > > I have also added parameter because currently -flto=jobserv leads > > > to unlimited parallelism. This should be fixed by conneting to Make's > > > jobsever and build our own mini jobserver to distribute partitions > > > between worker threads, but this seems bit too involved for last minute > > > change in stage4. I plan to work on this and hopefully bacport it to .2 > > > release. > > > > > > I have tested the performance on by 32CPU 64threads box and got best > > > wall time with 32 partitions and therefore I set it by default. I get > > > > > > --param max-lto-streaming-parallelism=1 > > > Time variable usr sys > > > wall GGC > > > phase stream out : 50.65 ( 30%) 20.66 ( 61%) 71.38 > > > ( 35%) 921 kB ( 0%) > > > TOTAL : 170.73 33.69204.64 > > > 7459610 kB > > > > > > --param max-lto-streaming-parallelism=4 > > > phase stream out : 13.79 ( 11%) 6.80 ( 35%) 20.94 > > > ( 14%) 155 kB ( 0%) > > > TOTAL : 130.26 19.68150.46 > > > 7458844 kB > > > > > > --param max-lto-streaming-parallelism=8 > > > phase stream out : 8.94 ( 7%) 5.21 ( 29%) 14.15 > > > ( 10%) 83 kB ( 0%) > > > TOTAL : 125.28 18.09143.54 > > > 7458773 kB > > > > > > --param max-lto-streaming-parallelism=16 > > > phase stream out : 4.56 ( 4%) 4.34 ( 25%) 9.46 > > > ( 7%) 35 kB ( 0%) > > > TOTAL : 122.60 17.21140.56 > > > 7458725 kB > > > > > > --param max-lto-streaming-parallelism=32 > > > phase stream out : 2.34 ( 2%) 5.69 ( 31%) 8.03 > > > ( 6%) 15 kB ( 0%) > > > TOTAL : 118.53 18.36137.08 > > > 7458705 kB > > > > > > --param max-lto-streaming-parallelism=64 > > > phase stream out : 1.63 ( 1%) 15.76 ( 55%) 17.40 > > > ( 12%) 13 kB ( 0%) > > > TOTAL : 122.17 28.66151.00 > > > 7458702 kB > > > > > > --param max-lto-streaming-parallelism=256 > > > phase stream out : 1.28 ( 1%) 9.24 ( 41%) 10.53 > > > ( 8%) 13 kB ( 0%) > > > TOTAL : 116.78 22.56139.53 > > > 7458702 kB > > > > > > Note that it is bit odd that 64 leads to worse results that full > > > parallelism but it seems to reproduce relatively well. Also the usr/sys > > > times for streaming are not representative since they do not account sys > > > time of the forked threads. I am not sure where the fork time is > > > accounted. > > > > > > Generally it seems that the forking performance is not at all that > > > bad and scales reasonably, but I still we should limit the default for > > > something less than 128 we do now. Definitly there are diminishing > > > returns after increasing from 16 or 32 and memory use goes up > > > noticeably. With current trunk memory use also does not seem terribly > > > bad (less global stream streaming makes the workers cheaper) and in all > > > memory traces I collected it is dominated by compilation stage during > > > the full rebuild. > > > > > > I did similar tests for cc1 binary. There the relative time spent in > > > streaming is lower so it goes from 17% to 1% (for parallelism 1 and 32 > > > respectively) > > > > > > Bootstrapped/regtested x86_64-linux, OK? > > > > Please document the new param in invoke.texi. Otherwise looks good > > to me. Btw, do we actually allocate garbage at write-out time? > > Thus, would using threads work as well? > > It is on my TODO to get this working. Last time i checked by adding > abort into ggc_alloc there was some occurences but I think that can be > cleanded up. > > I wonder how much performance hit we would get for enabling pthreads for > lto1 binary and thus building libbackend with it? Is there any performance impact before the first thread creation? (besides eventually a few well-predicted if (threads_are_running) checks?) Richard.
[PATCH] Mark MissingArgError, UnknownError and Warn *.opt arguments as gcc-internal-format (PR translation/90041)
Hi! These 3 *.opt messages are printed using: opts-common.c: error_at (loc, option->missing_argument_error, opt); opts-common.c: error_at (loc, e->unknown_error, arg); opts-common.c:warning_at (loc, 0, decoded->warn_message, opt); and so they do support the various gcc-internal-format format string specifiers like %<, %>, %qs etc. The following patch tweaks exgettext so that it marks them as such and also adjusts c.opt so that it passes contrib/check-internal-format-escaping.py after such a change. Tested on x86_64-linux using make gcc.pot, eyeballing the resulting file and trying ./xgcc -B ./ -S hello.c -fhandle-exceptions Ok for trunk? 2019-04-11 Jakub Jelinek PR translation/90041 * po/exgettext: Print MissingArgError, UnknownError or Warn *.opt argument using error or warning instead of _ to mark it as gcc-internal-format. * c.opt (-fhandle-exceptions): Use %< and %> around option names in the Warn diagnostics. --- gcc/po/exgettext.jj 2019-01-01 12:37:21.728899953 +0100 +++ gcc/po/exgettext2019-04-11 13:39:14.361331358 +0200 @@ -252,7 +252,7 @@ echo "scanning option files..." >&2 } else sub("\\).*", "", line) printf("#line %d \"%s\"\n", lineno, file) - printf("_(\"%s\")\n", line) + printf("error(\"%s\")\n", line) } if ((field == 1) && /UnknownError/) { line = $0 @@ -263,7 +263,7 @@ echo "scanning option files..." >&2 } else sub("\\).*", "", line) printf("#line %d \"%s\"\n", lineno, file) - printf("_(\"%s\")\n", line) + printf("error(\"%s\")\n", line) } if ((field == 1) && /Warn\(/) { line = $0 @@ -274,7 +274,7 @@ echo "scanning option files..." >&2 } else sub("\\).*", "", line) printf("#line %d \"%s\"\n", lineno, file) - printf("_(\"%s\")\n", line) + printf("warning(0, \"%s\")\n", line) } if (field == 2) { line = $0 --- gcc/c-family/c.opt.jj 2019-03-22 15:39:58.149990452 +0100 +++ gcc/c-family/c.opt 2019-04-11 14:06:38.234474269 +0200 @@ -1516,7 +1516,7 @@ fguiding-decls C++ ObjC++ Deprecated fhandle-exceptions -C++ ObjC++ Optimization Alias(fexceptions) Warn({-fhandle-exceptions has been renamed -fexceptions (and is now on by default)}) +C++ ObjC++ Optimization Alias(fexceptions) Warn({%<-fhandle-exceptions%> has been renamed %<-fexceptions%> (and is now on by default)}) fhonor-std C++ ObjC++ Deprecated Jakub
Re: [PATCH] Handle multiple 'default' in target attribute (PR middle-end/89970).
On 4/11/19 1:15 PM, Jakub Jelinek wrote: > On Thu, Apr 11, 2019 at 01:10:00PM +0200, Richard Biener wrote: >> On Thu, Apr 11, 2019 at 12:47 PM Jakub Jelinek wrote: >>> >>> On Thu, Apr 11, 2019 at 12:44:40PM +0200, Martin Liška wrote: On 4/11/19 11:57 AM, Richard Biener wrote: > On Thu, Apr 11, 2019 at 10:37 AM Martin Liška wrote: >> >> Hi. >> >> The patch is catching duplicate 'default' values in target attribute. >> >> Patch can bootstrap on x86_64-linux-gnu and survives regression tests. >> >> Ready to be installed? > > I wonder if it isn't better to ignore duplicate "default"s (given you > needed to > adjust a testcase even)? Well, possibly yes. But I would prefer to have a more strict checking. >>> >>> I agree, default, default is not really useful and rejecting it is fine. >>> Having a testcase covering that is of course desirable. >> >> There is one in the patch. >> >> That said, I was worried to go down the same route as that volatile asm >> thing and the patch needs to be on branches as well? > > On branches I'd indeed ignore second and following default if that is what > we've done before. > > Jakub > Well, I'm probably no planning to backport that. Martin
Re: Add parameter to limit LTO streaming parallelism
> On Thu, 11 Apr 2019, Jan Hubicka wrote: > > > Hi, > > the LTO streaming forks for every partition. With the number of > > partitions incrased to 128 and relatively large memory usage (around > > 5GB) needed to WPA firefox this causes kernel to spend a lot of time > > probably by copying the page tables. > > > > This patch makes the streamer to for only lto_parallelism times > > and strem num_partitions/lto_paralleism in each thread. > > I have also added parameter because currently -flto=jobserv leads > > to unlimited parallelism. This should be fixed by conneting to Make's > > jobsever and build our own mini jobserver to distribute partitions > > between worker threads, but this seems bit too involved for last minute > > change in stage4. I plan to work on this and hopefully bacport it to .2 > > release. > > > > I have tested the performance on by 32CPU 64threads box and got best > > wall time with 32 partitions and therefore I set it by default. I get > > > > --param max-lto-streaming-parallelism=1 > > Time variable usr sys > > wall GGC > > phase stream out : 50.65 ( 30%) 20.66 ( 61%) 71.38 ( > > 35%) 921 kB ( 0%) > > TOTAL : 170.73 33.69204.64 > > 7459610 kB > > > > --param max-lto-streaming-parallelism=4 > > phase stream out : 13.79 ( 11%) 6.80 ( 35%) 20.94 ( > > 14%) 155 kB ( 0%) > > TOTAL : 130.26 19.68150.46 > > 7458844 kB > > > > --param max-lto-streaming-parallelism=8 > > phase stream out : 8.94 ( 7%) 5.21 ( 29%) 14.15 ( > > 10%) 83 kB ( 0%) > > TOTAL : 125.28 18.09143.54 > > 7458773 kB > > > > --param max-lto-streaming-parallelism=16 > > phase stream out : 4.56 ( 4%) 4.34 ( 25%) 9.46 ( > > 7%) 35 kB ( 0%) > > TOTAL : 122.60 17.21140.56 > > 7458725 kB > > > > --param max-lto-streaming-parallelism=32 > > phase stream out : 2.34 ( 2%) 5.69 ( 31%) 8.03 ( > > 6%) 15 kB ( 0%) > > TOTAL : 118.53 18.36137.08 > > 7458705 kB > > > > --param max-lto-streaming-parallelism=64 > > phase stream out : 1.63 ( 1%) 15.76 ( 55%) 17.40 ( > > 12%) 13 kB ( 0%) > > TOTAL : 122.17 28.66151.00 > > 7458702 kB > > > > --param max-lto-streaming-parallelism=256 > > phase stream out : 1.28 ( 1%) 9.24 ( 41%) 10.53 ( > > 8%) 13 kB ( 0%) > > TOTAL : 116.78 22.56139.53 > > 7458702 kB > > > > Note that it is bit odd that 64 leads to worse results that full > > parallelism but it seems to reproduce relatively well. Also the usr/sys > > times for streaming are not representative since they do not account sys > > time of the forked threads. I am not sure where the fork time is > > accounted. > > > > Generally it seems that the forking performance is not at all that > > bad and scales reasonably, but I still we should limit the default for > > something less than 128 we do now. Definitly there are diminishing > > returns after increasing from 16 or 32 and memory use goes up > > noticeably. With current trunk memory use also does not seem terribly > > bad (less global stream streaming makes the workers cheaper) and in all > > memory traces I collected it is dominated by compilation stage during > > the full rebuild. > > > > I did similar tests for cc1 binary. There the relative time spent in > > streaming is lower so it goes from 17% to 1% (for parallelism 1 and 32 > > respectively) > > > > Bootstrapped/regtested x86_64-linux, OK? > > Please document the new param in invoke.texi. Otherwise looks good > to me. Btw, do we actually allocate garbage at write-out time? > Thus, would using threads work as well? It is on my TODO to get this working. Last time i checked by adding abort into ggc_alloc there was some occurences but I think that can be cleanded up. I wonder how much performance hit we would get for enabling pthreads for lto1 binary and thus building libbackend with it? Honza
Re: Add parameter to limit LTO streaming parallelism
On Thu, 11 Apr 2019, Jan Hubicka wrote: > Hi, > the LTO streaming forks for every partition. With the number of > partitions incrased to 128 and relatively large memory usage (around > 5GB) needed to WPA firefox this causes kernel to spend a lot of time > probably by copying the page tables. > > This patch makes the streamer to for only lto_parallelism times > and strem num_partitions/lto_paralleism in each thread. > I have also added parameter because currently -flto=jobserv leads > to unlimited parallelism. This should be fixed by conneting to Make's > jobsever and build our own mini jobserver to distribute partitions > between worker threads, but this seems bit too involved for last minute > change in stage4. I plan to work on this and hopefully bacport it to .2 > release. > > I have tested the performance on by 32CPU 64threads box and got best > wall time with 32 partitions and therefore I set it by default. I get > > --param max-lto-streaming-parallelism=1 > Time variable usr sys > wall GGC > phase stream out : 50.65 ( 30%) 20.66 ( 61%) 71.38 ( > 35%) 921 kB ( 0%) > TOTAL : 170.73 33.69204.64 > 7459610 kB > > --param max-lto-streaming-parallelism=4 > phase stream out : 13.79 ( 11%) 6.80 ( 35%) 20.94 ( > 14%) 155 kB ( 0%) > TOTAL : 130.26 19.68150.46 > 7458844 kB > > --param max-lto-streaming-parallelism=8 > phase stream out : 8.94 ( 7%) 5.21 ( 29%) 14.15 ( > 10%) 83 kB ( 0%) > TOTAL : 125.28 18.09143.54 > 7458773 kB > > --param max-lto-streaming-parallelism=16 > phase stream out : 4.56 ( 4%) 4.34 ( 25%) 9.46 ( > 7%) 35 kB ( 0%) > TOTAL : 122.60 17.21140.56 > 7458725 kB > > --param max-lto-streaming-parallelism=32 > phase stream out : 2.34 ( 2%) 5.69 ( 31%) 8.03 ( > 6%) 15 kB ( 0%) > TOTAL : 118.53 18.36137.08 > 7458705 kB > > --param max-lto-streaming-parallelism=64 > phase stream out : 1.63 ( 1%) 15.76 ( 55%) 17.40 ( > 12%) 13 kB ( 0%) > TOTAL : 122.17 28.66151.00 > 7458702 kB > > --param max-lto-streaming-parallelism=256 > phase stream out : 1.28 ( 1%) 9.24 ( 41%) 10.53 ( > 8%) 13 kB ( 0%) > TOTAL : 116.78 22.56139.53 > 7458702 kB > > Note that it is bit odd that 64 leads to worse results that full > parallelism but it seems to reproduce relatively well. Also the usr/sys > times for streaming are not representative since they do not account sys > time of the forked threads. I am not sure where the fork time is > accounted. > > Generally it seems that the forking performance is not at all that > bad and scales reasonably, but I still we should limit the default for > something less than 128 we do now. Definitly there are diminishing > returns after increasing from 16 or 32 and memory use goes up > noticeably. With current trunk memory use also does not seem terribly > bad (less global stream streaming makes the workers cheaper) and in all > memory traces I collected it is dominated by compilation stage during > the full rebuild. > > I did similar tests for cc1 binary. There the relative time spent in > streaming is lower so it goes from 17% to 1% (for parallelism 1 and 32 > respectively) > > Bootstrapped/regtested x86_64-linux, OK? Please document the new param in invoke.texi. Otherwise looks good to me. Btw, do we actually allocate garbage at write-out time? Thus, would using threads work as well? Thanks, Richard. > * params.def (PARAM_MAX_LTO_STREAMING_PARALLELISM): New parameter. > * lto.c (do_stream_out): rename to ... > (stream_out): ... this one; move original code to ... > (stream_out_partitions_1, stream_out_partitions): ... these new > functions. > (lto_wpa_write_files): Honnor lto_parallelism > Index: params.def > === > --- params.def(revision 270143) > +++ params.def(working copy) > @@ -1146,6 +1146,11 @@ DEFPARAM (MAX_PARTITION_SIZE, > "Maximal size of a partition for LTO (in estimated instructions).", > 100, 0, INT_MAX) > > +DEFPARAM (PARAM_MAX_LTO_STREAMING_PARALLELISM, > + "max-lto-streaming-parallelism", > + "maximal number of LTO partitions streamed in parallel.", > + 32, 1, 0) > + > /* Diagnostic parameters. */ > > DEFPARAM (CXX_MAX_NAMESPACES_FOR_DIAGNOSTIC_HELP, > Index: lto/lto.c > =
Add parameter to limit LTO streaming parallelism
Hi, the LTO streaming forks for every partition. With the number of partitions incrased to 128 and relatively large memory usage (around 5GB) needed to WPA firefox this causes kernel to spend a lot of time probably by copying the page tables. This patch makes the streamer to for only lto_parallelism times and strem num_partitions/lto_paralleism in each thread. I have also added parameter because currently -flto=jobserv leads to unlimited parallelism. This should be fixed by conneting to Make's jobsever and build our own mini jobserver to distribute partitions between worker threads, but this seems bit too involved for last minute change in stage4. I plan to work on this and hopefully bacport it to .2 release. I have tested the performance on by 32CPU 64threads box and got best wall time with 32 partitions and therefore I set it by default. I get --param max-lto-streaming-parallelism=1 Time variable usr sys wall GGC phase stream out : 50.65 ( 30%) 20.66 ( 61%) 71.38 ( 35%) 921 kB ( 0%) TOTAL : 170.73 33.69204.64 7459610 kB --param max-lto-streaming-parallelism=4 phase stream out : 13.79 ( 11%) 6.80 ( 35%) 20.94 ( 14%) 155 kB ( 0%) TOTAL : 130.26 19.68150.46 7458844 kB --param max-lto-streaming-parallelism=8 phase stream out : 8.94 ( 7%) 5.21 ( 29%) 14.15 ( 10%) 83 kB ( 0%) TOTAL : 125.28 18.09143.54 7458773 kB --param max-lto-streaming-parallelism=16 phase stream out : 4.56 ( 4%) 4.34 ( 25%) 9.46 ( 7%) 35 kB ( 0%) TOTAL : 122.60 17.21140.56 7458725 kB --param max-lto-streaming-parallelism=32 phase stream out : 2.34 ( 2%) 5.69 ( 31%) 8.03 ( 6%) 15 kB ( 0%) TOTAL : 118.53 18.36137.08 7458705 kB --param max-lto-streaming-parallelism=64 phase stream out : 1.63 ( 1%) 15.76 ( 55%) 17.40 ( 12%) 13 kB ( 0%) TOTAL : 122.17 28.66151.00 7458702 kB --param max-lto-streaming-parallelism=256 phase stream out : 1.28 ( 1%) 9.24 ( 41%) 10.53 ( 8%) 13 kB ( 0%) TOTAL : 116.78 22.56139.53 7458702 kB Note that it is bit odd that 64 leads to worse results that full parallelism but it seems to reproduce relatively well. Also the usr/sys times for streaming are not representative since they do not account sys time of the forked threads. I am not sure where the fork time is accounted. Generally it seems that the forking performance is not at all that bad and scales reasonably, but I still we should limit the default for something less than 128 we do now. Definitly there are diminishing returns after increasing from 16 or 32 and memory use goes up noticeably. With current trunk memory use also does not seem terribly bad (less global stream streaming makes the workers cheaper) and in all memory traces I collected it is dominated by compilation stage during the full rebuild. I did similar tests for cc1 binary. There the relative time spent in streaming is lower so it goes from 17% to 1% (for parallelism 1 and 32 respectively) Bootstrapped/regtested x86_64-linux, OK? * params.def (PARAM_MAX_LTO_STREAMING_PARALLELISM): New parameter. * lto.c (do_stream_out): rename to ... (stream_out): ... this one; move original code to ... (stream_out_partitions_1, stream_out_partitions): ... these new functions. (lto_wpa_write_files): Honnor lto_parallelism Index: params.def === --- params.def (revision 270143) +++ params.def (working copy) @@ -1146,6 +1146,11 @@ DEFPARAM (MAX_PARTITION_SIZE, "Maximal size of a partition for LTO (in estimated instructions).", 100, 0, INT_MAX) +DEFPARAM (PARAM_MAX_LTO_STREAMING_PARALLELISM, + "max-lto-streaming-parallelism", + "maximal number of LTO partitions streamed in parallel.", + 32, 1, 0) + /* Diagnostic parameters. */ DEFPARAM (CXX_MAX_NAMESPACES_FOR_DIAGNOSTIC_HELP, Index: lto/lto.c === --- lto/lto.c (revision 270143) +++ lto/lto.c (working copy) @@ -2304,7 +2304,7 @@ static lto_file *current_lto_file; /* Actually stream out ENCODER into TEMP_FILENAME. */ static void -do_stream_out (char *temp_filename, lto_symtab_encoder_t encoder, int part) +stream_out (char *temp_filename, lto_symtab_encoder_t encoder, int part) { lto_file *file = lto_obj_file_open (temp_filename, true); if (!file) @@ -2352,
Re: [PATCH] Handle multiple 'default' in target attribute (PR middle-end/89970).
On Thu, Apr 11, 2019 at 01:10:00PM +0200, Richard Biener wrote: > On Thu, Apr 11, 2019 at 12:47 PM Jakub Jelinek wrote: > > > > On Thu, Apr 11, 2019 at 12:44:40PM +0200, Martin Liška wrote: > > > On 4/11/19 11:57 AM, Richard Biener wrote: > > > > On Thu, Apr 11, 2019 at 10:37 AM Martin Liška wrote: > > > >> > > > >> Hi. > > > >> > > > >> The patch is catching duplicate 'default' values in target attribute. > > > >> > > > >> Patch can bootstrap on x86_64-linux-gnu and survives regression tests. > > > >> > > > >> Ready to be installed? > > > > > > > > I wonder if it isn't better to ignore duplicate "default"s (given you > > > > needed to > > > > adjust a testcase even)? > > > > > > Well, possibly yes. But I would prefer to have a more strict checking. > > > > I agree, default, default is not really useful and rejecting it is fine. > > Having a testcase covering that is of course desirable. > > There is one in the patch. > > That said, I was worried to go down the same route as that volatile asm > thing and the patch needs to be on branches as well? On branches I'd indeed ignore second and following default if that is what we've done before. Jakub
Re: [PATCH] Handle multiple 'default' in target attribute (PR middle-end/89970).
On Thu, Apr 11, 2019 at 12:47 PM Jakub Jelinek wrote: > > On Thu, Apr 11, 2019 at 12:44:40PM +0200, Martin Liška wrote: > > On 4/11/19 11:57 AM, Richard Biener wrote: > > > On Thu, Apr 11, 2019 at 10:37 AM Martin Liška wrote: > > >> > > >> Hi. > > >> > > >> The patch is catching duplicate 'default' values in target attribute. > > >> > > >> Patch can bootstrap on x86_64-linux-gnu and survives regression tests. > > >> > > >> Ready to be installed? > > > > > > I wonder if it isn't better to ignore duplicate "default"s (given you > > > needed to > > > adjust a testcase even)? > > > > Well, possibly yes. But I would prefer to have a more strict checking. > > I agree, default, default is not really useful and rejecting it is fine. > Having a testcase covering that is of course desirable. There is one in the patch. That said, I was worried to go down the same route as that volatile asm thing and the patch needs to be on branches as well? Anyhow, patch is OK for trunk. Richard. > Jakub
Re: [aarch64] PR90016 - aarch64: reference to undeclared N in help for command line option
On Wed, 10 Apr 2019 at 11:57, Richard Earnshaw (lists) wrote: > > 'to N' is now redundant and misleading given the earlier change to use > . > > Removed > > * config/aarch64/aarch64.opt (msve-vector-bits): Remove redundant and > obsolete reference to N. > Hi Richard, I've just added the missing final '.', which was causing FAIL: compiler driver --help=target option(s): "^ +-.*[^:.]$" absent from output: " -msve-vector-bits= Set the number of bits in an SVE vector register" Christophe > R.
Re: [PATCH] Handle multiple 'default' in target attribute (PR middle-end/89970).
On Thu, Apr 11, 2019 at 12:44:40PM +0200, Martin Liška wrote: > On 4/11/19 11:57 AM, Richard Biener wrote: > > On Thu, Apr 11, 2019 at 10:37 AM Martin Liška wrote: > >> > >> Hi. > >> > >> The patch is catching duplicate 'default' values in target attribute. > >> > >> Patch can bootstrap on x86_64-linux-gnu and survives regression tests. > >> > >> Ready to be installed? > > > > I wonder if it isn't better to ignore duplicate "default"s (given you > > needed to > > adjust a testcase even)? > > Well, possibly yes. But I would prefer to have a more strict checking. I agree, default, default is not really useful and rejecting it is fine. Having a testcase covering that is of course desirable. Jakub
Re: [PATCH] Handle multiple 'default' in target attribute (PR middle-end/89970).
On 4/11/19 11:57 AM, Richard Biener wrote: > On Thu, Apr 11, 2019 at 10:37 AM Martin Liška wrote: >> >> Hi. >> >> The patch is catching duplicate 'default' values in target attribute. >> >> Patch can bootstrap on x86_64-linux-gnu and survives regression tests. >> >> Ready to be installed? > > I wonder if it isn't better to ignore duplicate "default"s (given you needed > to > adjust a testcase even)? Well, possibly yes. But I would prefer to have a more strict checking. Martin > > Richard. > >> Thanks, >> Martin >> >> gcc/ChangeLog: >> >> 2019-04-10 Martin Liska >> >> PR middle-end/89970 >> * multiple_target.c (create_dispatcher_calls): Wrap ifunc >> in error message. >> (separate_attrs): Handle multiple 'default's. >> (expand_target_clones): Rework error handling code. >> >> gcc/testsuite/ChangeLog: >> >> 2019-04-10 Martin Liska >> >> PR middle-end/89970 >> * gcc.target/i386/mvc15.c: New test. >> * gcc.target/i386/mvc3.c: Quote target in error pattern. >> * gcc.target/i386/mvc4.c: Remove duplicit 'default'. >> --- >> gcc/multiple_target.c | 38 ++- >> gcc/testsuite/gcc.target/i386/mvc15.c | 11 >> gcc/testsuite/gcc.target/i386/mvc3.c | 2 +- >> gcc/testsuite/gcc.target/i386/mvc4.c | 2 +- >> 4 files changed, 38 insertions(+), 15 deletions(-) >> create mode 100644 gcc/testsuite/gcc.target/i386/mvc15.c >> >>
Re: [Patch] [testsuite][arm] Update warning prune regex
Hi Matthew, On 4/11/19 10:26 AM, Matthew Malcomson wrote: r269586 changed the format of some warning messages. Each switch in the warning message is now surrounded by single quotes. This commit updates the regex's in arm.exp dejagnu files to match the new format and remove the extra 20+ FAILs on excess errors that are introduced on certain variations (e.g. arm-eabi-aem/-marm/-march=armv7-a/-mfpu=vfpv3-d16/-mfloat-abi=softfp). Regtested arm.exp with cross-compiler arm-none-eabi Ok. Thanks, Kyrill gcc/testsuite/ChangeLog: 2019-04-11 Matthew Malcomson * g++.target/arm/arm.exp: Change format of default prune regex. * gcc.target/arm/arm.exp: Change format of default prune regex. ### Attachment also inlined for ease of reply ### diff --git a/gcc/testsuite/g++.target/arm/arm.exp b/gcc/testsuite/g++.target/arm/arm.exp index 247536b8312f8ff7a50c0eaf66e12785b80b6d3b..575f0a11c87583a264761f705997c7efc9d2d368 100644 --- a/gcc/testsuite/g++.target/arm/arm.exp +++ b/gcc/testsuite/g++.target/arm/arm.exp @@ -35,7 +35,7 @@ if ![info exists DEFAULT_CXXFLAGS] then { global dg_runtest_extra_prunes set dg_runtest_extra_prunes "" -lappend dg_runtest_extra_prunes "warning: switch -m(cpu|arch)=.* conflicts with -m(cpu|arch)=.* switch" +lappend dg_runtest_extra_prunes "warning: switch '-m(cpu|arch)=.*' conflicts with '-m(cpu|arch)=.*' switch" # Initialize `dg'. dg-init diff --git a/gcc/testsuite/gcc.target/arm/arm.exp b/gcc/testsuite/gcc.target/arm/arm.exp index 0c4981c8316a30944302220b344810e3d44fcab1..829f6836b2918acdbae3160ec48802d9fe7ab5de 100644 --- a/gcc/testsuite/gcc.target/arm/arm.exp +++ b/gcc/testsuite/gcc.target/arm/arm.exp @@ -33,7 +33,7 @@ if ![info exists DEFAULT_CFLAGS] then { # This variable should only apply to tests called in this exp file. global dg_runtest_extra_prunes set dg_runtest_extra_prunes "" -lappend dg_runtest_extra_prunes "warning: switch -m(cpu|arch)=.* conflicts with -m(cpu|arch)=.* switch" +lappend dg_runtest_extra_prunes "warning: switch '-m(cpu|arch)=.*' conflicts with '-m(cpu|arch)=.*' switch" # Initialize `dg'. dg-init
Re: [PATCH] Handle multiple 'default' in target attribute (PR middle-end/89970).
On Thu, Apr 11, 2019 at 10:37 AM Martin Liška wrote: > > Hi. > > The patch is catching duplicate 'default' values in target attribute. > > Patch can bootstrap on x86_64-linux-gnu and survives regression tests. > > Ready to be installed? I wonder if it isn't better to ignore duplicate "default"s (given you needed to adjust a testcase even)? Richard. > Thanks, > Martin > > gcc/ChangeLog: > > 2019-04-10 Martin Liska > > PR middle-end/89970 > * multiple_target.c (create_dispatcher_calls): Wrap ifunc > in error message. > (separate_attrs): Handle multiple 'default's. > (expand_target_clones): Rework error handling code. > > gcc/testsuite/ChangeLog: > > 2019-04-10 Martin Liska > > PR middle-end/89970 > * gcc.target/i386/mvc15.c: New test. > * gcc.target/i386/mvc3.c: Quote target in error pattern. > * gcc.target/i386/mvc4.c: Remove duplicit 'default'. > --- > gcc/multiple_target.c | 38 ++- > gcc/testsuite/gcc.target/i386/mvc15.c | 11 > gcc/testsuite/gcc.target/i386/mvc3.c | 2 +- > gcc/testsuite/gcc.target/i386/mvc4.c | 2 +- > 4 files changed, 38 insertions(+), 15 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/mvc15.c > >
[patch] PR 79842 - i18n: subword translation in "Can't use the same %smodule"
Hi all, I am planning to commit the following patch --- ../_clean/gcc/fortran/module.c 2019-03-21 20:46:46.0 +0100 +++ gcc/fortran/module.c2019-04-11 10:28:37.0 +0200 @@ -7144,8 +7144,12 @@ gfc_use_module (gfc_use_list *module) for (p = gfc_state_stack; p; p = p->previous) if ((p->state == COMP_MODULE || p->state == COMP_SUBMODULE) && strcmp (p->sym->name, module_name) == 0) - gfc_fatal_error ("Cannot USE the same %smodule we're building", - p->state == COMP_SUBMODULE ? "sub" : ""); + { + if (p->state == COMP_SUBMODULE) + gfc_fatal_error ("Cannot USE a submodule that is currently built"); + else + gfc_fatal_error ("Cannot USE a module that is currently built"); + } init_pi_tree (); init_true_name_tree (); While testing it I did not find any coverage of these errors in the test suite. TIA Dominique
Re: [PATCH] Fix up RTL DCE find_call_stack_args (PR rtl-optimization/89965, take 2)
On Thu, 11 Apr 2019, Jakub Jelinek wrote: > On Thu, Apr 11, 2019 at 09:54:24AM +0200, Richard Biener wrote: > > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? > > > > > > During those 2 bootstraps/regtests, data.load_found has been set just > > > on the new testcase on ia32. > > > > Hmm, I wonder whether we really need to DCE calls after reload? > > That said, I'm not familiar enough with the code to check if the > > patch makes sense (can there ever be uses of the argument slots > > _after_ the call?). > > And here is the patch on top of the refactoring patch. > As for the argument slots after the call, hope Jeff and Vlad won't mind if I > copy'n'paste what they said on IRC yesterday: > law: vmakarov: in PR89965, is it valid RTL to store something in stack > argument slot and read it again before the call that uses that stack slot? > law: vmakarov: if yes, I have a rough idea what to do, but if stack argument > slots may be only stored and can be read only by a callee and not by > something else in the caller, then it is a RA issue > jakub: i think it is not defined (or described). But the old > reload used equiv memory for long time to store value in memory. LRA just > uses the same approach. > i think you're allowed to read it up to the call point, who "owns" the > contents of the slot after the call point is subject to debate :-) > not sure if we defined things once a REG_EQUIV is on the MEM, but that > would tend to imply the memory is unchanging across the call > (though one could ask how in the world the caller would know the callee > didn't clobber the slot) Ok, so that says "well, nothing prevents it from being used" for example for a const/pure call (depending on what alias analysis does here...). Who owns the argument slots should be defined by the ABI I guess. So iff alias-analysis doesn't leave us with the chance to pick up the stack slot contents after the call in any case we're of course fine. And if we do have the chance but should not then the call instruction needs some explicit clobbering of the argument area (or the RTL IL needs to be documented that such clobbering is implicit and the alias machinery then needs to honor that). Still leaving review of the patch to somebody else (it's clearly closing at least part of the hole). Richard. > 2019-04-11 Jakub Jelinek > > PR rtl-optimization/89965 > * dce.c: Include rtl-iter.h. > (struct check_argument_load_data): New type. > (check_argument_load): New function. > (find_call_stack_args): Check for loads from stack slots still tracked > in sp_bytes and punt if any is found. > > * gcc.target/i386/pr89965.c: New test. > > --- gcc/dce.c.jj 2019-04-11 10:30:00.436705065 +0200 > +++ gcc/dce.c 2019-04-11 10:43:00.926828390 +0200 > @@ -35,6 +35,7 @@ along with GCC; see the file COPYING3. > #include "valtrack.h" > #include "tree-pass.h" > #include "dbgcnt.h" > +#include "rtl-iter.h" > > > /* - > @@ -325,6 +326,48 @@ sp_based_mem_offset (rtx_call_insn *call >return off; > } > > +/* Data for check_argument_load called via note_uses. */ > +struct check_argument_load_data { > + bitmap sp_bytes; > + HOST_WIDE_INT min_sp_off, max_sp_off; > + rtx_call_insn *call_insn; > + bool fast; > + bool load_found; > +}; > + > +/* Helper function for find_call_stack_args. Check if there are > + any loads from the argument slots in between the const/pure call > + and store to the argument slot, set LOAD_FOUND if any is found. */ > + > +static void > +check_argument_load (rtx *loc, void *data) > +{ > + struct check_argument_load_data *d > += (struct check_argument_load_data *) data; > + subrtx_iterator::array_type array; > + FOR_EACH_SUBRTX (iter, array, *loc, NONCONST) > +{ > + const_rtx mem = *iter; > + HOST_WIDE_INT size; > + if (MEM_P (mem) > + && MEM_SIZE_KNOWN_P (mem) > + && MEM_SIZE (mem).is_constant (&size)) > + { > + HOST_WIDE_INT off = sp_based_mem_offset (d->call_insn, mem, d->fast); > + if (off != INTTYPE_MINIMUM (HOST_WIDE_INT) > + && off < d->max_sp_off > + && off + size > d->min_sp_off) > + for (HOST_WIDE_INT byte = MAX (off, d->min_sp_off); > + byte < MIN (off + size, d->max_sp_off); byte++) > + if (bitmap_bit_p (d->sp_bytes, byte - d->min_sp_off)) > + { > + d->load_found = true; > + return; > + } > + } > +} > +} > + > /* Try to find all stack stores of CALL_INSN arguments if > ACCUMULATE_OUTGOING_ARGS. If all stack stores have been found > and it is therefore safe to eliminate the call, return true, > @@ -396,6 +439,8 @@ find_call_stack_args (rtx_call_insn *cal >/* Walk backwards, looking for argument stores. The search stops > when seeing another call, sp adjustment o
[Patch] [testsuite][arm] Update warning prune regex
r269586 changed the format of some warning messages. Each switch in the warning message is now surrounded by single quotes. This commit updates the regex's in arm.exp dejagnu files to match the new format and remove the extra 20+ FAILs on excess errors that are introduced on certain variations (e.g. arm-eabi-aem/-marm/-march=armv7-a/-mfpu=vfpv3-d16/-mfloat-abi=softfp). Regtested arm.exp with cross-compiler arm-none-eabi gcc/testsuite/ChangeLog: 2019-04-11 Matthew Malcomson * g++.target/arm/arm.exp: Change format of default prune regex. * gcc.target/arm/arm.exp: Change format of default prune regex. ### Attachment also inlined for ease of reply### diff --git a/gcc/testsuite/g++.target/arm/arm.exp b/gcc/testsuite/g++.target/arm/arm.exp index 247536b8312f8ff7a50c0eaf66e12785b80b6d3b..575f0a11c87583a264761f705997c7efc9d2d368 100644 --- a/gcc/testsuite/g++.target/arm/arm.exp +++ b/gcc/testsuite/g++.target/arm/arm.exp @@ -35,7 +35,7 @@ if ![info exists DEFAULT_CXXFLAGS] then { global dg_runtest_extra_prunes set dg_runtest_extra_prunes "" -lappend dg_runtest_extra_prunes "warning: switch -m(cpu|arch)=.* conflicts with -m(cpu|arch)=.* switch" +lappend dg_runtest_extra_prunes "warning: switch '-m(cpu|arch)=.*' conflicts with '-m(cpu|arch)=.*' switch" # Initialize `dg'. dg-init diff --git a/gcc/testsuite/gcc.target/arm/arm.exp b/gcc/testsuite/gcc.target/arm/arm.exp index 0c4981c8316a30944302220b344810e3d44fcab1..829f6836b2918acdbae3160ec48802d9fe7ab5de 100644 --- a/gcc/testsuite/gcc.target/arm/arm.exp +++ b/gcc/testsuite/gcc.target/arm/arm.exp @@ -33,7 +33,7 @@ if ![info exists DEFAULT_CFLAGS] then { # This variable should only apply to tests called in this exp file. global dg_runtest_extra_prunes set dg_runtest_extra_prunes "" -lappend dg_runtest_extra_prunes "warning: switch -m(cpu|arch)=.* conflicts with -m(cpu|arch)=.* switch" +lappend dg_runtest_extra_prunes "warning: switch '-m(cpu|arch)=.*' conflicts with '-m(cpu|arch)=.*' switch" # Initialize `dg'. dg-init diff --git a/gcc/testsuite/g++.target/arm/arm.exp b/gcc/testsuite/g++.target/arm/arm.exp index 247536b8312f8ff7a50c0eaf66e12785b80b6d3b..575f0a11c87583a264761f705997c7efc9d2d368 100644 --- a/gcc/testsuite/g++.target/arm/arm.exp +++ b/gcc/testsuite/g++.target/arm/arm.exp @@ -35,7 +35,7 @@ if ![info exists DEFAULT_CXXFLAGS] then { global dg_runtest_extra_prunes set dg_runtest_extra_prunes "" -lappend dg_runtest_extra_prunes "warning: switch -m(cpu|arch)=.* conflicts with -m(cpu|arch)=.* switch" +lappend dg_runtest_extra_prunes "warning: switch '-m(cpu|arch)=.*' conflicts with '-m(cpu|arch)=.*' switch" # Initialize `dg'. dg-init diff --git a/gcc/testsuite/gcc.target/arm/arm.exp b/gcc/testsuite/gcc.target/arm/arm.exp index 0c4981c8316a30944302220b344810e3d44fcab1..829f6836b2918acdbae3160ec48802d9fe7ab5de 100644 --- a/gcc/testsuite/gcc.target/arm/arm.exp +++ b/gcc/testsuite/gcc.target/arm/arm.exp @@ -33,7 +33,7 @@ if ![info exists DEFAULT_CFLAGS] then { # This variable should only apply to tests called in this exp file. global dg_runtest_extra_prunes set dg_runtest_extra_prunes "" -lappend dg_runtest_extra_prunes "warning: switch -m(cpu|arch)=.* conflicts with -m(cpu|arch)=.* switch" +lappend dg_runtest_extra_prunes "warning: switch '-m(cpu|arch)=.*' conflicts with '-m(cpu|arch)=.*' switch" # Initialize `dg'. dg-init
Re: [PATCH] Add support for missing AVX512* ISAs (PR target/89929).
On Thu, Apr 11, 2019 at 10:38 AM Martin Liška wrote: > > Hi. > > The patch is adding missing AVX512 ISAs for target and target_clone > attributes. > > Patch can bootstrap on x86_64-linux-gnu and survives regression tests. > > Ready to be installed? > Thanks, > Martin > > gcc/ChangeLog: > > 2019-04-10 Martin Liska > > PR target/89929 > * config/i386/i386.c (get_builtin_code_for_version): Add > support for missing AVX512 ISAs. > > gcc/testsuite/ChangeLog: > > 2019-04-10 Martin Liska > > PR target/89929 > * g++.target/i386/mv28.C: New test. > * gcc.target/i386/mvc14.c: New test. LGTM, but HJ should check the priorities. Uros. > --- > gcc/config/i386/i386.c| 34 ++- > gcc/testsuite/g++.target/i386/mv28.C | 30 +++ > gcc/testsuite/gcc.target/i386/mvc14.c | 16 + > 3 files changed, 79 insertions(+), 1 deletion(-) > create mode 100644 gcc/testsuite/g++.target/i386/mv28.C > create mode 100644 gcc/testsuite/gcc.target/i386/mvc14.c > >
[PATCH] Fix up RTL DCE find_call_stack_args (PR rtl-optimization/89965, take 2)
On Thu, Apr 11, 2019 at 09:54:24AM +0200, Richard Biener wrote: > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? > > > > During those 2 bootstraps/regtests, data.load_found has been set just > > on the new testcase on ia32. > > Hmm, I wonder whether we really need to DCE calls after reload? > That said, I'm not familiar enough with the code to check if the > patch makes sense (can there ever be uses of the argument slots > _after_ the call?). And here is the patch on top of the refactoring patch. As for the argument slots after the call, hope Jeff and Vlad won't mind if I copy'n'paste what they said on IRC yesterday: law: vmakarov: in PR89965, is it valid RTL to store something in stack argument slot and read it again before the call that uses that stack slot? law: vmakarov: if yes, I have a rough idea what to do, but if stack argument slots may be only stored and can be read only by a callee and not by something else in the caller, then it is a RA issue jakub: i think it is not defined (or described). But the old reload used equiv memory for long time to store value in memory. LRA just uses the same approach. i think you're allowed to read it up to the call point, who "owns" the contents of the slot after the call point is subject to debate :-) not sure if we defined things once a REG_EQUIV is on the MEM, but that would tend to imply the memory is unchanging across the call (though one could ask how in the world the caller would know the callee didn't clobber the slot) 2019-04-11 Jakub Jelinek PR rtl-optimization/89965 * dce.c: Include rtl-iter.h. (struct check_argument_load_data): New type. (check_argument_load): New function. (find_call_stack_args): Check for loads from stack slots still tracked in sp_bytes and punt if any is found. * gcc.target/i386/pr89965.c: New test. --- gcc/dce.c.jj2019-04-11 10:30:00.436705065 +0200 +++ gcc/dce.c 2019-04-11 10:43:00.926828390 +0200 @@ -35,6 +35,7 @@ along with GCC; see the file COPYING3. #include "valtrack.h" #include "tree-pass.h" #include "dbgcnt.h" +#include "rtl-iter.h" /* - @@ -325,6 +326,48 @@ sp_based_mem_offset (rtx_call_insn *call return off; } +/* Data for check_argument_load called via note_uses. */ +struct check_argument_load_data { + bitmap sp_bytes; + HOST_WIDE_INT min_sp_off, max_sp_off; + rtx_call_insn *call_insn; + bool fast; + bool load_found; +}; + +/* Helper function for find_call_stack_args. Check if there are + any loads from the argument slots in between the const/pure call + and store to the argument slot, set LOAD_FOUND if any is found. */ + +static void +check_argument_load (rtx *loc, void *data) +{ + struct check_argument_load_data *d += (struct check_argument_load_data *) data; + subrtx_iterator::array_type array; + FOR_EACH_SUBRTX (iter, array, *loc, NONCONST) +{ + const_rtx mem = *iter; + HOST_WIDE_INT size; + if (MEM_P (mem) + && MEM_SIZE_KNOWN_P (mem) + && MEM_SIZE (mem).is_constant (&size)) + { + HOST_WIDE_INT off = sp_based_mem_offset (d->call_insn, mem, d->fast); + if (off != INTTYPE_MINIMUM (HOST_WIDE_INT) + && off < d->max_sp_off + && off + size > d->min_sp_off) + for (HOST_WIDE_INT byte = MAX (off, d->min_sp_off); +byte < MIN (off + size, d->max_sp_off); byte++) + if (bitmap_bit_p (d->sp_bytes, byte - d->min_sp_off)) + { + d->load_found = true; + return; + } + } +} +} + /* Try to find all stack stores of CALL_INSN arguments if ACCUMULATE_OUTGOING_ARGS. If all stack stores have been found and it is therefore safe to eliminate the call, return true, @@ -396,6 +439,8 @@ find_call_stack_args (rtx_call_insn *cal /* Walk backwards, looking for argument stores. The search stops when seeing another call, sp adjustment or memory store other than argument store. */ + struct check_argument_load_data data += { sp_bytes, min_sp_off, max_sp_off, call_insn, fast, false }; ret = false; for (insn = PREV_INSN (call_insn); insn; insn = prev_insn) { @@ -414,6 +459,10 @@ find_call_stack_args (rtx_call_insn *cal if (!set || SET_DEST (set) == stack_pointer_rtx) break; + note_uses (&PATTERN (insn), check_argument_load, &data); + if (data.load_found) + break; + if (!MEM_P (SET_DEST (set))) continue; --- gcc/testsuite/gcc.target/i386/pr89965.c.jj 2019-04-11 10:28:56.211764660 +0200 +++ gcc/testsuite/gcc.target/i386/pr89965.c 2019-04-11 10:28:56.211764660 +0200 @@ -0,0 +1,39 @@ +/* PR rtl-optimization/89965 */ +/* { dg-do run } */ +/* { dg-options "-O -mtune=nano-x2 -fcaller-saves -fexpensive-optimizations -fno-tree-dce -fno-tree-ter" } */ +/*
Re: [Patch] PR rtl-optimization/87763 - generate more bfi instructions on aarch64
On 10/04/2019 21:31, Steve Ellcey wrote: > On Wed, 2019-04-10 at 11:10 +0100, Richard Earnshaw (lists) wrote: >> >> OK with those changes. >> >> R. > > I made the changes you suggested and checked in the patch. Just to be > complete, here is the final version of the patch that I checked in. > > 2018-04-10 Steve Ellcey > > PR rtl-optimization/87763 > * config/aarch64/aarch64-protos.h (aarch64_masks_and_shift_for_bfi_p): > New prototype. > * config/aarch64/aarch64.c (aarch64_masks_and_shift_for_bfi_p): > New function. > * config/aarch64/aarch64.md (*aarch64_bfi5_shift): > New instruction. > (*aarch64_bfi5_shift_alt): Ditto. > (*aarch64_bfi4_noand): Ditto. > (*aarch64_bfi4_noand_alt): Ditto. > (*aarch64_bfi4_noshift): Ditto. > You've removed the ..._noshift_alt variant. That wasn't my intention, so perhaps you misunderstood what I was trying to say. The two versions are both needed, since the register tie is not orthogonal to the constants used in the masks and canonicalization will not generate a consistent ordering of the operands. My point is that you can make the 'alt' version from +(define_insn "*aarch64_bfi4_noshift" + [(set (match_operand:GPI 0 "register_operand" "=r") +(ior:GPI (and:GPI (match_operand:GPI 1 "register_operand" "0") + (match_operand:GPI 2 "const_int_operand" "n")) + (and:GPI (match_operand:GPI 3 "register_operand" "r") + (match_operand:GPI 4 "const_int_operand" "n"] + "aarch64_masks_and_shift_for_bfi_p (mode, UINTVAL (operands[2]), 0, + UINTVAL (operands[4]))" + "bfi\t%0, %3, 0, %P4" + [(set_attr "type" "bfm")] +) by simply duplicating it and permuting the numbering of the operands in the pattern part, the remaining parts remain identical, including the final condition and the insn printing string: +(define_insn "*aarch64_bfi4_noshift_alt" + [(set (match_operand:GPI 0 "register_operand" "=r") +(ior:GPI (and:GPI (match_operand:GPI 3 "register_operand" "r") + (match_operand:GPI 4 "const_int_operand" "n")) + (and:GPI (match_operand:GPI 1 "register_operand" "0") + (match_operand:GPI 2 "const_int_operand" "n"] Operand order permuted + "aarch64_masks_and_shift_for_bfi_p (mode, UINTVAL (operands[2]), 0, + UINTVAL (operands[4]))" does not change + "bfi\t%0, %3, 0, %P4" does not change + [(set_attr "type" "bfm")] +) R. > 2018-04-10 Steve Ellcey > > PR rtl-optimization/87763 > * gcc.target/aarch64/combine_bfxil.c: Change some bfxil checks > to bfi. > * gcc.target/aarch64/combine_bfi_2.c: New test. > > > combine.patch > > diff --git a/gcc/config/aarch64/aarch64-protos.h > b/gcc/config/aarch64/aarch64-protos.h > index b035e35..b6c0d0a 100644 > --- a/gcc/config/aarch64/aarch64-protos.h > +++ b/gcc/config/aarch64/aarch64-protos.h > @@ -429,6 +429,9 @@ bool aarch64_label_mentioned_p (rtx); > void aarch64_declare_function_name (FILE *, const char*, tree); > bool aarch64_legitimate_pic_operand_p (rtx); > bool aarch64_mask_and_shift_for_ubfiz_p (scalar_int_mode, rtx, rtx); > +bool aarch64_masks_and_shift_for_bfi_p (scalar_int_mode, unsigned > HOST_WIDE_INT, > + unsigned HOST_WIDE_INT, > + unsigned HOST_WIDE_INT); > bool aarch64_zero_extend_const_eq (machine_mode, rtx, machine_mode, rtx); > bool aarch64_move_imm (HOST_WIDE_INT, machine_mode); > opt_machine_mode aarch64_sve_pred_mode (unsigned int); > diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c > index 95e5b03..9be7548 100644 > --- a/gcc/config/aarch64/aarch64.c > +++ b/gcc/config/aarch64/aarch64.c > @@ -9336,6 +9336,35 @@ aarch64_mask_and_shift_for_ubfiz_p (scalar_int_mode > mode, rtx mask, >& ((HOST_WIDE_INT_1U << INTVAL (shft_amnt)) - 1)) == 0; > } > > +/* Return true if the masks and a shift amount from an RTX of the form > + ((x & MASK1) | ((y << SHIFT_AMNT) & MASK2)) are valid to combine into > + a BFI instruction of mode MODE. See *arch64_bfi patterns. */ > + > +bool > +aarch64_masks_and_shift_for_bfi_p (scalar_int_mode mode, > +unsigned HOST_WIDE_INT mask1, > +unsigned HOST_WIDE_INT shft_amnt, > +unsigned HOST_WIDE_INT mask2) > +{ > + unsigned HOST_WIDE_INT t; > + > + /* Verify that there is no overlap in what bits are set in the two masks. > */ > + if (mask1 != ~mask2) > +return false; > + > + /* Verify that mask2 is not all zeros or ones. */ > + if (mask2 == 0 || mask2 == HOST_WIDE_INT_M1U) > +return false; > + > + /* The shift amount should always be less than the mode size. */ > + gcc_assert (shft_amnt < GET_MODE_BITSIZE (mode)); > + > + /* Verify that
Re: GCC 7 backport
On 3/28/19 9:52 AM, Martin Liška wrote: > Hi. > > Backporting one documentation fix. > > Martin > One more patch that I'm going to backport. Martin >From a096ce6d9f8e636815af5a237881e1ba443f1b18 Mon Sep 17 00:00:00 2001 From: marxin Date: Fri, 8 Mar 2019 12:55:40 + Subject: [PATCH] Backport r269492 gcc/ChangeLog: 2019-03-08 Martin Liska PR target/86952 * config/i386/i386.c (ix86_option_override_internal): Disable jump tables when retpolines are used. gcc/testsuite/ChangeLog: 2019-03-08 Martin Liska PR target/86952 * gcc.target/i386/pr86952.c: New test. * gcc.target/i386/indirect-thunk-7.c: Use jump tables to match scanned pattern. * gcc.target/i386/indirect-thunk-inline-7.c: Likewise. --- gcc/config/i386/i386.c| 6 + .../gcc.target/i386/indirect-thunk-7.c| 2 +- .../gcc.target/i386/indirect-thunk-extern-7.c | 2 +- .../gcc.target/i386/indirect-thunk-inline-7.c | 2 +- gcc/testsuite/gcc.target/i386/pr86952.c | 23 +++ 5 files changed, 32 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr86952.c diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 18dce38640b..96bdb3a78ba 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -6282,6 +6282,12 @@ ix86_option_override_internal (bool main_args_p, target_option_default_node = target_option_current_node = build_target_option_node (opts); + /* PR86952: jump table usage with retpolines is slow. + The PR provides some numbers about the slowness. */ + if (ix86_indirect_branch != indirect_branch_keep + && !opts_set->x_flag_jump_tables) +opts->x_flag_jump_tables = 0; + return true; } diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-7.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-7.c index 3c72036dbaf..53868f46558 100644 --- a/gcc/testsuite/gcc.target/i386/indirect-thunk-7.c +++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-7.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mno-indirect-branch-register -mfunction-return=keep -mindirect-branch=thunk -fno-pic" } */ +/* { dg-options "-O2 -mno-indirect-branch-register -mfunction-return=keep -mindirect-branch=thunk -fno-pic -fjump-tables" } */ void func0 (void); void func1 (void); diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-7.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-7.c index 2b9a33e93dc..bc185fe98af 100644 --- a/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-7.c +++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-7.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mno-indirect-branch-register -mfunction-return=keep -mindirect-branch=thunk-extern -fno-pic" } */ +/* { dg-options "-O2 -mno-indirect-branch-register -mfunction-return=keep -mindirect-branch=thunk-extern -fno-pic -fjump-tables" } */ void func0 (void); void func1 (void); diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-7.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-7.c index ea009245a58..e6f064959a1 100644 --- a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-7.c +++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-7.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mno-indirect-branch-register -mfunction-return=keep -mindirect-branch=thunk-inline -fno-pic" } */ +/* { dg-options "-O2 -mno-indirect-branch-register -mfunction-return=keep -mindirect-branch=thunk-inline -fno-pic -fjump-tables" } */ void func0 (void); void func1 (void); diff --git a/gcc/testsuite/gcc.target/i386/pr86952.c b/gcc/testsuite/gcc.target/i386/pr86952.c new file mode 100644 index 000..004e167add0 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr86952.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mindirect-branch=thunk" } */ + +int global; + +int +foo (int x) +{ + switch (x & 7) +{ + case 0: ; return 1722; + case 1: global += 1; return 1060; + case 2: ; return 1990; + case 3: ; return 1242; + case 4: ; return 1466; + case 5: ; return 894; + case 6: ; return 570; + case 7: ; return 572; + default: return 0; +} +} + +/* { dg-final { scan-assembler-not "jmp\[ \t\]\\*" } } */ -- 2.21.0
Re: [PATCH] Fix up RTL DCE find_call_stack_args (PR rtl-optimization/89965)
On Thu, 11 Apr 2019, Jakub Jelinek wrote: > On Thu, Apr 11, 2019 at 09:54:24AM +0200, Richard Biener wrote: > > > The following patch just punts if we find loads from stack slots in > > > between > > > where they are pushed and the const/pure call. In addition to that, I've > > > outlined the same largish sequence that had 3 copies in the function > > > already > > > and I'd need to add 4th copy, so in the end the dce.c changes are removing > > > more than adding: > > > 1 file changed, 118 insertions(+), 135 deletions(-) > > > > The refactoring itself is probably OK (and if done separately > > makes reviewing easier). > > Here is just the refactoring, ok for trunk? OK. Thanks, Richard. > 2019-04-11 Jakub Jelinek > > PR rtl-optimization/89965 > * dce.c (sp_based_mem_offset): New function. > (find_call_stack_args): Use sp_based_mem_offset. > > --- gcc/dce.c.jj 2019-04-11 10:26:18.509365653 +0200 > +++ gcc/dce.c 2019-04-11 10:30:00.436705065 +0200 > @@ -272,6 +272,58 @@ check_argument_store (HOST_WIDE_INT size >return true; > } > > +/* If MEM has sp address, return 0, if it has sp + const address, > + return that const, if it has reg address where reg is set to sp + const > + and FAST is false, return const, otherwise return > + INTTYPE_MINUMUM (HOST_WIDE_INT). */ > + > +static HOST_WIDE_INT > +sp_based_mem_offset (rtx_call_insn *call_insn, const_rtx mem, bool fast) > +{ > + HOST_WIDE_INT off = 0; > + rtx addr = XEXP (mem, 0); > + if (GET_CODE (addr) == PLUS > + && REG_P (XEXP (addr, 0)) > + && CONST_INT_P (XEXP (addr, 1))) > +{ > + off = INTVAL (XEXP (addr, 1)); > + addr = XEXP (addr, 0); > +} > + if (addr == stack_pointer_rtx) > +return off; > + > + if (!REG_P (addr) || fast) > +return INTTYPE_MINIMUM (HOST_WIDE_INT); > + > + /* If not fast, use chains to see if addr wasn't set to sp + offset. */ > + df_ref use; > + FOR_EACH_INSN_USE (use, call_insn) > + if (rtx_equal_p (addr, DF_REF_REG (use))) > +break; > + > + if (use == NULL) > +return INTTYPE_MINIMUM (HOST_WIDE_INT); > + > + struct df_link *defs; > + for (defs = DF_REF_CHAIN (use); defs; defs = defs->next) > +if (! DF_REF_IS_ARTIFICIAL (defs->ref)) > + break; > + > + if (defs == NULL) > +return INTTYPE_MINIMUM (HOST_WIDE_INT); > + > + rtx set = single_set (DF_REF_INSN (defs->ref)); > + if (!set) > +return INTTYPE_MINIMUM (HOST_WIDE_INT); > + > + if (GET_CODE (SET_SRC (set)) != PLUS > + || XEXP (SET_SRC (set), 0) != stack_pointer_rtx > + || !CONST_INT_P (XEXP (SET_SRC (set), 1))) > +return INTTYPE_MINIMUM (HOST_WIDE_INT); > + > + off += INTVAL (XEXP (SET_SRC (set), 1)); > + return off; > +} > > /* Try to find all stack stores of CALL_INSN arguments if > ACCUMULATE_OUTGOING_ARGS. If all stack stores have been found > @@ -309,58 +361,13 @@ find_call_stack_args (rtx_call_insn *cal > if (GET_CODE (XEXP (p, 0)) == USE > && MEM_P (XEXP (XEXP (p, 0), 0))) >{ > - rtx mem = XEXP (XEXP (p, 0), 0), addr; > - HOST_WIDE_INT off = 0, size; > + rtx mem = XEXP (XEXP (p, 0), 0); > + HOST_WIDE_INT size; > if (!MEM_SIZE_KNOWN_P (mem) || !MEM_SIZE (mem).is_constant (&size)) > return false; > - addr = XEXP (mem, 0); > - if (GET_CODE (addr) == PLUS > - && REG_P (XEXP (addr, 0)) > - && CONST_INT_P (XEXP (addr, 1))) > - { > - off = INTVAL (XEXP (addr, 1)); > - addr = XEXP (addr, 0); > - } > - if (addr != stack_pointer_rtx) > - { > - if (!REG_P (addr)) > - return false; > - /* If not fast, use chains to see if addr wasn't set to > -sp + offset. */ > - if (!fast) > - { > - df_ref use; > - struct df_link *defs; > - rtx set; > - > - FOR_EACH_INSN_USE (use, call_insn) > - if (rtx_equal_p (addr, DF_REF_REG (use))) > - break; > - > - if (use == NULL) > - return false; > - > - for (defs = DF_REF_CHAIN (use); defs; defs = defs->next) > - if (! DF_REF_IS_ARTIFICIAL (defs->ref)) > - break; > - > - if (defs == NULL) > - return false; > - > - set = single_set (DF_REF_INSN (defs->ref)); > - if (!set) > - return false; > - > - if (GET_CODE (SET_SRC (set)) != PLUS > - || XEXP (SET_SRC (set), 0) != stack_pointer_rtx > - || !CONST_INT_P (XEXP (SET_SRC (set), 1))) > - return false; > - > - off += INTVAL (XEXP (SET_SRC (set), 1)); > - } > - else > - return false; > - } > + HOST_WIDE_INT off = sp_based_mem_offset (call_insn, mem, fast); > + if (off == INTTYPE_MINIMUM (HOST_WIDE_INT)) > + return false; > min_sp_off = MIN (min_sp_off, off); > m
Re: [PATCH] Fix up RTL DCE find_call_stack_args (PR rtl-optimization/89965)
On Thu, Apr 11, 2019 at 09:54:24AM +0200, Richard Biener wrote: > > The following patch just punts if we find loads from stack slots in between > > where they are pushed and the const/pure call. In addition to that, I've > > outlined the same largish sequence that had 3 copies in the function already > > and I'd need to add 4th copy, so in the end the dce.c changes are removing > > more than adding: > > 1 file changed, 118 insertions(+), 135 deletions(-) > > The refactoring itself is probably OK (and if done separately > makes reviewing easier). Here is just the refactoring, ok for trunk? 2019-04-11 Jakub Jelinek PR rtl-optimization/89965 * dce.c (sp_based_mem_offset): New function. (find_call_stack_args): Use sp_based_mem_offset. --- gcc/dce.c.jj2019-04-11 10:26:18.509365653 +0200 +++ gcc/dce.c 2019-04-11 10:30:00.436705065 +0200 @@ -272,6 +272,58 @@ check_argument_store (HOST_WIDE_INT size return true; } +/* If MEM has sp address, return 0, if it has sp + const address, + return that const, if it has reg address where reg is set to sp + const + and FAST is false, return const, otherwise return + INTTYPE_MINUMUM (HOST_WIDE_INT). */ + +static HOST_WIDE_INT +sp_based_mem_offset (rtx_call_insn *call_insn, const_rtx mem, bool fast) +{ + HOST_WIDE_INT off = 0; + rtx addr = XEXP (mem, 0); + if (GET_CODE (addr) == PLUS + && REG_P (XEXP (addr, 0)) + && CONST_INT_P (XEXP (addr, 1))) +{ + off = INTVAL (XEXP (addr, 1)); + addr = XEXP (addr, 0); +} + if (addr == stack_pointer_rtx) +return off; + + if (!REG_P (addr) || fast) +return INTTYPE_MINIMUM (HOST_WIDE_INT); + + /* If not fast, use chains to see if addr wasn't set to sp + offset. */ + df_ref use; + FOR_EACH_INSN_USE (use, call_insn) + if (rtx_equal_p (addr, DF_REF_REG (use))) +break; + + if (use == NULL) +return INTTYPE_MINIMUM (HOST_WIDE_INT); + + struct df_link *defs; + for (defs = DF_REF_CHAIN (use); defs; defs = defs->next) +if (! DF_REF_IS_ARTIFICIAL (defs->ref)) + break; + + if (defs == NULL) +return INTTYPE_MINIMUM (HOST_WIDE_INT); + + rtx set = single_set (DF_REF_INSN (defs->ref)); + if (!set) +return INTTYPE_MINIMUM (HOST_WIDE_INT); + + if (GET_CODE (SET_SRC (set)) != PLUS + || XEXP (SET_SRC (set), 0) != stack_pointer_rtx + || !CONST_INT_P (XEXP (SET_SRC (set), 1))) +return INTTYPE_MINIMUM (HOST_WIDE_INT); + + off += INTVAL (XEXP (SET_SRC (set), 1)); + return off; +} /* Try to find all stack stores of CALL_INSN arguments if ACCUMULATE_OUTGOING_ARGS. If all stack stores have been found @@ -309,58 +361,13 @@ find_call_stack_args (rtx_call_insn *cal if (GET_CODE (XEXP (p, 0)) == USE && MEM_P (XEXP (XEXP (p, 0), 0))) { - rtx mem = XEXP (XEXP (p, 0), 0), addr; - HOST_WIDE_INT off = 0, size; + rtx mem = XEXP (XEXP (p, 0), 0); + HOST_WIDE_INT size; if (!MEM_SIZE_KNOWN_P (mem) || !MEM_SIZE (mem).is_constant (&size)) return false; - addr = XEXP (mem, 0); - if (GET_CODE (addr) == PLUS - && REG_P (XEXP (addr, 0)) - && CONST_INT_P (XEXP (addr, 1))) - { - off = INTVAL (XEXP (addr, 1)); - addr = XEXP (addr, 0); - } - if (addr != stack_pointer_rtx) - { - if (!REG_P (addr)) - return false; - /* If not fast, use chains to see if addr wasn't set to - sp + offset. */ - if (!fast) - { - df_ref use; - struct df_link *defs; - rtx set; - - FOR_EACH_INSN_USE (use, call_insn) - if (rtx_equal_p (addr, DF_REF_REG (use))) - break; - - if (use == NULL) - return false; - - for (defs = DF_REF_CHAIN (use); defs; defs = defs->next) - if (! DF_REF_IS_ARTIFICIAL (defs->ref)) - break; - - if (defs == NULL) - return false; - - set = single_set (DF_REF_INSN (defs->ref)); - if (!set) - return false; - - if (GET_CODE (SET_SRC (set)) != PLUS - || XEXP (SET_SRC (set), 0) != stack_pointer_rtx - || !CONST_INT_P (XEXP (SET_SRC (set), 1))) - return false; - - off += INTVAL (XEXP (SET_SRC (set), 1)); - } - else - return false; - } + HOST_WIDE_INT off = sp_based_mem_offset (call_insn, mem, fast); + if (off == INTTYPE_MINIMUM (HOST_WIDE_INT)) + return false; min_sp_off = MIN (min_sp_off, off); max_sp_off = MAX (max_sp_off, off + size); } @@ -376,51 +383,19 @@ find_call_stack_args (rtx_call_insn *cal if (GET_CODE (XEXP (p, 0)) == USE && MEM_P (XEXP (XEXP (p, 0), 0))) { - rtx mem = XEXP (XEXP (p, 0), 0), a
[PATCH] Add support for missing AVX512* ISAs (PR target/89929).
Hi. The patch is adding missing AVX512 ISAs for target and target_clone attributes. Patch can bootstrap on x86_64-linux-gnu and survives regression tests. Ready to be installed? Thanks, Martin gcc/ChangeLog: 2019-04-10 Martin Liska PR target/89929 * config/i386/i386.c (get_builtin_code_for_version): Add support for missing AVX512 ISAs. gcc/testsuite/ChangeLog: 2019-04-10 Martin Liska PR target/89929 * g++.target/i386/mv28.C: New test. * gcc.target/i386/mvc14.c: New test. --- gcc/config/i386/i386.c| 34 ++- gcc/testsuite/g++.target/i386/mv28.C | 30 +++ gcc/testsuite/gcc.target/i386/mvc14.c | 16 + 3 files changed, 79 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/g++.target/i386/mv28.C create mode 100644 gcc/testsuite/gcc.target/i386/mvc14.c diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 57d5a7b8432..2d09a006fb6 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -31883,6 +31883,22 @@ get_builtin_code_for_version (tree decl, tree *predicate_list) P_AVX2, P_PROC_AVX2, P_AVX512F, +P_AVX512VL, +P_AVX512BW, +P_AVX512DQ, +P_AVX512CD, +P_AVX512ER, +P_AVX512PF, +P_AVX512VBMI, +P_AVX512IFMA, +P_AVX5124VNNIW, +P_AVX5124FMAPS, +P_AVX512VPOPCNTDQ, +P_AVX512VBMI2, +P_GFNI, +P_VPCLMULQDQ, +P_AVX512VNNI, +P_AVX512BITALG, P_PROC_AVX512F }; @@ -31916,7 +31932,23 @@ get_builtin_code_for_version (tree decl, tree *predicate_list) {"fma", P_FMA}, {"bmi2", P_BMI2}, {"avx2", P_AVX2}, - {"avx512f", P_AVX512F} + {"avx512f", P_AVX512F}, + {"avx512vl", P_AVX512VL}, + {"avx512bw", P_AVX512BW}, + {"avx512dq", P_AVX512DQ}, + {"avx512cd", P_AVX512CD}, + {"avx512er", P_AVX512ER}, + {"avx512pf", P_AVX512PF}, + {"avx512vbmi", P_AVX512VBMI}, + {"avx512ifma", P_AVX512IFMA}, + {"avx5124vnniw", P_AVX5124VNNIW}, + {"avx5124fmaps", P_AVX5124FMAPS}, + {"avx512vpopcntdq", P_AVX512VPOPCNTDQ}, + {"avx512vbmi2", P_AVX512VBMI2}, + {"gfni", P_GFNI}, + {"vpclmulqdq", P_VPCLMULQDQ}, + {"avx512vnni", P_AVX512VNNI}, + {"avx512bitalg", P_AVX512BITALG} }; diff --git a/gcc/testsuite/g++.target/i386/mv28.C b/gcc/testsuite/g++.target/i386/mv28.C new file mode 100644 index 000..401edc2585f --- /dev/null +++ b/gcc/testsuite/g++.target/i386/mv28.C @@ -0,0 +1,30 @@ +/* { dg-do compile} */ +/* { dg-require-ifunc "" } */ + +#define FN(TARGET) \ +void __attribute__ ((target(TARGET))) foo () {} + +FN("avx512f") +FN("avx512vl") +FN("avx512bw") +FN("avx512dq") +FN("avx512cd") +FN("avx512er") +FN("avx512pf") +FN("avx512vbmi") +FN("avx512ifma") +FN("avx5124vnniw") +FN("avx5124fmaps") +FN("avx512vpopcntdq") +FN("avx512vbmi2") +FN("gfni") +FN("vpclmulqdq") +FN("avx512vnni") +FN("avx512bitalg") +FN("default") + +int main() +{ + foo (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/i386/mvc14.c b/gcc/testsuite/gcc.target/i386/mvc14.c new file mode 100644 index 000..bff39af3998 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/mvc14.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-require-ifunc "" } */ + +__attribute__((target_clones("avx512f", "avx512vl", "avx512bw", "avx512dq", + "avx512cd", "avx512er", "avx512pf", "avx512vbmi", + "avx512ifma", "avx5124vnniw", "avx5124fmaps", + "avx512vpopcntdq", "avx512vbmi2", "gfni", + "vpclmulqdq", "avx512vnni", "avx512bitalg", + "default"))) +int foo (); + +int +bar () +{ + return foo(); +}
[PATCH] Handle multiple 'default' in target attribute (PR middle-end/89970).
Hi. The patch is catching duplicate 'default' values in target attribute. Patch can bootstrap on x86_64-linux-gnu and survives regression tests. Ready to be installed? Thanks, Martin gcc/ChangeLog: 2019-04-10 Martin Liska PR middle-end/89970 * multiple_target.c (create_dispatcher_calls): Wrap ifunc in error message. (separate_attrs): Handle multiple 'default's. (expand_target_clones): Rework error handling code. gcc/testsuite/ChangeLog: 2019-04-10 Martin Liska PR middle-end/89970 * gcc.target/i386/mvc15.c: New test. * gcc.target/i386/mvc3.c: Quote target in error pattern. * gcc.target/i386/mvc4.c: Remove duplicit 'default'. --- gcc/multiple_target.c | 38 ++- gcc/testsuite/gcc.target/i386/mvc15.c | 11 gcc/testsuite/gcc.target/i386/mvc3.c | 2 +- gcc/testsuite/gcc.target/i386/mvc4.c | 2 +- 4 files changed, 38 insertions(+), 15 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/mvc15.c diff --git a/gcc/multiple_target.c b/gcc/multiple_target.c index fd983cc4ad9..99dc9da44f5 100644 --- a/gcc/multiple_target.c +++ b/gcc/multiple_target.c @@ -73,7 +73,7 @@ create_dispatcher_calls (struct cgraph_node *node) if (!targetm.has_ifunc_p ()) { error_at (DECL_SOURCE_LOCATION (node->decl), - "the call requires ifunc, which is not" + "the call requires %, which is not" " supported by this target"); return; } @@ -235,8 +235,10 @@ get_attr_str (tree arglist, char *attr_str) } /* Return number of attributes separated by comma and put them into ARGS. - If there is no DEFAULT attribute return -1. If there is an empty - string in attribute return -2. */ + If there is no DEFAULT attribute return -1. + If there is an empty string in attribute return -2. + If there are multiple DEFAULT attributes return -3. + */ static int separate_attrs (char *attr_str, char **attrs, int attrnum) @@ -256,6 +258,8 @@ separate_attrs (char *attr_str, char **attrs, int attrnum) } if (default_count == 0) return -1; + else if (default_count > 1) +return -3; else if (i + default_count < attrnum) return -2; @@ -347,8 +351,7 @@ expand_target_clones (struct cgraph_node *node, bool definition) if (attr_len == -1) { warning_at (DECL_SOURCE_LOCATION (node->decl), - 0, - "single % attribute is ignored"); + 0, "single % attribute is ignored"); return false; } @@ -374,21 +377,30 @@ expand_target_clones (struct cgraph_node *node, bool definition) char **attrs = XNEWVEC (char *, attrnum); attrnum = separate_attrs (attr_str, attrs, attrnum); - if (attrnum == -1) + switch (attrnum) { +case -1: error_at (DECL_SOURCE_LOCATION (node->decl), - "default target was not set"); - XDELETEVEC (attrs); - XDELETEVEC (attr_str); - return false; -} - else if (attrnum == -2) -{ + "% target was not set"); + break; +case -2: error_at (DECL_SOURCE_LOCATION (node->decl), "an empty string cannot be in % attribute"); + break; +case -3: + error_at (DECL_SOURCE_LOCATION (node->decl), + "multiple % targets were set"); + break; +default: + break; +} + + if (attrnum < 0) +{ XDELETEVEC (attrs); XDELETEVEC (attr_str); return false; + } cgraph_function_version_info *decl1_v = NULL; diff --git a/gcc/testsuite/gcc.target/i386/mvc15.c b/gcc/testsuite/gcc.target/i386/mvc15.c new file mode 100644 index 000..955aa1e9fe8 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/mvc15.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-require-ifunc "" } */ + +__attribute__((target_clones("default", "default"))) +int foo (); /* { dg-error "multiple 'default' targets were set" } */ + +int +bar () +{ + return foo(); +} diff --git a/gcc/testsuite/gcc.target/i386/mvc3.c b/gcc/testsuite/gcc.target/i386/mvc3.c index 1c7755fabbe..f14fc02694c 100644 --- a/gcc/testsuite/gcc.target/i386/mvc3.c +++ b/gcc/testsuite/gcc.target/i386/mvc3.c @@ -2,7 +2,7 @@ /* { dg-require-ifunc "" } */ __attribute__((target_clones("avx","arch=slm","arch=core-avx2"))) -int foo (); /* { dg-error "default target was not set" } */ +int foo (); /* { dg-error "'default' target was not set" } */ int bar () diff --git a/gcc/testsuite/gcc.target/i386/mvc4.c b/gcc/testsuite/gcc.target/i386/mvc4.c index 91293c34cca..1b9b0b4f174 100644 --- a/gcc/testsuite/gcc.target/i386/mvc4.c +++ b/gcc/testsuite/gcc.target/i386/mvc4.c @@ -1,7 +1,7 @@ /* { dg-do run } */ /* { dg-require-ifunc "" } */ -__attribute__((target_clones("default","avx","default"))) +__attribute__((target_clones("default","avx"))) int foo () {
[PATCH, doc] Note variable shadowing at max macro using statement expression
[ was: Re: [RFC, doc] Note variable shadowing at max macro using statement expression ] On 09-04-19 22:51, Sandra Loosemore wrote: > On 4/8/19 5:38 AM, Tom de Vries wrote: >> Hi, >> >> When suggesting to rewrite the unsafe (with respect to multiple >> evaluation of >> arguments) macro definition: >> ... >> #define max(a,b) ((a) > (b) ? (a) : (b)) >> ... >> into the safe macro definition: >> ... >> #define maxint(a,b) \ >> ({int _a = (a), _b = (b); _a > _b ? _a : _b; }) >> ... >> mention the variable shadowing problem for: >> ... >> #define maxint3(a, b, c) \ >> ({int _a = (a), _b = (b), _c = (c); maxint (maxint (_a, _b), _c); }) >> ... >> >> Any comments? > > The content looks reasonable, but I have some copy-editing nits. > Hi Sandra, thanks for the review. I've attached the updated patch, as well as the resulting relevant gcc.info portion. OK for trunk? Thanks, - Tom [doc] Note variable shadowing at max macro using statement expression When suggesting to rewrite the unsafe (with respect to multiple evaluation of arguments) macro definition: ... #define max(a,b) ((a) > (b) ? (a) : (b)) ... into the safe macro definition: ... #define maxint(a,b) \ ({int _a = (a), _b = (b); _a > _b ? _a : _b; }) ... mention the variable shadowing problem for: ... #define maxint3(a, b, c) \ ({int _a = (a), _b = (b), _c = (c); maxint (maxint (_a, _b), _c); }) ... 2019-04-08 Tom de Vries * doc/extend.texi (@node Statement Exprs): Note variable shadowing at max macro using statement expression. --- gcc/doc/extend.texi | 26 -- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 8e0deac26c3..cad7ad49e56 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -142,14 +142,36 @@ follows: @cindex side effects, macro argument But this definition computes either @var{a} or @var{b} twice, with bad results if the operand has side effects. In GNU C, if you know the -type of the operands (here taken as @code{int}), you can define -the macro safely as follows: +type of the operands (here taken as @code{int}), you can avoid this +problem by defining the macro as follows: @smallexample #define maxint(a,b) \ (@{int _a = (a), _b = (b); _a > _b ? _a : _b; @}) @end smallexample +Note that introducing variable declarations (as we do in @code{maxint}) can +cause variable shadowing, so while this example using the @code{max} macro +produces correct results: +@smallexample +int _a = 1, _b = 2, c; +c = max (_a, _b); +@end smallexample +@noindent +this example using maxint will not: +@smallexample +int _a = 1, _b = 2, c; +c = maxint (_a, _b); +@end smallexample + +This problem may for instance occur when we use this pattern recursively, like +so: + +@smallexample +#define maxint3(a, b, c) \ + (@{int _a = (a), _b = (b), _c = (c); maxint (maxint (_a, _b), _c); @}) +@end smallexample + Embedded statements are not allowed in constant expressions, such as the value of an enumeration constant, the width of a bit-field, or the initial value of a static variable. This feature is especially useful in making macro definitions "safe" (so that they evaluate each operand exactly once). For example, the "maximum" function is commonly defined as a macro in standard C as follows: #define max(a,b) ((a) > (b) ? (a) : (b)) But this definition computes either A or B twice, with bad results if the operand has side effects. In GNU C, if you know the type of the operands (here taken as 'int'), you can avoid this problem by defining the macro as follows: #define maxint(a,b) \ ({int _a = (a), _b = (b); _a > _b ? _a : _b; }) Note that introducing variable declarations (as we do in 'maxint') can cause variable shadowing, so while this example using the 'max' macro produces correct results: int _a = 1, _b = 2, c; c = max (_a, _b); this example using maxint will not: int _a = 1, _b = 2, c; c = maxint (_a, _b); This problem may for instance occur when we use this pattern recursively, like so: #define maxint3(a, b, c) \ ({int _a = (a), _b = (b), _c = (c); maxint (maxint (_a, _b), _c); })
Re: [PATCH GCC] PR90021 ICE fix
On Wed, Apr 10, 2019 at 7:48 AM bin.cheng wrote: > > Hi, > This patch fixes ICE reported by PR90021. The issue has been there since > loop interchange > and recently exposed by patch for PR89725. As PR comment suggests, we have > equal access > function {{1, +, 1}_6, +, 1}_4 and _6 is of loop_nest's outer loop. This > patch introduces > new parameter loopnum to function evolution_function_is_univariate_p so that > the above > scev won't be considered as multivariate, i.e, {1, +, 1}_6 is considered as a > symbol to the > loop_nest. Note we only change computation of classic dist vector, rather > than DDR itself. > > Bootstrap and test on x86_64. Is it ok? OK. > BTW, I wonder if collecting data reference in one loop but computing DDR in > an inner loop > is useful in the future, besides loop interchange. I guess it's useful for compile-time but eventually we can also refactor the data-ref code to be able to initialize a new data-ref from an already existing one, re-analyzing it for an outer loop. Not sure if that's cheaper in any ways though. Eventually the caching done by SCEV can also be improved. That said, we re-used the data-refs to avoid doing a quadratic amount of analyses during the search of the loop pair to interchange in a deep nest - I think that's very much desirable also for other passes operating on deeper nests of which we have none at the moment - loop distribution being the closest candidate. I think graphite also suffers from this somewhat in its SCOP finding/analysis. Richard. > Thanks, > bin > > 2019-04-10 Bin Cheng > > PR tree-optimization/92001 > * tree-chrec.c (evolution_function_is_univariate_p): New parameter > and check univariate against it. > * tree-chrec.h (evolution_function_is_univariate_p): New parameter. > * tree-data-ref.c (add_other_self_distances): Pass new argument. > > 2018-04-10 Bin Cheng > > PR tree-optimization/92001 > * gcc/testsuite/gfortran.dg/pr90021.f90: New test.
Re: [PATCH][Version 3]Come up with -flive-patching master option.
On Wed, Apr 10, 2019 at 9:49 PM Jonathan Wakely wrote: > > On 10/04/19 13:55 -0500, Qing Zhao wrote: > >Hi, Jonathan, > > > >thanks for your review on the documentation change for -flive-patching > >option. > > > >> > >>> --- a/gcc/doc/invoke.texi > >>> +++ b/gcc/doc/invoke.texi > >>> @@ -416,6 +416,7 @@ Objective-C and Objective-C++ Dialects}. > >>> -fipa-bit-cp -fipa-vrp @gol > >>> -fipa-pta -fipa-profile -fipa-pure-const -fipa-reference > >>> -fipa-reference-addressable @gol > >>> -fipa-stack-alignment -fipa-icf -fira-algorithm=@var{algorithm} @gol > >>> +-flive-patching=@var{level} @gol > >>> -fira-region=@var{region} -fira-hoist-pressure @gol > >>> -fira-loop-pressure -fno-ira-share-save-slots @gol > >>> -fno-ira-share-spill-slots @gol > >>> @@ -9045,6 +9046,65 @@ equivalences that are found only by GCC and > >>> equivalences found only by Gold. > >>> This flag is enabled by default at @option{-O2} and @option{-Os}. > >>> +@item -flive-patching=@var{level} > >>> +@opindex flive-patching > >>> +Control GCC's optimizations to provide a safe compilation for > >>> live-patching. > >> > >> "provide a safe compilation" isn't very clear to me. I don't know what > >> it means to "provide a compilation", let alone a safe one. > >> > >> Could we say something like "Control GCC’s optimizations to produce > >> output suitable for live-patching.” ? > > > >yes, this is better. > > > >> > >> > >>> +If the compiler's optimization uses a function's body or information > >>> extracted > >>> +from its body to optimize/change another function, the latter is called > >>> an > >>> +impacted function of the former. If a function is patched, its impacted > >>> +functions should be patched too. > >>> + > >>> +The impacted functions are decided by the compiler's interprocedural > >> > >> decided or determined? > >determined is better. > > > >> > >>> +optimizations. For example, inlining a function into its caller, cloning > >>> +a function and changing its caller to call this new clone, or extracting > >>> +a function's pureness/constness information to optimize its direct or > >>> +indirect callers, etc. > >> > >> I don't know what the second sentence is saying. I can read it two > >> different ways: > >> > >> 1) Those are examples of interprocedural optimizations which > >> participate in the decision making, but the actual details of how the > >> decisions are made are not specified here. > >> > >> 2) Performing those optimizations causes a function to be impacted. > >> > >> If 1) is the intended meaning, then I think it should say "For > >> example, when inlining a function into its caller, ..." > >> > >> If 2) is the intended meaning, then I think it should say "For > >> example, a caller is impacted when inlining a function > >> into its caller …". > > > >2) is the intended meaining. > > > >> > >> Does either of those suggestions match the intended meaning? Or do you > >> have a better way to rephrase it? > >> > >>> +Usually, the more IPA optimizations enabled, the larger the number of > >>> +impacted functions for each function. In order to control the number of > >>> +impacted functions and computed the list of impacted function easily, > >> > >> Should be "and more easily compute the list of impacted functions”. > > > >this is good. > >> > >>> +we provide control to partially enable IPA optimizations on two different > >>> +levels. > >> > >> We don't usually say "we provide" like this. I suggest simply "IPA > >> optimizations can be partially enabled at two different levels.” > > > >Okay. > >> > >>> + > >>> +The @var{level} argument should be one of the following: > >>> + > >>> +@table @samp > >>> + > >>> +@item inline-clone > >>> + > >>> +Only enable inlining and cloning optimizations, which includes inlining, > >>> +cloning, interprocedural scalar replacement of aggregates and partial > >>> inlining. > >>> +As a result, when patching a function, all its callers and its clones' > >>> +callers need to be patched as well. > >> > >> Since you've defined the term "impacted" could this just say "all its > >> callers and its clones' callers are impacted.”? > > > >I think that the following might be better: > > > >when patching a function, all its callers and its clones’ callers are > >impacted, therefore need to be patched as well. > > Agreed. > > >> > >>> +@option{-flive-patching=inline-clone} disables the following > >>> optimization flags: > >>> +@gccoptlist{-fwhole-program -fipa-pta -fipa-reference -fipa-ra @gol > >>> +-fipa-icf -fipa-icf-functions -fipa-icf-variables @gol > >>> +-fipa-bit-cp -fipa-vrp -fipa-pure-const -fipa-reference-addressable > >>> @gol > >>> +-fipa-stack-alignment} > >>> + > >>> +@item inline-only-static > >>> + > >>> +Only enable inlining of static functions. > >>> +As a result, when patching a static function, all its callers need to be > >>> +patches as well. > >> > >> "Typo: "patches" should be "patched", but I'd suggest "are impacted" > >> here too. > > > >Okay.
Re: [PATCH] Fix up RTL DCE find_call_stack_args (PR rtl-optimization/89965)
On Thu, 11 Apr 2019, Jakub Jelinek wrote: > Hi! > > The following testcase is miscompiled, because the result of > a DImode (doubleword) right shift is used in a QImode subreg as well as > later on pushed into a stack slot as an argument to a const function > whose result isn't used. The RA because of the weirdo target tuning > reuses the REG_EQUIV argument slot (as it wants the value in a non-Q_REGS > register), so it first pushes it to the stack slot and then loads from the > stack slot again (according to Vlad, this can happen with both LRA and old > reload). Later on during DCE we determine the call is not needed and try to > find all the argument pushes and don't mark those as needed, so we > effectively DCE the right shift, push to the argument slot as well as > other slots and the call, and end up keeping just the load from the > uninitialized argument slot. > > The following patch just punts if we find loads from stack slots in between > where they are pushed and the const/pure call. In addition to that, I've > outlined the same largish sequence that had 3 copies in the function already > and I'd need to add 4th copy, so in the end the dce.c changes are removing > more than adding: > 1 file changed, 118 insertions(+), 135 deletions(-) The refactoring itself is probably OK (and if done separately makes reviewing easier). > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? > > During those 2 bootstraps/regtests, data.load_found has been set just > on the new testcase on ia32. Hmm, I wonder whether we really need to DCE calls after reload? That said, I'm not familiar enough with the code to check if the patch makes sense (can there ever be uses of the argument slots _after_ the call?). Richard. > 2019-04-11 Jakub Jelinek > > PR rtl-optimization/89965 > * dce.c: Include rtl-iter.h. > (sp_based_mem_offset): New function. > (struct check_argument_load_data): New type. > (check_argument_load): New function. > (find_call_stack_args): Use sp_based_mem_offset, check for loads > from stack slots still tracked in sp_bytes and punt if any is > found. > > * gcc.target/i386/pr89965.c: New test. > > --- gcc/dce.c.jj 2019-02-15 00:11:13.209111382 +0100 > +++ gcc/dce.c 2019-04-10 14:18:21.111763533 +0200 > @@ -35,6 +35,7 @@ along with GCC; see the file COPYING3. > #include "valtrack.h" > #include "tree-pass.h" > #include "dbgcnt.h" > +#include "rtl-iter.h" > > > /* - > @@ -265,6 +266,100 @@ check_argument_store (HOST_WIDE_INT size >return true; > } > > +/* If MEM has sp address, return 0, if it has sp + const address, > + return that const, if it has reg address where reg is set to sp + const > + and FAST is false, return const, otherwise return > + INTTYPE_MINUMUM (HOST_WIDE_INT). */ > + > +static HOST_WIDE_INT > +sp_based_mem_offset (rtx_call_insn *call_insn, const_rtx mem, bool fast) > +{ > + HOST_WIDE_INT off = 0; > + rtx addr = XEXP (mem, 0); > + if (GET_CODE (addr) == PLUS > + && REG_P (XEXP (addr, 0)) > + && CONST_INT_P (XEXP (addr, 1))) > +{ > + off = INTVAL (XEXP (addr, 1)); > + addr = XEXP (addr, 0); > +} > + if (addr == stack_pointer_rtx) > +return off; > + > + if (!REG_P (addr) || fast) > +return INTTYPE_MINIMUM (HOST_WIDE_INT); > + > + /* If not fast, use chains to see if addr wasn't set to sp + offset. */ > + df_ref use; > + FOR_EACH_INSN_USE (use, call_insn) > + if (rtx_equal_p (addr, DF_REF_REG (use))) > +break; > + > + if (use == NULL) > +return INTTYPE_MINIMUM (HOST_WIDE_INT); > + > + struct df_link *defs; > + for (defs = DF_REF_CHAIN (use); defs; defs = defs->next) > +if (! DF_REF_IS_ARTIFICIAL (defs->ref)) > + break; > + > + if (defs == NULL) > +return INTTYPE_MINIMUM (HOST_WIDE_INT); > + > + rtx set = single_set (DF_REF_INSN (defs->ref)); > + if (!set) > +return INTTYPE_MINIMUM (HOST_WIDE_INT); > + > + if (GET_CODE (SET_SRC (set)) != PLUS > + || XEXP (SET_SRC (set), 0) != stack_pointer_rtx > + || !CONST_INT_P (XEXP (SET_SRC (set), 1))) > +return INTTYPE_MINIMUM (HOST_WIDE_INT); > + > + off += INTVAL (XEXP (SET_SRC (set), 1)); > + return off; > +} > + > +/* Data for check_argument_load called via note_uses. */ > +struct check_argument_load_data { > + bitmap sp_bytes; > + HOST_WIDE_INT min_sp_off, max_sp_off; > + rtx_call_insn *call_insn; > + bool fast; > + bool load_found; > +}; > + > +/* Helper function for find_call_stack_args. Check if there are > + any loads from the argument slots in between the const/pure call > + and store to the argument slot, set LOAD_FOUND if any is found. */ > + > +static void > +check_argument_load (rtx *loc, void *data) > +{ > + struct check_argument_load_data *d > += (struct check_argument_load_data *) data; > + subrtx_iterator::array_type array; > + FOR_EACH_SU
[C++ PATCH] Make some type forbidden diagnostics translatable (PR translation/90035)
Hi! While looking into another PR, I've noticed that in two spots the parser->type_definition_forbidden_message messages are untranslatable, we construct them at runtime from 3 strings using concat. The following patch fixes it by adding a const char * member to the parser structure and passing that as another argument to error, in the more usual case where the argument string only contains %< and %> and not %qs added in this patch it will be just ignored. I believe this patch is more friendly to the translators (as well as less expensive at compile time because it doesn't have to concat/allocate anything). Another possibility would be to just use in cp_parser_has_attribute_expression G_("types may not be defined in %<__builtin_has_attribute%> expressions") and in cp_parser_sizeof_operand use a switch based on the 3 possible keyword values and use G_("types may not be defined in % expressions") G_("types may not be defined in %<__alignof__%> expressions") G_("types may not be defined in % expressions") G_("types may not be defined in % expressions") depending on that (and C++ mode which determines which alignof spelling is in ridpointers). We wouldn't need to add an extra member, on the other side the translators would need to translate 5 messages instead of just one. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2019-04-11 Jakub Jelinek PR translation/90035 * parser.h (struct cp_parser): Add type_definition_forbidden_message_arg member. * parser.c (cp_debug_parser): Print it. (cp_parser_check_type_definition): Pass parser->type_definition_forbidden_message_arg as second argument to error. (cp_parser_has_attribute_expression, cp_parser_sizeof_operand): Set parser->type_definition_forbidden_message_arg and use G_() with %qs for parser->type_definition_forbidden_message instead of building untranslatable message using concat. --- gcc/cp/parser.h.jj 2019-01-08 22:33:34.255713665 +0100 +++ gcc/cp/parser.h 2019-04-10 15:19:37.672814432 +0200 @@ -350,6 +350,9 @@ struct GTY(()) cp_parser { issued as an error message if a type is defined. */ const char *type_definition_forbidden_message; + /* Argument for type_definition_forbidden_message if needed. */ + const char *type_definition_forbidden_message_arg; + /* A stack used for member functions of local classes. The lists contained in an individual entry can only be processed once the outermost class being defined is complete. */ --- gcc/cp/parser.c.jj 2019-03-27 21:19:39.45746 +0100 +++ gcc/cp/parser.c 2019-04-10 15:34:28.201322120 +0200 @@ -570,8 +570,10 @@ cp_debug_parser (FILE *file, cp_parser * cp_debug_print_flag (file, "Colon doesn't start a class definition", parser->colon_doesnt_start_class_def_p); if (parser->type_definition_forbidden_message) -fprintf (file, "Error message for forbidden type definitions: %s\n", -parser->type_definition_forbidden_message); +fprintf (file, "Error message for forbidden type definitions: %s %s\n", +parser->type_definition_forbidden_message, +parser->type_definition_forbidden_message_arg +? parser->type_definition_forbidden_message_arg : ""); cp_debug_print_unparsed_queues (file, parser->unparsed_queues); fprintf (file, "Number of class definitions in progress: %u\n", parser->num_classes_being_defined); @@ -3054,8 +3056,9 @@ cp_parser_check_type_definition (cp_pars if (parser->type_definition_forbidden_message) { /* Don't use `%s' to print the string, because quotations (`%<', `%>') -in the message need to be interpreted. */ - error (parser->type_definition_forbidden_message); +or %qs in the message need to be interpreted. */ + error (parser->type_definition_forbidden_message, +parser->type_definition_forbidden_message_arg); return false; } return true; @@ -8518,12 +8521,12 @@ cp_parser_has_attribute_expression (cp_p /* Types cannot be defined in a `sizeof' expression. Save away the old message. */ const char *saved_message = parser->type_definition_forbidden_message; - /* And create the new one. */ - const int kwd = RID_BUILTIN_HAS_ATTRIBUTE; - char *tmp = concat ("types may not be defined in %<", - IDENTIFIER_POINTER (ridpointers[kwd]), - "%> expressions", NULL); - parser->type_definition_forbidden_message = tmp; + const char *saved_message_arg += parser->type_definition_forbidden_message_arg; + parser->type_definition_forbidden_message += G_("types may not be defined in %qs expressions"); + parser->type_definition_forbidden_message_arg += IDENTIFIER_POINTER (ridpointers[RID_BUILTIN_HAS_ATTRIBUTE]); /* The restrictions on constant-expressions do not apply inside sizeof expressions. */ @@ -8562,10 +8
[PATCH] Fix up RTL DCE find_call_stack_args (PR rtl-optimization/89965)
Hi! The following testcase is miscompiled, because the result of a DImode (doubleword) right shift is used in a QImode subreg as well as later on pushed into a stack slot as an argument to a const function whose result isn't used. The RA because of the weirdo target tuning reuses the REG_EQUIV argument slot (as it wants the value in a non-Q_REGS register), so it first pushes it to the stack slot and then loads from the stack slot again (according to Vlad, this can happen with both LRA and old reload). Later on during DCE we determine the call is not needed and try to find all the argument pushes and don't mark those as needed, so we effectively DCE the right shift, push to the argument slot as well as other slots and the call, and end up keeping just the load from the uninitialized argument slot. The following patch just punts if we find loads from stack slots in between where they are pushed and the const/pure call. In addition to that, I've outlined the same largish sequence that had 3 copies in the function already and I'd need to add 4th copy, so in the end the dce.c changes are removing more than adding: 1 file changed, 118 insertions(+), 135 deletions(-) Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? During those 2 bootstraps/regtests, data.load_found has been set just on the new testcase on ia32. 2019-04-11 Jakub Jelinek PR rtl-optimization/89965 * dce.c: Include rtl-iter.h. (sp_based_mem_offset): New function. (struct check_argument_load_data): New type. (check_argument_load): New function. (find_call_stack_args): Use sp_based_mem_offset, check for loads from stack slots still tracked in sp_bytes and punt if any is found. * gcc.target/i386/pr89965.c: New test. --- gcc/dce.c.jj2019-02-15 00:11:13.209111382 +0100 +++ gcc/dce.c 2019-04-10 14:18:21.111763533 +0200 @@ -35,6 +35,7 @@ along with GCC; see the file COPYING3. #include "valtrack.h" #include "tree-pass.h" #include "dbgcnt.h" +#include "rtl-iter.h" /* - @@ -265,6 +266,100 @@ check_argument_store (HOST_WIDE_INT size return true; } +/* If MEM has sp address, return 0, if it has sp + const address, + return that const, if it has reg address where reg is set to sp + const + and FAST is false, return const, otherwise return + INTTYPE_MINUMUM (HOST_WIDE_INT). */ + +static HOST_WIDE_INT +sp_based_mem_offset (rtx_call_insn *call_insn, const_rtx mem, bool fast) +{ + HOST_WIDE_INT off = 0; + rtx addr = XEXP (mem, 0); + if (GET_CODE (addr) == PLUS + && REG_P (XEXP (addr, 0)) + && CONST_INT_P (XEXP (addr, 1))) +{ + off = INTVAL (XEXP (addr, 1)); + addr = XEXP (addr, 0); +} + if (addr == stack_pointer_rtx) +return off; + + if (!REG_P (addr) || fast) +return INTTYPE_MINIMUM (HOST_WIDE_INT); + + /* If not fast, use chains to see if addr wasn't set to sp + offset. */ + df_ref use; + FOR_EACH_INSN_USE (use, call_insn) + if (rtx_equal_p (addr, DF_REF_REG (use))) +break; + + if (use == NULL) +return INTTYPE_MINIMUM (HOST_WIDE_INT); + + struct df_link *defs; + for (defs = DF_REF_CHAIN (use); defs; defs = defs->next) +if (! DF_REF_IS_ARTIFICIAL (defs->ref)) + break; + + if (defs == NULL) +return INTTYPE_MINIMUM (HOST_WIDE_INT); + + rtx set = single_set (DF_REF_INSN (defs->ref)); + if (!set) +return INTTYPE_MINIMUM (HOST_WIDE_INT); + + if (GET_CODE (SET_SRC (set)) != PLUS + || XEXP (SET_SRC (set), 0) != stack_pointer_rtx + || !CONST_INT_P (XEXP (SET_SRC (set), 1))) +return INTTYPE_MINIMUM (HOST_WIDE_INT); + + off += INTVAL (XEXP (SET_SRC (set), 1)); + return off; +} + +/* Data for check_argument_load called via note_uses. */ +struct check_argument_load_data { + bitmap sp_bytes; + HOST_WIDE_INT min_sp_off, max_sp_off; + rtx_call_insn *call_insn; + bool fast; + bool load_found; +}; + +/* Helper function for find_call_stack_args. Check if there are + any loads from the argument slots in between the const/pure call + and store to the argument slot, set LOAD_FOUND if any is found. */ + +static void +check_argument_load (rtx *loc, void *data) +{ + struct check_argument_load_data *d += (struct check_argument_load_data *) data; + subrtx_iterator::array_type array; + FOR_EACH_SUBRTX (iter, array, *loc, NONCONST) +{ + const_rtx mem = *iter; + HOST_WIDE_INT size; + if (MEM_P (mem) + && MEM_SIZE_KNOWN_P (mem) + && MEM_SIZE (mem).is_constant (&size)) + { + HOST_WIDE_INT off = sp_based_mem_offset (d->call_insn, mem, d->fast); + if (off != INTTYPE_MINIMUM (HOST_WIDE_INT) + && off < d->max_sp_off + && off + size > d->min_sp_off) + for (HOST_WIDE_INT byte = MAX (off, d->min_sp_off); +byte < MIN (off + size, d->max_sp_off); byte++) + if (bi