Re: [PATCH, PR d/89255][1/3] Add -fbuilding-libphobos-tests

2019-04-11 Thread Iain Buclaw
Patch updated and committed to trunk as r270301.

-- 
Iain
---
2019-04-12  Iain Buclaw  

* d-tree.h (DECL_IN_UNITTEST_CONDITION_P): Define.
* decl.cc (DeclVisitor): Add in_version_unittest_ field.
(DeclVisitor::visit(ConditionalDeclaration)): New override.
(DeclVisitor::visit(FuncDeclaration)): Set
DECL_IN_UNITTEST_CONDITION_P.
* lang.opt (-fbuilding-libphobos-tests): Add option.
* modules.cc (current_testing_module): New static variable.
(build_module_tree): Generate second moduleinfo symbol to hold
reference to unittests if flag_building_libphobos_tests.
(register_module_decl): Check DECL_IN_UNITTEST_CONDITION_P to decide
which moduleinfo the decl should be registered against.

---
diff --git a/gcc/d/d-tree.h b/gcc/d/d-tree.h
index 0b3c5eddedd..cff832cc645 100644
--- a/gcc/d/d-tree.h
+++ b/gcc/d/d-tree.h
@@ -59,7 +59,8 @@ typedef Array Expressions;
 
Usage of DECL_LANG_FLAG_?:
0: LABEL_VARIABLE_CASE (in LABEL_DECL).
-  DECL_BUILT_IN_CTFE (in FUNCTION_DECL).  */
+  DECL_BUILT_IN_CTFE (in FUNCTION_DECL).
+   1: DECL_IN_UNITTEST_CONDITION_P (in FUNCTION_DECL).  */
 
 /* The kinds of scopes we recognize.  */
 
@@ -380,6 +381,10 @@ lang_tree_node
 #define DECL_BUILT_IN_CTFE(NODE) \
   (DECL_LANG_FLAG_0 (FUNCTION_DECL_CHECK (NODE)))
 
+/* True if the decl is only compiled in when unittests are turned on.  */
+#define DECL_IN_UNITTEST_CONDITION_P(NODE) \
+  (DECL_LANG_FLAG_1 (FUNCTION_DECL_CHECK (NODE)))
+
 enum d_tree_index
 {
   DTI_VTABLE_ENTRY_TYPE,
diff --git a/gcc/d/decl.cc b/gcc/d/decl.cc
index fffed97727f..f6c863988e3 100644
--- a/gcc/d/decl.cc
+++ b/gcc/d/decl.cc
@@ -21,6 +21,7 @@ along with GCC; see the file COPYING3.  If not see
 
 #include "dmd/aggregate.h"
 #include "dmd/attrib.h"
+#include "dmd/cond.h"
 #include "dmd/ctfe.h"
 #include "dmd/declaration.h"
 #include "dmd/enum.h"
@@ -121,9 +122,13 @@ class DeclVisitor : public Visitor
 {
   using Visitor::visit;
 
+  /* If we're lowering the body of a version(unittest) condition.  */
+  bool in_version_unittest_;
+
 public:
   DeclVisitor (void)
   {
+this->in_version_unittest_ = false;
   }
 
   /* This should be overridden by each declaration class.  */
@@ -241,6 +246,25 @@ public:
 visit ((AttribDeclaration *) d);
   }
 
+  /* Conditional compilation is the process of selecting which code to compile
+ and which code to not compile.  Look for version conditions that may  */
+
+  void visit (ConditionalDeclaration *d)
+  {
+bool old_condition = this->in_version_unittest_;
+
+if (global.params.useUnitTests)
+  {
+	VersionCondition *vc = d->condition->isVersionCondition ();
+	if (vc && vc->ident == Identifier::idPool ("unittest"))
+	  this->in_version_unittest_ = true;
+  }
+
+visit ((AttribDeclaration *) d);
+
+this->in_version_unittest_ = old_condition;
+  }
+
   /* Walk over all members in the namespace scope.  */
 
   void visit (Nspace *d)
@@ -868,6 +892,7 @@ public:
   }
 
 DECL_ARGUMENTS (fndecl) = param_list;
+DECL_IN_UNITTEST_CONDITION_P (fndecl) = this->in_version_unittest_;
 rest_of_decl_compilation (fndecl, 1, 0);
 
 /* If this is a member function that nested (possibly indirectly) in another
diff --git a/gcc/d/lang.opt b/gcc/d/lang.opt
index 523f73c90de..f65be444d45 100644
--- a/gcc/d/lang.opt
+++ b/gcc/d/lang.opt
@@ -197,6 +197,10 @@ Enum(bounds_check) String(safeonly) Value(1)
 EnumValue
 Enum(bounds_check) String(on) Value(2)
 
+; Generates a secondary ModuleInfo symbol for linking in unittests
+fbuilding-libphobos-tests
+D Undocumented Var(flag_building_libphobos_tests)
+
 fbuiltin
 D Var(flag_no_builtin, 0)
 ; Documented in C
diff --git a/gcc/d/modules.cc b/gcc/d/modules.cc
index e9bd44115f9..315f5d82356 100644
--- a/gcc/d/modules.cc
+++ b/gcc/d/modules.cc
@@ -110,6 +110,11 @@ enum module_info_flags
 
 static module_info *current_moduleinfo;
 
+/* When compiling with -fbuilding-libphobos-tests, this contains information
+   about the module that gets compiled in only when unittests are enabled.  */
+
+static module_info *current_testing_module;
+
 /* The declaration of the current module being compiled.  */
 
 static Module *current_module_decl;
@@ -706,8 +711,10 @@ build_module_tree (Module *decl)
   assert (!current_moduleinfo && !current_module_decl);
 
   module_info mi = module_info ();
+  module_info mitest = module_info ();
 
   current_moduleinfo = &mi;
+  current_testing_module = &mitest;
   current_module_decl = decl;
 
   /* Layout module members.  */
@@ -720,6 +727,53 @@ build_module_tree (Module *decl)
 	}
 }
 
+  /* For libphobos-internal use only.  Generate a separate module info symbol
+ that references all compiled in unittests, this allows compiling library
+ modules and linking to libphobos without having run-time conflicts because
+ of two ModuleInfo records with the same name being present in two DSOs.  */
+  if (flag_building_libphobos_te

[PATCH, d] Committed merge with upstream dmd

2019-04-11 Thread Iain Buclaw
Hi,

This patch merges the D front-end implementation with dmd upstream c185f9df1.

Adds new virtual isVersionCondition, this is so that in the code
generation pass, a ConditionDeclaration's condition can be identified
without requiring a Visitor function.

Bootstrapped and regression tested on x86_64-linux-gnu.

Committed to trunk as r270300.

-- 
Iain
---
diff --git a/gcc/d/dmd/MERGE b/gcc/d/dmd/MERGE
index 800be95e4e6..be0c5a50da2 100644
--- a/gcc/d/dmd/MERGE
+++ b/gcc/d/dmd/MERGE
@@ -1,4 +1,4 @@
-d7ed327edb0b01ad56e7e73e77b3401cd565675e
+c185f9df1789456c7d88d047f2df23dd784f1182
 
 The first line of this file holds the git revision number of the last
 merge done from the dlang/dmd repository.
diff --git a/gcc/d/dmd/cond.h b/gcc/d/dmd/cond.h
index 891969be48d..8e33b16a9da 100644
--- a/gcc/d/dmd/cond.h
+++ b/gcc/d/dmd/cond.h
@@ -39,6 +39,7 @@ public:
 virtual Condition *syntaxCopy() = 0;
 virtual int include(Scope *sc, ScopeDsymbol *sds) = 0;
 virtual DebugCondition *isDebugCondition() { return NULL; }
+virtual VersionCondition *isVersionCondition() { return NULL; }
 virtual void accept(Visitor *v) { v->visit(this); }
 };
 
@@ -91,6 +92,7 @@ public:
 VersionCondition(Module *mod, unsigned level, Identifier *ident);
 
 int include(Scope *sc, ScopeDsymbol *sds);
+VersionCondition *isVersionCondition() { return this; }
 void accept(Visitor *v) { v->visit(this); }
 };
 


Re: [PR89528] reset debug uses of return value when dropping dead RTL call

2019-04-11 Thread Richard Biener
On April 12, 2019 3:28:20 AM GMT+02:00, Alexandre Oliva  
wrote:
>On Apr  5, 2019, Richard Biener  wrote:
>
>> Looks OK but can you adjust the testcase to actually test
>> something?
>
>Doh, I *knew* I'd failed to do something I should have ;-)
>
>
>I resorted to the trick you used in pr89892.c, of using a more complex
>expression to get UNSUPPORTED results instead of FAILs when the call
>and
>thus the variable are optimized out.  How's this?

Looks good! 

Thanks, 
Richard. 

>
>[PR89528] reset debug uses of return value when dropping dead RTL call
>
>When we remove an RTL call, we wouldn't clean up references to the
>return value of the call in debug insns.  Make it so that we do.
>
>
>for  gcc/ChangeLog
>
>   PR debug/89528
>   * valtrack.c (dead_debug_insert_temp): Reset debug references
>   to the return value of a call being removed.
>
>for  gcc/testsuite/ChangeLog
>
>   PR debug/89528
>   * gcc.dg/guality/pr89528.c: New.
>---
>gcc/testsuite/gcc.dg/guality/pr89528.c |   25 +
> gcc/valtrack.c |   22 ++
> 2 files changed, 31 insertions(+), 16 deletions(-)
> create mode 100644 gcc/testsuite/gcc.dg/guality/pr89528.c
>
>diff --git a/gcc/testsuite/gcc.dg/guality/pr89528.c
>b/gcc/testsuite/gcc.dg/guality/pr89528.c
>new file mode 100644
>index ..04a7e84d8755
>--- /dev/null
>+++ b/gcc/testsuite/gcc.dg/guality/pr89528.c
>@@ -0,0 +1,25 @@
>+/* PR debug/89528 */
>+/* { dg-do run } */
>+/* { dg-options "-g" } */
>+
>+#include 
>+
>+char b;
>+int d, e;
>+static int i = 1;
>+void a(int l) { printf("", l); }
>+char c(char l) { return l || b && l == 1 ? b : b % l; }
>+short f(int l, int m) { return l * m; }
>+short g(short l, short m) { return m || l == 767 && m == 1; }
>+int h(int l, int m) { return (l ^ m & l ^ (m & 647) - m ^ m) < m; }
>+static int j(int l) { return d == 0 || l == 647 && d == 1 ? l : l % d;
>}
>+short k(int l) { return l >= 2 >> l; }
>+void optimize_me_not() { asm(""); }
>+static short n(void) {
>+  int l_1127 = ~j(9 || 0) ^ 65535;
>+  optimize_me_not(); /* { dg-final { gdb-test . "l_1127+1" "-65534" }
>} */
>+  f(l_1127, i && e ^ 4) && g(0, 0);
>+  e = 0;
>+  return 5;
>+}
>+int main() { n(); }
>diff --git a/gcc/valtrack.c b/gcc/valtrack.c
>index 9b2bb333c0a3..1f67378a867c 100644
>--- a/gcc/valtrack.c
>+++ b/gcc/valtrack.c
>@@ -657,22 +657,12 @@ dead_debug_insert_temp (struct dead_debug_local
>*debug, unsigned int uregno,
>   {
> dest = SET_DEST (set);
> src = SET_SRC (set);
>-/* Lose if the REG-setting insn is a CALL.  */
>-if (GET_CODE (src) == CALL)
>-  {
>-while (uses)
>-  {
>-cur = uses->next;
>-XDELETE (uses);
>-uses = cur;
>-  }
>-return 0;
>-  }
>-/* Asm in DEBUG_INSN is never useful, we can't emit debug info for
>-   that.  And for volatile_insn_p, it is actually harmful
>-   - DEBUG_INSNs shouldn't have any side-effects.  */
>-else if (GET_CODE (src) == ASM_OPERANDS
>- || volatile_insn_p (src))
>+/* Reset uses if the REG-setting insn is a CALL.  Asm in
>+   DEBUG_INSN is never useful, we can't emit debug info for
>+   that.  And for volatile_insn_p, it is actually harmful -
>+   DEBUG_INSNs shouldn't have any side-effects.  */
>+if (GET_CODE (src) == CALL || GET_CODE (src) == ASM_OPERANDS
>+|| volatile_insn_p (src))
>   set = NULL_RTX;
>   }
> 



[PATCH] Uglify identifiers missed in previous commit(s)

2019-04-11 Thread Thomas Rodgers

   * include/pstl/algorithm_impl.h: Uglify identfiers.
   * include/pstl/numeric_impl.h:  Uglify identfiers.
   * include/pstl/parallel_backend_tbb.h: Uglify identfiers.
   
>From b75813e885c50e667a1474c1d0e1fc47ee893d6e Mon Sep 17 00:00:00 2001
From: Thomas Rodgers 
Date: Thu, 11 Apr 2019 19:42:50 -0700
Subject: [PATCH] Uglify identifiers missed in previous commit(s)

   * include/pstl/algorithm_impl.h: Uglify identfiers.
   * include/pstl/numeric_impl.h:  Uglify identfiers.
   * include/pstl/parallel_backend_tbb.h: Uglify identfiers.
---
 libstdc++-v3/include/pstl/algorithm_impl.h| 22 +--
 libstdc++-v3/include/pstl/numeric_impl.h  |  2 +-
 .../include/pstl/parallel_backend_tbb.h   |  5 +++--
 3 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/libstdc++-v3/include/pstl/algorithm_impl.h b/libstdc++-v3/include/pstl/algorithm_impl.h
index b0d60baae14..d39e99add05 100644
--- a/libstdc++-v3/include/pstl/algorithm_impl.h
+++ b/libstdc++-v3/include/pstl/algorithm_impl.h
@@ -283,20 +283,20 @@ __pattern_walk2(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardI
 template 
 _ForwardIterator2
-__pattern_walk2_n(_ExecutionPolicy&&, _ForwardIterator1 __first1, _Size n, _ForwardIterator2 __first2, _Function f,
+__pattern_walk2_n(_ExecutionPolicy&&, _ForwardIterator1 __first1, _Size __n, _ForwardIterator2 __first2, _Function __f,
   _IsVector is_vector, /*parallel=*/std::false_type) noexcept
 {
-return __internal::__brick_walk2_n(__first1, n, __first2, f, is_vector);
+return __internal::__brick_walk2_n(__first1, __n, __first2, __f, is_vector);
 }
 
 template 
 _RandomAccessIterator2
-__pattern_walk2_n(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _Size n, _RandomAccessIterator2 __first2,
-  _Function f, _IsVector is_vector, /*parallel=*/std::true_type)
+__pattern_walk2_n(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _Size __n, _RandomAccessIterator2 __first2,
+  _Function __f, _IsVector __is_vector, /*parallel=*/std::true_type)
 {
-return __internal::__pattern_walk2(std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + n, __first2, f, is_vector,
-   std::true_type());
+return __internal::__pattern_walk2(std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n, __first2, __f,
+   __is_vector, std::true_type());
 }
 
 template 
@@ -1033,7 +1033,7 @@ __pattern_copy_if(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Ran
 return __internal::__except_handler([&__exec, __n, __first, __result, __is_vector, __pred, &__mask_buf]() {
 bool* __mask = __mask_buf.get();
 _DifferenceType __m{};
-__par_backend::parallel_strict_scan(
+__par_backend::__parallel_strict_scan(
 std::forward<_ExecutionPolicy>(__exec), __n, _DifferenceType(0),
 [=](_DifferenceType __i, _DifferenceType __len) { // Reduce
 return __internal::__brick_calc_mask_1<_DifferenceType>(__first + __i, __first + (__i + __len), __mask + __i,
@@ -1182,7 +1182,7 @@ __remove_elements(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardI
 __mask += __min;
 _DifferenceType __m{};
 // 2. Elements that doesn't satisfy pred are moved to result
-__par_backend::parallel_strict_scan(
+__par_backend::__parallel_strict_scan(
 std::forward<_ExecutionPolicy>(__exec), __n, _DifferenceType(0),
 [__mask, __is_vector](_DifferenceType __i, _DifferenceType __len) {
 return __internal::__brick_count(__mask + __i, __mask + __i + __len, [](bool __val) { return __val; }, __is_vector);
@@ -1309,7 +1309,7 @@ __pattern_unique_copy(_ExecutionPolicy&& __exec, _RandomAccessIterator __first,
 return __internal::__except_handler([&__exec, __n, __first, __result, __pred, __is_vector, &__mask_buf]() {
 bool* __mask = __mask_buf.get();
 _DifferenceType __m{};
-__par_backend::parallel_strict_scan(
+__par_backend::__parallel_strict_scan(
 std::forward<_ExecutionPolicy>(__exec), __n, _DifferenceType(0),
 [=](_DifferenceType __i, _DifferenceType __len) -> _DifferenceType { // Reduce
 _DifferenceType __extra = 0;
@@ -2033,7 +2033,7 @@ __pattern_partition_copy(_ExecutionPolicy&& __exec, _RandomAccessIterator __firs
 return __internal::__except_handler([&__exec, __n, __first, __out_true, __out_false, __is_vector, __pred, &__mask_buf]() {
 bool* __mask = __mask_buf.get();
 _ReturnType __m{};
-__par_backend::parallel_strict_scan(
+__par_backend::__parallel_strict_scan(
 std::forward<_ExecutionPolicy>(__exec), __n, std::make_pair(_DifferenceType(0), _Differenc

[PR86438] avoid too-long shift in test

2019-04-11 Thread Alexandre Oliva
The test fell back to long long and long when __int128 is not
available, but it assumed sizeof(long) < sizeof(long long) because of
a shift count that would be out of range for a long long if their
widths are the same.  Fixed by splitting it up into two shifts.

Tested on x86_64-linux-gnu, -m64 and -m32.  Hopefully Andrew and/or John
David will let me know if it fails to fix the problem on the platforms
in which they've observed it.  Thanks for the report, sorry it took me
so long to get to it.  I'm going to install this as obvious, unless
there are objections in the next few days.


for  gcc/testsuite/ChangeLog

PR rtl-optimization/86438
* gcc.dg/torture/pr86438.c: Split up too-wide shift.
---
 gcc/testsuite/gcc.dg/torture/pr86438.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/torture/pr86438.c 
b/gcc/testsuite/gcc.dg/torture/pr86438.c
index 3e95515ae6a6..5f8b463f7572 100644
--- a/gcc/testsuite/gcc.dg/torture/pr86438.c
+++ b/gcc/testsuite/gcc.dg/torture/pr86438.c
@@ -24,6 +24,6 @@ main (void)
   u64 d = (g ? 5 : 4);
   u32 f = __builtin_sub_overflow_p (d, (u128) d, (u64) 0);
   u128 x = g + f + d;
-  check (x >> (sizeof (u64) * __CHAR_BIT__), x);
+  check ((x >> 1) >> (sizeof (u64) * __CHAR_BIT__ - 1), x);
   return 0;
 }


-- 
Alexandre Oliva, freedom fighter   https://FSFLA.org/blogs/lxo
Be the change, be Free! FSF Latin America board member
GNU Toolchain EngineerFree Software Evangelist
Hay que enGNUrecerse, pero sin perder la terGNUra jamás-GNUChe


Re: [PR89528] reset debug uses of return value when dropping dead RTL call

2019-04-11 Thread Alexandre Oliva
On Apr  5, 2019, Richard Biener  wrote:

> Looks OK but can you adjust the testcase to actually test
> something?

Doh, I *knew* I'd failed to do something I should have ;-)


I resorted to the trick you used in pr89892.c, of using a more complex
expression to get UNSUPPORTED results instead of FAILs when the call and
thus the variable are optimized out.  How's this?


[PR89528] reset debug uses of return value when dropping dead RTL call

When we remove an RTL call, we wouldn't clean up references to the
return value of the call in debug insns.  Make it so that we do.


for  gcc/ChangeLog

PR debug/89528
* valtrack.c (dead_debug_insert_temp): Reset debug references
to the return value of a call being removed.

for  gcc/testsuite/ChangeLog

PR debug/89528
* gcc.dg/guality/pr89528.c: New.
---
 gcc/testsuite/gcc.dg/guality/pr89528.c |   25 +
 gcc/valtrack.c |   22 ++
 2 files changed, 31 insertions(+), 16 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/guality/pr89528.c

diff --git a/gcc/testsuite/gcc.dg/guality/pr89528.c 
b/gcc/testsuite/gcc.dg/guality/pr89528.c
new file mode 100644
index ..04a7e84d8755
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/guality/pr89528.c
@@ -0,0 +1,25 @@
+/* PR debug/89528 */
+/* { dg-do run } */
+/* { dg-options "-g" } */
+
+#include 
+
+char b;
+int d, e;
+static int i = 1;
+void a(int l) { printf("", l); }
+char c(char l) { return l || b && l == 1 ? b : b % l; }
+short f(int l, int m) { return l * m; }
+short g(short l, short m) { return m || l == 767 && m == 1; }
+int h(int l, int m) { return (l ^ m & l ^ (m & 647) - m ^ m) < m; }
+static int j(int l) { return d == 0 || l == 647 && d == 1 ? l : l % d; }
+short k(int l) { return l >= 2 >> l; }
+void optimize_me_not() { asm(""); }
+static short n(void) {
+  int l_1127 = ~j(9 || 0) ^ 65535;
+  optimize_me_not(); /* { dg-final { gdb-test . "l_1127+1" "-65534" } } */
+  f(l_1127, i && e ^ 4) && g(0, 0);
+  e = 0;
+  return 5;
+}
+int main() { n(); }
diff --git a/gcc/valtrack.c b/gcc/valtrack.c
index 9b2bb333c0a3..1f67378a867c 100644
--- a/gcc/valtrack.c
+++ b/gcc/valtrack.c
@@ -657,22 +657,12 @@ dead_debug_insert_temp (struct dead_debug_local *debug, 
unsigned int uregno,
{
  dest = SET_DEST (set);
  src = SET_SRC (set);
- /* Lose if the REG-setting insn is a CALL.  */
- if (GET_CODE (src) == CALL)
-   {
- while (uses)
-   {
- cur = uses->next;
- XDELETE (uses);
- uses = cur;
-   }
- return 0;
-   }
- /* Asm in DEBUG_INSN is never useful, we can't emit debug info for
-that.  And for volatile_insn_p, it is actually harmful
-- DEBUG_INSNs shouldn't have any side-effects.  */
- else if (GET_CODE (src) == ASM_OPERANDS
-  || volatile_insn_p (src))
+ /* Reset uses if the REG-setting insn is a CALL.  Asm in
+DEBUG_INSN is never useful, we can't emit debug info for
+that.  And for volatile_insn_p, it is actually harmful -
+DEBUG_INSNs shouldn't have any side-effects.  */
+ if (GET_CODE (src) == CALL || GET_CODE (src) == ASM_OPERANDS
+ || volatile_insn_p (src))
set = NULL_RTX;
}
 


-- 
Alexandre Oliva, freedom fighter   https://FSFLA.org/blogs/lxo
Be the change, be Free! FSF Latin America board member
GNU Toolchain EngineerFree Software Evangelist
Hay que enGNUrecerse, pero sin perder la terGNUra jamás-GNUChe


Re: AW: [Patch, fortran] PRs 89843 and 90022 - C Fortran Interop fixes.

2019-04-11 Thread Gilles Gouaillardet

Reinhold,


Thanks for the insights !


That means there is currently an other issue since copy-in is performed 
even if the argument is declared as ASYNCHRONOUS.



I gave the copy-in mechanism some more thoughts, and as a library 
developers, I can clearly see a need *not* to do that


on a case-by-case basis, mainly for performance reasons, but also to be 
friendly with legacy apps that are not strictly standard compliant.


At this stage, I think the best way to move forward is to add an other 
directive in the interface definition.



for example, we could declare


module foo

interface

subroutine bar_f08(buf) BIND(C, name="bar_c")

implicit none

!GCC$ ATTRIBUTES NO_COPY_IN :: buf

TYPE(*), DIMENSION(..), INTENT(IN) :: buf

end subroutine

end interface

end module


Does this make sense ?


Gilles

On 4/10/2019 4:22 PM, Bader, Reinhold wrote:

Hi Gilles,


I also found an other potential issue with copy-in.

If in Fortran, we

call foo(buf(0,0))

then the C subroutine can only access buf(0,0), and other elements such
as buf(1025,1025) cannot be accessed.

Such elements are valid in buf, but out of bounds in the copy (that
contains a single element).

Strictly speaking, I cannot say whether this is a violation of the
standard or not, but I can see how this will

break a lot of existing apps (once again, those apps might be incorrect
in the first place, but most of us got used to them working).


The above call will only be conforming if the dummy argument is declared
assumed or explicit size.
Otherwise, the compiler should reject it due to rank mismatch. For assumed
rank, the call would be
legitimate, but the rank of the dummy argument is then zero. Even if no
copy-in is performed,
accessing data beyond the address range of that scalar is not strictly
allowed.

Of more interest is the situation where the dummy argument in Fortran is
declared, e.g.,

TYPE(*), ASYNCHRONOUS, INTENT(IN) :: BUF(..)

The standard's semantics *forbids* performing copy-in/out in this case, IIRC.
Otherwise
ASYNCHRONOUS semantics would not work, and non-blocking MPI calls would fail
due
to buffers vanishing into thin air.

Regards
Reinhold


To me, this is a second reason why copy-in is not desirable (at least as
a default option).



Cheers,


Gilles

On 4/9/2019 7:18 PM, Paul Richard Thomas wrote:

The most part of this patch is concerned with implementing calls from
C of of fortran bind c procedures with assumed shape or assumed rank
dummies to completely fix PR89843. The conversion of the descriptors
from CFI to gfc occur on entry to and reversed on exit from the
procedure.

This patch is safe for trunk, even at this late stage, because its
effects are barricaded behind the tests for CFI descriptors. I believe
that it appropriately rewards the bug reporters to have this feature
work as well as possible at release.

Between comments and the ChangeLogs, this patch is self explanatory.

Bootstrapped and regtested on FC29/x86_64 - OK for trunk?

Paul

2019-04-09  Paul Thomas  

  PR fortran/89843
  * trans-decl.c (gfc_get_symbol_decl): Assumed shape and assumed
  rank dummies of bind C procs require deferred initialization.
  (convert_CFI_desc): New procedure to convert incoming CFI
  descriptors to gfc types and back again.
  (gfc_trans_deferred_vars): Call it.
  * trans-expr.c (gfc_conv_gfc_desc_to_cfi_desc): Null the CFI
  descriptor pointer. Free the descriptor in all cases.

  PR fortran/90022
  * trans-decl.c (gfc_get_symbol_decl): Make sure that the se
  expression is a pointer type before converting it to the symbol
  backend_decl type.

2019-04-09  Paul Thomas  

  PR fortran/89843
  * gfortran.dg/ISO_Fortran_binding_4.f90: Modify the value of x
  in ctg. Test the conversion of the descriptor types in the main
  program.
  * gfortran.dg/ISO_Fortran_binding_10.f90: New test.
  * gfortran.dg/ISO_Fortran_binding_10.c: Called by it.

  PR fortran/90022
  * gfortran.dg/ISO_Fortran_binding_1.c: Correct the indexing for
  the computation of 'ans'. Also, change the expected results for
  CFI_is_contiguous to comply with standard.
  * gfortran.dg/ISO_Fortran_binding_1.f90: Correct the expected
  results for CFI_is_contiguous to comply with standard.
  * gfortran.dg/ISO_Fortran_binding_9.f90: New test.
  * gfortran.dg/ISO_Fortran_binding_9.c: Called by it.

2019-04-09  Paul Thomas  

  PR fortran/89843
  * runtime/ISO_Fortran_binding.c (cfi_desc_to_gfc_desc): Only
  return immediately if the source pointer is null. Bring
  forward the extraction of the gfc type. Extract the kind so
  that the element size can be correctly computed for sections
  and components of derived type arrays. Remove the free of the
  CFI descriptor since this is now done in trans-expr.c.
  (gfc_desc_to_cfi_desc): Only allocate the CFI descriptor if it
  is not null.
  (CFI_section): Normalise the difference b

Re: C++ PATCH for c++/87603 - constexpr functions are no longer noexcept

2019-04-11 Thread Jonathan Wakely

On 11/04/19 17:36 -0400, Marek Polacek wrote:

This patch deals with constexpr functions and whether or not they should be
noexcept.

CWG 1129 specified that constexpr functions are noexcept: it was a special case
in [expr.unary.noexcept].  This was accidentally removed in 
but the CWG conclusion was to keep it as-is.

Clearly we need to change this for C++17.  The question is whether we should
treat it as a DR and apply it retroactively (which is what clang did).  I took


Well Clang never implemented CWG 1129 in the first place, so they
applied P0003 retroactively by just doing nothing :-)



C++ PATCH for c++/87603 - constexpr functions are no longer noexcept

2019-04-11 Thread Marek Polacek
This patch deals with constexpr functions and whether or not they should be
noexcept.

CWG 1129 specified that constexpr functions are noexcept: it was a special case
in [expr.unary.noexcept].  This was accidentally removed in 
but the CWG conclusion was to keep it as-is.

Clearly we need to change this for C++17.  The question is whether we should
treat it as a DR and apply it retroactively (which is what clang did).  I took
the same approach and my reasoning is in the comment in check_noexcept_r.

Arguably it might be too late to put this in now, maybe we should defer to GCC 
10.
But at least there's a patch.

Bootstrapped/regtested on x86_64-linux.

2019-04-11  Marek Polacek  

PR c++/87603 - constexpr functions are no longer noexcept.
* constexpr.c (is_sub_constant_expr): Remove unused function.
* cp-tree.h (is_sub_constant_expr): Remove declaration.
* except.c (check_noexcept_r): Don't consider a call to a constexpr
function noexcept.

* g++.dg/cpp0x/constexpr-noexcept.C: Adjust the expected result.
* g++.dg/cpp0x/constexpr-noexcept3.C: Likewise.
* g++.dg/cpp0x/constexpr-noexcept4.C: Likewise.
* g++.dg/cpp0x/constexpr-noexcept8.C: New test.
* g++.dg/cpp0x/inh-ctor32.C: Remove dg-message.
* g++.dg/cpp1y/constexpr-noexcept1.C: New test.

diff --git gcc/cp/constexpr.c gcc/cp/constexpr.c
index 0ce5618a2fc..9c13f0d5f32 100644
--- gcc/cp/constexpr.c
+++ gcc/cp/constexpr.c
@@ -5423,27 +5423,6 @@ cxx_eval_outermost_constant_expr (tree t, bool 
allow_non_constant,
   return r;
 }
 
-/* Returns true if T is a valid subexpression of a constant expression,
-   even if it isn't itself a constant expression.  */
-
-bool
-is_sub_constant_expr (tree t)
-{
-  bool non_constant_p = false;
-  bool overflow_p = false;
-  hash_map  map;
-  HOST_WIDE_INT constexpr_ops_count = 0;
-
-  constexpr_ctx ctx
-= { NULL, &map, NULL, NULL, NULL, NULL, &constexpr_ops_count,
-   true, true, false };
-
-  instantiate_constexpr_fns (t);
-  cxx_eval_constant_expression (&ctx, t, false, &non_constant_p,
-   &overflow_p);
-  return !non_constant_p && !overflow_p;
-}
-
 /* If T represents a constant expression returns its reduced value.
Otherwise return error_mark_node.  If T is dependent, then
return NULL.  */
diff --git gcc/cp/cp-tree.h gcc/cp/cp-tree.h
index b87b968fa4e..ff4ce068a83 100644
--- gcc/cp/cp-tree.h
+++ gcc/cp/cp-tree.h
@@ -7720,7 +7720,6 @@ extern tree fold_non_dependent_init   (tree,
 tsubst_flags_t = 
tf_warning_or_error,
 bool = false);
 extern tree fold_simple(tree);
-extern bool is_sub_constant_expr(tree);
 extern bool reduced_constant_expression_p   (tree);
 extern bool is_instantiation_of_constexpr   (tree);
 extern bool var_in_constexpr_fn (tree);
diff --git gcc/cp/except.c gcc/cp/except.c
index 40e973fad66..7df17c52eb4 100644
--- gcc/cp/except.c
+++ gcc/cp/except.c
@@ -1128,11 +1128,14 @@ check_noexcept_r (tree *tp, int * /*walk_subtrees*/, 
void * /*data*/)
  && (DECL_ARTIFICIAL (fn)
  || nothrow_libfn_p (fn)))
return TREE_NOTHROW (fn) ? NULL_TREE : fn;
- /* A call to a constexpr function is noexcept if the call
-is a constant expression.  */
- if (DECL_DECLARED_CONSTEXPR_P (fn)
- && is_sub_constant_expr (t))
-   return NULL_TREE;
+ /* We used to treat a call to a constexpr function as noexcept if
+the call was a constant expression (CWG 1129).  This has changed
+in P0003 whereby noexcept has no special rule for constant
+expressions anymore.  Since the current behavior is important for
+certain library functionality, we treat this as a DR, therefore
+adjusting the behavior for C++11 and C++14.  Previously, we had
+to evaluate the noexcept-specifier's operand here, but that could
+cause instantiations that would fail.  */
}
   if (!TYPE_NOTHROW_P (type))
return fn;
diff --git gcc/testsuite/g++.dg/cpp0x/constexpr-noexcept.C 
gcc/testsuite/g++.dg/cpp0x/constexpr-noexcept.C
index dbadaa8e3d2..035afd13d39 100644
--- gcc/testsuite/g++.dg/cpp0x/constexpr-noexcept.C
+++ gcc/testsuite/g++.dg/cpp0x/constexpr-noexcept.C
@@ -10,4 +10,7 @@ constexpr T value(T t) noexcept(is_funny::value) { return 
t; } // Line 7
 
 constexpr bool ok = noexcept(value(42));
 
-static_assert(ok, "Assertion failure");
+// We used to treat a call to a constexpr function as noexcept if
+// the call was a constant expression.  We no longer do since
+// c++/87603.
+static_assert(!ok, "Assertion failure");
diff --git gcc/testsuite/g++.dg/cpp0x/constexpr-noexcept3.C 
gcc/testsuite/g++.dg/cpp0x/constexpr-noexcept3.C
index 9541bc0781d..5a43899b5fa 100644

[PATCH, libphobos] Committed merge with upstream phobos

2019-04-11 Thread Iain Buclaw
Hi,

This patch merges the libphobos library with upstream phobos cf95639ff.

Backports port fixes from upstream that have been committed since last sync.

Bootstrapped and regression tested on x86_64-linux-gnu.

Committed to trunk as r270296.

-- 
Iain
diff --git a/libphobos/src/MERGE b/libphobos/src/MERGE
index 63bd46834c5..b4d44b55624 100644
--- a/libphobos/src/MERGE
+++ b/libphobos/src/MERGE
@@ -1,4 +1,4 @@
-ef07932811de50a1d5810ea23462d127a60574a6
+cf95639ffd9ed6f3b9d10d98461b2fbd31615757
 
 The first line of this file holds the git revision number of the last
 merge done from the dlang/phobos repository.
diff --git a/libphobos/src/std/datetime/systime.d b/libphobos/src/std/datetime/systime.d
index e65d296e427..326b5441724 100644
--- a/libphobos/src/std/datetime/systime.d
+++ b/libphobos/src/std/datetime/systime.d
@@ -214,6 +214,22 @@ public:
hnsecsToUnixEpoch;
 }
 }
+else version (DragonFlyBSD)
+{
+import core.sys.dragonflybsd.time : clock_gettime, CLOCK_REALTIME,
+CLOCK_REALTIME_FAST, CLOCK_REALTIME_PRECISE, CLOCK_SECOND;
+static if (clockType == ClockType.coarse)   alias clockArg = CLOCK_REALTIME_FAST;
+else static if (clockType == ClockType.normal)  alias clockArg = CLOCK_REALTIME;
+else static if (clockType == ClockType.precise) alias clockArg = CLOCK_REALTIME_PRECISE;
+else static if (clockType == ClockType.second)  alias clockArg = CLOCK_SECOND;
+else static assert(0, "Previous static if is wrong.");
+timespec ts;
+if (clock_gettime(clockArg, &ts) != 0)
+throw new TimeException("Call to clock_gettime() failed");
+return convert!("seconds", "hnsecs")(ts.tv_sec) +
+   ts.tv_nsec / 100 +
+   hnsecsToUnixEpoch;
+}
 else version (Solaris)
 {
 static if (clockType == ClockType.second)
diff --git a/libphobos/src/std/datetime/timezone.d b/libphobos/src/std/datetime/timezone.d
index e923a34a98d..7ae19020243 100644
--- a/libphobos/src/std/datetime/timezone.d
+++ b/libphobos/src/std/datetime/timezone.d
@@ -292,10 +292,12 @@ public:
 
 version (Posix)
 {
-version (FreeBSD)  enum utcZone = "Etc/UTC";
-else version (NetBSD)  enum utcZone = "UTC";
-else version (linux)   enum utcZone = "UTC";
-else version (OSX) enum utcZone = "UTC";
+version (FreeBSD)enum utcZone = "Etc/UTC";
+else version (NetBSD)enum utcZone = "UTC";
+else version (DragonFlyBSD)  enum utcZone = "UTC";
+else version (linux) enum utcZone = "UTC";
+else version (OSX)   enum utcZone = "UTC";
+else version (Solaris)   enum utcZone = "UTC";
 else static assert(0, "The location of the UTC timezone file on this Posix platform must be set.");
 
 auto tzs = [testTZ("America/Los_Angeles", "PST", "PDT", dur!"hours"(-8), dur!"hours"(1)),
@@ -1891,6 +1893,14 @@ public:
 // Android concatenates all time zone data into a single file and stores it here.
 enum defaultTZDatabaseDir = "/system/usr/share/zoneinfo/";
 }
+else version (Solaris)
+{
+/++
+The default directory where the TZ Database files are. It's empty
+for Windows, since Windows doesn't have them.
+  +/
+enum defaultTZDatabaseDir = "/usr/share/lib/zoneinfo/";
+}
 else version (Posix)
 {
 /++
diff --git a/libphobos/src/std/experimental/allocator/building_blocks/region.d b/libphobos/src/std/experimental/allocator/building_blocks/region.d
index dfcecce72bd..3d8431c23ca 100644
--- a/libphobos/src/std/experimental/allocator/building_blocks/region.d
+++ b/libphobos/src/std/experimental/allocator/building_blocks/region.d
@@ -392,6 +392,8 @@ struct InSituRegion(size_t size, size_t minAlign = platformAlignment)
 else version (PPC64) enum growDownwards = Yes.growDownwards;
 else version (MIPS32) enum growDownwards = Yes.growDownwards;
 else version (MIPS64) enum growDownwards = Yes.growDownwards;
+else version (RISCV32) enum growDownwards = Yes.growDownwards;
+else version (RISCV64) enum growDownwards = Yes.growDownwards;
 else version (SPARC) enum growDownwards = Yes.growDownwards;
 else version (SystemZ) enum growDownwards = Yes.growDownwards;
 else static assert(0, "Dunno how the stack grows on this architecture.");
diff --git a/libphobos/src/std/file.d b/libphobos/src/std/file.d
index 17b7ca82654..9ba992944eb 100644
--- a/libphobos/src/std/file.d
+++ b/libphobos/src/std/file.d
@@ -1488,6 +1488,7 @@ if (isInputRange!R && !isInfinite!R && isSomeChar!(ElementEncodingType!R))
 // - OS X, where the native filesystem (HFS+) 

[PATCH, libphobos] Committed merge with upstream druntime

2019-04-11 Thread Iain Buclaw
Hi,

This patch merges the libdruntime sub-library with upstream druntime 175bf5fc.

Backports extern(C) bindings from upstream druntime that have been
committed since the last sync. Notably, the Solaris port has been
undergoing a some testing to get it all passing.

Bootstrapped and regression tested on x86_64-linux-gnu.

Committed to trunk as r270295.

-- 
Iain
---
diff --git a/libphobos/libdruntime/MERGE b/libphobos/libdruntime/MERGE
index 15a55ab612a..a7bbd3da964 100644
--- a/libphobos/libdruntime/MERGE
+++ b/libphobos/libdruntime/MERGE
@@ -1,4 +1,4 @@
-d57fa1ffaecc858229ed7a730e8486b59197dee5
+175bf5fc69d26fec60d533ff77f7e915fd5bb468
 
 The first line of this file holds the git revision number of the last
 merge done from the dlang/druntime repository.
diff --git a/libphobos/libdruntime/core/stdc/stdio.d b/libphobos/libdruntime/core/stdc/stdio.d
index e708a51763c..6ce3f9d13e5 100644
--- a/libphobos/libdruntime/core/stdc/stdio.d
+++ b/libphobos/libdruntime/core/stdc/stdio.d
@@ -1643,9 +1643,6 @@ else version (CRuntime_Bionic)
 }
 else version (CRuntime_Musl)
 {
-import core.sys.posix.sys.types : off_t;
-///
-int fseeko(FILE *, off_t, int);
 @trusted
 {
 ///
diff --git a/libphobos/libdruntime/core/stdc/time.d b/libphobos/libdruntime/core/stdc/time.d
index 648f782601b..4a571e153bf 100644
--- a/libphobos/libdruntime/core/stdc/time.d
+++ b/libphobos/libdruntime/core/stdc/time.d
@@ -148,21 +148,22 @@ else
 }
 
 ///
-double  difftime(time_t time1, time_t time0);
+pure double  difftime(time_t time1, time_t time0); // MT-Safe
 ///
-time_t  mktime(tm* timeptr);
+@system time_t  mktime(scope tm* timeptr); // @system: MT-Safe env locale
 ///
-time_t  time(time_t* timer);
+time_t  time(scope time_t* timer);
+
 ///
-char*   asctime(in tm* timeptr);
+@system char*   asctime(const scope tm* timeptr); // @system: MT-Unsafe race:asctime locale
 ///
-char*   ctime(in time_t* timer);
+@system char*   ctime(const scope time_t* timer); // @system: MT-Unsafe race:tmbuf race:asctime env locale
 ///
-tm* gmtime(in time_t* timer);
+@system tm* gmtime(const scope time_t* timer); // @system: MT-Unsafe race:tmbuf env locale
 ///
-tm* localtime(in time_t* timer);
+@system tm* localtime(const scope time_t* timer); // @system: MT-Unsafe race:tmbuf env locale
 ///
-@system size_t  strftime(char* s, size_t maxsize, in char* format, in tm* timeptr);
+@system size_t  strftime(scope char* s, size_t maxsize, const scope char* format, const scope tm* timeptr); // @system: MT-Safe env locale
 
 version (Windows)
 {
@@ -171,9 +172,9 @@ version (Windows)
 ///
 void  _tzset();  // non-standard
 ///
-@system char* _strdate(char* s); // non-standard
+@system char* _strdate(return scope char* s); // non-standard
 ///
-@system char* _strtime(char* s); // non-standard
+@system char* _strtime(return scope char* s); // non-standard
 
 ///
 extern __gshared const(char)*[2] tzname; // non-standard
diff --git a/libphobos/libdruntime/core/sync/condition.d b/libphobos/libdruntime/core/sync/condition.d
index b6755f2d998..8afa8f7cc38 100644
--- a/libphobos/libdruntime/core/sync/condition.d
+++ b/libphobos/libdruntime/core/sync/condition.d
@@ -23,7 +23,12 @@ public import core.time;
 version (Windows)
 {
 private import core.sync.semaphore;
-private import core.sys.windows.windows;
+private import core.sys.windows.basetsd /+: HANDLE+/;
+private import core.sys.windows.winbase /+: CloseHandle, CreateSemaphoreA, CRITICAL_SECTION,
+DeleteCriticalSection, EnterCriticalSection, INFINITE, InitializeCriticalSection,
+LeaveCriticalSection, ReleaseSemaphore, WAIT_OBJECT_0, WaitForSingleObject+/;
+private import core.sys.windows.windef /+: BOOL, DWORD+/;
+private import core.sys.windows.winerror /+: WAIT_TIMEOUT+/;
 }
 else version (Posix)
 {
diff --git a/libphobos/libdruntime/core/sync/mutex.d b/libphobos/libdruntime/core/sync/mutex.d
index 798f8412a02..024009f48aa 100644
--- a/libphobos/libdruntime/core/sync/mutex.d
+++ b/libphobos/libdruntime/core/sync/mutex.d
@@ -20,7 +20,9 @@ public import core.sync.exception;
 
 version (Windows)
 {
-private import core.sys.windows.windows;
+private import core.sys.windows.winbase /+: CRITICAL_SECTION, DeleteCriticalSection,
+EnterCriticalSection, InitializeCriticalSection, LeaveCriticalSection,
+TryEnterCriticalSection+/;
 }
 else version (Posix)
 {
@@ -144,7 +146,7 @@ class Mutex :
 {
 import core.internal.abort : abort;
 !pthread_mutex_destroy(&m_hndl) ||
-abort("Error: pthread_mutex_init failed.");
+abort("Error: pthread_mutex_destroy failed.");
 }
 this.__monitor = null;
 }
@@ -318,7 +320,7 @@ unittest
 void useResource() shared @safe nothrow @nogc
 {
 mtx.lock_nothrow();
- 

[PATCH, d] Committed merge with upstream dmd

2019-04-11 Thread Iain Buclaw
Hi,

This patch merges the D front-end implementation with dmd upstream d7ed327ed.

Backports fix for an ICE that occurred when accessing empty array in CTFE.

Bootstrapped and regression tested on x86_64-linux-gnu.

Committed to trunk as r270294.

-- 
Iain
---
diff --git a/gcc/d/dmd/MERGE b/gcc/d/dmd/MERGE
index 31ea106965b..800be95e4e6 100644
--- a/gcc/d/dmd/MERGE
+++ b/gcc/d/dmd/MERGE
@@ -1,4 +1,4 @@
-5dd3eccc3b0758346f77bee3cdc3f6bd15de339b
+d7ed327edb0b01ad56e7e73e77b3401cd565675e
 
 The first line of this file holds the git revision number of the last
 merge done from the dlang/dmd repository.
diff --git a/gcc/d/dmd/dinterpret.c b/gcc/d/dmd/dinterpret.c
index 777f89cf186..40f3e77cbc5 100644
--- a/gcc/d/dmd/dinterpret.c
+++ b/gcc/d/dmd/dinterpret.c
@@ -6272,11 +6272,14 @@ Expression *scrubReturnValue(Loc loc, Expression *e)
 /* Returns: true if e is void,
  * or is an array literal or struct literal of void elements.
  */
-static bool isVoid(Expression *e)
+static bool isVoid(Expression *e, bool checkArray = false)
 {
 if (e->op == TOKvoid)
 return true;
 
+if (checkArray && e->type->ty != Tsarray)
+return false;
+
 if (e->op == TOKarrayliteral)
 return isEntirelyVoid(((ArrayLiteralExp *)e)->elements);
 
@@ -6314,7 +6317,7 @@ Expression *scrubArray(Loc loc, Expressions *elems, bool structlit)
 
 // A struct .init may contain void members.
 // Static array members are a weird special case (bug 10994).
-if (structlit && isVoid(e))
+if (structlit && isVoid(e, true))
 {
 e = NULL;
 }
diff --git a/gcc/testsuite/gdc.test/compilable/test19778.d b/gcc/testsuite/gdc.test/compilable/test19778.d
new file mode 100644
index 000..87905fae6a0
--- /dev/null
+++ b/gcc/testsuite/gdc.test/compilable/test19778.d
@@ -0,0 +1,6 @@
+struct S
+{
+int[] data;
+}
+immutable X = S([]);
+enum len = X.data.length;


Re: [PATCH] Replace direct PSTL uses of assert() with a macro

2019-04-11 Thread Thomas Rodgers
 removed (patch version applied attached).

Tested x86_64-linux, committed to trunk.

Jonathan Wakely writes:

> On 10/04/19 23:59 +0100, Jonathan Wakely wrote:
>>On 10/04/19 15:57 -0700, Thomas Rodgers wrote:
>>>Ok, lets try this again.
>>>
On 09/04/19 15:23 -0700, Thomas Rodgers wrote:
>This also replaces calls to __TBB_ASSERT so that there are two macro
>definitions provided by c++config -
>   __PSTL_ASSERT(_Condition)
>   __PSTL_ASSERT_MSG(_Condition, _Message)
>
>   * include/bits/c++config:
>   Add definition for __PSTL_ASSERT.
>   Add definition for __PSTL_ASSERT_MSG.
>   * include/pstl/algorithm_impl.h: Replace use of assert().
>   * include/pstl/numeric_impl.h: Replace use of assert().
>   * include/pstl/parallel_backend_tbb.h:
>   Replace use of assert().
>   Replace use of __TBB_ASSERT().
>
>   * include/pstl/parallel_backend_utils.h: Replace use of assert().
>>>
>>>Jonathan Wakely writes:
... you fix now ...
>>
>>Looks good, OK for trunk, thanks.
>
> Looks like parallel_backend_tbb.h still includes  after this
> patch. Assuming tests still pass with that removed, that tweak to the
> patch is pre-approved.

>From 87483056b4fecb08455443b34b9658f4d7289a74 Mon Sep 17 00:00:00 2001
From: Thomas Rodgers 
Date: Fri, 5 Apr 2019 15:27:35 -0700
Subject: [PATCH] Replace direct PSTL uses of assert() with a macro

This also replaces calls to __TBB_ASSERT so that there are two macro
definitions provided by c++config -
	__PSTL_ASSERT(_Condition)
	__PSTL_ASSERT_MSG(_Condition, _Message)

	* include/bits/c++config:
	Add definition for __PSTL_ASSERT.
	Add definition for __PSTL_ASSERT_MSG.
	* include/pstl/algorithm_impl.h: Replace use of assert().
	* include/pstl/numeric_impl.h: Replace use of assert().
	* include/pstl/parallel_backend_tbb.h:
	Replace use of assert().
	Replace use of __TBB_ASSERT().

	* include/pstl/parallel_backend_utils.h: Replace use of assert().
---
 libstdc++-v3/include/bits/c++config   |  4 
 libstdc++-v3/include/pstl/algorithm_impl.h| 10 +-
 libstdc++-v3/include/pstl/numeric_impl.h  |  4 ++--
 libstdc++-v3/include/pstl/parallel_backend_tbb.h  |  9 -
 .../include/pstl/parallel_backend_utils.h | 15 +++
 5 files changed, 22 insertions(+), 20 deletions(-)

diff --git a/libstdc++-v3/include/bits/c++config b/libstdc++-v3/include/bits/c++config
index 66420a9a3f2..ef8ba96737b 100644
--- a/libstdc++-v3/include/bits/c++config
+++ b/libstdc++-v3/include/bits/c++config
@@ -690,6 +690,10 @@ namespace std
 #  undef __PSTL_PAR_BACKEND_TBB
 # endif
 
+# define __PSTL_ASSERT(_Condition) __glibcxx_assert(_Condition)
+# define __PSTL_ASSERT_MSG(_Condition, _Message) __glibcxx_assert(_Condition)
+
+
 # define __PSTL_PRAGMA(x) _Pragma (#x)
 
 # define __PSTL_STRING_AUX(x) #x
diff --git a/libstdc++-v3/include/pstl/algorithm_impl.h b/libstdc++-v3/include/pstl/algorithm_impl.h
index e06bf60151e..b0d60baae14 100644
--- a/libstdc++-v3/include/pstl/algorithm_impl.h
+++ b/libstdc++-v3/include/pstl/algorithm_impl.h
@@ -2731,8 +2731,8 @@ __pattern_includes(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Forwa
  return !__internal::__parallel_or(
 std::forward<_ExecutionPolicy>(__exec), __first2, __last2,
 [__first1, __last1, __first2, __last2, &__comp](_ForwardIterator2 __i, _ForwardIterator2 __j) {
-assert(__j > __i);
-//assert(__j - __i > 1);
+__PSTL_ASSERT(__j > __i);
+//__PSTL_ASSERT(__j - __i > 1);
 
 //1. moving boundaries to "consume" subsequence of equal elements
 auto __is_equal = [&__comp](_ForwardIterator2 __a, _ForwardIterator2 __b) -> bool {
@@ -2756,8 +2756,8 @@ __pattern_includes(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Forwa
 //2. testing is __a subsequence of the second range included into the first range
 auto __b = std::lower_bound(__first1, __last1, *__i, __comp);
 
-assert(!__comp(*(__last1 - 1), *__b));
-assert(!__comp(*(__j - 1), *__i));
+__PSTL_ASSERT(!__comp(*(__last1 - 1), *__b));
+__PSTL_ASSERT(!__comp(*(__j - 1), *__i));
 return !std::includes(__b, __last1, __i, __j, __comp);
 });
 });
@@ -2948,7 +2948,7 @@ __parallel_set_union_op(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _
 }
 
 const auto __m2 = __left_bound_seq_2 - __first2;
-assert(__m1 == 0 || __m2 == 0);
+__PSTL_ASSERT(__m1 == 0 || __m2 == 0);
 if (__m2 > __set_algo_cut_off)
 {
 auto __res_or = __result;
diff --git a/libstdc++-v3/include/pstl/numeric_impl.h b/libstdc++-v3/include/pstl/numeric_impl.h
index 49a4abf5a95..738a61d92f6 100644
--- a/libstdc++-v3/include/pstl/numeric_impl.h
+++ b/libstdc++

[patch, fortran, committed] Fix error messages for translation

2019-04-11 Thread Thomas Koenig

Hello world,

I have just committed the attached patch as obvious and simple.  It
fixes an error message so that it can be extracted for translation.
No test case because it does not change anything (which was verified
by regression-testing).

Regards

Thomas

2019-04-11  Thomas Koenig  

PR translation/89939
* frontend-passes.c (B_ERROR): Delete macro.
(C_ERROR): Delete macro.
(B_ERROR_1): New macro.
(C_ERROR_1): New macro.
(C_ERROR_2): New macro.
(inline_matmul_assign): Use new macros.
(call_external_blas): Likewise.
Index: frontend-passes.c
===
--- frontend-passes.c	(Revision 270182)
+++ frontend-passes.c	(Arbeitskopie)
@@ -3743,13 +3743,16 @@ check_conjg_transpose_variable (gfc_expr *e, bool
 
 /* Macros for unified error messages.  */
 
-#define B_ERROR(n) _("Incorrect extent in argument B in MATMUL intrinsic in " \
-		 "dimension " #n ": is %ld, should be %ld")
+#define B_ERROR_1 _("Incorrect extent in argument B in MATMUL intrinsic in " \
+		 "dimension 1: is %ld, should be %ld")
 
-#define C_ERROR(n) _("Array bound mismatch for dimension " #n " of array " \
-		 "(%ld/%ld)")
+#define C_ERROR_1 _("Array bound mismatch for dimension 1 of array " \
+		"(%ld/%ld)")
 
+#define C_ERROR_2 _("Array bound mismatch for dimension 2 of array " \
+		"(%ld/%ld)")
 
+
 /* Inline assignments of the form c = matmul(a,b).
Handle only the cases currently where b and c are rank-two arrays.
 
@@ -3976,7 +3979,7 @@ inline_matmul_assign (gfc_code **c, int *walk_subt
 
 	  b1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_b, 1);
 	  a2 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 2);
-	  test = runtime_error_ne (b1, a2, B_ERROR(1));
+	  test = runtime_error_ne (b1, a2, B_ERROR_1);
 	  *next_code_point = test;
 	  next_code_point = &test->next;
 
@@ -3984,7 +3987,7 @@ inline_matmul_assign (gfc_code **c, int *walk_subt
 	{
 	  c1 = get_array_inq_function (GFC_ISYM_SIZE, expr1, 1);
 	  a1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 1);
-	  test = runtime_error_ne (c1, a1, C_ERROR(1));
+	  test = runtime_error_ne (c1, a1, C_ERROR_1);
 	  *next_code_point = test;
 	  next_code_point = &test->next;
 	}
@@ -3994,7 +3997,7 @@ inline_matmul_assign (gfc_code **c, int *walk_subt
 
 	  b1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_b, 1);
 	  a1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 1);
-	  test = runtime_error_ne (b1, a1, B_ERROR(1));
+	  test = runtime_error_ne (b1, a1, B_ERROR_1);
 	  *next_code_point = test;
 	  next_code_point = &test->next;
 
@@ -4002,7 +4005,7 @@ inline_matmul_assign (gfc_code **c, int *walk_subt
 	{
 	  c1 = get_array_inq_function (GFC_ISYM_SIZE, expr1, 1);
 	  b2 = get_array_inq_function (GFC_ISYM_SIZE, matrix_b, 2);
-	  test = runtime_error_ne (c1, b2, C_ERROR(1));
+	  test = runtime_error_ne (c1, b2, C_ERROR_1);
 	  *next_code_point = test;
 	  next_code_point = &test->next;
 	}
@@ -4012,7 +4015,7 @@ inline_matmul_assign (gfc_code **c, int *walk_subt
 
 	  b1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_b, 1);
 	  a2 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 2);
-	  test = runtime_error_ne (b1, a2, B_ERROR(1));
+	  test = runtime_error_ne (b1, a2, B_ERROR_1);
 	  *next_code_point = test;
 	  next_code_point = &test->next;
 
@@ -4020,13 +4023,13 @@ inline_matmul_assign (gfc_code **c, int *walk_subt
 	{
 	  c1 = get_array_inq_function (GFC_ISYM_SIZE, expr1, 1);
 	  a1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 1);
-	  test = runtime_error_ne (c1, a1, C_ERROR(1));
+	  test = runtime_error_ne (c1, a1, C_ERROR_1);
 	  *next_code_point = test;
 	  next_code_point = &test->next;
 
 	  c2 = get_array_inq_function (GFC_ISYM_SIZE, expr1, 2);
 	  b2 = get_array_inq_function (GFC_ISYM_SIZE, matrix_b, 2);
-	  test = runtime_error_ne (c2, b2, C_ERROR(2));
+	  test = runtime_error_ne (c2, b2, C_ERROR_2);
 	  *next_code_point = test;
 	  next_code_point = &test->next;
 	}
@@ -4037,7 +4040,7 @@ inline_matmul_assign (gfc_code **c, int *walk_subt
 	  b2 = get_array_inq_function (GFC_ISYM_SIZE, matrix_b, 2);
 	  a2 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 2);
 	  /* matrix_b is transposed, hence dimension 1 for the error message.  */
-	  test = runtime_error_ne (b2, a2, B_ERROR(1));
+	  test = runtime_error_ne (b2, a2, B_ERROR_1);
 	  *next_code_point = test;
 	  next_code_point = &test->next;
 
@@ -4045,13 +4048,13 @@ inline_matmul_assign (gfc_code **c, int *walk_subt
 	{
 	  c1 = get_array_inq_function (GFC_ISYM_SIZE, expr1, 1);
 	  a1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 1);
-	  test = runtime_error_ne (c1, a1, C_ERROR(1));
+	  test = runtime_error_ne (c1, a1, C_ERROR_1);
 	  *next_code_point = test;
 	  next_code_point = &test->next;
 
 	  c2

C++ PATCH to use build_converted_constant_bool_expr for noexcept-specifiers

2019-04-11 Thread Marek Polacek
[except.spec] says that "In a noexcept-specifier, the constant-expression,
if supplied, shall be a contextually converted constant expression of type
bool".  We now have a dedicated function for creating such an expression,
so build_noexcept_spec should make use of it.

As a consequence, we now reject pr86397-2.C but that's correct: converting
a pointer to bool is a "boolean conversion", which is not allowed under the
rules for a converted constant expression ([expr.const]p7).

Bootstrapped/regtested on x86_64-linux, ok for trunk?

2019-04-11  Marek Polacek  

* except.c (build_noexcept_spec): Use build_converted_constant_bool_expr
instead of perform_implicit_conversion_flags.

* g++.dg/cpp0x/noexcept30.C: Tweak dg-error.
* g++.dg/cpp0x/pr86397-1.C: Likewise.
* g++.dg/cpp0x/pr86397-2.C: Likewise.

diff --git gcc/cp/except.c gcc/cp/except.c
index 40e973fad66..25ab8699589 100644
--- gcc/cp/except.c
+++ gcc/cp/except.c
@@ -1285,9 +1285,7 @@ build_noexcept_spec (tree expr, tsubst_flags_t complain)
   if (TREE_CODE (expr) != DEFERRED_NOEXCEPT
   && !value_dependent_expression_p (expr))
 {
-  expr = perform_implicit_conversion_flags (boolean_type_node, expr,
-   complain,
-   LOOKUP_NORMAL);
+  expr = build_converted_constant_bool_expr (expr, complain);
   expr = instantiate_non_dependent_expr (expr);
   expr = cxx_constant_value (expr);
 }
diff --git gcc/testsuite/g++.dg/cpp0x/noexcept30.C 
gcc/testsuite/g++.dg/cpp0x/noexcept30.C
index 8c7ff2aad45..6a9f7821092 100644
--- gcc/testsuite/g++.dg/cpp0x/noexcept30.C
+++ gcc/testsuite/g++.dg/cpp0x/noexcept30.C
@@ -5,7 +5,7 @@
 template
 struct F {
   template
-  void f() noexcept(&F::template f) {} // { dg-error "exception 
specification" }
+  void f() noexcept(&F::template f) {} // { dg-error "exception 
specification|convert" }
 };
 
 int main () {
diff --git gcc/testsuite/g++.dg/cpp0x/pr86397-1.C 
gcc/testsuite/g++.dg/cpp0x/pr86397-1.C
index a0123cba0da..c6cfc1b5561 100644
--- gcc/testsuite/g++.dg/cpp0x/pr86397-1.C
+++ gcc/testsuite/g++.dg/cpp0x/pr86397-1.C
@@ -1,5 +1,5 @@
 // { dg-do compile { target c++11 } }
 // { dg-options "-fdelete-null-pointer-checks" }
 void e();
-template  void f(int() noexcept(e)) {}
-template void f(int()); // { dg-error "does not match" "" { target 
c++17 } }
+template  void f(int() noexcept(e)) {} // { dg-error "convert" }
+template void f(int());
diff --git gcc/testsuite/g++.dg/cpp0x/pr86397-2.C 
gcc/testsuite/g++.dg/cpp0x/pr86397-2.C
index 8e4956bcf6c..54aefdb0916 100644
--- gcc/testsuite/g++.dg/cpp0x/pr86397-2.C
+++ gcc/testsuite/g++.dg/cpp0x/pr86397-2.C
@@ -1,5 +1,5 @@
 // { dg-do compile { target c++11 } }
 // { dg-options "-fdelete-null-pointer-checks" }
 void e();
-template  void f(int() noexcept(e)) {}
-template void f(int() noexcept);
+template  void f(int() noexcept(e)) {} // { dg-error "convert" }
+template void f(int() noexcept); // { dg-error "does not match any 
template declaration" "" { target c++17 } }


[PATCH] PR libstdc++/90046 fix build failure on epiphany-elf

2019-04-11 Thread Jonathan Wakely

The epiphany-elf target aligns structs to 8 bytes, which causes the
static_assert(alignof(_Chunk) == 1) to fail.

Instead of requiring _Chunks to be positionable at any alignment, ensure
new buffers are aligned to alignof(_Chunk). Because the buffer size is a
power of two, we know that both the buffer size and sizeof(_Chunk) are
multiples of alignof(_Chunk). So is p is aligned to alignof(_Chunk) then
so is (p + size - sizeof(_Chunk)). So just ensure the new buffer is
aligned to at least alignof(_Chunk), which should already be true
because the caller requests at least alignof(max_align_t).

PR libstdc++/90046
* src/c++17/memory_resource.cc
(monotonic_buffer_resource::_Chunk::allocate): Increase alignment if
needed to allow placing a _Chunk at the end of the buffer.
(monotonic_buffer_resource::_M_new_buffer): Remove static_assert.

Tested x86_64-linux, bootstrapped epiphany-elf, committed to trunk./

commit 8af70056b079035bfb5a6a3a9df4b10194b025cf
Author: Jonathan Wakely 
Date:   Thu Apr 11 20:32:40 2019 +0100

PR libstdc++/90046 fix build failure on epiphany-elf

The epiphany-elf target aligns structs to 8 bytes, which causes the
static_assert(alignof(_Chunk) == 1) to fail.

Instead of requiring _Chunks to be positionable at any alignment, ensure
new buffers are aligned to alignof(_Chunk). Because the buffer size is a
power of two, we know that both the buffer size and sizeof(_Chunk) are
multiples of alignof(_Chunk). So is p is aligned to alignof(_Chunk) then
so is (p + size - sizeof(_Chunk)). So just ensure the new buffer is
aligned to at least alignof(_Chunk), which should already be true
because the caller requests at least alignof(max_align_t).

PR libstdc++/90046
* src/c++17/memory_resource.cc
(monotonic_buffer_resource::_Chunk::allocate): Increase alignment if
needed to allow placing a _Chunk at the end of the buffer.
(monotonic_buffer_resource::_M_new_buffer): Remove static_assert.

diff --git a/libstdc++-v3/src/c++17/memory_resource.cc 
b/libstdc++-v3/src/c++17/memory_resource.cc
index cd11bf5875c..b6698011f5c 100644
--- a/libstdc++-v3/src/c++17/memory_resource.cc
+++ b/libstdc++-v3/src/c++17/memory_resource.cc
@@ -182,7 +182,21 @@ namespace pmr
 _Chunk*& __head)
 {
   __size = std::__ceil2(__size + sizeof(_Chunk));
+
+  if constexpr (alignof(_Chunk) > 1)
+   {
+ // PR libstdc++/90046
+ // For targets like epiphany-elf where alignof(_Chunk) != 1
+ // ensure that the last sizeof(_Chunk) bytes in the buffer
+ // are suitably-aligned for a _Chunk.
+ // This should be unnecessary, because the caller already
+ // passes in max(__align, alignof(max_align_t)).
+ if (__align < alignof(_Chunk))
+   __align = alignof(_Chunk);
+   }
+
   void* __p = __r->allocate(__size, __align);
+
   // Add a chunk defined by (__p, __size, __align) to linked list __head.
   void* const __back = (char*)__p + __size - sizeof(_Chunk);
   __head = ::new(__back) _Chunk(__size, __align, __head);
@@ -231,9 +245,6 @@ namespace pmr
   void
   monotonic_buffer_resource::_M_new_buffer(size_t bytes, size_t alignment)
   {
-// Need to check this somewhere, so put it here:
-static_assert(alignof(monotonic_buffer_resource::_Chunk) == 1);
-
 const size_t n = std::max(bytes, _M_next_bufsiz);
 const size_t m = std::max(alignment, alignof(std::max_align_t));
 auto [p, size] = _Chunk::allocate(_M_upstream, n, m, _M_head);


Re: [RFC] D support for S/390

2019-04-11 Thread Rainer Orth
Hi Robin,

>> This will occur on any 32-bit target.  The following patch (using
>> ssize_t instead) allowed the code to compile:
>
> thanks, included your fix and attempted a more generic version of the
> 186 test.
>
> I also continued debugging some fails further:
>
> - Most of the MurmurHash fails are simply due to the wrong byte order
> being asserted but I did not yet check whether multi-chunk hashes are
> more complicated to get right - I suppose not, though.
>
> - The regex searches are even documented to not work properly on
> big-endian platforms. I still guess they could be fixed without too much
> effort.
>
> - Math unit tests fail due to lower precision than on other machines.
> Maybe this is because I only tested using -O0.

good.  I'm seeing those two, but at the moment concentrate on a couple
of Solaris/SPARC specific issues (mcontext_t was wrong, causing memory
corruption; need a different variant of makecontext; stack needs
doubleword alignment; some minor stuff).  64-bit results are not too bad ATM:

=== gdc Summary for unix/-m64 ===

# of expected passes29159
# of unexpected failures199
# of unsupported tests  20

=== libphobos Summary for unix/-m64 ===

# of expected passes398
# of unexpected failures22

and some of the failures also occur on Solaris/x86, but 32-bit execution
tests are terrible right now, mostly due to the same issue, I believe:

FAIL: libphobos.aa/test_aa.d execution test

Thread 2 received signal SIGSEGV, Segmentation fault.
[Switching to Thread 1 (LWP 1)]
0x0009c9b0 in rt.aaA.Impl.length() const (this=...)
at /vol/gcc/src/hg/trunk/solaris/libphobos/libdruntime/rt/aaA.d:87
87  assert(used >= deleted);
(gdb) where
#0  0x0009c9b0 in rt.aaA.Impl.length() const (this=...)
at /vol/gcc/src/hg/trunk/solaris/libphobos/libdruntime/rt/aaA.d:87
#1  0x0009c960 in rt.aaA.AA.empty() const (this=...)
at /vol/gcc/src/hg/trunk/solaris/libphobos/libdruntime/rt/aaA.d:44
#2  0x0009e83c in _aaValues (aa=..., keysz=4, valsz=1, 
tiValueArray=0x718fc ) at 
/vol/gcc/src/hg/trunk/solaris/libphobos/libdruntime/rt/aaA.d:513
#3  0x00082688 in object.values!(test_aa.testKeysValues1().T[int], 
test_aa.testKeysValues1().T, int).values(test_aa.testKeysValues1().T[int]) 
(aa=...)
at /vol/gcc/src/hg/trunk/solaris/libphobos/libdruntime/object.d:2171
#4  0x00077a9c in test_aa.testKeysValues1() ()
at 
/vol/gcc/src/hg/trunk/solaris/libphobos/testsuite/libphobos.aa/test_aa.d:56
#5  0x00077604 in D main ()
at 
/vol/gcc/src/hg/trunk/solaris/libphobos/testsuite/libphobos.aa/test_aa.d:3

1: x/i $pc
=> 0x9c9b0 <_D2rt3aaA4Impl6lengthMxFNaNbNdNiZk+32>: ld  [ %g1 + 8 ], %g2
(gdb) p/x $g1
$1 = 0x8

Investigating this is my next task.

> @Iain: With the patch as it is - hoping no additional tab/space damage
> :) - is there any chance of getting it upstream anytime soon?

I noticed you missed one piece of Iain's typeinfo.cc patch, btw.:

diff --git a/gcc/d/typeinfo.cc b/gcc/d/typeinfo.cc
--- a/gcc/d/typeinfo.cc
+++ b/gcc/d/typeinfo.cc
@@ -886,7 +886,7 @@ public:
if (cd->isCOMinterface ())
  flags |= ClassFlags::isCOMclass;
 
-   this->layout_field (build_integer_cst (flags));
+   this->layout_field (build_integer_cst (flags, d_uint_type));
 
/* void *deallocator;
   OffsetTypeInfo[] m_offTi;  (not implemented)

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: [Patch] PR rtl-optimization/87763 - generate more bfi instructions on aarch64

2019-04-11 Thread Steve Ellcey
On Thu, 2019-04-11 at 17:00 +0100, Richard Earnshaw (lists) wrote:
> 
> 
> Please add _alt at the end, to distinguish from the insn above.
> 
> Otherwise OK.

I added _alt, I also had to move the "0" contraint and the "r"
constraint.  I had "0" with operand 3 instead of operand 1 and
that caused a couple of test failures.  I fixed it and the regressions
went away.  I also had to tweak gcc.target/aarch64/combine_bfxil.c,
some of the bfxil instructions became bfi instructions so I updated
the scan checks.

Steve Ellcey


[PATCH v2] Fix __patchable_function_entries section flags

2019-04-11 Thread Joao Moreira
When -fpatchable-relocation-entry is used, gcc places nops on the
prologue of each compiled function and creates a section named
__patchable_function_entries which holds relocation entries for the
positions in which the nops were placed. As is, gcc creates this
section without the proper section flags, causing crashes in the
compiled program during its load.

Given the above, fix the problem by creating the section with the
SECTION_WRITE and SECTION_RELRO flags.

The problem was noticed while compiling glibc with
-fpatchable-function-entry compiler flag. After applying the patch,
this issue was solved.

This was also tested on x86-64 arch without visible problems under
the gcc standard tests.

2019-04-10  Joao Moreira  

* targhooks.c (default_print_patchable_function_entry): Emit
__patchable_function_entries section with writable flags to allow
relocation resolution.

Signed-off-by: Joao Moreira 
---
 gcc/targhooks.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/targhooks.c b/gcc/targhooks.c
index 318f7e9784a..e6f54ddf41b 100644
--- a/gcc/targhooks.c
+++ b/gcc/targhooks.c
@@ -1814,7 +1814,7 @@ default_print_patchable_function_entry (FILE *file,
   ASM_GENERATE_INTERNAL_LABEL (buf, "LPFE", patch_area_number);
 
   switch_to_section (get_section ("__patchable_function_entries",
- 0, NULL));
+ SECTION_WRITE | SECTION_RELRO, NULL));
   fputs (asm_op, file);
   assemble_name_raw (file, buf);
   fputc ('\n', file);
-- 
2.16.4



Packaging boxes

2019-04-11 Thread Slite Cao
Hello,
 
Attached you some of our factory made custom box with custom logo you
may interest as following.
Best regards
Slite
Best regards,
Slite
-
SLC-PACKAGE CO.LTD
www.slcpackage.com  
Always Serve you by Heart
Address: No.10 Hualai Packaging Industry Park,Yuwu
Jinhua,Zhejiang China
Contact Person: Slite Cao
Tel:0086 15314679598  Fax: 0086 571 85135628
What's App/Wechat: 008618159815925
Skype:slite...@outlook.com
If you don't want to received such emails,please click UNSUBSCRIBE
/http://tracking.slcpack.info/tracking/unsubscribe?d=j1jX4QlVQ6RF_ssSz-6BnzAZEDm5gj-tZu2ODvQ50wx8jQs-ZUMYF-9SdKkXv51xBzsPXJX9LFRG5jR7PWq9KktbjS61gcR5JcCd0Iy46139PgyLFrNQNKsw25_fL1XMlTfPVFdRTEo8eGPYPXE3e5oxQExloiS2uaydIM8wxKv64FKSZRUPgOogxjpBvBfV-egtcflisjbuP_fcolhu82_XUFDv2lsDJ_ixZXKT4JEJlpbZexa1IFuVxM0IHLA0SmsENt8WrCO4i4Jnc19ac-y4gv032ZWrUrzjHE_vNdQg0cc1P7_fGwrDp-Py1VtxBo9jm0Y0Q8NqrKGIMmZogy4j_6HSVXSQicuwQT-KCQIkpQ9B9nRdLuDYlBg_WyGjILz5EJsZVrSX36VO-ttK74TnxN_HFNea0oP7DaHJb1lQMwTCUzZMkPiZdWcb50mQqYMHa3gbe4F2p9BgSMXZ4M62V4hHfR6F61CVqfimE9I60.
/Slite Cao, China, Longyan, Fujian, 364200, China

Re: [PATCH] Add support for missing AVX512* ISAs (PR target/89929).

2019-04-11 Thread H.J. Lu
On Thu, Apr 11, 2019 at 1:38 AM Martin Liška  wrote:
>
> Hi.
>
> The patch is adding missing AVX512 ISAs for target and target_clone
> attributes.
>
> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
>
> Ready to be installed?
> Thanks,
> Martin
>
> gcc/ChangeLog:
>
> 2019-04-10  Martin Liska  
>
> PR target/89929
> * config/i386/i386.c (get_builtin_code_for_version): Add
> support for missing AVX512 ISAs.
>
> gcc/testsuite/ChangeLog:
>
> 2019-04-10  Martin Liska  
>
> PR target/89929
> * g++.target/i386/mv28.C: New test.
> * gcc.target/i386/mvc14.c: New test.
> ---
>  gcc/config/i386/i386.c| 34 ++-
>  gcc/testsuite/g++.target/i386/mv28.C  | 30 +++
>  gcc/testsuite/gcc.target/i386/mvc14.c | 16 +
>  3 files changed, 79 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/g++.target/i386/mv28.C
>  create mode 100644 gcc/testsuite/gcc.target/i386/mvc14.c
>
>

Since any ISAs beyond AVX512F may be enabled individually, we
can't simply assign priorities to them.   For GFNI, we can have

1. GFNI
2.  GFNI + AVX
3.  GFNI + AVX512F
4. GFNI + AVX512F + AVX512VL

For this code,  GFNI + AVX512BW is ignored:

[hjl@gnu-cfl-1 pr89929]$ cat z.ii
__attribute__((target("gfni")))
int foo(int i) {
return 1;
}
__attribute__((target("gfni,avx512bw")))
int foo(int i) {
return 4;
}
__attribute__((target("default")))
int foo(int i) {
return 3;
}
int bar ()
{
return foo(2);
}
[hjl@gnu-cfl-1 pr89929]$
/export/build/gnu/tools-build/gcc-wip-debug/build-x86_64-linux/gcc/xgcc
-B/export/build/gnu/tools-build/gcc-wip-debug/build-x86_64-linux/gcc/
-S z.ii -O2
[hjl@gnu-cfl-1 pr89929]$ cat z.s
.file "z.ii"
.text
.p2align 4
.globl _Z3fooi.gfni
.type _Z3fooi.gfni, @function
_Z3fooi.gfni:
.LFB0:
.cfi_startproc
movl $1, %eax
ret
.cfi_endproc
.LFE0:
.size _Z3fooi.gfni, .-_Z3fooi.gfni
.p2align 4
.globl _Z3fooi
.type _Z3fooi, @function
_Z3fooi:
.LFB2:
.cfi_startproc
movl $3, %eax
ret
.cfi_endproc
.LFE2:
.size _Z3fooi, .-_Z3fooi
.p2align 4
.globl _Z3fooi.avx512bw_gfni
.type _Z3fooi.avx512bw_gfni, @function
_Z3fooi.avx512bw_gfni:
.LFB1:
.cfi_startproc
movl $4, %eax
ret
.cfi_endproc
.LFE1:
.size _Z3fooi.avx512bw_gfni, .-_Z3fooi.avx512bw_gfni
.section .text._Z3fooi.resolver,"axG",@progbits,_Z3fooi.resolver,comdat
.p2align 4
.weak _Z3fooi.resolver
.type _Z3fooi.resolver, @function
_Z3fooi.resolver:
.LFB5:
.cfi_startproc
subq $8, %rsp
.cfi_def_cfa_offset 16
call __cpu_indicator_init
movl $_Z3fooi, %eax
movl $_Z3fooi.gfni, %edx
testb $1, __cpu_features2(%rip)
cmovne %rdx, %rax
addq $8, %rsp
.cfi_def_cfa_offset 8
ret
.cfi_endproc
.LFE5:
.size _Z3fooi.resolver, .-_Z3fooi.resolver
.globl _Z7_Z3fooii
.type _Z7_Z3fooii, @gnu_indirect_function
.set _Z7_Z3fooii,_Z3fooi.resolver
.text
.p2align 4
.globl _Z3barv
.type _Z3barv, @function
_Z3barv:
.LFB3:
.cfi_startproc
movl $2, %edi
jmp _Z7_Z3fooii
.cfi_endproc
.LFE3:
.size _Z3barv, .-_Z3barv
.ident "GCC: (GNU) 9.0.1 20190411 (experimental)"
.section .note.GNU-stack,"",@progbits
[hjl@gnu-cfl-1 pr89929]$

For AVX512 ISAs, we need a different scheme for priorities.


-- 
H.J.


Re: [Patch] PR rtl-optimization/87763 - generate more bfi instructions on aarch64

2019-04-11 Thread Richard Earnshaw (lists)
On 11/04/2019 16:21, Steve Ellcey wrote:
> On Thu, 2019-04-11 at 14:58 +, Steve Ellcey wrote:
>>
>>> You've removed the ..._noshift_alt variant.  That wasn't my
>>> intention,
>>> so perhaps you misunderstood what I was trying to say.
>>>
>>> The two versions are both needed, since the register tie is not
>>> orthogonal to the constants used in the masks and canonicalization
>>> will
>>> not generate a consistent ordering of the operands.
>>
>> I started doing this and then convinced myself (perhaps incorrectly)
>> that I didn't need the alt version.  Operands 1 and 3 are registers
>> and Operands 2 and 4 are constants, so the only difference is in the 
>> call to aarch64_masks_and_shift_for_bfi_p.  Given that argument 2 to
>> this call is 0 this call should be the equivelent of ((x & MASK1) |
>> (y
>> & MASK2)) and that should mean that:
>>
>> aarch64_masks_and_shift_for_bfi_p(X,0,Y) ==
>> aarch64_masks_and_shift_for_bfi_p(Y,0,X)
>>
>>
>> Maybe I am wrong about that?  I will do some expirements.  My testing
>> did not find any cases in the testsuite where not having the _alt
>> version resulted in a bfi not being generated.
> 
> OK, I think I see where my idea that I didn't need both versions is
> wrong.  There is an extra check on the final argument that is not done
> on the initial argument.  Here is the patch that I am testing, I think
> it is what you have in mind.  It looks wierd having arguments 3 and 4
> before 1 and 2, I think that is why I had it differently in my original
> patch but if you prefer this version, that is fine with me.
> 
> OK to check in once my build/test is finished?
> 
> 
> 
> 2018-04-11  Steve Ellcey  
> 
>   PR rtl-optimization/87763
>   * config/aarch64/aarch64.md (*aarch64_bfi4_noshift):
>   New Instruction.
> 
> 
> diff --git a/gcc/config/aarch64/aarch64.md
> b/gcc/config/aarch64/aarch64.md
> index e0df975..eac688a 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -5565,7 +5565,8 @@
>  )
>  
>  ;; Like *aarch64_bfi5_shift but with no shifting, we are just
> -;; copying the least significant bits of OP3 to OP0.
> +;; copying the least significant bits of OP3 to OP0.  We need two versions
> +;; of the instruction to handle different checks on the constant values.
>  
>  (define_insn "*aarch64_bfi4_noshift"
>[(set (match_operand:GPI 0 "register_operand" "=r")
> @@ -5579,6 +5580,18 @@
>[(set_attr "type" "bfm")]
>  )
>  
> +(define_insn "*aarch64_bfi4_noshift"

Please add _alt at the end, to distinguish from the insn above.

Otherwise OK.

R.

> +  [(set (match_operand:GPI 0 "register_operand" "=r")
> +(ior:GPI (and:GPI (match_operand:GPI 3 "register_operand" "0")
> +  (match_operand:GPI 4 "const_int_operand" "n"))
> + (and:GPI (match_operand:GPI 1 "register_operand" "r")
> +  (match_operand:GPI 2 "const_int_operand" "n"]
> +  "aarch64_masks_and_shift_for_bfi_p (mode, UINTVAL (operands[2]), 0,
> +   UINTVAL (operands[4]))"
> +  "bfi\t%0, %3, 0, %P4"
> +  [(set_attr "type" "bfm")]
> +)
> +
>  (define_insn "*extr_insv_lower_reg"
>[(set (zero_extract:GPI (match_operand:GPI 0 "register_operand" "+r")
> (match_operand 1 "const_int_operand" "n")
> 



Re: [RFC] D support for S/390

2019-04-11 Thread Rainer Orth
Hi Iain,

> @Rainer Orth  any last requests before I commit the fix for PR
> d/89255?  That will make testing individual library modules easier I
> guess.

no, the patch is perfectly fine as is.  Sorry for not following up
earlier; I've now tested it successfully on Solaris 11.[345]/x86 and am
also using it on Solaris 11.5/SPARC (still WIP, but getting closer).

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: [PATCH] Add target-zlib to top-level configure, use zlib from libphobos

2019-04-11 Thread Iain Buclaw
On Sat, 6 Apr 2019 at 19:05, Iain Buclaw  wrote:
>
> On Sat, 6 Apr 2019 at 17:27, Matthias Klose  wrote:
> >
> > On 29.03.19 23:23, Iain Buclaw wrote:
> > > On Mon, 18 Feb 2019 at 14:26, Matthias Klose  wrote:
> > >>
> > >>
> > >> sorry, I didn't mean to propose to rename the option, so
> > >> --with-target-system-zlib=auto sounds fine.
> > >
> > > OK, a bit belated, but here it is --with-target-system-zlib=auto.
> >
> > yes, this does the job.
> >
>
> Good.  I added documentation to install.texi.
>

Is this OK for trunk?  It's the only prerequisite for applying
subdir-objects to libphobos.

-- 
Iain


Re: [RFC] D support for S/390

2019-04-11 Thread Iain Buclaw
On Thu, 11 Apr 2019 at 17:43, Robin Dapp  wrote:
>
> Hi Rainer,
>
> > This will occur on any 32-bit target.  The following patch (using
> > ssize_t instead) allowed the code to compile:
>
> thanks, included your fix and attempted a more generic version of the
> 186 test.
>
> I also continued debugging some fails further:
>
> - Most of the MurmurHash fails are simply due to the wrong byte order
> being asserted but I did not yet check whether multi-chunk hashes are
> more complicated to get right - I suppose not, though.
>
> - The regex searches are even documented to not work properly on
> big-endian platforms. I still guess they could be fixed without too much
> effort.
>
> - Math unit tests fail due to lower precision than on other machines.
> Maybe this is because I only tested using -O0.
>
> @Iain: With the patch as it is - hoping no additional tab/space damage
> :) - is there any chance of getting it upstream anytime soon?
>

I'll push for it tonight, thanks.

@Rainer Orth  any last requests before I commit the fix for PR
d/89255?  That will make testing individual library modules easier I
guess.

-- 
Iain


Re: [RFC] D support for S/390

2019-04-11 Thread Robin Dapp
Hi Rainer,

> This will occur on any 32-bit target.  The following patch (using
> ssize_t instead) allowed the code to compile:

thanks, included your fix and attempted a more generic version of the
186 test.

I also continued debugging some fails further:

- Most of the MurmurHash fails are simply due to the wrong byte order
being asserted but I did not yet check whether multi-chunk hashes are
more complicated to get right - I suppose not, though.

- The regex searches are even documented to not work properly on
big-endian platforms. I still guess they could be fixed without too much
effort.

- Math unit tests fail due to lower precision than on other machines.
Maybe this is because I only tested using -O0.

@Iain: With the patch as it is - hoping no additional tab/space damage
:) - is there any chance of getting it upstream anytime soon?

Regards
 Robin
diff --git a/gcc/d/dmd/constfold.c b/gcc/d/dmd/constfold.c
index ddd356bb966..cb58d4b4f5b 100644
--- a/gcc/d/dmd/constfold.c
+++ b/gcc/d/dmd/constfold.c
@@ -1752,14 +1752,17 @@ UnionExp Cat(Type *type, Expression *e1, Expression *e2)
 }
 else if (e1->op == TOKint64 && e2->op == TOKstring)
 {
-// Concatenate the strings
+// [w|d]?char ~ string --> string
+// We assume that we only ever prepend one char of the same type
+// (wchar,dchar) as the string's characters.
+
 StringExp *es2 = (StringExp *)e2;
 size_t len = 1 + es2->len;
 unsigned char sz = es2->sz;
 dinteger_t v = e1->toInteger();
 
 void *s = mem.xmalloc((len + 1) * sz);
-memcpy((char *)s, &v, sz);
+Port::valcpy((char *)s, v, sz);
 memcpy((char *)s + sz, es2->string, es2->len * sz);
 
 // Add terminating 0
diff --git a/gcc/d/typeinfo.cc b/gcc/d/typeinfo.cc
index dac66acdcd4..21bc4d62a68 100644
--- a/gcc/d/typeinfo.cc
+++ b/gcc/d/typeinfo.cc
@@ -830,7 +830,7 @@ public:
 	flags |= ClassFlags::noPointers;
 
 Lhaspointers:
-	this->layout_field (size_int (flags));
+	this->layout_field (build_integer_cst (flags, d_uint_type));
 
 	/* void *deallocator;  */
 	tree ddtor = (cd->aggDelete)
@@ -886,7 +886,7 @@ public:
 	if (cd->isCOMinterface ())
 	  flags |= ClassFlags::isCOMclass;
 
-	this->layout_field (size_int (flags));
+	this->layout_field (build_integer_cst (flags));
 
 	/* void *deallocator;
 	   OffsetTypeInfo[] m_offTi;  (not implemented)
@@ -1019,7 +1019,7 @@ public:
 StructFlags::Type m_flags = 0;
 if (ti->hasPointers ())
   m_flags |= StructFlags::hasPointers;
-this->layout_field (size_int (m_flags));
+this->layout_field (build_integer_cst (m_flags, d_uint_type));
 
 /* void function(void*) xdtor;  */
 tree dtor = (sd->dtor) ? build_address (get_symbol_decl (sd->dtor))
@@ -1033,7 +1033,7 @@ public:
   this->layout_field (null_pointer_node);
 
 /* uint m_align;  */
-this->layout_field (size_int (ti->alignsize ()));
+this->layout_field (build_integer_cst (ti->alignsize (), d_uint_type));
 
 if (global.params.is64bit)
   {
@@ -1489,8 +1489,8 @@ create_typeinfo (Type *type, Module *mod)
   array_type_node, array_type_node,
   ptr_type_node, ptr_type_node,
   ptr_type_node, ptr_type_node,
-  size_type_node, ptr_type_node,
-  ptr_type_node, size_type_node,
+  d_uint_type, ptr_type_node,
+  ptr_type_node, d_uint_type,
   ptr_type_node, argtype, argtype, NULL);
 	}
 	  t->vtinfo = TypeInfoStructDeclaration::create (t);
diff --git a/gcc/testsuite/gdc.dg/runnable.d b/gcc/testsuite/gdc.dg/runnable.d
index e36a2585027..8d9a5868831 100644
--- a/gcc/testsuite/gdc.dg/runnable.d
+++ b/gcc/testsuite/gdc.dg/runnable.d
@@ -890,12 +890,17 @@ struct S186
 }
 }
 
+static if (size_t.sizeof == 8)
+ size_t checkval = 0x0202;
+static if (size_t.sizeof == 4)
+ size_t checkval = 0x0202;
+
 void check186(in S186 obj, byte fieldB)
 {
 assert(obj.fieldA == 2);
 assert(obj.fieldB == 0);
 assert(obj.fieldC == 0);
-assert(obj._complete == 2);
+assert(obj._complete == checkval);
 assert(fieldB == 0);
 }
 
@@ -907,7 +912,7 @@ void test186a(size_t val)
 assert(obj.fieldA == 2);
 assert(obj.fieldB == 0);
 assert(obj.fieldC == 0);
-assert(obj._complete == 2);
+assert(obj._complete == checkval);
 
 obj = S186(val);
 check186(obj, obj.fieldB);
@@ -915,12 +920,12 @@ void test186a(size_t val)
 assert(obj.fieldA == 2);
 assert(obj.fieldB == 0);
 assert(obj.fieldC == 0);
-assert(obj._complete == 2);
+assert(obj._complete == checkval);
 }
 
 void test186()
 {
-test186a(2);
+test186a(checkval);
 }
 
 /**/
diff --git a/gcc/testsuite/gdc.dg/simd.d b/gcc/testsuite/gdc.dg/simd.d
index 812b36649aa..7d0aa0168c0 100644
--- a/gcc/testsuite/gdc.dg/simd.d
+++ b/gcc/testsuite/gdc.dg/simd.d
@@ -1576,7 +1576,10 @@ ubyte[16] foounsto()
 void testOPvecunsto()
 {
 auto a 

Re: [Patch] PR rtl-optimization/87763 - generate more bfi instructions on aarch64

2019-04-11 Thread Steve Ellcey
On Thu, 2019-04-11 at 14:58 +, Steve Ellcey wrote:
> 
> > You've removed the ..._noshift_alt variant.  That wasn't my
> > intention,
> > so perhaps you misunderstood what I was trying to say.
> > 
> > The two versions are both needed, since the register tie is not
> > orthogonal to the constants used in the masks and canonicalization
> > will
> > not generate a consistent ordering of the operands.
> 
> I started doing this and then convinced myself (perhaps incorrectly)
> that I didn't need the alt version.  Operands 1 and 3 are registers
> and Operands 2 and 4 are constants, so the only difference is in the 
> call to aarch64_masks_and_shift_for_bfi_p.  Given that argument 2 to
> this call is 0 this call should be the equivelent of ((x & MASK1) |
> (y
> & MASK2)) and that should mean that:
> 
> aarch64_masks_and_shift_for_bfi_p(X,0,Y) ==
> aarch64_masks_and_shift_for_bfi_p(Y,0,X)
> 
> 
> Maybe I am wrong about that?  I will do some expirements.  My testing
> did not find any cases in the testsuite where not having the _alt
> version resulted in a bfi not being generated.

OK, I think I see where my idea that I didn't need both versions is
wrong.  There is an extra check on the final argument that is not done
on the initial argument.  Here is the patch that I am testing, I think
it is what you have in mind.  It looks wierd having arguments 3 and 4
before 1 and 2, I think that is why I had it differently in my original
patch but if you prefer this version, that is fine with me.

OK to check in once my build/test is finished?



2018-04-11  Steve Ellcey  

PR rtl-optimization/87763
* config/aarch64/aarch64.md (*aarch64_bfi4_noshift):
New Instruction.


diff --git a/gcc/config/aarch64/aarch64.md
b/gcc/config/aarch64/aarch64.md
index e0df975..eac688a 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -5565,7 +5565,8 @@
 )
 
 ;; Like *aarch64_bfi5_shift but with no shifting, we are just
-;; copying the least significant bits of OP3 to OP0.
+;; copying the least significant bits of OP3 to OP0.  We need two versions
+;; of the instruction to handle different checks on the constant values.
 
 (define_insn "*aarch64_bfi4_noshift"
   [(set (match_operand:GPI 0 "register_operand" "=r")
@@ -5579,6 +5580,18 @@
   [(set_attr "type" "bfm")]
 )
 
+(define_insn "*aarch64_bfi4_noshift"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+(ior:GPI (and:GPI (match_operand:GPI 3 "register_operand" "0")
+  (match_operand:GPI 4 "const_int_operand" "n"))
+ (and:GPI (match_operand:GPI 1 "register_operand" "r")
+  (match_operand:GPI 2 "const_int_operand" "n"]
+  "aarch64_masks_and_shift_for_bfi_p (mode, UINTVAL (operands[2]), 0,
+ UINTVAL (operands[4]))"
+  "bfi\t%0, %3, 0, %P4"
+  [(set_attr "type" "bfm")]
+)
+
 (define_insn "*extr_insv_lower_reg"
   [(set (zero_extract:GPI (match_operand:GPI 0 "register_operand" "+r")
  (match_operand 1 "const_int_operand" "n")



[C++ Patch/RFC] PR 89900 ("[9 Regression] ICE: Segmentation fault (in check_instantiated_arg)")

2019-04-11 Thread Paolo Carlini

Hi,

over the last few days I spent some time on this regression, which at 
first seemed just a minor error-recovery issue, but then I noticed that 
very slightly tweeking the original testcase uncovered a pretty serious 
ICE on valid:


template void
fk (XE..., int/*SW*/);

void
w9 (void)
{
  fk (0);
}

The regression has to do with the changes committed by Jason for 
c++/86932, in particular with the condition in coerce_template_parms:


   if (template_parameter_pack_p (TREE_VALUE (parm))
      && (arg || !(complain & tf_partial))
      && !(arg && ARGUMENT_PACK_P (arg)))

which has the additional (arg || !complain & tf_partial)) false for the 
present testcase, thus the null arg is not changed into an empty pack, 
thus later  instantiate_template calls check_instantiated_args which 
finds it still null and crashes. Now, likely some additional analysis is 
in order, but for sure there is an important difference between the 
testcase which came with c++/86932 and the above: non-type vs type 
template parameter pack. It seems to me that the kind of problem fixed 
in c++/86932 cannot occur with type packs, because it boils down to a 
reference to a previous parm (full disclosure: the comments and logic in 
fixed_parameter_pack_p helped me a lot here). Thus I had the idea of 
simply restricting the scope of the new condition above by adding an || 
TREE_CODE (TREE_VALUE (parm)) == TYPE_DECL, which definitely leads to a 
clean testsuite and a proper behavior on the new testcases, AFAICS. I'm 
attaching what I tested on x86_64-linux.


Thanks, Paolo.

//

Index: cp/pt.c
===
--- cp/pt.c (revision 270279)
+++ cp/pt.c (working copy)
@@ -8475,7 +8475,8 @@ coerce_template_parms (tree parms,
arg = NULL_TREE;
 
   if (template_parameter_pack_p (TREE_VALUE (parm))
- && (arg || !(complain & tf_partial))
+ && (arg || !(complain & tf_partial)
+ || TREE_CODE (TREE_VALUE (parm)) == TYPE_DECL)
  && !(arg && ARGUMENT_PACK_P (arg)))
 {
  /* Some arguments will be placed in the
Index: testsuite/g++.dg/cpp0x/pr89900-1.C
===
--- testsuite/g++.dg/cpp0x/pr89900-1.C  (nonexistent)
+++ testsuite/g++.dg/cpp0x/pr89900-1.C  (working copy)
@@ -0,0 +1,10 @@
+// { dg-do compile { target c++11 } }
+
+template void
+fk (XE..., SW);  // { dg-error "12:.SW. has not been declared" }
+
+void
+w9 (void)
+{
+  fk (0);
+}
Index: testsuite/g++.dg/cpp0x/pr89900-2.C
===
--- testsuite/g++.dg/cpp0x/pr89900-2.C  (nonexistent)
+++ testsuite/g++.dg/cpp0x/pr89900-2.C  (working copy)
@@ -0,0 +1,10 @@
+// { dg-do compile { target c++11 } }
+
+template void
+fk (XE..., int);
+
+void
+w9 (void)
+{
+  fk (0);
+}


Re: [C++ PATCH] Make some type forbidden diagnostics translatable (PR translation/90035)

2019-04-11 Thread Jason Merrill

On 4/11/19 3:50 AM, Jakub Jelinek wrote:

Hi!

While looking into another PR, I've noticed that in two spots the
parser->type_definition_forbidden_message messages are untranslatable,
we construct them at runtime from 3 strings using concat.

The following patch fixes it by adding a const char * member
to the parser structure and passing that as another argument to error,
in the more usual case where the argument string only contains %< and %>
and not %qs added in this patch it will be just ignored.

I believe this patch is more friendly to the translators (as well as
less expensive at compile time because it doesn't have to concat/allocate
anything).

Another possibility would be to just use in cp_parser_has_attribute_expression
G_("types may not be defined in %<__builtin_has_attribute%> expressions")
and in cp_parser_sizeof_operand use a switch based on the 3 possible keyword
values and use
G_("types may not be defined in % expressions")
G_("types may not be defined in %<__alignof__%> expressions")
G_("types may not be defined in % expressions")
G_("types may not be defined in % expressions")
depending on that (and C++ mode which determines which alignof spelling is
in ridpointers).  We wouldn't need to add an extra member, on the other side
the translators would need to translate 5 messages instead of just one.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?


OK.

Jason



[PR 89693] Reorganize cgraph_node::clone_of_p

2019-04-11 Thread Martin Jambor
Hi,

this reorganization of cgraph_node:clone_of_p() prevents verifier
falsely ICEing because it cannot recognize that a former thunk (expanded
even before reaching pass pipeline) was cloned by IPA-CP.

It basically traverses the clone_of chain at each step of thunk chain
traversal, rather than just after it.  This is only done when checking
is on, so the extra little overhead should be of little concern.

Bootstrapped, LTO-bootstrapped and tested on x86_64, OK for trunk?  If
so, I will check if we need it in GCC 8 and if so, backport it there
too.

Thanks,

Martin


2019-04-11  Martin Jambor  

PR ipa/89693
* cgraph.c (clone_of_p): Loop over clone chain for each step in
the thunk chain.

testsuite/
* g++.dg/ipa/pr89693.C: New test.
---
 gcc/cgraph.c   | 30 ++---
 gcc/testsuite/g++.dg/ipa/pr89693.C | 52 ++
 2 files changed, 70 insertions(+), 12 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/ipa/pr89693.C

diff --git a/gcc/cgraph.c b/gcc/cgraph.c
index 49d80ad1e28..b1b0b4c42d5 100644
--- a/gcc/cgraph.c
+++ b/gcc/cgraph.c
@@ -2977,17 +2977,25 @@ cgraph_node::collect_callers (void)
 static bool
 clone_of_p (cgraph_node *node, cgraph_node *node2)
 {
-  bool skipped_thunk = false;
   node = node->ultimate_alias_target ();
   node2 = node2->ultimate_alias_target ();
 
+  if (node2->clone_of == node
+  || node2->former_clone_of == node->decl)
+return true;
+
+  if (!node->thunk.thunk_p && !node->former_thunk_p ())
+{
+  while (node2 && node->decl != node2->decl)
+   node2 = node2->clone_of;
+  return node2 != NULL;
+}
+
   /* There are no virtual clones of thunks so check former_clone_of or if we
  might have skipped thunks because this adjustments are no longer
  necessary.  */
   while (node->thunk.thunk_p || node->former_thunk_p ())
 {
-  if (node2->former_clone_of == node->decl)
-   return true;
   if (!node->thunk.this_adjusting)
return false;
   /* In case of instrumented expanded thunks, which can have multiple calls
@@ -2996,23 +3004,21 @@ clone_of_p (cgraph_node *node, cgraph_node *node2)
   if (node->callees->next_callee)
return true;
   node = node->callees->callee->ultimate_alias_target ();
-  skipped_thunk = true;
-}
 
-  if (skipped_thunk)
-{
   if (!node2->clone.args_to_skip
  || !bitmap_bit_p (node2->clone.args_to_skip, 0))
return false;
   if (node2->former_clone_of == node->decl)
return true;
-  else if (!node2->clone_of)
-   return false;
+
+  cgraph_node *n2 = node2;
+  while (n2 && node->decl != n2->decl)
+   n2 = n2->clone_of;
+  if (n2)
+   return true;
 }
 
-  while (node2 && node->decl != node2->decl)
-node2 = node2->clone_of;
-  return node2 != NULL;
+  return false;
 }
 
 /* Verify edge count and frequency.  */
diff --git a/gcc/testsuite/g++.dg/ipa/pr89693.C 
b/gcc/testsuite/g++.dg/ipa/pr89693.C
new file mode 100644
index 000..4ac83eeeb3a
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ipa/pr89693.C
@@ -0,0 +1,52 @@
+// Copyright (C) 2005 Free Software Foundation, Inc.
+// Contributed by Nathan Sidwell 4 Apr 2005 
+// Re-purposed to check for re-rurgesnce of PR 89693 in 2019.
+
+// { dg-do compile }
+// { dg-options "-O3 -fno-ipa-icf-functions" }
+
+// Origin: yan...@ca.ibm.com
+// nat...@codesourcery.com
+
+struct A {
+  virtual void One ();
+};
+struct B  {
+  virtual B *Two ();
+  virtual B &Three ();
+};
+
+struct C : A, B
+{
+  virtual C *Two ();
+  virtual C &Three ();
+};
+void A::One () {}
+B *B::Two(){return this;}
+B &B::Three(){return *this;}
+C *C::Two ()   {return 0;}
+C &C::Three ()   {return *(C *)0;}
+
+B *Foo (B *b)
+{
+  return b->Two ();
+}
+
+B &Bar (B *b)
+{
+  return b->Three ();
+}
+
+int main ()
+{
+  C c;
+
+  /* We should not adjust a null pointer.  */
+  if (Foo (&c))
+return 1;
+  /* But we should adjust a (bogus) null reference.  */
+  if (!&Bar (&c))
+return 2;
+
+  return 0;
+}
-- 
2.21.0



Re: [EXT] Re: [Patch] PR rtl-optimization/87763 - generate more bfi instructions on aarch64

2019-04-11 Thread Steve Ellcey
On Thu, 2019-04-11 at 09:59 +0100, Richard Earnshaw (lists) wrote:
> 
> > 
> > 2018-04-10  Steve Ellcey  
> > 
> > PR rtl-optimization/87763
> > * config/aarch64/aarch64-protos.h
> > (aarch64_masks_and_shift_for_bfi_p):
> > New prototype.
> > * config/aarch64/aarch64.c (aarch64_masks_and_shift_for_bfi_p):
> > New function.
> > * config/aarch64/aarch64.md (*aarch64_bfi5_shift):
> > New instruction.
> > (*aarch64_bfi5_shift_alt): Ditto.
> > (*aarch64_bfi4_noand): Ditto.
> > (*aarch64_bfi4_noand_alt): Ditto.
> > (*aarch64_bfi4_noshift): Ditto.
> > 
> 
> You've removed the ..._noshift_alt variant.  That wasn't my intention,
> so perhaps you misunderstood what I was trying to say.
>
> The two versions are both needed, since the register tie is not
> orthogonal to the constants used in the masks and canonicalization will
> not generate a consistent ordering of the operands.

I started doing this and then convinced myself (perhaps incorrectly)
that I didn't need the alt version.  Operands 1 and 3 are registers
and Operands 2 and 4 are constants, so the only difference is in the 
call to aarch64_masks_and_shift_for_bfi_p.  Given that argument 2 to
this call is 0 this call should be the equivelent of ((x & MASK1) | (y
& MASK2)) and that should mean that:

aarch64_masks_and_shift_for_bfi_p(X,0,Y) ==
aarch64_masks_and_shift_for_bfi_p(Y,0,X)


Maybe I am wrong about that?  I will do some expirements.  My testing
did not find any cases in the testsuite where not having the _alt
version resulted in a bfi not being generated.

Steve Ellcey
sell...@marvell.com
> > 


Re: [PATCH, doc] Note variable shadowing at max macro using statement expression

2019-04-11 Thread Sandra Loosemore

On 4/11/19 2:31 AM, Tom de Vries wrote:

Hi Sandra,

thanks for the review.

I've attached the updated patch, as well as the resulting relevant
gcc.info portion.

OK for trunk?


This version looks OK.  Thanks for helping to improve the docs!

-Sandra


Re: [Patch, fortran] PRs 89843 and 90022 - C Fortran Interop fixes.

2019-04-11 Thread Paul Richard Thomas
Hi Dominique,

Yes indeed - I used int(kind(loc(res))) to achieve the same effect.

I am looking for but failing to find a similar problem for PR89846.
Tomorrow I turn my attention to an incorrect cast in the compiler.

Regards

Paul

On Thu, 11 Apr 2019 at 14:54, Dominique d'Humières  wrote:
>
>
> With the following code
>
> module cdesc
>   interface
>   function cdesc_f08(buf, expected) result (res) BIND(C, name="cdesc_c")
>   USE, INTRINSIC :: ISO_C_BINDING
>   implicit none
>   INTEGER(C_INT) :: res
>   type(*), dimension(..), INTENT(INOUT) :: buf
>   type(c_ptr),INTENT(IN) :: expected
> end function cdesc_f08
>   end interface
> end module
>
> program cdesc_test
>   USE, INTRINSIC :: ISO_C_BINDING
>   use cdesc
>   implicit none
>   integer(c_int), target :: a0, a1(10), a2(10,10), a3(10,10,10)
>   if (cdesc_f08(a0, C_LOC(a0)) .ne. 1) stop 1
>   if (cdesc_f08(a1, C_LOC(a1(1))) .ne. 1) stop 2
>   if (cdesc_f08(a2, C_LOC(a2(1,1))) .ne. 1) stop 3
>   if (cdesc_f08(a3, C_LOC(a3(1,1,1))) .ne. 1) stop 4
> end program
>
> The test ISO_Fortran_binding_9.f90 executes without failure for both -m32 and 
> -m64.
>
> Dominique
>
> > Le 10 avr. 2019 à 00:42, Paul Richard Thomas 
> >  a écrit :
> >
> > Many thanks, sir! I will look into it.
> >
> > Paul
>


-- 
"If you can't explain it simply, you don't understand it well enough"
- Albert Einstein


Re: [Patch, fortran] PRs 89843 and 90022 - C Fortran Interop fixes.

2019-04-11 Thread Dominique d'Humières


With the following code

module cdesc
  interface
  function cdesc_f08(buf, expected) result (res) BIND(C, name="cdesc_c")
  USE, INTRINSIC :: ISO_C_BINDING
  implicit none
  INTEGER(C_INT) :: res
  type(*), dimension(..), INTENT(INOUT) :: buf
  type(c_ptr),INTENT(IN) :: expected
end function cdesc_f08
  end interface
end module

program cdesc_test
  USE, INTRINSIC :: ISO_C_BINDING
  use cdesc
  implicit none
  integer(c_int), target :: a0, a1(10), a2(10,10), a3(10,10,10)
  if (cdesc_f08(a0, C_LOC(a0)) .ne. 1) stop 1
  if (cdesc_f08(a1, C_LOC(a1(1))) .ne. 1) stop 2
  if (cdesc_f08(a2, C_LOC(a2(1,1))) .ne. 1) stop 3
  if (cdesc_f08(a3, C_LOC(a3(1,1,1))) .ne. 1) stop 4
end program

The test ISO_Fortran_binding_9.f90 executes without failure for both -m32 and 
-m64.

Dominique

> Le 10 avr. 2019 à 00:42, Paul Richard Thomas  
> a écrit :
> 
> Many thanks, sir! I will look into it.
> 
> Paul



[PATCH] tilepro.c diagnostics bug fix (PR target/52726)

2019-04-11 Thread Jakub Jelinek
On Tue, Mar 12, 2019 at 10:58:14AM +0100, Jakub Jelinek wrote:
> These are the only remaining cases of gcc-internal-format diagnostics using
> HOST_WIDE_INT_PRINT*.  That is wrong because the string depends on the exact
> host, and xgettext doesn't handle it anyway, so in gcc.pot the messages are
> truncated at the spot where HOST_WIDE_INT_PRINT* appears.  See also
> PR79846 for an earlier change of the same kind.
> 
> Tested on a cross to s390x-linux on the 
> gcc.target/s390/htm-builtins-compile-2.c
> testcase.  Ok for trunk?
> 
> 2019-03-12  Jakub Jelinek  
> 
>   PR target/52726
>   * config/s390/s390.md (tabort): Use %wd instead of
>   HOST_WIDE_INT_PRINT_DEC in error message, reword to avoid two capital
>   letters and periods.
>   * config/tilepro/tilepro.c (tilepro_print_operand): Use %wd in
>   output_operand_lossage instead of HOST_WIDE_INT_PRINT_DEC, replace
>   's with %< and %>.

Unfortunately, today while building gcc.pot I've noticed:
config/tilepro/tilepro.c:4774: warning: Although being used in a format string 
position, the msgid is not a valid C format string. Reason: In the directive 
number 2, the token after '<' is not the name of a format specifier macro. The 
valid macro names are listed in ISO C 99 section 7.8.1.
Indeed, output_operand_lossage argument is not gcc-internal-format, so
it can't use %wd nor %< nor %>.  It is printed using:
  va_start (ap, cmsgid);

  pfx_str = this_is_asm_operands ? _("invalid 'asm': ") : "output_operand: ";
  fmt_string = xasprintf ("%s%s", pfx_str, _(cmsgid));
  new_message = xvasprintf (fmt_string, ap);

  if (this_is_asm_operands)
error_for_asm (this_is_asm_operands, "%s", new_message);
  else
internal_error ("%s", new_message);
so can use just the fprintf format specifiers.  While I perhaps could use
%ld instead of %<%wd%> and cast the HOST_WIDE_INT argument to long, it
doesn't seem to be worth it IMHO, if the argument is not a CONST_INT, we
print also just invalid %%t operand, so I'd suggest doing following.

Ok for trunk?

2019-04-11  Jakub Jelinek  

PR target/52726
* config/tilepro/tilepro.c (tilepro_print_operand): Use just
"invalid %%t operand" in output_operand_lossage message.

--- gcc/config/tilepro/tilepro.c.jj 2019-03-12 10:46:40.995960027 +0100
+++ gcc/config/tilepro/tilepro.c2019-04-11 14:54:01.708668318 +0200
@@ -4771,7 +4771,7 @@ tilepro_print_operand (FILE *file, rtx x
i = exact_log2 (n);
if (i < 0)
  {
-   output_operand_lossage ("invalid %%t operand %<%wd%>", n);
+   output_operand_lossage ("invalid %%t operand");
return;
  }
 


Jakub


[PATCH] Fix #error in mips loongson-mmintrin.h

2019-04-11 Thread Jakub Jelinek
Hi!

While doing make gcc.pot, I've noticed warnings about unterminated string
in loongson-mmintrin.h.
I don't have any usable mips setup, but just tried:
/tmp/1a.c:
# error "You must select -mloongson-mmi or -march=loongson2e/2f/3a to use
 loongson-mmiintrin.h"
/tmp/1b.c:
# error You must select -mloongson-mmi or -march=loongson2e/2f/3a to use\
 loongson-mmiintrin.h
$ gcc -S -o /tmp/1a.{s,c}
/tmp/1a.c:1:9: warning: missing terminating " character
 # error "You must select -mloongson-mmi or -march=loongson2e/2f/3a to use
 ^
/tmp/1a.c:1:3: error: #error "You must select -mloongson-mmi or 
-march=loongson2e/2f/3a to use
 # error "You must select -mloongson-mmi or -march=loongson2e/2f/3a to use
   ^
/tmp/1a.c:2:11: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or ‘__attribute__’ before 
‘-’ token
   loongson-mmiintrin.h"
   ^
/tmp/1a.c:2:23: warning: missing terminating " character
   loongson-mmiintrin.h"
   ^
/tmp/1a.c:2:23: error: missing terminating " character
$ gcc -S -o /tmp/1b.{s,c}
/tmp/1b.c:1:3: error: #error You must select -mloongson-mmi or 
-march=loongson2e/2f/3a to use loongson-mmiintrin.h
 # error You must select -mloongson-mmi or -march=loongson2e/2f/3a to use\
   ^
and similarly for g++.  Ok for trunk?

2019-04-11  Jakub Jelinek  

* config/mips/loongson-mmiintrin.h: Fix up #error message.

--- gcc/config/mips/loongson-mmiintrin.h.jj 2019-01-01 12:37:22.459887959 
+0100
+++ gcc/config/mips/loongson-mmiintrin.h2019-04-11 14:41:39.901898228 
+0200
@@ -28,8 +28,8 @@
 #define _GCC_LOONGSON_MMIINTRIN_H
 
 #if !defined(__mips_loongson_mmi)
-# error "You must select -mloongson-mmi or -march=loongson2e/2f/3a to use
-loongson-mmiintrin.h"
+# error You must select -mloongson-mmi or -march=loongson2e/2f/3a to use\
+ loongson-mmiintrin.h
 #endif
 
 #ifdef __cplusplus

Jakub


Re: Add parameter to limit LTO streaming parallelism

2019-04-11 Thread Richard Biener
On Thu, 11 Apr 2019, Jan Hubicka wrote:

> > On Thu, 11 Apr 2019, Jan Hubicka wrote:
> > 
> > > Hi,
> > > the LTO streaming forks for every partition. With the number of
> > > partitions incrased to 128 and relatively large memory usage (around
> > > 5GB) needed to WPA firefox this causes kernel to spend a lot of time
> > > probably by copying the page tables.
> > > 
> > > This patch makes the streamer to for only lto_parallelism times
> > > and strem num_partitions/lto_paralleism in each thread.
> > > I have also added parameter because currently -flto=jobserv leads
> > > to unlimited parallelism.  This should be fixed by conneting to Make's
> > > jobsever and build our own mini jobserver to distribute partitions
> > > between worker threads, but this seems bit too involved for last minute
> > > change in stage4.  I plan to work on this and hopefully bacport it to .2
> > > release.
> > > 
> > > I have tested the performance on by 32CPU 64threads box and got best
> > > wall time with 32 partitions and therefore I set it by default.  I get
> > > 
> > > --param max-lto-streaming-parallelism=1
> > > Time variable   usr   sys 
> > >  wall   GGC
> > >  phase stream out   :  50.65 ( 30%)  20.66 ( 61%)  71.38 
> > > ( 35%) 921 kB (  0%)
> > >  TOTAL  : 170.73 33.69204.64  
> > >   7459610 kB
> > > 
> > > --param max-lto-streaming-parallelism=4
> > >  phase stream out   :  13.79 ( 11%)   6.80 ( 35%)  20.94 
> > > ( 14%) 155 kB (  0%)
> > >  TOTAL  : 130.26 19.68150.46  
> > >   7458844 kB
> > > 
> > > --param max-lto-streaming-parallelism=8
> > >  phase stream out   :   8.94 (  7%)   5.21 ( 29%)  14.15 
> > > ( 10%)  83 kB (  0%)
> > >  TOTAL  : 125.28 18.09143.54  
> > >   7458773 kB
> > > 
> > > --param max-lto-streaming-parallelism=16
> > >  phase stream out   :   4.56 (  4%)   4.34 ( 25%)   9.46 
> > > (  7%)  35 kB (  0%)
> > >  TOTAL  : 122.60 17.21140.56  
> > >   7458725 kB
> > > 
> > > --param max-lto-streaming-parallelism=32
> > >  phase stream out   :   2.34 (  2%)   5.69 ( 31%)   8.03 
> > > (  6%)  15 kB (  0%)
> > >  TOTAL  : 118.53 18.36137.08  
> > >   7458705 kB
> > > 
> > > --param max-lto-streaming-parallelism=64
> > >  phase stream out   :   1.63 (  1%)  15.76 ( 55%)  17.40 
> > > ( 12%)  13 kB (  0%)
> > >  TOTAL  : 122.17 28.66151.00  
> > >   7458702 kB
> > > 
> > > --param max-lto-streaming-parallelism=256
> > >  phase stream out   :   1.28 (  1%)   9.24 ( 41%)  10.53 
> > > (  8%)  13 kB (  0%)
> > >  TOTAL  : 116.78 22.56139.53  
> > >   7458702 kB
> > > 
> > > Note that it is bit odd that 64 leads to worse results that full
> > > parallelism but it seems to reproduce relatively well. Also the usr/sys
> > > times for streaming are not representative since they do not account sys
> > > time of the forked threads. I am not sure where the fork time is
> > > accounted.
> > > 
> > > Generally it seems that the forking performance is not at all that
> > > bad and scales reasonably, but I still we should limit the default for
> > > something less than 128 we do now. Definitly there are diminishing
> > > returns after increasing from 16 or 32 and memory use goes up
> > > noticeably. With current trunk memory use also does not seem terribly
> > > bad (less global stream streaming makes the workers cheaper) and in all
> > > memory traces I collected it is dominated by compilation stage during
> > > the full rebuild.
> > > 
> > > I did similar tests for cc1 binary. There the relative time spent in
> > > streaming is lower so it goes from 17% to 1% (for parallelism 1 and 32
> > > respectively)
> > > 
> > > Bootstrapped/regtested x86_64-linux, OK?
> > 
> > Please document the new param in invoke.texi.  Otherwise looks good
> > to me.  Btw, do we actually allocate garbage at write-out time?
> > Thus, would using threads work as well?
> 
> It is on my TODO to get this working.  Last time i checked by adding
> abort into ggc_alloc there was some occurences but I think that can be
> cleanded up.
> 
> I wonder how much performance hit we would get for enabling pthreads for
> lto1 binary and thus building libbackend with it?

Is there any performance impact before the first thread creation?
(besides eventually a few well-predicted if (threads_are_running) checks?)

Richard.


[PATCH] Mark MissingArgError, UnknownError and Warn *.opt arguments as gcc-internal-format (PR translation/90041)

2019-04-11 Thread Jakub Jelinek
Hi!

These 3 *.opt messages are printed using:
opts-common.c:  error_at (loc, option->missing_argument_error, opt);
opts-common.c:  error_at (loc, e->unknown_error, arg);
opts-common.c:warning_at (loc, 0, decoded->warn_message, opt);
and so they do support the various gcc-internal-format format string
specifiers like %<, %>, %qs etc.

The following patch tweaks exgettext so that it marks them as such
and also adjusts c.opt so that it passes
contrib/check-internal-format-escaping.py after such a change.

Tested on x86_64-linux using make gcc.pot, eyeballing the resulting file
and trying ./xgcc -B ./ -S hello.c -fhandle-exceptions

Ok for trunk?

2019-04-11  Jakub Jelinek  

PR translation/90041
* po/exgettext: Print MissingArgError, UnknownError or Warn
*.opt argument using error or warning instead of _ to mark it
as gcc-internal-format.

* c.opt (-fhandle-exceptions): Use %< and %> around option names
in the Warn diagnostics.

--- gcc/po/exgettext.jj 2019-01-01 12:37:21.728899953 +0100
+++ gcc/po/exgettext2019-04-11 13:39:14.361331358 +0200
@@ -252,7 +252,7 @@ echo "scanning option files..." >&2
} else
sub("\\).*", "", line)
printf("#line %d \"%s\"\n", lineno, file)
-   printf("_(\"%s\")\n", line)
+   printf("error(\"%s\")\n", line)
}
if ((field == 1) && /UnknownError/) {
line = $0
@@ -263,7 +263,7 @@ echo "scanning option files..." >&2
} else
sub("\\).*", "", line)
printf("#line %d \"%s\"\n", lineno, file)
-   printf("_(\"%s\")\n", line)
+   printf("error(\"%s\")\n", line)
}
if ((field == 1) && /Warn\(/) {
line = $0
@@ -274,7 +274,7 @@ echo "scanning option files..." >&2
} else
sub("\\).*", "", line)
printf("#line %d \"%s\"\n", lineno, file)
-   printf("_(\"%s\")\n", line)
+   printf("warning(0, \"%s\")\n", line)
}
if (field == 2) {
line = $0
--- gcc/c-family/c.opt.jj   2019-03-22 15:39:58.149990452 +0100
+++ gcc/c-family/c.opt  2019-04-11 14:06:38.234474269 +0200
@@ -1516,7 +1516,7 @@ fguiding-decls
 C++ ObjC++ Deprecated
 
 fhandle-exceptions
-C++ ObjC++ Optimization Alias(fexceptions) Warn({-fhandle-exceptions has been 
renamed -fexceptions (and is now on by default)})
+C++ ObjC++ Optimization Alias(fexceptions) Warn({%<-fhandle-exceptions%> has 
been renamed %<-fexceptions%> (and is now on by default)})
 
 fhonor-std
 C++ ObjC++ Deprecated

Jakub


Re: [PATCH] Handle multiple 'default' in target attribute (PR middle-end/89970).

2019-04-11 Thread Martin Liška
On 4/11/19 1:15 PM, Jakub Jelinek wrote:
> On Thu, Apr 11, 2019 at 01:10:00PM +0200, Richard Biener wrote:
>> On Thu, Apr 11, 2019 at 12:47 PM Jakub Jelinek  wrote:
>>>
>>> On Thu, Apr 11, 2019 at 12:44:40PM +0200, Martin Liška wrote:
 On 4/11/19 11:57 AM, Richard Biener wrote:
> On Thu, Apr 11, 2019 at 10:37 AM Martin Liška  wrote:
>>
>> Hi.
>>
>> The patch is catching duplicate 'default' values in target attribute.
>>
>> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
>>
>> Ready to be installed?
>
> I wonder if it isn't better to ignore duplicate "default"s (given you 
> needed to
> adjust a testcase even)?

 Well, possibly yes. But I would prefer to have a more strict checking.
>>>
>>> I agree, default, default is not really useful and rejecting it is fine.
>>> Having a testcase covering that is of course desirable.
>>
>> There is one in the patch.
>>
>> That said, I was worried to go down the same route as that volatile asm
>> thing and the patch needs to be on branches as well?
> 
> On branches I'd indeed ignore second and following default if that is what
> we've done before.
> 
>   Jakub
> 

Well, I'm probably no planning to backport that.

Martin


Re: Add parameter to limit LTO streaming parallelism

2019-04-11 Thread Jan Hubicka
> On Thu, 11 Apr 2019, Jan Hubicka wrote:
> 
> > Hi,
> > the LTO streaming forks for every partition. With the number of
> > partitions incrased to 128 and relatively large memory usage (around
> > 5GB) needed to WPA firefox this causes kernel to spend a lot of time
> > probably by copying the page tables.
> > 
> > This patch makes the streamer to for only lto_parallelism times
> > and strem num_partitions/lto_paralleism in each thread.
> > I have also added parameter because currently -flto=jobserv leads
> > to unlimited parallelism.  This should be fixed by conneting to Make's
> > jobsever and build our own mini jobserver to distribute partitions
> > between worker threads, but this seems bit too involved for last minute
> > change in stage4.  I plan to work on this and hopefully bacport it to .2
> > release.
> > 
> > I have tested the performance on by 32CPU 64threads box and got best
> > wall time with 32 partitions and therefore I set it by default.  I get
> > 
> > --param max-lto-streaming-parallelism=1
> > Time variable   usr   sys  
> > wall   GGC
> >  phase stream out   :  50.65 ( 30%)  20.66 ( 61%)  71.38 ( 
> > 35%) 921 kB (  0%)
> >  TOTAL  : 170.73 33.69204.64
> > 7459610 kB
> > 
> > --param max-lto-streaming-parallelism=4
> >  phase stream out   :  13.79 ( 11%)   6.80 ( 35%)  20.94 ( 
> > 14%) 155 kB (  0%)
> >  TOTAL  : 130.26 19.68150.46
> > 7458844 kB
> > 
> > --param max-lto-streaming-parallelism=8
> >  phase stream out   :   8.94 (  7%)   5.21 ( 29%)  14.15 ( 
> > 10%)  83 kB (  0%)
> >  TOTAL  : 125.28 18.09143.54
> > 7458773 kB
> > 
> > --param max-lto-streaming-parallelism=16
> >  phase stream out   :   4.56 (  4%)   4.34 ( 25%)   9.46 (  
> > 7%)  35 kB (  0%)
> >  TOTAL  : 122.60 17.21140.56
> > 7458725 kB
> > 
> > --param max-lto-streaming-parallelism=32
> >  phase stream out   :   2.34 (  2%)   5.69 ( 31%)   8.03 (  
> > 6%)  15 kB (  0%)
> >  TOTAL  : 118.53 18.36137.08
> > 7458705 kB
> > 
> > --param max-lto-streaming-parallelism=64
> >  phase stream out   :   1.63 (  1%)  15.76 ( 55%)  17.40 ( 
> > 12%)  13 kB (  0%)
> >  TOTAL  : 122.17 28.66151.00
> > 7458702 kB
> > 
> > --param max-lto-streaming-parallelism=256
> >  phase stream out   :   1.28 (  1%)   9.24 ( 41%)  10.53 (  
> > 8%)  13 kB (  0%)
> >  TOTAL  : 116.78 22.56139.53
> > 7458702 kB
> > 
> > Note that it is bit odd that 64 leads to worse results that full
> > parallelism but it seems to reproduce relatively well. Also the usr/sys
> > times for streaming are not representative since they do not account sys
> > time of the forked threads. I am not sure where the fork time is
> > accounted.
> > 
> > Generally it seems that the forking performance is not at all that
> > bad and scales reasonably, but I still we should limit the default for
> > something less than 128 we do now. Definitly there are diminishing
> > returns after increasing from 16 or 32 and memory use goes up
> > noticeably. With current trunk memory use also does not seem terribly
> > bad (less global stream streaming makes the workers cheaper) and in all
> > memory traces I collected it is dominated by compilation stage during
> > the full rebuild.
> > 
> > I did similar tests for cc1 binary. There the relative time spent in
> > streaming is lower so it goes from 17% to 1% (for parallelism 1 and 32
> > respectively)
> > 
> > Bootstrapped/regtested x86_64-linux, OK?
> 
> Please document the new param in invoke.texi.  Otherwise looks good
> to me.  Btw, do we actually allocate garbage at write-out time?
> Thus, would using threads work as well?

It is on my TODO to get this working.  Last time i checked by adding
abort into ggc_alloc there was some occurences but I think that can be
cleanded up.

I wonder how much performance hit we would get for enabling pthreads for
lto1 binary and thus building libbackend with it?

Honza


Re: Add parameter to limit LTO streaming parallelism

2019-04-11 Thread Richard Biener
On Thu, 11 Apr 2019, Jan Hubicka wrote:

> Hi,
> the LTO streaming forks for every partition. With the number of
> partitions incrased to 128 and relatively large memory usage (around
> 5GB) needed to WPA firefox this causes kernel to spend a lot of time
> probably by copying the page tables.
> 
> This patch makes the streamer to for only lto_parallelism times
> and strem num_partitions/lto_paralleism in each thread.
> I have also added parameter because currently -flto=jobserv leads
> to unlimited parallelism.  This should be fixed by conneting to Make's
> jobsever and build our own mini jobserver to distribute partitions
> between worker threads, but this seems bit too involved for last minute
> change in stage4.  I plan to work on this and hopefully bacport it to .2
> release.
> 
> I have tested the performance on by 32CPU 64threads box and got best
> wall time with 32 partitions and therefore I set it by default.  I get
> 
> --param max-lto-streaming-parallelism=1
> Time variable   usr   sys  
> wall   GGC
>  phase stream out   :  50.65 ( 30%)  20.66 ( 61%)  71.38 ( 
> 35%) 921 kB (  0%)
>  TOTAL  : 170.73 33.69204.64  
>   7459610 kB
> 
> --param max-lto-streaming-parallelism=4
>  phase stream out   :  13.79 ( 11%)   6.80 ( 35%)  20.94 ( 
> 14%) 155 kB (  0%)
>  TOTAL  : 130.26 19.68150.46  
>   7458844 kB
> 
> --param max-lto-streaming-parallelism=8
>  phase stream out   :   8.94 (  7%)   5.21 ( 29%)  14.15 ( 
> 10%)  83 kB (  0%)
>  TOTAL  : 125.28 18.09143.54  
>   7458773 kB
> 
> --param max-lto-streaming-parallelism=16
>  phase stream out   :   4.56 (  4%)   4.34 ( 25%)   9.46 (  
> 7%)  35 kB (  0%)
>  TOTAL  : 122.60 17.21140.56  
>   7458725 kB
> 
> --param max-lto-streaming-parallelism=32
>  phase stream out   :   2.34 (  2%)   5.69 ( 31%)   8.03 (  
> 6%)  15 kB (  0%)
>  TOTAL  : 118.53 18.36137.08  
>   7458705 kB
> 
> --param max-lto-streaming-parallelism=64
>  phase stream out   :   1.63 (  1%)  15.76 ( 55%)  17.40 ( 
> 12%)  13 kB (  0%)
>  TOTAL  : 122.17 28.66151.00  
>   7458702 kB
> 
> --param max-lto-streaming-parallelism=256
>  phase stream out   :   1.28 (  1%)   9.24 ( 41%)  10.53 (  
> 8%)  13 kB (  0%)
>  TOTAL  : 116.78 22.56139.53  
>   7458702 kB
> 
> Note that it is bit odd that 64 leads to worse results that full
> parallelism but it seems to reproduce relatively well. Also the usr/sys
> times for streaming are not representative since they do not account sys
> time of the forked threads. I am not sure where the fork time is
> accounted.
> 
> Generally it seems that the forking performance is not at all that
> bad and scales reasonably, but I still we should limit the default for
> something less than 128 we do now. Definitly there are diminishing
> returns after increasing from 16 or 32 and memory use goes up
> noticeably. With current trunk memory use also does not seem terribly
> bad (less global stream streaming makes the workers cheaper) and in all
> memory traces I collected it is dominated by compilation stage during
> the full rebuild.
> 
> I did similar tests for cc1 binary. There the relative time spent in
> streaming is lower so it goes from 17% to 1% (for parallelism 1 and 32
> respectively)
> 
> Bootstrapped/regtested x86_64-linux, OK?

Please document the new param in invoke.texi.  Otherwise looks good
to me.  Btw, do we actually allocate garbage at write-out time?
Thus, would using threads work as well?

Thanks,
Richard.

>   * params.def (PARAM_MAX_LTO_STREAMING_PARALLELISM): New parameter.
>   * lto.c (do_stream_out): rename to ...
>   (stream_out): ... this one; move original code to ...
>   (stream_out_partitions_1, stream_out_partitions): ... these new
>   functions.
>   (lto_wpa_write_files): Honnor lto_parallelism
> Index: params.def
> ===
> --- params.def(revision 270143)
> +++ params.def(working copy)
> @@ -1146,6 +1146,11 @@ DEFPARAM (MAX_PARTITION_SIZE,
> "Maximal size of a partition for LTO (in estimated instructions).",
> 100, 0, INT_MAX)
>  
> +DEFPARAM (PARAM_MAX_LTO_STREAMING_PARALLELISM,
> +   "max-lto-streaming-parallelism",
> +   "maximal number of LTO partitions streamed in parallel.",
> +   32, 1, 0)
> +
>  /* Diagnostic parameters.  */
>  
>  DEFPARAM (CXX_MAX_NAMESPACES_FOR_DIAGNOSTIC_HELP,
> Index: lto/lto.c
> =

Add parameter to limit LTO streaming parallelism

2019-04-11 Thread Jan Hubicka
Hi,
the LTO streaming forks for every partition. With the number of
partitions incrased to 128 and relatively large memory usage (around
5GB) needed to WPA firefox this causes kernel to spend a lot of time
probably by copying the page tables.

This patch makes the streamer to for only lto_parallelism times
and strem num_partitions/lto_paralleism in each thread.
I have also added parameter because currently -flto=jobserv leads
to unlimited parallelism.  This should be fixed by conneting to Make's
jobsever and build our own mini jobserver to distribute partitions
between worker threads, but this seems bit too involved for last minute
change in stage4.  I plan to work on this and hopefully bacport it to .2
release.

I have tested the performance on by 32CPU 64threads box and got best
wall time with 32 partitions and therefore I set it by default.  I get

--param max-lto-streaming-parallelism=1
Time variable   usr   sys  wall 
  GGC
 phase stream out   :  50.65 ( 30%)  20.66 ( 61%)  71.38 ( 35%) 
921 kB (  0%)
 TOTAL  : 170.73 33.69204.64
7459610 kB

--param max-lto-streaming-parallelism=4
 phase stream out   :  13.79 ( 11%)   6.80 ( 35%)  20.94 ( 14%) 
155 kB (  0%)
 TOTAL  : 130.26 19.68150.46
7458844 kB

--param max-lto-streaming-parallelism=8
 phase stream out   :   8.94 (  7%)   5.21 ( 29%)  14.15 ( 10%) 
 83 kB (  0%)
 TOTAL  : 125.28 18.09143.54
7458773 kB

--param max-lto-streaming-parallelism=16
 phase stream out   :   4.56 (  4%)   4.34 ( 25%)   9.46 (  7%) 
 35 kB (  0%)
 TOTAL  : 122.60 17.21140.56
7458725 kB

--param max-lto-streaming-parallelism=32
 phase stream out   :   2.34 (  2%)   5.69 ( 31%)   8.03 (  6%) 
 15 kB (  0%)
 TOTAL  : 118.53 18.36137.08
7458705 kB

--param max-lto-streaming-parallelism=64
 phase stream out   :   1.63 (  1%)  15.76 ( 55%)  17.40 ( 12%) 
 13 kB (  0%)
 TOTAL  : 122.17 28.66151.00
7458702 kB

--param max-lto-streaming-parallelism=256
 phase stream out   :   1.28 (  1%)   9.24 ( 41%)  10.53 (  8%) 
 13 kB (  0%)
 TOTAL  : 116.78 22.56139.53
7458702 kB

Note that it is bit odd that 64 leads to worse results that full
parallelism but it seems to reproduce relatively well. Also the usr/sys
times for streaming are not representative since they do not account sys
time of the forked threads. I am not sure where the fork time is
accounted.

Generally it seems that the forking performance is not at all that
bad and scales reasonably, but I still we should limit the default for
something less than 128 we do now. Definitly there are diminishing
returns after increasing from 16 or 32 and memory use goes up
noticeably. With current trunk memory use also does not seem terribly
bad (less global stream streaming makes the workers cheaper) and in all
memory traces I collected it is dominated by compilation stage during
the full rebuild.

I did similar tests for cc1 binary. There the relative time spent in
streaming is lower so it goes from 17% to 1% (for parallelism 1 and 32
respectively)

Bootstrapped/regtested x86_64-linux, OK?

* params.def (PARAM_MAX_LTO_STREAMING_PARALLELISM): New parameter.
* lto.c (do_stream_out): rename to ...
(stream_out): ... this one; move original code to ...
(stream_out_partitions_1, stream_out_partitions): ... these new
functions.
(lto_wpa_write_files): Honnor lto_parallelism
Index: params.def
===
--- params.def  (revision 270143)
+++ params.def  (working copy)
@@ -1146,6 +1146,11 @@ DEFPARAM (MAX_PARTITION_SIZE,
  "Maximal size of a partition for LTO (in estimated instructions).",
  100, 0, INT_MAX)
 
+DEFPARAM (PARAM_MAX_LTO_STREAMING_PARALLELISM,
+ "max-lto-streaming-parallelism",
+ "maximal number of LTO partitions streamed in parallel.",
+ 32, 1, 0)
+
 /* Diagnostic parameters.  */
 
 DEFPARAM (CXX_MAX_NAMESPACES_FOR_DIAGNOSTIC_HELP,
Index: lto/lto.c
===
--- lto/lto.c   (revision 270143)
+++ lto/lto.c   (working copy)
@@ -2304,7 +2304,7 @@ static lto_file *current_lto_file;
 /* Actually stream out ENCODER into TEMP_FILENAME.  */
 
 static void
-do_stream_out (char *temp_filename, lto_symtab_encoder_t encoder, int part)
+stream_out (char *temp_filename, lto_symtab_encoder_t encoder, int part)
 {
   lto_file *file = lto_obj_file_open (temp_filename, true);
   if (!file)
@@ -2352,

Re: [PATCH] Handle multiple 'default' in target attribute (PR middle-end/89970).

2019-04-11 Thread Jakub Jelinek
On Thu, Apr 11, 2019 at 01:10:00PM +0200, Richard Biener wrote:
> On Thu, Apr 11, 2019 at 12:47 PM Jakub Jelinek  wrote:
> >
> > On Thu, Apr 11, 2019 at 12:44:40PM +0200, Martin Liška wrote:
> > > On 4/11/19 11:57 AM, Richard Biener wrote:
> > > > On Thu, Apr 11, 2019 at 10:37 AM Martin Liška  wrote:
> > > >>
> > > >> Hi.
> > > >>
> > > >> The patch is catching duplicate 'default' values in target attribute.
> > > >>
> > > >> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
> > > >>
> > > >> Ready to be installed?
> > > >
> > > > I wonder if it isn't better to ignore duplicate "default"s (given you 
> > > > needed to
> > > > adjust a testcase even)?
> > >
> > > Well, possibly yes. But I would prefer to have a more strict checking.
> >
> > I agree, default, default is not really useful and rejecting it is fine.
> > Having a testcase covering that is of course desirable.
> 
> There is one in the patch.
> 
> That said, I was worried to go down the same route as that volatile asm
> thing and the patch needs to be on branches as well?

On branches I'd indeed ignore second and following default if that is what
we've done before.

Jakub


Re: [PATCH] Handle multiple 'default' in target attribute (PR middle-end/89970).

2019-04-11 Thread Richard Biener
On Thu, Apr 11, 2019 at 12:47 PM Jakub Jelinek  wrote:
>
> On Thu, Apr 11, 2019 at 12:44:40PM +0200, Martin Liška wrote:
> > On 4/11/19 11:57 AM, Richard Biener wrote:
> > > On Thu, Apr 11, 2019 at 10:37 AM Martin Liška  wrote:
> > >>
> > >> Hi.
> > >>
> > >> The patch is catching duplicate 'default' values in target attribute.
> > >>
> > >> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
> > >>
> > >> Ready to be installed?
> > >
> > > I wonder if it isn't better to ignore duplicate "default"s (given you 
> > > needed to
> > > adjust a testcase even)?
> >
> > Well, possibly yes. But I would prefer to have a more strict checking.
>
> I agree, default, default is not really useful and rejecting it is fine.
> Having a testcase covering that is of course desirable.

There is one in the patch.

That said, I was worried to go down the same route as that volatile asm
thing and the patch needs to be on branches as well?

Anyhow, patch is OK for trunk.

Richard.

> Jakub


Re: [aarch64] PR90016 - aarch64: reference to undeclared N in help for command line option

2019-04-11 Thread Christophe Lyon
On Wed, 10 Apr 2019 at 11:57, Richard Earnshaw (lists)
 wrote:
>
> 'to N' is now redundant and misleading given the earlier change to use
> .
>
> Removed
>
> * config/aarch64/aarch64.opt (msve-vector-bits): Remove redundant and
> obsolete reference to N.
>

Hi Richard,

I've just added the missing final '.', which was causing
FAIL: compiler driver --help=target option(s): "^ +-.*[^:.]$" absent
from output: "  -msve-vector-bits=  Set the number of bits in
an SVE vector register"

Christophe

> R.


Re: [PATCH] Handle multiple 'default' in target attribute (PR middle-end/89970).

2019-04-11 Thread Jakub Jelinek
On Thu, Apr 11, 2019 at 12:44:40PM +0200, Martin Liška wrote:
> On 4/11/19 11:57 AM, Richard Biener wrote:
> > On Thu, Apr 11, 2019 at 10:37 AM Martin Liška  wrote:
> >>
> >> Hi.
> >>
> >> The patch is catching duplicate 'default' values in target attribute.
> >>
> >> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
> >>
> >> Ready to be installed?
> > 
> > I wonder if it isn't better to ignore duplicate "default"s (given you 
> > needed to
> > adjust a testcase even)?
> 
> Well, possibly yes. But I would prefer to have a more strict checking.

I agree, default, default is not really useful and rejecting it is fine.
Having a testcase covering that is of course desirable.

Jakub


Re: [PATCH] Handle multiple 'default' in target attribute (PR middle-end/89970).

2019-04-11 Thread Martin Liška
On 4/11/19 11:57 AM, Richard Biener wrote:
> On Thu, Apr 11, 2019 at 10:37 AM Martin Liška  wrote:
>>
>> Hi.
>>
>> The patch is catching duplicate 'default' values in target attribute.
>>
>> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
>>
>> Ready to be installed?
> 
> I wonder if it isn't better to ignore duplicate "default"s (given you needed 
> to
> adjust a testcase even)?

Well, possibly yes. But I would prefer to have a more strict checking.

Martin

> 
> Richard.
> 
>> Thanks,
>> Martin
>>
>> gcc/ChangeLog:
>>
>> 2019-04-10  Martin Liska  
>>
>> PR middle-end/89970
>> * multiple_target.c (create_dispatcher_calls): Wrap ifunc
>> in error message.
>> (separate_attrs): Handle multiple 'default's.
>> (expand_target_clones): Rework error handling code.
>>
>> gcc/testsuite/ChangeLog:
>>
>> 2019-04-10  Martin Liska  
>>
>> PR middle-end/89970
>> * gcc.target/i386/mvc15.c: New test.
>> * gcc.target/i386/mvc3.c: Quote target in error pattern.
>> * gcc.target/i386/mvc4.c: Remove duplicit 'default'.
>> ---
>>  gcc/multiple_target.c | 38 ++-
>>  gcc/testsuite/gcc.target/i386/mvc15.c | 11 
>>  gcc/testsuite/gcc.target/i386/mvc3.c  |  2 +-
>>  gcc/testsuite/gcc.target/i386/mvc4.c  |  2 +-
>>  4 files changed, 38 insertions(+), 15 deletions(-)
>>  create mode 100644 gcc/testsuite/gcc.target/i386/mvc15.c
>>
>>



Re: [Patch] [testsuite][arm] Update warning prune regex

2019-04-11 Thread Kyrill Tkachov

Hi Matthew,

On 4/11/19 10:26 AM, Matthew Malcomson wrote:

r269586 changed the format of some warning messages.

Each switch in the warning message is now surrounded by single quotes.

This commit updates the regex's in arm.exp dejagnu files to match the
new format and remove the extra 20+ FAILs on excess errors that are
introduced on certain variations (e.g.
arm-eabi-aem/-marm/-march=armv7-a/-mfpu=vfpv3-d16/-mfloat-abi=softfp).

Regtested arm.exp with cross-compiler arm-none-eabi


Ok.

Thanks,

Kyrill



gcc/testsuite/ChangeLog:

2019-04-11  Matthew Malcomson 

    * g++.target/arm/arm.exp: Change format of default prune regex.
    * gcc.target/arm/arm.exp: Change format of default prune regex.



### Attachment also inlined for ease of reply    
###



diff --git a/gcc/testsuite/g++.target/arm/arm.exp 
b/gcc/testsuite/g++.target/arm/arm.exp
index 
247536b8312f8ff7a50c0eaf66e12785b80b6d3b..575f0a11c87583a264761f705997c7efc9d2d368 
100644

--- a/gcc/testsuite/g++.target/arm/arm.exp
+++ b/gcc/testsuite/g++.target/arm/arm.exp
@@ -35,7 +35,7 @@ if ![info exists DEFAULT_CXXFLAGS] then {

 global dg_runtest_extra_prunes
 set dg_runtest_extra_prunes ""
-lappend dg_runtest_extra_prunes "warning: switch -m(cpu|arch)=.* 
conflicts with -m(cpu|arch)=.* switch"
+lappend dg_runtest_extra_prunes "warning: switch '-m(cpu|arch)=.*' 
conflicts with '-m(cpu|arch)=.*' switch"


 # Initialize `dg'.
 dg-init
diff --git a/gcc/testsuite/gcc.target/arm/arm.exp 
b/gcc/testsuite/gcc.target/arm/arm.exp
index 
0c4981c8316a30944302220b344810e3d44fcab1..829f6836b2918acdbae3160ec48802d9fe7ab5de 
100644

--- a/gcc/testsuite/gcc.target/arm/arm.exp
+++ b/gcc/testsuite/gcc.target/arm/arm.exp
@@ -33,7 +33,7 @@ if ![info exists DEFAULT_CFLAGS] then {
 # This variable should only apply to tests called in this exp file.
 global dg_runtest_extra_prunes
 set dg_runtest_extra_prunes ""
-lappend dg_runtest_extra_prunes "warning: switch -m(cpu|arch)=.* 
conflicts with -m(cpu|arch)=.* switch"
+lappend dg_runtest_extra_prunes "warning: switch '-m(cpu|arch)=.*' 
conflicts with '-m(cpu|arch)=.*' switch"


 # Initialize `dg'.
 dg-init



Re: [PATCH] Handle multiple 'default' in target attribute (PR middle-end/89970).

2019-04-11 Thread Richard Biener
On Thu, Apr 11, 2019 at 10:37 AM Martin Liška  wrote:
>
> Hi.
>
> The patch is catching duplicate 'default' values in target attribute.
>
> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
>
> Ready to be installed?

I wonder if it isn't better to ignore duplicate "default"s (given you needed to
adjust a testcase even)?

Richard.

> Thanks,
> Martin
>
> gcc/ChangeLog:
>
> 2019-04-10  Martin Liska  
>
> PR middle-end/89970
> * multiple_target.c (create_dispatcher_calls): Wrap ifunc
> in error message.
> (separate_attrs): Handle multiple 'default's.
> (expand_target_clones): Rework error handling code.
>
> gcc/testsuite/ChangeLog:
>
> 2019-04-10  Martin Liska  
>
> PR middle-end/89970
> * gcc.target/i386/mvc15.c: New test.
> * gcc.target/i386/mvc3.c: Quote target in error pattern.
> * gcc.target/i386/mvc4.c: Remove duplicit 'default'.
> ---
>  gcc/multiple_target.c | 38 ++-
>  gcc/testsuite/gcc.target/i386/mvc15.c | 11 
>  gcc/testsuite/gcc.target/i386/mvc3.c  |  2 +-
>  gcc/testsuite/gcc.target/i386/mvc4.c  |  2 +-
>  4 files changed, 38 insertions(+), 15 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/mvc15.c
>
>


[patch] PR 79842 - i18n: subword translation in "Can't use the same %smodule"

2019-04-11 Thread Dominique d'Humières
Hi all,

I am planning to commit the following patch

--- ../_clean/gcc/fortran/module.c  2019-03-21 20:46:46.0 +0100
+++ gcc/fortran/module.c2019-04-11 10:28:37.0 +0200
@@ -7144,8 +7144,12 @@ gfc_use_module (gfc_use_list *module)
   for (p = gfc_state_stack; p; p = p->previous)
 if ((p->state == COMP_MODULE || p->state == COMP_SUBMODULE)
 && strcmp (p->sym->name, module_name) == 0)
-  gfc_fatal_error ("Cannot USE the same %smodule we're building",
-  p->state == COMP_SUBMODULE ? "sub" : "");
+  {
+   if (p->state == COMP_SUBMODULE)
+ gfc_fatal_error ("Cannot USE a submodule that is currently built");
+   else
+ gfc_fatal_error ("Cannot USE a module that is currently built");
+  }
 
   init_pi_tree ();
   init_true_name_tree ();

While testing it I did not find any coverage of these errors in the test suite.

TIA

Dominique



Re: [PATCH] Fix up RTL DCE find_call_stack_args (PR rtl-optimization/89965, take 2)

2019-04-11 Thread Richard Biener
On Thu, 11 Apr 2019, Jakub Jelinek wrote:

> On Thu, Apr 11, 2019 at 09:54:24AM +0200, Richard Biener wrote:
> > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> > > 
> > > During those 2 bootstraps/regtests, data.load_found has been set just
> > > on the new testcase on ia32.
> > 
> > Hmm, I wonder whether we really need to DCE calls after reload?
> > That said, I'm not familiar enough with the code to check if the
> > patch makes sense (can there ever be uses of the argument slots
> > _after_ the call?).
> 
> And here is the patch on top of the refactoring patch.
> As for the argument slots after the call, hope Jeff and Vlad won't mind if I
> copy'n'paste what they said on IRC yesterday:
> law: vmakarov: in PR89965, is it valid RTL to store something in stack 
> argument slot and read it again before the call that uses that stack slot?
> law: vmakarov: if yes, I have a rough idea what to do, but if stack argument 
> slots may be only stored and can be read only by a callee and not by 
> something else in the caller, then it is a RA issue
>  jakub: i think it is not defined (or described). But the old 
> reload used equiv memory for long time to store value in memory.  LRA just 
> uses the same approach.
>  i think you're allowed to read it up to the call point, who "owns" the 
> contents of the slot after the call point is subject to debate :-)
>  not sure if we defined things once a REG_EQUIV is on the MEM, but that 
> would tend to imply the memory is unchanging across the call
>  (though one could ask how in the world the caller would know the callee 
> didn't clobber the slot)

Ok, so that says "well, nothing prevents it from being used" for example
for a const/pure call (depending on what alias analysis does here...).
Who owns the argument slots should be defined by the ABI I guess.

So iff alias-analysis doesn't leave us with the chance to pick up
the stack slot contents after the call in any case we're of course fine.
And if we do have the chance but should not then the call instruction
needs some explicit clobbering of the argument area (or the RTL IL
needs to be documented that such clobbering is implicit and the
alias machinery then needs to honor that).

Still leaving review of the patch to somebody else (it's clearly
closing at least part of the hole).

Richard.

> 2019-04-11  Jakub Jelinek  
>   
>   PR rtl-optimization/89965
>   * dce.c: Include rtl-iter.h.
>   (struct check_argument_load_data): New type.
>   (check_argument_load): New function.
>   (find_call_stack_args): Check for loads from stack slots still tracked
>   in sp_bytes and punt if any is found.
> 
>   * gcc.target/i386/pr89965.c: New test.
> 
> --- gcc/dce.c.jj  2019-04-11 10:30:00.436705065 +0200
> +++ gcc/dce.c 2019-04-11 10:43:00.926828390 +0200
> @@ -35,6 +35,7 @@ along with GCC; see the file COPYING3.
>  #include "valtrack.h"
>  #include "tree-pass.h"
>  #include "dbgcnt.h"
> +#include "rtl-iter.h"
>  
>  
>  /* -
> @@ -325,6 +326,48 @@ sp_based_mem_offset (rtx_call_insn *call
>return off;
>  }
>  
> +/* Data for check_argument_load called via note_uses.  */
> +struct check_argument_load_data {
> +  bitmap sp_bytes;
> +  HOST_WIDE_INT min_sp_off, max_sp_off;
> +  rtx_call_insn *call_insn;
> +  bool fast;
> +  bool load_found;
> +};
> +
> +/* Helper function for find_call_stack_args.  Check if there are
> +   any loads from the argument slots in between the const/pure call
> +   and store to the argument slot, set LOAD_FOUND if any is found.  */
> +
> +static void
> +check_argument_load (rtx *loc, void *data)
> +{
> +  struct check_argument_load_data *d
> += (struct check_argument_load_data *) data;
> +  subrtx_iterator::array_type array;
> +  FOR_EACH_SUBRTX (iter, array, *loc, NONCONST)
> +{
> +  const_rtx mem = *iter;
> +  HOST_WIDE_INT size;
> +  if (MEM_P (mem)
> +   && MEM_SIZE_KNOWN_P (mem)
> +   && MEM_SIZE (mem).is_constant (&size))
> + {
> +   HOST_WIDE_INT off = sp_based_mem_offset (d->call_insn, mem, d->fast);
> +   if (off != INTTYPE_MINIMUM (HOST_WIDE_INT)
> +   && off < d->max_sp_off
> +   && off + size > d->min_sp_off)
> + for (HOST_WIDE_INT byte = MAX (off, d->min_sp_off);
> +  byte < MIN (off + size, d->max_sp_off); byte++)
> +   if (bitmap_bit_p (d->sp_bytes, byte - d->min_sp_off))
> + {
> +   d->load_found = true;
> +   return;
> + }
> + }
> +}
> +}
> +
>  /* Try to find all stack stores of CALL_INSN arguments if
> ACCUMULATE_OUTGOING_ARGS.  If all stack stores have been found
> and it is therefore safe to eliminate the call, return true,
> @@ -396,6 +439,8 @@ find_call_stack_args (rtx_call_insn *cal
>/* Walk backwards, looking for argument stores.  The search stops
>   when seeing another call, sp adjustment o

[Patch] [testsuite][arm] Update warning prune regex

2019-04-11 Thread Matthew Malcomson
r269586 changed the format of some warning messages.

Each switch in the warning message is now surrounded by single quotes.

This commit updates the regex's in arm.exp dejagnu files to match the
new format and remove the extra 20+ FAILs on excess errors that are
introduced on certain variations (e.g.
arm-eabi-aem/-marm/-march=armv7-a/-mfpu=vfpv3-d16/-mfloat-abi=softfp).

Regtested arm.exp with cross-compiler arm-none-eabi

gcc/testsuite/ChangeLog:

2019-04-11  Matthew Malcomson  

* g++.target/arm/arm.exp: Change format of default prune regex.
* gcc.target/arm/arm.exp: Change format of default prune regex.



### Attachment also inlined for ease of reply###


diff --git a/gcc/testsuite/g++.target/arm/arm.exp 
b/gcc/testsuite/g++.target/arm/arm.exp
index 
247536b8312f8ff7a50c0eaf66e12785b80b6d3b..575f0a11c87583a264761f705997c7efc9d2d368
 100644
--- a/gcc/testsuite/g++.target/arm/arm.exp
+++ b/gcc/testsuite/g++.target/arm/arm.exp
@@ -35,7 +35,7 @@ if ![info exists DEFAULT_CXXFLAGS] then {
 
 global dg_runtest_extra_prunes
 set dg_runtest_extra_prunes ""
-lappend dg_runtest_extra_prunes "warning: switch -m(cpu|arch)=.* conflicts 
with -m(cpu|arch)=.* switch"
+lappend dg_runtest_extra_prunes "warning: switch '-m(cpu|arch)=.*' conflicts 
with '-m(cpu|arch)=.*' switch"
 
 # Initialize `dg'.
 dg-init
diff --git a/gcc/testsuite/gcc.target/arm/arm.exp 
b/gcc/testsuite/gcc.target/arm/arm.exp
index 
0c4981c8316a30944302220b344810e3d44fcab1..829f6836b2918acdbae3160ec48802d9fe7ab5de
 100644
--- a/gcc/testsuite/gcc.target/arm/arm.exp
+++ b/gcc/testsuite/gcc.target/arm/arm.exp
@@ -33,7 +33,7 @@ if ![info exists DEFAULT_CFLAGS] then {
 # This variable should only apply to tests called in this exp file.
 global dg_runtest_extra_prunes
 set dg_runtest_extra_prunes ""
-lappend dg_runtest_extra_prunes "warning: switch -m(cpu|arch)=.* conflicts 
with -m(cpu|arch)=.* switch"
+lappend dg_runtest_extra_prunes "warning: switch '-m(cpu|arch)=.*' conflicts 
with '-m(cpu|arch)=.*' switch"
 
 # Initialize `dg'.
 dg-init

diff --git a/gcc/testsuite/g++.target/arm/arm.exp 
b/gcc/testsuite/g++.target/arm/arm.exp
index 
247536b8312f8ff7a50c0eaf66e12785b80b6d3b..575f0a11c87583a264761f705997c7efc9d2d368
 100644
--- a/gcc/testsuite/g++.target/arm/arm.exp
+++ b/gcc/testsuite/g++.target/arm/arm.exp
@@ -35,7 +35,7 @@ if ![info exists DEFAULT_CXXFLAGS] then {
 
 global dg_runtest_extra_prunes
 set dg_runtest_extra_prunes ""
-lappend dg_runtest_extra_prunes "warning: switch -m(cpu|arch)=.* conflicts 
with -m(cpu|arch)=.* switch"
+lappend dg_runtest_extra_prunes "warning: switch '-m(cpu|arch)=.*' conflicts 
with '-m(cpu|arch)=.*' switch"
 
 # Initialize `dg'.
 dg-init
diff --git a/gcc/testsuite/gcc.target/arm/arm.exp 
b/gcc/testsuite/gcc.target/arm/arm.exp
index 
0c4981c8316a30944302220b344810e3d44fcab1..829f6836b2918acdbae3160ec48802d9fe7ab5de
 100644
--- a/gcc/testsuite/gcc.target/arm/arm.exp
+++ b/gcc/testsuite/gcc.target/arm/arm.exp
@@ -33,7 +33,7 @@ if ![info exists DEFAULT_CFLAGS] then {
 # This variable should only apply to tests called in this exp file.
 global dg_runtest_extra_prunes
 set dg_runtest_extra_prunes ""
-lappend dg_runtest_extra_prunes "warning: switch -m(cpu|arch)=.* conflicts 
with -m(cpu|arch)=.* switch"
+lappend dg_runtest_extra_prunes "warning: switch '-m(cpu|arch)=.*' conflicts 
with '-m(cpu|arch)=.*' switch"
 
 # Initialize `dg'.
 dg-init



Re: [PATCH] Add support for missing AVX512* ISAs (PR target/89929).

2019-04-11 Thread Uros Bizjak
On Thu, Apr 11, 2019 at 10:38 AM Martin Liška  wrote:
>
> Hi.
>
> The patch is adding missing AVX512 ISAs for target and target_clone
> attributes.
>
> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
>
> Ready to be installed?
> Thanks,
> Martin
>
> gcc/ChangeLog:
>
> 2019-04-10  Martin Liska  
>
> PR target/89929
> * config/i386/i386.c (get_builtin_code_for_version): Add
> support for missing AVX512 ISAs.
>
> gcc/testsuite/ChangeLog:
>
> 2019-04-10  Martin Liska  
>
> PR target/89929
> * g++.target/i386/mv28.C: New test.
> * gcc.target/i386/mvc14.c: New test.

LGTM, but HJ should check the priorities.

Uros.

> ---
>  gcc/config/i386/i386.c| 34 ++-
>  gcc/testsuite/g++.target/i386/mv28.C  | 30 +++
>  gcc/testsuite/gcc.target/i386/mvc14.c | 16 +
>  3 files changed, 79 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/g++.target/i386/mv28.C
>  create mode 100644 gcc/testsuite/gcc.target/i386/mvc14.c
>
>


[PATCH] Fix up RTL DCE find_call_stack_args (PR rtl-optimization/89965, take 2)

2019-04-11 Thread Jakub Jelinek
On Thu, Apr 11, 2019 at 09:54:24AM +0200, Richard Biener wrote:
> > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> > 
> > During those 2 bootstraps/regtests, data.load_found has been set just
> > on the new testcase on ia32.
> 
> Hmm, I wonder whether we really need to DCE calls after reload?
> That said, I'm not familiar enough with the code to check if the
> patch makes sense (can there ever be uses of the argument slots
> _after_ the call?).

And here is the patch on top of the refactoring patch.
As for the argument slots after the call, hope Jeff and Vlad won't mind if I
copy'n'paste what they said on IRC yesterday:
law: vmakarov: in PR89965, is it valid RTL to store something in stack argument 
slot and read it again before the call that uses that stack slot?
law: vmakarov: if yes, I have a rough idea what to do, but if stack argument 
slots may be only stored and can be read only by a callee and not by something 
else in the caller, then it is a RA issue
 jakub: i think it is not defined (or described). But the old reload 
used equiv memory for long time to store value in memory.  LRA just uses the 
same approach.
 i think you're allowed to read it up to the call point, who "owns" the 
contents of the slot after the call point is subject to debate :-)
 not sure if we defined things once a REG_EQUIV is on the MEM, but that 
would tend to imply the memory is unchanging across the call
 (though one could ask how in the world the caller would know the callee 
didn't clobber the slot)

2019-04-11  Jakub Jelinek  

PR rtl-optimization/89965
* dce.c: Include rtl-iter.h.
(struct check_argument_load_data): New type.
(check_argument_load): New function.
(find_call_stack_args): Check for loads from stack slots still tracked
in sp_bytes and punt if any is found.

* gcc.target/i386/pr89965.c: New test.

--- gcc/dce.c.jj2019-04-11 10:30:00.436705065 +0200
+++ gcc/dce.c   2019-04-11 10:43:00.926828390 +0200
@@ -35,6 +35,7 @@ along with GCC; see the file COPYING3.
 #include "valtrack.h"
 #include "tree-pass.h"
 #include "dbgcnt.h"
+#include "rtl-iter.h"
 
 
 /* -
@@ -325,6 +326,48 @@ sp_based_mem_offset (rtx_call_insn *call
   return off;
 }
 
+/* Data for check_argument_load called via note_uses.  */
+struct check_argument_load_data {
+  bitmap sp_bytes;
+  HOST_WIDE_INT min_sp_off, max_sp_off;
+  rtx_call_insn *call_insn;
+  bool fast;
+  bool load_found;
+};
+
+/* Helper function for find_call_stack_args.  Check if there are
+   any loads from the argument slots in between the const/pure call
+   and store to the argument slot, set LOAD_FOUND if any is found.  */
+
+static void
+check_argument_load (rtx *loc, void *data)
+{
+  struct check_argument_load_data *d
+= (struct check_argument_load_data *) data;
+  subrtx_iterator::array_type array;
+  FOR_EACH_SUBRTX (iter, array, *loc, NONCONST)
+{
+  const_rtx mem = *iter;
+  HOST_WIDE_INT size;
+  if (MEM_P (mem)
+ && MEM_SIZE_KNOWN_P (mem)
+ && MEM_SIZE (mem).is_constant (&size))
+   {
+ HOST_WIDE_INT off = sp_based_mem_offset (d->call_insn, mem, d->fast);
+ if (off != INTTYPE_MINIMUM (HOST_WIDE_INT)
+ && off < d->max_sp_off
+ && off + size > d->min_sp_off)
+   for (HOST_WIDE_INT byte = MAX (off, d->min_sp_off);
+byte < MIN (off + size, d->max_sp_off); byte++)
+ if (bitmap_bit_p (d->sp_bytes, byte - d->min_sp_off))
+   {
+ d->load_found = true;
+ return;
+   }
+   }
+}
+}
+
 /* Try to find all stack stores of CALL_INSN arguments if
ACCUMULATE_OUTGOING_ARGS.  If all stack stores have been found
and it is therefore safe to eliminate the call, return true,
@@ -396,6 +439,8 @@ find_call_stack_args (rtx_call_insn *cal
   /* Walk backwards, looking for argument stores.  The search stops
  when seeing another call, sp adjustment or memory store other than
  argument store.  */
+  struct check_argument_load_data data
+= { sp_bytes, min_sp_off, max_sp_off, call_insn, fast, false };
   ret = false;
   for (insn = PREV_INSN (call_insn); insn; insn = prev_insn)
 {
@@ -414,6 +459,10 @@ find_call_stack_args (rtx_call_insn *cal
   if (!set || SET_DEST (set) == stack_pointer_rtx)
break;
 
+  note_uses (&PATTERN (insn), check_argument_load, &data);
+  if (data.load_found)
+   break;
+
   if (!MEM_P (SET_DEST (set)))
continue;
 
--- gcc/testsuite/gcc.target/i386/pr89965.c.jj  2019-04-11 10:28:56.211764660 
+0200
+++ gcc/testsuite/gcc.target/i386/pr89965.c 2019-04-11 10:28:56.211764660 
+0200
@@ -0,0 +1,39 @@
+/* PR rtl-optimization/89965 */
+/* { dg-do run } */
+/* { dg-options "-O -mtune=nano-x2 -fcaller-saves -fexpensive-optimizations 
-fno-tree-dce -fno-tree-ter" } */
+/*

Re: [Patch] PR rtl-optimization/87763 - generate more bfi instructions on aarch64

2019-04-11 Thread Richard Earnshaw (lists)
On 10/04/2019 21:31, Steve Ellcey wrote:
> On Wed, 2019-04-10 at 11:10 +0100, Richard Earnshaw (lists) wrote:
>>
>> OK with those changes.
>>
>> R.
> 
> I made the changes you suggested and checked in the patch.  Just to be
> complete, here is the final version of the patch that I checked in.
> 
> 2018-04-10  Steve Ellcey  
> 
>   PR rtl-optimization/87763
>   * config/aarch64/aarch64-protos.h (aarch64_masks_and_shift_for_bfi_p):
>   New prototype.
>   * config/aarch64/aarch64.c (aarch64_masks_and_shift_for_bfi_p):
>   New function.
>   * config/aarch64/aarch64.md (*aarch64_bfi5_shift):
>   New instruction.
>   (*aarch64_bfi5_shift_alt): Ditto.
>   (*aarch64_bfi4_noand): Ditto.
>   (*aarch64_bfi4_noand_alt): Ditto.
>   (*aarch64_bfi4_noshift): Ditto.
> 

You've removed the ..._noshift_alt variant.  That wasn't my intention,
so perhaps you misunderstood what I was trying to say.

The two versions are both needed, since the register tie is not
orthogonal to the constants used in the masks and canonicalization will
not generate a consistent ordering of the operands.

My point is that you can make the 'alt' version from

+(define_insn "*aarch64_bfi4_noshift"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+(ior:GPI (and:GPI (match_operand:GPI 1 "register_operand" "0")
+  (match_operand:GPI 2 "const_int_operand" "n"))
+ (and:GPI (match_operand:GPI 3 "register_operand" "r")
+  (match_operand:GPI 4 "const_int_operand" "n"]
+  "aarch64_masks_and_shift_for_bfi_p (mode, UINTVAL (operands[2]), 0,
+ UINTVAL (operands[4]))"
+  "bfi\t%0, %3, 0, %P4"
+  [(set_attr "type" "bfm")]
+)

by simply duplicating it and permuting the numbering of the operands in
the pattern part, the remaining parts remain identical, including the
final condition and the insn printing string:

+(define_insn "*aarch64_bfi4_noshift_alt"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+(ior:GPI (and:GPI (match_operand:GPI 3 "register_operand" "r")
+  (match_operand:GPI 4 "const_int_operand" "n"))
+ (and:GPI (match_operand:GPI 1 "register_operand" "0")
+  (match_operand:GPI 2 "const_int_operand" "n"]

Operand order permuted

+  "aarch64_masks_and_shift_for_bfi_p (mode, UINTVAL (operands[2]), 0,
+ UINTVAL (operands[4]))"

does not change

+  "bfi\t%0, %3, 0, %P4"

does not change

+  [(set_attr "type" "bfm")]
+)

R.

> 2018-04-10  Steve Ellcey  
> 
>   PR rtl-optimization/87763
>   * gcc.target/aarch64/combine_bfxil.c: Change some bfxil checks
>   to bfi.
>   * gcc.target/aarch64/combine_bfi_2.c: New test.
> 
> 
> combine.patch
> 
> diff --git a/gcc/config/aarch64/aarch64-protos.h 
> b/gcc/config/aarch64/aarch64-protos.h
> index b035e35..b6c0d0a 100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -429,6 +429,9 @@ bool aarch64_label_mentioned_p (rtx);
>  void aarch64_declare_function_name (FILE *, const char*, tree);
>  bool aarch64_legitimate_pic_operand_p (rtx);
>  bool aarch64_mask_and_shift_for_ubfiz_p (scalar_int_mode, rtx, rtx);
> +bool aarch64_masks_and_shift_for_bfi_p (scalar_int_mode, unsigned 
> HOST_WIDE_INT,
> + unsigned HOST_WIDE_INT,
> + unsigned HOST_WIDE_INT);
>  bool aarch64_zero_extend_const_eq (machine_mode, rtx, machine_mode, rtx);
>  bool aarch64_move_imm (HOST_WIDE_INT, machine_mode);
>  opt_machine_mode aarch64_sve_pred_mode (unsigned int);
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 95e5b03..9be7548 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -9336,6 +9336,35 @@ aarch64_mask_and_shift_for_ubfiz_p (scalar_int_mode 
> mode, rtx mask,
>& ((HOST_WIDE_INT_1U << INTVAL (shft_amnt)) - 1)) == 0;
>  }
>  
> +/* Return true if the masks and a shift amount from an RTX of the form
> +   ((x & MASK1) | ((y << SHIFT_AMNT) & MASK2)) are valid to combine into
> +   a BFI instruction of mode MODE.  See *arch64_bfi patterns.  */
> +
> +bool
> +aarch64_masks_and_shift_for_bfi_p (scalar_int_mode mode,
> +unsigned HOST_WIDE_INT mask1,
> +unsigned HOST_WIDE_INT shft_amnt,
> +unsigned HOST_WIDE_INT mask2)
> +{
> +  unsigned HOST_WIDE_INT t;
> +
> +  /* Verify that there is no overlap in what bits are set in the two masks.  
> */
> +  if (mask1 != ~mask2)
> +return false;
> +
> +  /* Verify that mask2 is not all zeros or ones.  */
> +  if (mask2 == 0 || mask2 == HOST_WIDE_INT_M1U)
> +return false;
> +
> +  /* The shift amount should always be less than the mode size.  */
> +  gcc_assert (shft_amnt < GET_MODE_BITSIZE (mode));
> +
> +  /* Verify that 

Re: GCC 7 backport

2019-04-11 Thread Martin Liška
On 3/28/19 9:52 AM, Martin Liška wrote:
> Hi.
> 
> Backporting one documentation fix.
> 
> Martin
> 

One more patch that I'm going to backport.

Martin
>From a096ce6d9f8e636815af5a237881e1ba443f1b18 Mon Sep 17 00:00:00 2001
From: marxin 
Date: Fri, 8 Mar 2019 12:55:40 +
Subject: [PATCH] Backport r269492

gcc/ChangeLog:

2019-03-08  Martin Liska  

	PR target/86952
	* config/i386/i386.c (ix86_option_override_internal): Disable
	jump tables when retpolines are used.

gcc/testsuite/ChangeLog:

2019-03-08  Martin Liska  

	PR target/86952
	* gcc.target/i386/pr86952.c: New test.
	* gcc.target/i386/indirect-thunk-7.c: Use jump tables to match
	scanned pattern.
	* gcc.target/i386/indirect-thunk-inline-7.c: Likewise.
---
 gcc/config/i386/i386.c|  6 +
 .../gcc.target/i386/indirect-thunk-7.c|  2 +-
 .../gcc.target/i386/indirect-thunk-extern-7.c |  2 +-
 .../gcc.target/i386/indirect-thunk-inline-7.c |  2 +-
 gcc/testsuite/gcc.target/i386/pr86952.c   | 23 +++
 5 files changed, 32 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr86952.c

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 18dce38640b..96bdb3a78ba 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -6282,6 +6282,12 @@ ix86_option_override_internal (bool main_args_p,
 target_option_default_node = target_option_current_node
   = build_target_option_node (opts);
 
+  /* PR86952: jump table usage with retpolines is slow.
+ The PR provides some numbers about the slowness.  */
+  if (ix86_indirect_branch != indirect_branch_keep
+  && !opts_set->x_flag_jump_tables)
+opts->x_flag_jump_tables = 0;
+
   return true;
 }
 
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-7.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-7.c
index 3c72036dbaf..53868f46558 100644
--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-7.c
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-7.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mno-indirect-branch-register -mfunction-return=keep -mindirect-branch=thunk -fno-pic" } */
+/* { dg-options "-O2 -mno-indirect-branch-register -mfunction-return=keep -mindirect-branch=thunk -fno-pic -fjump-tables" } */
 
 void func0 (void);
 void func1 (void);
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-7.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-7.c
index 2b9a33e93dc..bc185fe98af 100644
--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-7.c
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-extern-7.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mno-indirect-branch-register -mfunction-return=keep -mindirect-branch=thunk-extern -fno-pic" } */
+/* { dg-options "-O2 -mno-indirect-branch-register -mfunction-return=keep -mindirect-branch=thunk-extern -fno-pic -fjump-tables" } */
 
 void func0 (void);
 void func1 (void);
diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-7.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-7.c
index ea009245a58..e6f064959a1 100644
--- a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-7.c
+++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-7.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mno-indirect-branch-register -mfunction-return=keep -mindirect-branch=thunk-inline -fno-pic" } */
+/* { dg-options "-O2 -mno-indirect-branch-register -mfunction-return=keep -mindirect-branch=thunk-inline -fno-pic -fjump-tables" } */
 
 void func0 (void);
 void func1 (void);
diff --git a/gcc/testsuite/gcc.target/i386/pr86952.c b/gcc/testsuite/gcc.target/i386/pr86952.c
new file mode 100644
index 000..004e167add0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr86952.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mindirect-branch=thunk" } */
+
+int global;
+
+int
+foo (int x)
+{
+  switch (x & 7)
+{
+  case 0: ; return 1722;
+  case 1: global += 1; return 1060;
+  case 2: ; return 1990;
+  case 3: ; return 1242;
+  case 4: ; return 1466;
+  case 5: ; return 894;
+  case 6: ; return 570;
+  case 7: ; return 572;
+  default: return 0;
+}
+}
+
+/* { dg-final { scan-assembler-not "jmp\[ \t\]\\*" } } */
-- 
2.21.0



Re: [PATCH] Fix up RTL DCE find_call_stack_args (PR rtl-optimization/89965)

2019-04-11 Thread Richard Biener
On Thu, 11 Apr 2019, Jakub Jelinek wrote:

> On Thu, Apr 11, 2019 at 09:54:24AM +0200, Richard Biener wrote:
> > > The following patch just punts if we find loads from stack slots in 
> > > between
> > > where they are pushed and the const/pure call.  In addition to that, I've
> > > outlined the same largish sequence that had 3 copies in the function 
> > > already
> > > and I'd need to add 4th copy, so in the end the dce.c changes are removing
> > > more than adding:
> > >  1 file changed, 118 insertions(+), 135 deletions(-)
> > 
> > The refactoring itself is probably OK (and if done separately
> > makes reviewing easier).
> 
> Here is just the refactoring, ok for trunk?

OK.

Thanks,
Richard.

> 2019-04-11  Jakub Jelinek  
>   
>   PR rtl-optimization/89965
>   * dce.c (sp_based_mem_offset): New function.
>   (find_call_stack_args): Use sp_based_mem_offset.
> 
> --- gcc/dce.c.jj  2019-04-11 10:26:18.509365653 +0200
> +++ gcc/dce.c 2019-04-11 10:30:00.436705065 +0200
> @@ -272,6 +272,58 @@ check_argument_store (HOST_WIDE_INT size
>return true;
>  }
>  
> +/* If MEM has sp address, return 0, if it has sp + const address,
> +   return that const, if it has reg address where reg is set to sp + const
> +   and FAST is false, return const, otherwise return
> +   INTTYPE_MINUMUM (HOST_WIDE_INT).  */
> +
> +static HOST_WIDE_INT
> +sp_based_mem_offset (rtx_call_insn *call_insn, const_rtx mem, bool fast)
> +{
> +  HOST_WIDE_INT off = 0;
> +  rtx addr = XEXP (mem, 0);
> +  if (GET_CODE (addr) == PLUS
> +  && REG_P (XEXP (addr, 0))
> +  && CONST_INT_P (XEXP (addr, 1)))
> +{
> +  off = INTVAL (XEXP (addr, 1));
> +  addr = XEXP (addr, 0);
> +}
> +  if (addr == stack_pointer_rtx)
> +return off;
> +
> +  if (!REG_P (addr) || fast)
> +return INTTYPE_MINIMUM (HOST_WIDE_INT);
> +
> +  /* If not fast, use chains to see if addr wasn't set to sp + offset.  */
> +  df_ref use;
> +  FOR_EACH_INSN_USE (use, call_insn)
> +  if (rtx_equal_p (addr, DF_REF_REG (use)))
> +break;
> +
> +  if (use == NULL)
> +return INTTYPE_MINIMUM (HOST_WIDE_INT);
> +
> +  struct df_link *defs;
> +  for (defs = DF_REF_CHAIN (use); defs; defs = defs->next)
> +if (! DF_REF_IS_ARTIFICIAL (defs->ref))
> +  break;
> +
> +  if (defs == NULL)
> +return INTTYPE_MINIMUM (HOST_WIDE_INT);
> +
> +  rtx set = single_set (DF_REF_INSN (defs->ref));
> +  if (!set)
> +return INTTYPE_MINIMUM (HOST_WIDE_INT);
> +
> +  if (GET_CODE (SET_SRC (set)) != PLUS
> +  || XEXP (SET_SRC (set), 0) != stack_pointer_rtx
> +  || !CONST_INT_P (XEXP (SET_SRC (set), 1)))
> +return INTTYPE_MINIMUM (HOST_WIDE_INT);
> +
> +  off += INTVAL (XEXP (SET_SRC (set), 1));
> +  return off;
> +}
>  
>  /* Try to find all stack stores of CALL_INSN arguments if
> ACCUMULATE_OUTGOING_ARGS.  If all stack stores have been found
> @@ -309,58 +361,13 @@ find_call_stack_args (rtx_call_insn *cal
>  if (GET_CODE (XEXP (p, 0)) == USE
>   && MEM_P (XEXP (XEXP (p, 0), 0)))
>{
> - rtx mem = XEXP (XEXP (p, 0), 0), addr;
> - HOST_WIDE_INT off = 0, size;
> + rtx mem = XEXP (XEXP (p, 0), 0);
> + HOST_WIDE_INT size;
>   if (!MEM_SIZE_KNOWN_P (mem) || !MEM_SIZE (mem).is_constant (&size))
> return false;
> - addr = XEXP (mem, 0);
> - if (GET_CODE (addr) == PLUS
> - && REG_P (XEXP (addr, 0))
> - && CONST_INT_P (XEXP (addr, 1)))
> -   {
> - off = INTVAL (XEXP (addr, 1));
> - addr = XEXP (addr, 0);
> -   }
> - if (addr != stack_pointer_rtx)
> -   {
> - if (!REG_P (addr))
> -   return false;
> - /* If not fast, use chains to see if addr wasn't set to
> -sp + offset.  */
> - if (!fast)
> -   {
> - df_ref use;
> - struct df_link *defs;
> - rtx set;
> -
> - FOR_EACH_INSN_USE (use, call_insn)
> -   if (rtx_equal_p (addr, DF_REF_REG (use)))
> - break;
> -
> - if (use == NULL)
> -   return false;
> -
> - for (defs = DF_REF_CHAIN (use); defs; defs = defs->next)
> -   if (! DF_REF_IS_ARTIFICIAL (defs->ref))
> - break;
> -
> - if (defs == NULL)
> -   return false;
> -
> - set = single_set (DF_REF_INSN (defs->ref));
> - if (!set)
> -   return false;
> -
> - if (GET_CODE (SET_SRC (set)) != PLUS
> - || XEXP (SET_SRC (set), 0) != stack_pointer_rtx
> - || !CONST_INT_P (XEXP (SET_SRC (set), 1)))
> -   return false;
> -
> - off += INTVAL (XEXP (SET_SRC (set), 1));
> -   }
> - else
> -   return false;
> -   }
> + HOST_WIDE_INT off = sp_based_mem_offset (call_insn, mem, fast);
> + if (off == INTTYPE_MINIMUM (HOST_WIDE_INT))
> +   return false;
>   min_sp_off = MIN (min_sp_off, off);
>   m

Re: [PATCH] Fix up RTL DCE find_call_stack_args (PR rtl-optimization/89965)

2019-04-11 Thread Jakub Jelinek
On Thu, Apr 11, 2019 at 09:54:24AM +0200, Richard Biener wrote:
> > The following patch just punts if we find loads from stack slots in between
> > where they are pushed and the const/pure call.  In addition to that, I've
> > outlined the same largish sequence that had 3 copies in the function already
> > and I'd need to add 4th copy, so in the end the dce.c changes are removing
> > more than adding:
> >  1 file changed, 118 insertions(+), 135 deletions(-)
> 
> The refactoring itself is probably OK (and if done separately
> makes reviewing easier).

Here is just the refactoring, ok for trunk?

2019-04-11  Jakub Jelinek  

PR rtl-optimization/89965
* dce.c (sp_based_mem_offset): New function.
(find_call_stack_args): Use sp_based_mem_offset.

--- gcc/dce.c.jj2019-04-11 10:26:18.509365653 +0200
+++ gcc/dce.c   2019-04-11 10:30:00.436705065 +0200
@@ -272,6 +272,58 @@ check_argument_store (HOST_WIDE_INT size
   return true;
 }
 
+/* If MEM has sp address, return 0, if it has sp + const address,
+   return that const, if it has reg address where reg is set to sp + const
+   and FAST is false, return const, otherwise return
+   INTTYPE_MINUMUM (HOST_WIDE_INT).  */
+
+static HOST_WIDE_INT
+sp_based_mem_offset (rtx_call_insn *call_insn, const_rtx mem, bool fast)
+{
+  HOST_WIDE_INT off = 0;
+  rtx addr = XEXP (mem, 0);
+  if (GET_CODE (addr) == PLUS
+  && REG_P (XEXP (addr, 0))
+  && CONST_INT_P (XEXP (addr, 1)))
+{
+  off = INTVAL (XEXP (addr, 1));
+  addr = XEXP (addr, 0);
+}
+  if (addr == stack_pointer_rtx)
+return off;
+
+  if (!REG_P (addr) || fast)
+return INTTYPE_MINIMUM (HOST_WIDE_INT);
+
+  /* If not fast, use chains to see if addr wasn't set to sp + offset.  */
+  df_ref use;
+  FOR_EACH_INSN_USE (use, call_insn)
+  if (rtx_equal_p (addr, DF_REF_REG (use)))
+break;
+
+  if (use == NULL)
+return INTTYPE_MINIMUM (HOST_WIDE_INT);
+
+  struct df_link *defs;
+  for (defs = DF_REF_CHAIN (use); defs; defs = defs->next)
+if (! DF_REF_IS_ARTIFICIAL (defs->ref))
+  break;
+
+  if (defs == NULL)
+return INTTYPE_MINIMUM (HOST_WIDE_INT);
+
+  rtx set = single_set (DF_REF_INSN (defs->ref));
+  if (!set)
+return INTTYPE_MINIMUM (HOST_WIDE_INT);
+
+  if (GET_CODE (SET_SRC (set)) != PLUS
+  || XEXP (SET_SRC (set), 0) != stack_pointer_rtx
+  || !CONST_INT_P (XEXP (SET_SRC (set), 1)))
+return INTTYPE_MINIMUM (HOST_WIDE_INT);
+
+  off += INTVAL (XEXP (SET_SRC (set), 1));
+  return off;
+}
 
 /* Try to find all stack stores of CALL_INSN arguments if
ACCUMULATE_OUTGOING_ARGS.  If all stack stores have been found
@@ -309,58 +361,13 @@ find_call_stack_args (rtx_call_insn *cal
 if (GET_CODE (XEXP (p, 0)) == USE
&& MEM_P (XEXP (XEXP (p, 0), 0)))
   {
-   rtx mem = XEXP (XEXP (p, 0), 0), addr;
-   HOST_WIDE_INT off = 0, size;
+   rtx mem = XEXP (XEXP (p, 0), 0);
+   HOST_WIDE_INT size;
if (!MEM_SIZE_KNOWN_P (mem) || !MEM_SIZE (mem).is_constant (&size))
  return false;
-   addr = XEXP (mem, 0);
-   if (GET_CODE (addr) == PLUS
-   && REG_P (XEXP (addr, 0))
-   && CONST_INT_P (XEXP (addr, 1)))
- {
-   off = INTVAL (XEXP (addr, 1));
-   addr = XEXP (addr, 0);
- }
-   if (addr != stack_pointer_rtx)
- {
-   if (!REG_P (addr))
- return false;
-   /* If not fast, use chains to see if addr wasn't set to
-  sp + offset.  */
-   if (!fast)
- {
-   df_ref use;
-   struct df_link *defs;
-   rtx set;
-
-   FOR_EACH_INSN_USE (use, call_insn)
- if (rtx_equal_p (addr, DF_REF_REG (use)))
-   break;
-
-   if (use == NULL)
- return false;
-
-   for (defs = DF_REF_CHAIN (use); defs; defs = defs->next)
- if (! DF_REF_IS_ARTIFICIAL (defs->ref))
-   break;
-
-   if (defs == NULL)
- return false;
-
-   set = single_set (DF_REF_INSN (defs->ref));
-   if (!set)
- return false;
-
-   if (GET_CODE (SET_SRC (set)) != PLUS
-   || XEXP (SET_SRC (set), 0) != stack_pointer_rtx
-   || !CONST_INT_P (XEXP (SET_SRC (set), 1)))
- return false;
-
-   off += INTVAL (XEXP (SET_SRC (set), 1));
- }
-   else
- return false;
- }
+   HOST_WIDE_INT off = sp_based_mem_offset (call_insn, mem, fast);
+   if (off == INTTYPE_MINIMUM (HOST_WIDE_INT))
+ return false;
min_sp_off = MIN (min_sp_off, off);
max_sp_off = MAX (max_sp_off, off + size);
   }
@@ -376,51 +383,19 @@ find_call_stack_args (rtx_call_insn *cal
 if (GET_CODE (XEXP (p, 0)) == USE
&& MEM_P (XEXP (XEXP (p, 0), 0)))
   {
-   rtx mem = XEXP (XEXP (p, 0), 0), a

[PATCH] Add support for missing AVX512* ISAs (PR target/89929).

2019-04-11 Thread Martin Liška
Hi.

The patch is adding missing AVX512 ISAs for target and target_clone
attributes.

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.

Ready to be installed?
Thanks,
Martin

gcc/ChangeLog:

2019-04-10  Martin Liska  

PR target/89929
* config/i386/i386.c (get_builtin_code_for_version): Add
support for missing AVX512 ISAs.

gcc/testsuite/ChangeLog:

2019-04-10  Martin Liska  

PR target/89929
* g++.target/i386/mv28.C: New test.
* gcc.target/i386/mvc14.c: New test.
---
 gcc/config/i386/i386.c| 34 ++-
 gcc/testsuite/g++.target/i386/mv28.C  | 30 +++
 gcc/testsuite/gcc.target/i386/mvc14.c | 16 +
 3 files changed, 79 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/g++.target/i386/mv28.C
 create mode 100644 gcc/testsuite/gcc.target/i386/mvc14.c


diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 57d5a7b8432..2d09a006fb6 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -31883,6 +31883,22 @@ get_builtin_code_for_version (tree decl, tree *predicate_list)
 P_AVX2,
 P_PROC_AVX2,
 P_AVX512F,
+P_AVX512VL,
+P_AVX512BW,
+P_AVX512DQ,
+P_AVX512CD,
+P_AVX512ER,
+P_AVX512PF,
+P_AVX512VBMI,
+P_AVX512IFMA,
+P_AVX5124VNNIW,
+P_AVX5124FMAPS,
+P_AVX512VPOPCNTDQ,
+P_AVX512VBMI2,
+P_GFNI,
+P_VPCLMULQDQ,
+P_AVX512VNNI,
+P_AVX512BITALG,
 P_PROC_AVX512F
   };
 
@@ -31916,7 +31932,23 @@ get_builtin_code_for_version (tree decl, tree *predicate_list)
   {"fma", P_FMA},
   {"bmi2", P_BMI2},
   {"avx2", P_AVX2},
-  {"avx512f", P_AVX512F}
+  {"avx512f", P_AVX512F},
+  {"avx512vl", P_AVX512VL},
+  {"avx512bw", P_AVX512BW},
+  {"avx512dq", P_AVX512DQ},
+  {"avx512cd", P_AVX512CD},
+  {"avx512er", P_AVX512ER},
+  {"avx512pf", P_AVX512PF},
+  {"avx512vbmi", P_AVX512VBMI},
+  {"avx512ifma", P_AVX512IFMA},
+  {"avx5124vnniw", P_AVX5124VNNIW},
+  {"avx5124fmaps", P_AVX5124FMAPS},
+  {"avx512vpopcntdq", P_AVX512VPOPCNTDQ},
+  {"avx512vbmi2", P_AVX512VBMI2},
+  {"gfni", P_GFNI},
+  {"vpclmulqdq", P_VPCLMULQDQ},
+  {"avx512vnni", P_AVX512VNNI},
+  {"avx512bitalg", P_AVX512BITALG}
 };
 
 
diff --git a/gcc/testsuite/g++.target/i386/mv28.C b/gcc/testsuite/g++.target/i386/mv28.C
new file mode 100644
index 000..401edc2585f
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/mv28.C
@@ -0,0 +1,30 @@
+/* { dg-do compile} */
+/* { dg-require-ifunc "" }  */
+
+#define FN(TARGET) \
+void __attribute__ ((target(TARGET))) foo () {}
+
+FN("avx512f")
+FN("avx512vl")
+FN("avx512bw")
+FN("avx512dq")
+FN("avx512cd")
+FN("avx512er")
+FN("avx512pf")
+FN("avx512vbmi")
+FN("avx512ifma")
+FN("avx5124vnniw")
+FN("avx5124fmaps")
+FN("avx512vpopcntdq")
+FN("avx512vbmi2")
+FN("gfni")
+FN("vpclmulqdq")
+FN("avx512vnni")
+FN("avx512bitalg")
+FN("default")
+
+int main()
+{
+  foo ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/mvc14.c b/gcc/testsuite/gcc.target/i386/mvc14.c
new file mode 100644
index 000..bff39af3998
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/mvc14.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-require-ifunc "" } */
+
+__attribute__((target_clones("avx512f", "avx512vl", "avx512bw", "avx512dq",
+			 "avx512cd", "avx512er", "avx512pf", "avx512vbmi",
+			 "avx512ifma", "avx5124vnniw", "avx5124fmaps",
+			 "avx512vpopcntdq", "avx512vbmi2", "gfni",
+			 "vpclmulqdq", "avx512vnni", "avx512bitalg",
+			 "default")))
+int foo ();
+
+int
+bar ()
+{
+  return foo();
+}



[PATCH] Handle multiple 'default' in target attribute (PR middle-end/89970).

2019-04-11 Thread Martin Liška
Hi.

The patch is catching duplicate 'default' values in target attribute.

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.

Ready to be installed?
Thanks,
Martin

gcc/ChangeLog:

2019-04-10  Martin Liska  

PR middle-end/89970
* multiple_target.c (create_dispatcher_calls): Wrap ifunc
in error message.
(separate_attrs): Handle multiple 'default's.
(expand_target_clones): Rework error handling code.

gcc/testsuite/ChangeLog:

2019-04-10  Martin Liska  

PR middle-end/89970
* gcc.target/i386/mvc15.c: New test.
* gcc.target/i386/mvc3.c: Quote target in error pattern.
* gcc.target/i386/mvc4.c: Remove duplicit 'default'.
---
 gcc/multiple_target.c | 38 ++-
 gcc/testsuite/gcc.target/i386/mvc15.c | 11 
 gcc/testsuite/gcc.target/i386/mvc3.c  |  2 +-
 gcc/testsuite/gcc.target/i386/mvc4.c  |  2 +-
 4 files changed, 38 insertions(+), 15 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/mvc15.c


diff --git a/gcc/multiple_target.c b/gcc/multiple_target.c
index fd983cc4ad9..99dc9da44f5 100644
--- a/gcc/multiple_target.c
+++ b/gcc/multiple_target.c
@@ -73,7 +73,7 @@ create_dispatcher_calls (struct cgraph_node *node)
   if (!targetm.has_ifunc_p ())
 {
   error_at (DECL_SOURCE_LOCATION (node->decl),
-		"the call requires ifunc, which is not"
+		"the call requires %, which is not"
 		" supported by this target");
   return;
 }
@@ -235,8 +235,10 @@ get_attr_str (tree arglist, char *attr_str)
 }
 
 /* Return number of attributes separated by comma and put them into ARGS.
-   If there is no DEFAULT attribute return -1.  If there is an empty
-   string in attribute return -2.  */
+   If there is no DEFAULT attribute return -1.
+   If there is an empty string in attribute return -2.
+   If there are multiple DEFAULT attributes return -3.
+   */
 
 static int
 separate_attrs (char *attr_str, char **attrs, int attrnum)
@@ -256,6 +258,8 @@ separate_attrs (char *attr_str, char **attrs, int attrnum)
 }
   if (default_count == 0)
 return -1;
+  else if (default_count > 1)
+return -3;
   else if (i + default_count < attrnum)
 return -2;
 
@@ -347,8 +351,7 @@ expand_target_clones (struct cgraph_node *node, bool definition)
   if (attr_len == -1)
 {
   warning_at (DECL_SOURCE_LOCATION (node->decl),
-		  0,
-		  "single % attribute is ignored");
+		  0, "single % attribute is ignored");
   return false;
 }
 
@@ -374,21 +377,30 @@ expand_target_clones (struct cgraph_node *node, bool definition)
   char **attrs = XNEWVEC (char *, attrnum);
 
   attrnum = separate_attrs (attr_str, attrs, attrnum);
-  if (attrnum == -1)
+  switch (attrnum)
 {
+case -1:
   error_at (DECL_SOURCE_LOCATION (node->decl),
-		"default target was not set");
-  XDELETEVEC (attrs);
-  XDELETEVEC (attr_str);
-  return false;
-}
-  else if (attrnum == -2)
-{
+		"% target was not set");
+  break;
+case -2:
   error_at (DECL_SOURCE_LOCATION (node->decl),
 		"an empty string cannot be in % attribute");
+  break;
+case -3:
+  error_at (DECL_SOURCE_LOCATION (node->decl),
+		"multiple % targets were set");
+  break;
+default:
+  break;
+}
+
+  if (attrnum < 0)
+{
   XDELETEVEC (attrs);
   XDELETEVEC (attr_str);
   return false;
+
 }
 
   cgraph_function_version_info *decl1_v = NULL;
diff --git a/gcc/testsuite/gcc.target/i386/mvc15.c b/gcc/testsuite/gcc.target/i386/mvc15.c
new file mode 100644
index 000..955aa1e9fe8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/mvc15.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-require-ifunc "" } */
+
+__attribute__((target_clones("default", "default")))
+int foo (); /* { dg-error "multiple 'default' targets were set" } */
+
+int
+bar ()
+{
+  return foo();
+}
diff --git a/gcc/testsuite/gcc.target/i386/mvc3.c b/gcc/testsuite/gcc.target/i386/mvc3.c
index 1c7755fabbe..f14fc02694c 100644
--- a/gcc/testsuite/gcc.target/i386/mvc3.c
+++ b/gcc/testsuite/gcc.target/i386/mvc3.c
@@ -2,7 +2,7 @@
 /* { dg-require-ifunc "" } */
 
 __attribute__((target_clones("avx","arch=slm","arch=core-avx2")))
-int foo (); /* { dg-error "default target was not set" } */
+int foo (); /* { dg-error "'default' target was not set" } */
 
 int
 bar ()
diff --git a/gcc/testsuite/gcc.target/i386/mvc4.c b/gcc/testsuite/gcc.target/i386/mvc4.c
index 91293c34cca..1b9b0b4f174 100644
--- a/gcc/testsuite/gcc.target/i386/mvc4.c
+++ b/gcc/testsuite/gcc.target/i386/mvc4.c
@@ -1,7 +1,7 @@
 /* { dg-do run } */
 /* { dg-require-ifunc "" } */
 
-__attribute__((target_clones("default","avx","default")))
+__attribute__((target_clones("default","avx")))
 int
 foo ()
 {



[PATCH, doc] Note variable shadowing at max macro using statement expression

2019-04-11 Thread Tom de Vries
[ was: Re: [RFC, doc] Note variable shadowing at max macro using
statement expression ]

On 09-04-19 22:51, Sandra Loosemore wrote:
> On 4/8/19 5:38 AM, Tom de Vries wrote:
>> Hi,
>>
>> When suggesting to rewrite the unsafe (with respect to multiple
>> evaluation of
>> arguments) macro definition:
>> ...
>>    #define max(a,b) ((a) > (b) ? (a) : (b))
>> ...
>> into the safe macro definition:
>> ...
>>    #define maxint(a,b) \
>>  ({int _a = (a), _b = (b); _a > _b ? _a : _b; })
>> ...
>> mention the variable shadowing problem for:
>> ...
>>    #define maxint3(a, b, c) \
>>  ({int _a = (a), _b = (b), _c = (c); maxint (maxint (_a, _b), _c); })
>> ...
>>
>> Any comments?
> 
> The content looks reasonable, but I have some copy-editing nits.
> 

Hi Sandra,

thanks for the review.

I've attached the updated patch, as well as the resulting relevant
gcc.info portion.

OK for trunk?

Thanks,
- Tom
[doc] Note variable shadowing at max macro using statement expression

When suggesting to rewrite the unsafe (with respect to multiple evaluation of
arguments) macro definition:
...
  #define max(a,b) ((a) > (b) ? (a) : (b))
...
into the safe macro definition:
...
  #define maxint(a,b) \
({int _a = (a), _b = (b); _a > _b ? _a : _b; })
...
mention the variable shadowing problem for:
...
  #define maxint3(a, b, c) \
({int _a = (a), _b = (b), _c = (c); maxint (maxint (_a, _b), _c); })
...

2019-04-08  Tom de Vries  

	* doc/extend.texi (@node Statement Exprs): Note variable shadowing at
	max macro using statement expression.

---
 gcc/doc/extend.texi | 26 --
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 8e0deac26c3..cad7ad49e56 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -142,14 +142,36 @@ follows:
 @cindex side effects, macro argument
 But this definition computes either @var{a} or @var{b} twice, with bad
 results if the operand has side effects.  In GNU C, if you know the
-type of the operands (here taken as @code{int}), you can define
-the macro safely as follows:
+type of the operands (here taken as @code{int}), you can avoid this
+problem by defining the macro as follows:
 
 @smallexample
 #define maxint(a,b) \
   (@{int _a = (a), _b = (b); _a > _b ? _a : _b; @})
 @end smallexample
 
+Note that introducing variable declarations (as we do in @code{maxint}) can
+cause variable shadowing, so while this example using the @code{max} macro
+produces correct results:
+@smallexample
+int _a = 1, _b = 2, c;
+c = max (_a, _b);
+@end smallexample
+@noindent
+this example using maxint will not:
+@smallexample
+int _a = 1, _b = 2, c;
+c = maxint (_a, _b);
+@end smallexample
+
+This problem may for instance occur when we use this pattern recursively, like
+so:
+
+@smallexample
+#define maxint3(a, b, c) \
+  (@{int _a = (a), _b = (b), _c = (c); maxint (maxint (_a, _b), _c); @})
+@end smallexample
+
 Embedded statements are not allowed in constant expressions, such as
 the value of an enumeration constant, the width of a bit-field, or
 the initial value of a static variable.
 This feature is especially useful in making macro definitions "safe"
(so that they evaluate each operand exactly once).  For example, the
"maximum" function is commonly defined as a macro in standard C as
follows:

 #define max(a,b) ((a) > (b) ? (a) : (b))

But this definition computes either A or B twice, with bad results if
the operand has side effects.  In GNU C, if you know the type of the
operands (here taken as 'int'), you can avoid this problem by defining
the macro as follows:

 #define maxint(a,b) \
   ({int _a = (a), _b = (b); _a > _b ? _a : _b; })

 Note that introducing variable declarations (as we do in 'maxint') can
cause variable shadowing, so while this example using the 'max' macro
produces correct results:
 int _a = 1, _b = 2, c;
 c = max (_a, _b);
this example using maxint will not:
 int _a = 1, _b = 2, c;
 c = maxint (_a, _b);

 This problem may for instance occur when we use this pattern
recursively, like so:

 #define maxint3(a, b, c) \
   ({int _a = (a), _b = (b), _c = (c); maxint (maxint (_a, _b), _c); })



Re: [PATCH GCC] PR90021 ICE fix

2019-04-11 Thread Richard Biener
On Wed, Apr 10, 2019 at 7:48 AM bin.cheng  wrote:
>
> Hi,
> This patch fixes ICE reported by PR90021.  The issue has been there since 
> loop interchange
> and recently exposed by patch for PR89725.  As PR comment suggests, we have 
> equal access
> function {{1, +, 1}_6, +, 1}_4 and _6 is of loop_nest's outer loop.  This 
> patch introduces
> new parameter loopnum to function evolution_function_is_univariate_p so that 
> the above
> scev won't be considered as multivariate, i.e, {1, +, 1}_6 is considered as a 
> symbol to the
> loop_nest.  Note we only change computation of classic dist vector, rather 
> than DDR itself.
>
> Bootstrap and test on x86_64.  Is it ok?

OK.

> BTW, I wonder if collecting data reference in one loop but computing DDR in 
> an inner loop
> is useful in the future, besides loop interchange.

I guess it's useful for compile-time but eventually we can also
refactor the data-ref code
to be able to initialize a new data-ref from an already existing one,
re-analyzing it for
an outer loop.  Not sure if that's cheaper in any ways though.
Eventually the caching
done by SCEV can also be improved.

That said, we re-used the data-refs to avoid doing a quadratic amount
of analyses
during the search of the loop pair to interchange in a deep nest - I
think that's
very much desirable also for other passes operating on deeper nests of which
we have none at the moment - loop distribution being the closest candidate.
I think graphite also suffers from this somewhat in its SCOP finding/analysis.

Richard.

> Thanks,
> bin
>
> 2019-04-10  Bin Cheng  
>
> PR tree-optimization/92001
> * tree-chrec.c (evolution_function_is_univariate_p): New parameter
> and check univariate against it.
> * tree-chrec.h (evolution_function_is_univariate_p): New parameter.
> * tree-data-ref.c (add_other_self_distances): Pass new argument.
>
> 2018-04-10  Bin Cheng  
>
> PR tree-optimization/92001
> * gcc/testsuite/gfortran.dg/pr90021.f90: New test.


Re: [PATCH][Version 3]Come up with -flive-patching master option.

2019-04-11 Thread Richard Biener
On Wed, Apr 10, 2019 at 9:49 PM Jonathan Wakely  wrote:
>
> On 10/04/19 13:55 -0500, Qing Zhao wrote:
> >Hi, Jonathan,
> >
> >thanks for your review on the documentation change for -flive-patching 
> >option.
> >
> >>
> >>> --- a/gcc/doc/invoke.texi
> >>> +++ b/gcc/doc/invoke.texi
> >>> @@ -416,6 +416,7 @@ Objective-C and Objective-C++ Dialects}.
> >>> -fipa-bit-cp -fipa-vrp @gol
> >>> -fipa-pta  -fipa-profile  -fipa-pure-const  -fipa-reference  
> >>> -fipa-reference-addressable @gol
> >>> -fipa-stack-alignment  -fipa-icf  -fira-algorithm=@var{algorithm} @gol
> >>> +-flive-patching=@var{level} @gol
> >>> -fira-region=@var{region}  -fira-hoist-pressure @gol
> >>> -fira-loop-pressure  -fno-ira-share-save-slots @gol
> >>> -fno-ira-share-spill-slots @gol
> >>> @@ -9045,6 +9046,65 @@ equivalences that are found only by GCC and 
> >>> equivalences found only by Gold.
> >>> This flag is enabled by default at @option{-O2} and @option{-Os}.
> >>> +@item -flive-patching=@var{level}
> >>> +@opindex flive-patching
> >>> +Control GCC's optimizations to provide a safe compilation for 
> >>> live-patching.
> >>
> >> "provide a safe compilation" isn't very clear to me. I don't know what
> >> it means to "provide a compilation", let alone a safe one.
> >>
> >> Could we say something like "Control GCC’s optimizations to produce
> >> output suitable for live-patching.” ?
> >
> >yes, this is better.
> >
> >>
> >>
> >>> +If the compiler's optimization uses a function's body or information 
> >>> extracted
> >>> +from its body to optimize/change another function, the latter is called 
> >>> an
> >>> +impacted function of the former.  If a function is patched, its impacted
> >>> +functions should be patched too.
> >>> +
> >>> +The impacted functions are decided by the compiler's interprocedural
> >>
> >> decided or determined?
> >determined is better.
> >
> >>
> >>> +optimizations.  For example, inlining a function into its caller, cloning
> >>> +a function and changing its caller to call this new clone, or extracting
> >>> +a function's pureness/constness information to optimize its direct or
> >>> +indirect callers, etc.
> >>
> >> I don't know what the second sentence is saying. I can read it two
> >> different ways:
> >>
> >> 1) Those are examples of interprocedural optimizations which
> >> participate in the decision making, but the actual details of how the
> >> decisions are made are not specified here.
> >>
> >> 2) Performing those optimizations causes a function to be impacted.
> >>
> >> If 1) is the intended meaning, then I think it should say "For
> >> example, when inlining a function into its caller, ..."
> >>
> >> If 2) is the intended meaning, then I think it should say "For
> >> example, a caller is impacted when inlining a function
> >> into its caller …".
> >
> >2) is the intended meaining.
> >
> >>
> >> Does either of those suggestions match the intended meaning? Or do you
> >> have a better way to rephrase it?
> >>
> >>> +Usually, the more IPA optimizations enabled, the larger the number of
> >>> +impacted functions for each function.  In order to control the number of
> >>> +impacted functions and computed the list of impacted function easily,
> >>
> >> Should be "and more easily compute the list of impacted functions”.
> >
> >this is good.
> >>
> >>> +we provide control to partially enable IPA optimizations on two different
> >>> +levels.
> >>
> >> We don't usually say "we provide" like this. I suggest simply "IPA
> >> optimizations can be partially enabled at two different levels.”
> >
> >Okay.
> >>
> >>> +
> >>> +The @var{level} argument should be one of the following:
> >>> +
> >>> +@table @samp
> >>> +
> >>> +@item inline-clone
> >>> +
> >>> +Only enable inlining and cloning optimizations, which includes inlining,
> >>> +cloning, interprocedural scalar replacement of aggregates and partial 
> >>> inlining.
> >>> +As a result, when patching a function, all its callers and its clones'
> >>> +callers need to be patched as well.
> >>
> >> Since you've defined the term "impacted" could this just say "all its
> >> callers and its clones' callers are impacted.”?
> >
> >I think that the following might be better:
> >
> >when patching a function, all its callers and its clones’ callers are 
> >impacted, therefore need to be patched as well.
>
> Agreed.
>
> >>
> >>> +@option{-flive-patching=inline-clone} disables the following 
> >>> optimization flags:
> >>> +@gccoptlist{-fwhole-program  -fipa-pta  -fipa-reference  -fipa-ra @gol
> >>> +-fipa-icf  -fipa-icf-functions  -fipa-icf-variables @gol
> >>> +-fipa-bit-cp  -fipa-vrp  -fipa-pure-const  -fipa-reference-addressable 
> >>> @gol
> >>> +-fipa-stack-alignment}
> >>> +
> >>> +@item inline-only-static
> >>> +
> >>> +Only enable inlining of static functions.
> >>> +As a result, when patching a static function, all its callers need to be
> >>> +patches as well.
> >>
> >> "Typo: "patches" should be "patched", but I'd suggest "are impacted"
> >> here too.
> >
> >Okay.

Re: [PATCH] Fix up RTL DCE find_call_stack_args (PR rtl-optimization/89965)

2019-04-11 Thread Richard Biener
On Thu, 11 Apr 2019, Jakub Jelinek wrote:

> Hi!
> 
> The following testcase is miscompiled, because the result of
> a DImode (doubleword) right shift is used in a QImode subreg as well as
> later on pushed into a stack slot as an argument to a const function
> whose result isn't used.  The RA because of the weirdo target tuning
> reuses the REG_EQUIV argument slot (as it wants the value in a non-Q_REGS
> register), so it first pushes it to the stack slot and then loads from the
> stack slot again (according to Vlad, this can happen with both LRA and old
> reload).  Later on during DCE we determine the call is not needed and try to
> find all the argument pushes and don't mark those as needed, so we
> effectively DCE the right shift, push to the argument slot as well as
> other slots and the call, and end up keeping just the load from the
> uninitialized argument slot.
> 
> The following patch just punts if we find loads from stack slots in between
> where they are pushed and the const/pure call.  In addition to that, I've
> outlined the same largish sequence that had 3 copies in the function already
> and I'd need to add 4th copy, so in the end the dce.c changes are removing
> more than adding:
>  1 file changed, 118 insertions(+), 135 deletions(-)

The refactoring itself is probably OK (and if done separately
makes reviewing easier).

> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> 
> During those 2 bootstraps/regtests, data.load_found has been set just
> on the new testcase on ia32.

Hmm, I wonder whether we really need to DCE calls after reload?
That said, I'm not familiar enough with the code to check if the
patch makes sense (can there ever be uses of the argument slots
_after_ the call?).

Richard.

> 2019-04-11  Jakub Jelinek  
>   
>   PR rtl-optimization/89965
>   * dce.c: Include rtl-iter.h.
>   (sp_based_mem_offset): New function.
>   (struct check_argument_load_data): New type.
>   (check_argument_load): New function.
>   (find_call_stack_args): Use sp_based_mem_offset, check for loads
>   from stack slots still tracked in sp_bytes and punt if any is
>   found.
> 
>   * gcc.target/i386/pr89965.c: New test.
> 
> --- gcc/dce.c.jj  2019-02-15 00:11:13.209111382 +0100
> +++ gcc/dce.c 2019-04-10 14:18:21.111763533 +0200
> @@ -35,6 +35,7 @@ along with GCC; see the file COPYING3.
>  #include "valtrack.h"
>  #include "tree-pass.h"
>  #include "dbgcnt.h"
> +#include "rtl-iter.h"
>  
>  
>  /* -
> @@ -265,6 +266,100 @@ check_argument_store (HOST_WIDE_INT size
>return true;
>  }
>  
> +/* If MEM has sp address, return 0, if it has sp + const address,
> +   return that const, if it has reg address where reg is set to sp + const
> +   and FAST is false, return const, otherwise return
> +   INTTYPE_MINUMUM (HOST_WIDE_INT).  */
> +
> +static HOST_WIDE_INT
> +sp_based_mem_offset (rtx_call_insn *call_insn, const_rtx mem, bool fast)
> +{
> +  HOST_WIDE_INT off = 0;
> +  rtx addr = XEXP (mem, 0);
> +  if (GET_CODE (addr) == PLUS
> +  && REG_P (XEXP (addr, 0))
> +  && CONST_INT_P (XEXP (addr, 1)))
> +{
> +  off = INTVAL (XEXP (addr, 1));
> +  addr = XEXP (addr, 0);
> +}
> +  if (addr == stack_pointer_rtx)
> +return off;
> +
> +  if (!REG_P (addr) || fast)
> +return INTTYPE_MINIMUM (HOST_WIDE_INT);
> +
> +  /* If not fast, use chains to see if addr wasn't set to sp + offset.  */
> +  df_ref use;
> +  FOR_EACH_INSN_USE (use, call_insn)
> +  if (rtx_equal_p (addr, DF_REF_REG (use)))
> +break;
> +
> +  if (use == NULL)
> +return INTTYPE_MINIMUM (HOST_WIDE_INT);
> +
> +  struct df_link *defs;
> +  for (defs = DF_REF_CHAIN (use); defs; defs = defs->next)
> +if (! DF_REF_IS_ARTIFICIAL (defs->ref))
> +  break;
> +
> +  if (defs == NULL)
> +return INTTYPE_MINIMUM (HOST_WIDE_INT);
> +
> +  rtx set = single_set (DF_REF_INSN (defs->ref));
> +  if (!set)
> +return INTTYPE_MINIMUM (HOST_WIDE_INT);
> +
> +  if (GET_CODE (SET_SRC (set)) != PLUS
> +  || XEXP (SET_SRC (set), 0) != stack_pointer_rtx
> +  || !CONST_INT_P (XEXP (SET_SRC (set), 1)))
> +return INTTYPE_MINIMUM (HOST_WIDE_INT);
> +
> +  off += INTVAL (XEXP (SET_SRC (set), 1));
> +  return off;
> +}
> +
> +/* Data for check_argument_load called via note_uses.  */
> +struct check_argument_load_data {
> +  bitmap sp_bytes;
> +  HOST_WIDE_INT min_sp_off, max_sp_off;
> +  rtx_call_insn *call_insn;
> +  bool fast;
> +  bool load_found;
> +};
> +
> +/* Helper function for find_call_stack_args.  Check if there are
> +   any loads from the argument slots in between the const/pure call
> +   and store to the argument slot, set LOAD_FOUND if any is found.  */
> +
> +static void
> +check_argument_load (rtx *loc, void *data)
> +{
> +  struct check_argument_load_data *d
> += (struct check_argument_load_data *) data;
> +  subrtx_iterator::array_type array;
> +  FOR_EACH_SU

[C++ PATCH] Make some type forbidden diagnostics translatable (PR translation/90035)

2019-04-11 Thread Jakub Jelinek
Hi!

While looking into another PR, I've noticed that in two spots the
parser->type_definition_forbidden_message messages are untranslatable,
we construct them at runtime from 3 strings using concat.

The following patch fixes it by adding a const char * member
to the parser structure and passing that as another argument to error,
in the more usual case where the argument string only contains %< and %>
and not %qs added in this patch it will be just ignored.

I believe this patch is more friendly to the translators (as well as
less expensive at compile time because it doesn't have to concat/allocate
anything).

Another possibility would be to just use in cp_parser_has_attribute_expression
G_("types may not be defined in %<__builtin_has_attribute%> expressions")
and in cp_parser_sizeof_operand use a switch based on the 3 possible keyword
values and use
G_("types may not be defined in % expressions")
G_("types may not be defined in %<__alignof__%> expressions")
G_("types may not be defined in % expressions")
G_("types may not be defined in % expressions")
depending on that (and C++ mode which determines which alignof spelling is
in ridpointers).  We wouldn't need to add an extra member, on the other side
the translators would need to translate 5 messages instead of just one.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2019-04-11  Jakub Jelinek  

PR translation/90035
* parser.h (struct cp_parser): Add
type_definition_forbidden_message_arg member.
* parser.c (cp_debug_parser): Print it.
(cp_parser_check_type_definition): Pass
parser->type_definition_forbidden_message_arg as second argument to
error.
(cp_parser_has_attribute_expression, cp_parser_sizeof_operand): Set
parser->type_definition_forbidden_message_arg and use G_() with
%qs for parser->type_definition_forbidden_message instead of
building untranslatable message using concat.

--- gcc/cp/parser.h.jj  2019-01-08 22:33:34.255713665 +0100
+++ gcc/cp/parser.h 2019-04-10 15:19:37.672814432 +0200
@@ -350,6 +350,9 @@ struct GTY(()) cp_parser {
  issued as an error message if a type is defined.  */
   const char *type_definition_forbidden_message;
 
+  /* Argument for type_definition_forbidden_message if needed.  */
+  const char *type_definition_forbidden_message_arg;
+
   /* A stack used for member functions of local classes.  The lists
  contained in an individual entry can only be processed once the
  outermost class being defined is complete.  */
--- gcc/cp/parser.c.jj  2019-03-27 21:19:39.45746 +0100
+++ gcc/cp/parser.c 2019-04-10 15:34:28.201322120 +0200
@@ -570,8 +570,10 @@ cp_debug_parser (FILE *file, cp_parser *
   cp_debug_print_flag (file, "Colon doesn't start a class definition",
  parser->colon_doesnt_start_class_def_p);
   if (parser->type_definition_forbidden_message)
-fprintf (file, "Error message for forbidden type definitions: %s\n",
-parser->type_definition_forbidden_message);
+fprintf (file, "Error message for forbidden type definitions: %s %s\n",
+parser->type_definition_forbidden_message,
+parser->type_definition_forbidden_message_arg
+? parser->type_definition_forbidden_message_arg : "");
   cp_debug_print_unparsed_queues (file, parser->unparsed_queues);
   fprintf (file, "Number of class definitions in progress: %u\n",
   parser->num_classes_being_defined);
@@ -3054,8 +3056,9 @@ cp_parser_check_type_definition (cp_pars
   if (parser->type_definition_forbidden_message)
 {
   /* Don't use `%s' to print the string, because quotations (`%<', `%>')
-in the message need to be interpreted.  */
-  error (parser->type_definition_forbidden_message);
+or %qs in the message need to be interpreted.  */
+  error (parser->type_definition_forbidden_message,
+parser->type_definition_forbidden_message_arg);
   return false;
 }
   return true;
@@ -8518,12 +8521,12 @@ cp_parser_has_attribute_expression (cp_p
   /* Types cannot be defined in a `sizeof' expression.  Save away the
  old message.  */
   const char *saved_message = parser->type_definition_forbidden_message;
-  /* And create the new one.  */
-  const int kwd = RID_BUILTIN_HAS_ATTRIBUTE;
-  char *tmp = concat ("types may not be defined in %<",
- IDENTIFIER_POINTER (ridpointers[kwd]),
- "%> expressions", NULL);
-  parser->type_definition_forbidden_message = tmp;
+  const char *saved_message_arg
+= parser->type_definition_forbidden_message_arg;
+  parser->type_definition_forbidden_message
+= G_("types may not be defined in %qs expressions");
+  parser->type_definition_forbidden_message_arg
+= IDENTIFIER_POINTER (ridpointers[RID_BUILTIN_HAS_ATTRIBUTE]);
 
   /* The restrictions on constant-expressions do not apply inside
  sizeof expressions.  */
@@ -8562,10 +8

[PATCH] Fix up RTL DCE find_call_stack_args (PR rtl-optimization/89965)

2019-04-11 Thread Jakub Jelinek
Hi!

The following testcase is miscompiled, because the result of
a DImode (doubleword) right shift is used in a QImode subreg as well as
later on pushed into a stack slot as an argument to a const function
whose result isn't used.  The RA because of the weirdo target tuning
reuses the REG_EQUIV argument slot (as it wants the value in a non-Q_REGS
register), so it first pushes it to the stack slot and then loads from the
stack slot again (according to Vlad, this can happen with both LRA and old
reload).  Later on during DCE we determine the call is not needed and try to
find all the argument pushes and don't mark those as needed, so we
effectively DCE the right shift, push to the argument slot as well as
other slots and the call, and end up keeping just the load from the
uninitialized argument slot.

The following patch just punts if we find loads from stack slots in between
where they are pushed and the const/pure call.  In addition to that, I've
outlined the same largish sequence that had 3 copies in the function already
and I'd need to add 4th copy, so in the end the dce.c changes are removing
more than adding:
 1 file changed, 118 insertions(+), 135 deletions(-)

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

During those 2 bootstraps/regtests, data.load_found has been set just
on the new testcase on ia32.

2019-04-11  Jakub Jelinek  

PR rtl-optimization/89965
* dce.c: Include rtl-iter.h.
(sp_based_mem_offset): New function.
(struct check_argument_load_data): New type.
(check_argument_load): New function.
(find_call_stack_args): Use sp_based_mem_offset, check for loads
from stack slots still tracked in sp_bytes and punt if any is
found.

* gcc.target/i386/pr89965.c: New test.

--- gcc/dce.c.jj2019-02-15 00:11:13.209111382 +0100
+++ gcc/dce.c   2019-04-10 14:18:21.111763533 +0200
@@ -35,6 +35,7 @@ along with GCC; see the file COPYING3.
 #include "valtrack.h"
 #include "tree-pass.h"
 #include "dbgcnt.h"
+#include "rtl-iter.h"
 
 
 /* -
@@ -265,6 +266,100 @@ check_argument_store (HOST_WIDE_INT size
   return true;
 }
 
+/* If MEM has sp address, return 0, if it has sp + const address,
+   return that const, if it has reg address where reg is set to sp + const
+   and FAST is false, return const, otherwise return
+   INTTYPE_MINUMUM (HOST_WIDE_INT).  */
+
+static HOST_WIDE_INT
+sp_based_mem_offset (rtx_call_insn *call_insn, const_rtx mem, bool fast)
+{
+  HOST_WIDE_INT off = 0;
+  rtx addr = XEXP (mem, 0);
+  if (GET_CODE (addr) == PLUS
+  && REG_P (XEXP (addr, 0))
+  && CONST_INT_P (XEXP (addr, 1)))
+{
+  off = INTVAL (XEXP (addr, 1));
+  addr = XEXP (addr, 0);
+}
+  if (addr == stack_pointer_rtx)
+return off;
+
+  if (!REG_P (addr) || fast)
+return INTTYPE_MINIMUM (HOST_WIDE_INT);
+
+  /* If not fast, use chains to see if addr wasn't set to sp + offset.  */
+  df_ref use;
+  FOR_EACH_INSN_USE (use, call_insn)
+  if (rtx_equal_p (addr, DF_REF_REG (use)))
+break;
+
+  if (use == NULL)
+return INTTYPE_MINIMUM (HOST_WIDE_INT);
+
+  struct df_link *defs;
+  for (defs = DF_REF_CHAIN (use); defs; defs = defs->next)
+if (! DF_REF_IS_ARTIFICIAL (defs->ref))
+  break;
+
+  if (defs == NULL)
+return INTTYPE_MINIMUM (HOST_WIDE_INT);
+
+  rtx set = single_set (DF_REF_INSN (defs->ref));
+  if (!set)
+return INTTYPE_MINIMUM (HOST_WIDE_INT);
+
+  if (GET_CODE (SET_SRC (set)) != PLUS
+  || XEXP (SET_SRC (set), 0) != stack_pointer_rtx
+  || !CONST_INT_P (XEXP (SET_SRC (set), 1)))
+return INTTYPE_MINIMUM (HOST_WIDE_INT);
+
+  off += INTVAL (XEXP (SET_SRC (set), 1));
+  return off;
+}
+
+/* Data for check_argument_load called via note_uses.  */
+struct check_argument_load_data {
+  bitmap sp_bytes;
+  HOST_WIDE_INT min_sp_off, max_sp_off;
+  rtx_call_insn *call_insn;
+  bool fast;
+  bool load_found;
+};
+
+/* Helper function for find_call_stack_args.  Check if there are
+   any loads from the argument slots in between the const/pure call
+   and store to the argument slot, set LOAD_FOUND if any is found.  */
+
+static void
+check_argument_load (rtx *loc, void *data)
+{
+  struct check_argument_load_data *d
+= (struct check_argument_load_data *) data;
+  subrtx_iterator::array_type array;
+  FOR_EACH_SUBRTX (iter, array, *loc, NONCONST)
+{
+  const_rtx mem = *iter;
+  HOST_WIDE_INT size;
+  if (MEM_P (mem)
+ && MEM_SIZE_KNOWN_P (mem)
+ && MEM_SIZE (mem).is_constant (&size))
+   {
+ HOST_WIDE_INT off = sp_based_mem_offset (d->call_insn, mem, d->fast);
+ if (off != INTTYPE_MINIMUM (HOST_WIDE_INT)
+ && off < d->max_sp_off
+ && off + size > d->min_sp_off)
+   for (HOST_WIDE_INT byte = MAX (off, d->min_sp_off);
+byte < MIN (off + size, d->max_sp_off); byte++)
+ if (bi