Re: [PATCH] libstdc++: Update {x86_64, i?86, powerpc64, s390x, aarch64}-linux baselines for GCC 10.1

2020-05-07 Thread Richard Biener via Gcc-patches
On Thu, May 7, 2020 at 8:53 AM Jonathan Wakely via Gcc-patches
 wrote:
>
> On 07/05/20 00:22 +0200, Eric Botcazou wrote:
> >> Oops, here are the updates from Fedora packages built during the weekend.
> >
> >The SPARC64/Linux bits are attached.  OK for trunk and gcc-10?
>
> OK, pending RM approval for gcc-10, thanks.

OK.

> It looks like gcc-9 needs the same update too.
>


[PATCH] Extend std::copy/std::copy_n char* overload to deque iterator

2020-05-07 Thread François Dumont via Gcc-patches
    This patch purpose is to make sure that existing 
std::copy/std::copy_n overloads for char* will also be used for 
std::deque iterators when dealing with istreambuf_iterator. It 
also make sure that it still works when _GLIBCXX_DEBUG is activated.


    * include/bits/stl_algo.h (__copy_n_a): Move to ...
    * include/bits/stl_algobase.h (__copy_n_a): ...here. Add __strict
    parameter.
    (__niter_base(const _Safe_iterator<_Ite, _Seq,
    random_access_iterator_tag>&)): New declaration.
    (__copy_move_a2(istreambuf_iterator<>, istreambuf_iterator<>,
    _Deque_iterator<>)): New declaration.
    (__copy_n_a(istreambuf_iterator<>, _Size, _Deque_iterator<>, bool)):
    New declaration.
    * include/bits/deque.tcc
    (__copy_move_a2(istreambuf_iterator<>, istreambuf_iterator<>,
    _Deque_iterator<>)): Add definition.
    (__copy_n_a(istreambuf_iterator<>, _Size, _Deque_iterator<>, bool)):
    Add definition.
    * include/bits/streambuf_iterator.h
    (__copy_n_a(istreambuf_iterator<>, _Size, _CharT*, bool)): Adapt
    definition.
    * include/debug/safe_iterator.tcc (__niter_base): Add definition.
    * testsuite/25_algorithms/copy/streambuf_iterators/char/4.cc (test03):
    New.
    * testsuite/25_algorithms/copy/streambuf_iterators/char/debug/
    deque_neg.cc: New.
    * testsuite/25_algorithms/copy_n/debug/istreambuf_ite_deque_neg.cc:
    New.
    * testsuite/25_algorithms/copy_n/istreambuf_iterator/2.cc: New.
    * testsuite/25_algorithms/copy_n/istreambuf_iterator/deque.cc: New.

Already tested for a while on Linux x64 normal and debug modes but I am 
currently rebuilding everything and will commit only once all succeeded 
again.


Ok ?

François

diff --git a/libstdc++-v3/include/bits/deque.tcc b/libstdc++-v3/include/bits/deque.tcc
index e773f32b256..45207066c48 100644
--- a/libstdc++-v3/include/bits/deque.tcc
+++ b/libstdc++-v3/include/bits/deque.tcc
@@ -1065,6 +1065,57 @@ _GLIBCXX_END_NAMESPACE_CONTAINER
   return __result;
 }
 
+#if __cplusplus >= 201103L
+  template
+__enable_if_t<__is_char<_CharT>::__value,
+		  _GLIBCXX_STD_C::_Deque_iterator<_CharT, _CharT&, _CharT*>>
+__copy_move_a2(
+	istreambuf_iterator<_CharT, char_traits<_CharT> > __first,
+	istreambuf_iterator<_CharT, char_traits<_CharT> > __last,
+	_GLIBCXX_STD_C::_Deque_iterator<_CharT, _CharT&, _CharT*> __result)
+{
+  if (__first == __last)
+	return __result;
+
+  for (;;)
+	{
+	  const auto __len = __result._M_last - __result._M_cur;
+	  const auto __nb
+	= std::__copy_n_a(__first, __len, __result._M_cur, false)
+	- __result._M_cur;
+	  __result += __nb;
+
+	  if (__nb != __len)
+	break;
+	}
+
+  return __result;
+}
+
+  template
+__enable_if_t<__is_char<_CharT>::__value,
+		  _GLIBCXX_STD_C::_Deque_iterator<_CharT, _CharT&, _CharT*>>
+__copy_n_a(
+  istreambuf_iterator<_CharT, char_traits<_CharT>> __it, _Size __size,
+  _GLIBCXX_STD_C::_Deque_iterator<_CharT, _CharT&, _CharT*> __result,
+  bool __strict)
+{
+  if (__size == 0)
+	return __result;
+
+  do
+	{
+	  const auto __len = std::min<_Size>(__result._M_last - __result._M_cur,
+	 __size);
+	  std::__copy_n_a(__it, __len, __result._M_cur, __strict);
+	  __result += __len;
+	  __size -= __len;
+	}
+  while (__size != 0);
+  return __result;
+}
+#endif
+
   template
 _OI
diff --git a/libstdc++-v3/include/bits/stl_algo.h b/libstdc++-v3/include/bits/stl_algo.h
index 932ece55529..70d8232aece 100644
--- a/libstdc++-v3/include/bits/stl_algo.h
+++ b/libstdc++-v3/include/bits/stl_algo.h
@@ -705,31 +705,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   return __result;
 }
 
-  template
-_GLIBCXX20_CONSTEXPR
-_OutputIterator
-__copy_n_a(_InputIterator __first, _Size __n, _OutputIterator __result)
-{
-  if (__n > 0)
-	{
-	  while (true)
-	{
-	  *__result = *__first;
-	  ++__result;
-	  if (--__n > 0)
-		++__first;
-	  else
-		break;
-	}
-	}
-  return __result;
-}
- 
-  template
-__enable_if_t<__is_char<_CharT>::__value, _CharT*>
-__copy_n_a(istreambuf_iterator<_CharT, char_traits<_CharT>>,
-	   _Size, _CharT*);
-
   template
 _GLIBCXX20_CONSTEXPR
 _OutputIterator
@@ -738,7 +713,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 {
   return std::__niter_wrap(__result,
 			   __copy_n_a(__first, __n,
-	  std::__niter_base(__result)));
+	  std::__niter_base(__result), true));
 }
 
   template::value)
 { return __it; }
 
+  template
+_Ite
+__niter_base(const ::__gnu_debug::_Safe_iterator<_Ite, _Seq,
+		 std::random_access_iterator_tag>&);
+
   // Reverse the __niter_base transformation to get a
   // __normal_iterator back again (this assumes that __normal_iterator
   // is only used to wrap random access iterators, like pointers).
@@ -466,6 +471,16 @@ _GLIBCXX_END_NAMESPACE_CONTAINER
 __copy_move_a2(istreambuf_iterator<_CharT, char_traits<_CharT> >,
 		   istreambuf_iterator<_CharT, 

Re: [PATCH] PR preprocessor/94657: use $AR, not 'ar',

2020-05-07 Thread Sergei Trofimovich via Gcc-patches
On Wed, 22 Apr 2020 23:05:38 +0100
Sergei Trofimovich  wrote:

> From: Sergei Trofimovich 
> 
> On system with 'ar' and '${CHOST}-ar' the latter is preferred.
> as it might not match default 'ar'.
> 
> Bug is initially reported downstream as https://bugs.gentoo.org/718004.
> 
> libcpp/ChangeLog:
> 
>   PR libcpp/94657
>   * Makefile.in: use @AR@ placeholder
>   * configure.ac: use AC_CHECK_TOOL to find 'ar'
>   * configure: regenerate
> ---
>  libcpp/ChangeLog|  7 
>  libcpp/Makefile.in  |  2 +-
>  libcpp/configure| 94 +
>  libcpp/configure.ac |  1 +
>  4 files changed, 103 insertions(+), 1 deletion(-)
> 
> diff --git a/libcpp/ChangeLog b/libcpp/ChangeLog
> index 307cf3add94..77145768a3d 100644
> --- a/libcpp/ChangeLog
> +++ b/libcpp/ChangeLog
> @@ -1,3 +1,10 @@
> +2020-04-22  Sergei Trofimovich  
> +
> + PR preprocessor/94657: use $AR, not 'ar'
> + * Makefile.in: use @AR@ placeholder
> + * configure.ac: use AC_CHECK_TOOL to find 'ar'
> + * configure: regenerate
> +
>  2020-02-14  Jakub Jelinek  
>  
>   Partially implement P1042R1: __VA_OPT__ wording clarifications
> diff --git a/libcpp/Makefile.in b/libcpp/Makefile.in
> index 8f8c8f65eb3..af7a0c6f73e 100644
> --- a/libcpp/Makefile.in
> +++ b/libcpp/Makefile.in
> @@ -25,7 +25,7 @@ srcdir = @srcdir@
>  top_builddir = .
>  VPATH = @srcdir@
>  INSTALL = @INSTALL@
> -AR = ar
> +AR = @AR@
>  ARFLAGS = cru
>  ACLOCAL = @ACLOCAL@
>  AUTOCONF = @AUTOCONF@
> diff --git a/libcpp/configure b/libcpp/configure
> index 11da199083b..a6dcf5dcb61 100755
> --- a/libcpp/configure
> +++ b/libcpp/configure
> @@ -657,6 +657,7 @@ ACLOCAL
>  EGREP
>  GREP
>  CPP
> +AR
>  RANLIB
>  ac_ct_CXX
>  CXXFLAGS
> @@ -1039,6 +1040,7 @@ do
>| -silent | --silent | --silen | --sile | --sil)
>  silent=yes ;;
>  
> +
>-sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb)
>  ac_prev=sbindir ;;
>-sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \
> @@ -4008,6 +4010,98 @@ else
>RANLIB="$ac_cv_prog_RANLIB"
>  fi
>  
> +if test -n "$ac_tool_prefix"; then
> +  # Extract the first word of "${ac_tool_prefix}ar", so it can be a program 
> name with args.
> +set dummy ${ac_tool_prefix}ar; ac_word=$2
> +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
> +$as_echo_n "checking for $ac_word... " >&6; }
> +if ${ac_cv_prog_AR+:} false; then :
> +  $as_echo_n "(cached) " >&6
> +else
> +  if test -n "$AR"; then
> +  ac_cv_prog_AR="$AR" # Let the user override the test.
> +else
> +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
> +for as_dir in $PATH
> +do
> +  IFS=$as_save_IFS
> +  test -z "$as_dir" && as_dir=.
> +for ac_exec_ext in '' $ac_executable_extensions; do
> +  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
> +ac_cv_prog_AR="${ac_tool_prefix}ar"
> +$as_echo "$as_me:${as_lineno-$LINENO}: found 
> $as_dir/$ac_word$ac_exec_ext" >&5
> +break 2
> +  fi
> +done
> +  done
> +IFS=$as_save_IFS
> +
> +fi
> +fi
> +AR=$ac_cv_prog_AR
> +if test -n "$AR"; then
> +  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $AR" >&5
> +$as_echo "$AR" >&6; }
> +else
> +  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
> +$as_echo "no" >&6; }
> +fi
> +
> +
> +fi
> +if test -z "$ac_cv_prog_AR"; then
> +  ac_ct_AR=$AR
> +  # Extract the first word of "ar", so it can be a program name with args.
> +set dummy ar; ac_word=$2
> +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
> +$as_echo_n "checking for $ac_word... " >&6; }
> +if ${ac_cv_prog_ac_ct_AR+:} false; then :
> +  $as_echo_n "(cached) " >&6
> +else
> +  if test -n "$ac_ct_AR"; then
> +  ac_cv_prog_ac_ct_AR="$ac_ct_AR" # Let the user override the test.
> +else
> +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
> +for as_dir in $PATH
> +do
> +  IFS=$as_save_IFS
> +  test -z "$as_dir" && as_dir=.
> +for ac_exec_ext in '' $ac_executable_extensions; do
> +  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
> +ac_cv_prog_ac_ct_AR="ar"
> +$as_echo "$as_me:${as_lineno-$LINENO}: found 
> $as_dir/$ac_word$ac_exec_ext" >&5
> +break 2
> +  fi
> +done
> +  done
> +IFS=$as_save_IFS
> +
> +fi
> +fi
> +ac_ct_AR=$ac_cv_prog_ac_ct_AR
> +if test -n "$ac_ct_AR"; then
> +  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_AR" >&5
> +$as_echo "$ac_ct_AR" >&6; }
> +else
> +  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
> +$as_echo "no" >&6; }
> +fi
> +
> +  if test "x$ac_ct_AR" = x; then
> +AR=""
> +  else
> +case $cross_compiling:$ac_tool_warned in
> +yes:)
> +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not 
> prefixed with host triplet" >&5
> +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" 
> >&2;}
> +ac_tool_warned=yes ;;
> +esac
> +AR=$ac_ct_AR
> +  fi
> +else
> +  AR="$ac_cv_prog_AR"
> +fi
> +
>  
>  
>  ac_ext=c
> diff --git a/libcpp/configure.ac b/libcpp/configure.ac
> index 1779562a3a7..9ccf

RE: [PATCH] libgcc: aarch64: Get hwcap for FreeBSD

2020-05-07 Thread Kyrylo Tkachov
Hi Andreas,

> -Original Message-
> From: Gcc-patches  On Behalf Of
> Andreas Tobler
> Sent: 06 May 2020 21:17
> To: GCC Patches 
> Subject: [PATCH] libgcc: aarch64: Get hwcap for FreeBSD
> 
> Hi all,
> 
> Since FreeBSD 12, FreeBSD has a sys/auxv.h header too but it doesn't
> provide the getauxval function. Instead it offers the elf_aux_info
> function which provides a similar functionality.
> This patch gets the hwcap for FreeBSD.
> 
> Is this ok for trunk?

This whole thing is gated on __gnu_linux__.  Does FreeBSD set that?

Thanks,
Kyrill

> 
> TIA,
> Andreas
> 
> +2020-05-05  Andreas Tobler  
> +
> + * config/aarch64/lse-init.c: Get hwcap for FreeBSD.
> +
> 
> diff --git a/libgcc/config/aarch64/lse-init.c
> b/libgcc/config/aarch64/lse-init.c
> index 00e9ab8cd1c..ab0d6b2f754 100644
> --- a/libgcc/config/aarch64/lse-init.c
> +++ b/libgcc/config/aarch64/lse-init.c
> @@ -41,7 +41,16 @@ unsigned long int __getauxval (unsigned long int);
>   static void __attribute__((constructor))
>   init_have_lse_atomics (void)
>   {
> +#ifndef __FreeBSD__
> unsigned long hwcap = __getauxval (AT_HWCAP);
> +#else
> +  unsigned long hwcap;
> +  int err;
> +
> +  err = elf_aux_info(AT_HWCAP, &hwcap, sizeof(hwcap));
> +  if (err)
> +hwcap = 0;
> +#endif
> __aarch64_have_lse_atomics = (hwcap & HWCAP_ATOMICS) != 0;
>   }



[PATCH] Add caveat about parsing of .gcda and .gcno files.

2020-05-07 Thread Martin Liška

Hi.

Let's discourage people from parsing of the coverage files.
We invented `gcov --json-format` for those who need a machine
readable format.

I'm going to install the patch if there are no objections.
Martin

gcc/ChangeLog:

2020-05-07  Martin Liska  

PR gcov-profile/94928
* gcov-io.h: Add caveat about coverage format parsing and
possible outdated documentation.
---
 gcc/gcov-io.h | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)


diff --git a/gcc/gcov-io.h b/gcc/gcov-io.h
index d21a43c4c31..2d99932a4b8 100644
--- a/gcc/gcov-io.h
+++ b/gcc/gcov-io.h
@@ -25,7 +25,13 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 .  */
 
 
-/* Coverage information is held in two files.  A notes file, which is
+/* CAVEAT: Coverage information files should not be parsed directly, but
+   we would rather recommend using `gcov --json-format` that provides
+   a machine readable coverage information.
+
+   Note that the following file format documentation can be outdated.
+
+   Coverage information is held in two files.  A notes file, which is
generated by the compiler, and a data file, which is generated by
the program under test.  Both files use a similar structure.  We do
not attempt to make these files backwards compatible with previous



Re: [PATCH] lto-wrapper: split arguments of getenv ("MAKE").

2020-05-07 Thread Martin Liška

On 5/6/20 6:42 PM, Richard Biener wrote:

Perfect if you checked freeargv is happy with a NULL argument!


Yes, it is.

Martin


[PATCH] c-family: Fix error-recovery ICE on __builtin_speculation_safe_value [PR94968]

2020-05-07 Thread Jakub Jelinek via Gcc-patches
Hi!

If the second argument of __builtin_speculation_safe_value is
error_mark_node (or has such a type), we ICE during
useless_typ_conversion_p.

Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux, ok for
trunk?

202-05-06  Jakub Jelinek  

PR c/94968
* c-common.c (speculation_safe_value_resolve_params): Return false if
error_operand_p (val2).
(resolve_overloaded_builtin) :
Remove extraneous semicolon.

* gcc.dg/pr94968.c: New test.

--- gcc/c-family/c-common.c.jj  2020-04-27 16:10:23.795726234 +0200
+++ gcc/c-family/c-common.c 2020-05-06 12:37:40.708539995 +0200
@@ -6716,6 +6716,8 @@ speculation_safe_value_resolve_params (l
   tree val2 = (*params)[1];
   if (TREE_CODE (TREE_TYPE (val2)) == ARRAY_TYPE)
val2 = default_conversion (val2);
+  if (error_operand_p (val2))
+   return false;
   if (!(TREE_TYPE (val) == TREE_TYPE (val2)
|| useless_type_conversion_p (TREE_TYPE (val), TREE_TYPE (val2
{
@@ -7400,7 +7402,7 @@ resolve_overloaded_builtin (location_t l
   {
tree new_function, first_param, result;
enum built_in_function fncode
- = speculation_safe_value_resolve_call (function, params);;
+ = speculation_safe_value_resolve_call (function, params);
 
if (fncode == BUILT_IN_NONE)
  return error_mark_node;
--- gcc/testsuite/gcc.dg/pr94968.c.jj   2020-05-06 12:50:10.865191796 +0200
+++ gcc/testsuite/gcc.dg/pr94968.c  2020-05-06 12:49:52.366471532 +0200
@@ -0,0 +1,8 @@
+/* PR c/94968 */
+/* { dg-do compile } */
+
+int
+foo (void)
+{ 
+  __builtin_speculation_safe_value (1, x); /* { dg-error "undeclared" } */
+}  /* { dg-message "each 
undeclared identifier is reported only once" "" { target *-*-* } .-1 } */

Jakub



[PATCH] csa: Fix --enable-checking=yes,df bootstrap failure in csa [PR94961]

2020-05-07 Thread Jakub Jelinek via Gcc-patches
Hi!

My recent combine-stack-adj.c change broke df checking bootstrap,
while most of the changes are done through validate_change/confirm_changes
which update df info, the removal of REG_EQUAL notes didn't update df info.

Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux, plus
tested with yes,df checking build/regtest on x86_64-linux (which previously
failed), ok for trunk?

2020-05-06  Jakub Jelinek  

PR bootstrap/94961
PR rtl-optimization/94516
* combine-stack-adj.c (try_apply_stack_adjustment): Call
df_notes_rescan after calling remove_reg_equal_equiv_notes.

--- gcc/combine-stack-adj.c.jj  2020-05-05 16:34:33.611007861 +0200
+++ gcc/combine-stack-adj.c 2020-05-06 13:27:03.275796460 +0200
@@ -303,7 +303,10 @@ try_apply_stack_adjustment (rtx_insn *in
ml->sp_offset -= delta;
 
   if (remove_equal)
-   remove_reg_equal_equiv_notes (insn);
+   {
+ remove_reg_equal_equiv_notes (insn);
+ df_notes_rescan (insn);
+   }
   return true;
 }
   else

Jakub



[PATCH] match.pd: Simplify unsigned A - B - 1 >= A to B >= A [PR94913]

2020-05-07 Thread Jakub Jelinek via Gcc-patches
Hi!

Implemented thusly.  The TYPE_OVERFLOW_WRAPS is there just because the
pattern above it has it too, if you want, I can throw it away from both.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2020-05-06  Jakub Jelinek  

PR tree-optimization/94913
* match.pd (A - B - 1 >= A to B >= A): New simplification.

* gcc.dg/tree-ssa/pr94913.c: New test.

--- gcc/match.pd.jj 2020-05-06 11:18:35.0 +0200
+++ gcc/match.pd2020-05-06 15:03:51.618058839 +0200
@@ -4791,6 +4791,15 @@ (define_operator_list COND_TERNARY
&& TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)))
(cmp @1 @0
 
+/* Optimize A - B - 1 >= A into B >= A for unsigned comparisons.  */
+(for cmp (ge lt)
+ (simplify
+  (cmp:c (plus:cs (minus:s @0 @1) integer_minus_onep) @0)
+   (if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
+   && TYPE_UNSIGNED (TREE_TYPE (@0))
+   && TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)))
+(cmp @1 @0
+
 /* Testing for overflow is unnecessary if we already know the result.  */
 /* A - B > A  */
 (for cmp (gt le)
--- gcc/testsuite/gcc.dg/tree-ssa/pr94913.c.jj  2020-05-06 15:20:08.306376994 
+0200
+++ gcc/testsuite/gcc.dg/tree-ssa/pr94913.c 2020-05-06 15:19:45.120725533 
+0200
@@ -0,0 +1,33 @@
+/* PR tree-optimization/94913 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump " (?:b_\[0-9]+\\\(D\\\) >= a|a_\[0-9]+\\\(D\\\) 
<= b)_\[0-9]+\\\(D\\\);" "optimized" } } */
+/* { dg-final { scan-tree-dump " (?:c_\[0-9]+\\\(D\\\) > d|d_\[0-9]+\\\(D\\\) 
< c)_\[0-9]+\\\(D\\\);" "optimized" } } */
+/* { dg-final { scan-tree-dump " (?:f_\[0-9]+\\\(D\\\) >= e|e_\[0-9]+\\\(D\\\) 
<= f)_\[0-9]+\\\(D\\\);" "optimized" } } */
+/* { dg-final { scan-tree-dump " (?:g_\[0-9]+\\\(D\\\) > h|h_\[0-9]+\\\(D\\\) 
< g)_\[0-9]+\\\(D\\\);" "optimized" } } */
+
+int
+foo (unsigned a, unsigned b)
+{
+  return (a - b - 1) >= a;
+}
+
+int
+bar (unsigned c, unsigned d)
+{
+  return (c - d - 1) < c;
+}
+
+int
+baz (unsigned e, unsigned f)
+{
+  unsigned t = e - f;
+  return (t - 1) >= e;
+}
+
+int
+qux (unsigned g, unsigned h)
+{
+  unsigned t = g - h;
+  return (t - 1) < g;
+}

Jakub



[PATCH] match.pd: Optimize ffs of known non-zero arg into ctz + 1 [PR94956]

2020-05-07 Thread Jakub Jelinek via Gcc-patches
Hi!

The ffs expanders on several targets (x86, ia64, aarch64 at least)
emit a conditional move or similar code to handle the case when the
argument is 0, which makes the code longer.
If we know from VRP that the argument will not be zero, we can (if the
target has also an ctz expander) just use ctz which is undefined at zero
and thus the expander doesn't need to deal with that.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2020-05-06  Jakub Jelinek  

PR tree-optimization/94956
* match.pd (FFS): Optimize __builtin_ffs* of non-zero argument into
__builtin_ctz* + 1 if ctz_optab has an expander.

* gcc.target/i386/pr94956.c: New test.

--- gcc/match.pd.jj 2020-05-06 15:03:51.618058839 +0200
+++ gcc/match.pd2020-05-06 15:48:23.658858289 +0200
@@ -5990,6 +5990,16 @@ (define_operator_list COND_TERNARY
 (convert (IFN_POPCOUNT:type @0)
 #endif
 
+/* __builtin_ffs needs to deal on many targets with the possible zero
+   argument.  If we know the argument is always non-zero, __builtin_ctz + 1
+   should lead to better code.  */
+(simplify
+ (FFS tree_expr_nonzero_p@0)
+ (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
+  && (optab_handler (ctz_optab, TYPE_MODE (TREE_TYPE (@0)))
+ != CODE_FOR_nothing))
+  (plus (CTZ:type @0) { build_one_cst (type); })))
+
 /* Simplify:
 
  a = a1 op a2
--- gcc/testsuite/gcc.target/i386/pr94956.c.jj  2020-05-06 16:35:47.085876237 
+0200
+++ gcc/testsuite/gcc.target/i386/pr94956.c 2020-05-06 16:39:52.927140038 
+0200
@@ -0,0 +1,28 @@
+/* PR tree-optimization/94956 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-not "\tcmovne\t" } } */
+/* { dg-final { scan-assembler-not "\tsete\t" } } */
+
+int
+foo (unsigned x)
+{
+  if (x == 0) __builtin_unreachable ();
+  return __builtin_ffs (x) - 1;
+}
+
+int
+bar (unsigned long x)
+{
+  if (x == 0) __builtin_unreachable ();
+  return __builtin_ffsl (x) - 1;
+}
+
+#ifdef __x86_64__
+int
+baz (unsigned long long x)
+{
+  if (x == 0) __builtin_unreachable ();
+  return __builtin_ffsll (x) - 1;
+}
+#endif


Jakub



[PATCH] match.pd: Canonicalize (X + (X >> (prec - 1))) ^ (X >> (prec - 1)) to abs (X) [PR94783]

2020-05-07 Thread Jakub Jelinek via Gcc-patches
Hi!

The following patch canonicalizes M = X >> (prec - 1); (X + M) ^ M
for signed integral types into ABS_EXPR (X).  For X == min it is already
UB because M is -1 and min + -1 is UB, so we can use ABS_EXPR rather than
say ABSU_EXPR + cast.

The backend might then emit the abs code back using the shift and addition
and xor if it is the best sequence for the target, but could do something
different that is better.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2020-05-06  Jakub Jelinek  

PR tree-optimization/94783
* match.pd ((X + (X >> (prec - 1))) ^ (X >> (prec - 1)) to abs (X)):
New simplification.

* gcc.dg/tree-ssa/pr94783.c: New test.

--- gcc/match.pd.jj 2020-05-06 15:48:23.658858289 +0200
+++ gcc/match.pd2020-05-06 17:47:02.035347946 +0200
@@ -120,6 +120,18 @@ (define_operator_list COND_TERNARY
   (with { tree utype = unsigned_type_for (TREE_TYPE (@0)); }
(convert (absu:utype @0)
 
+#if GIMPLE
+/* Optimize (X + (X >> (prec - 1))) ^ (X >> (prec - 1)) into abs (X).  */
+(simplify
+ (bit_xor:c (plus:cs @0 (rshift@2 @0 INTEGER_CST@1)) @2)
+ (if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
+  && !TYPE_UNSIGNED (TREE_TYPE (@0))
+  && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0))
+  && wi::to_widest (@1) == element_precision (TREE_TYPE (@0)) - 1
+  && TREE_CODE (@2) == SSA_NAME
+  && num_imm_uses (@2) == 2)
+  (abs @0)))
+#endif
 
 /* Simplifications of operations with one constant operand and
simplifications to constants or single values.  */
--- gcc/testsuite/gcc.dg/tree-ssa/pr94783.c.jj  2020-05-06 17:52:35.515323297 
+0200
+++ gcc/testsuite/gcc.dg/tree-ssa/pr94783.c 2020-05-06 17:52:10.915693948 
+0200
@@ -0,0 +1,12 @@
+/* PR tree-optimization/94783 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump "ABS_EXPR" "optimized" } } */
+/* { dg-final { scan-tree-dump-not " >> 31" "optimized" } } */
+
+int
+foo (int v)
+{
+  int mask = v >> (__SIZEOF_INT__ * __CHAR_BIT__ - 1);
+  return (v + mask) ^ mask;
+}

Jakub



Re: [PATCH] match.pd: Simplify unsigned A - B - 1 >= A to B >= A [PR94913]

2020-05-07 Thread Richard Biener
On Thu, 7 May 2020, Jakub Jelinek wrote:

> Hi!
> 
> Implemented thusly.  The TYPE_OVERFLOW_WRAPS is there just because the
> pattern above it has it too, if you want, I can throw it away from both.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> 
> 2020-05-06  Jakub Jelinek  
> 
>   PR tree-optimization/94913
>   * match.pd (A - B - 1 >= A to B >= A): New simplification.
> 
>   * gcc.dg/tree-ssa/pr94913.c: New test.
> 
> --- gcc/match.pd.jj   2020-05-06 11:18:35.0 +0200
> +++ gcc/match.pd  2020-05-06 15:03:51.618058839 +0200
> @@ -4791,6 +4791,15 @@ (define_operator_list COND_TERNARY
> && TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)))
> (cmp @1 @0
>  
> +/* Optimize A - B - 1 >= A into B >= A for unsigned comparisons.  */

Maybe write A - B + -1 >= A to actually match what you match below

> +(for cmp (ge lt)
> + (simplify
> +  (cmp:c (plus:cs (minus:s @0 @1) integer_minus_onep) @0)

on the plus :c is not needed, canonicalization will put the constant
literal second

The previous pattern has a single_use check on the minus, since
the result is always "simple" (a single stmt) the :s have no
effect (hmm, I guess a genmatch warning for this case might be nice).

And yes, if the TYPE_OVERFLOW_WRAPS checks are unnecessary remove
them please, we'll hopefully resist all attempts to ubsan
unsigned overflow ...

> +   (if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
> + && TYPE_UNSIGNED (TREE_TYPE (@0))
> + && TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)))
> +(cmp @1 @0
> +
>  /* Testing for overflow is unnecessary if we already know the result.  */
>  /* A - B > A  */
>  (for cmp (gt le)
> --- gcc/testsuite/gcc.dg/tree-ssa/pr94913.c.jj2020-05-06 
> 15:20:08.306376994 +0200
> +++ gcc/testsuite/gcc.dg/tree-ssa/pr94913.c   2020-05-06 15:19:45.120725533 
> +0200
> @@ -0,0 +1,33 @@
> +/* PR tree-optimization/94913 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
> +/* { dg-final { scan-tree-dump " (?:b_\[0-9]+\\\(D\\\) >= 
> a|a_\[0-9]+\\\(D\\\) <= b)_\[0-9]+\\\(D\\\);" "optimized" } } */
> +/* { dg-final { scan-tree-dump " (?:c_\[0-9]+\\\(D\\\) > 
> d|d_\[0-9]+\\\(D\\\) < c)_\[0-9]+\\\(D\\\);" "optimized" } } */
> +/* { dg-final { scan-tree-dump " (?:f_\[0-9]+\\\(D\\\) >= 
> e|e_\[0-9]+\\\(D\\\) <= f)_\[0-9]+\\\(D\\\);" "optimized" } } */
> +/* { dg-final { scan-tree-dump " (?:g_\[0-9]+\\\(D\\\) > 
> h|h_\[0-9]+\\\(D\\\) < g)_\[0-9]+\\\(D\\\);" "optimized" } } */
> +
> +int
> +foo (unsigned a, unsigned b)
> +{
> +  return (a - b - 1) >= a;
> +}
> +
> +int
> +bar (unsigned c, unsigned d)
> +{
> +  return (c - d - 1) < c;
> +}
> +
> +int
> +baz (unsigned e, unsigned f)
> +{
> +  unsigned t = e - f;
> +  return (t - 1) >= e;
> +}
> +
> +int
> +qux (unsigned g, unsigned h)
> +{
> +  unsigned t = g - h;
> +  return (t - 1) < g;
> +}
> 
>   Jakub
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)


[PATCH] testsuite: Improve g++.dg/ext/attr-parm-1.C testcase [PR94946]

2020-05-07 Thread Jakub Jelinek via Gcc-patches
Hi!

On Wed, May 06, 2020 at 02:14:03PM -0400, Nathan Sidwell wrote:
> --- /dev/null
> +++ w/gcc/testsuite/g++.dg/ext/attr-parm-1.C
> @@ -0,0 +1,7 @@
> +// { dg-do compile { target { i?86-*-* x86_64-*-* } } }
> +// { dg-options -m32 }
> +// PR 94946
> +class a {
> +  template  a(b (*)());
> +  template  a(b(__attribute__((fastcall)) *c)());
> +};

The testcase in the current form doesn't FAIL without the patch on
x86_64-linux unless also testing with -m32; as that the 64-bit testing
on that target is probably way more common, and we can use also attributes
that FAIL without the patch with -m64, the following patch adjusts the
test, so that it FAILs without the patch for both -m64 and -m32 (but not
-mx32) and PASSes with the patch.  Ok for trunk/10.2?
And for 9.4 with additional removal of the forgotten dg-options -m32 line?

Regtested on x86_64-linux and i686-linux.

2020-05-06  Jakub Jelinek  

PR c++/94946
* g++.dg/ext/attr-parm-1.C: Enable the test also for lp64 x86, use
sysv_abi and ms_abi attributes in that case instead of fastcall and
no attribute.

--- gcc/testsuite/g++.dg/ext/attr-parm-1.C.jj   2020-05-06 22:48:46.215419942 
+0200
+++ gcc/testsuite/g++.dg/ext/attr-parm-1.C  2020-05-06 22:50:40.117727005 
+0200
@@ -1,6 +1,11 @@
-// { dg-do compile { target { { i?86-*-* x86_64-*-* } && ia32 } } }
+// { dg-do compile { target { { i?86-*-* x86_64-*-* } && { ia32 || lp64 } } } }
 // PR 94946
 class a {
+#ifdef __LP64__
+  template  a(b(__attribute__((sysv_abi)) *c)());
+  template  a(b(__attribute__((ms_abi)) *c)());
+#else
   template  a(b (*)());
   template  a(b(__attribute__((fastcall)) *c)());
+#endif
 };


Jakub



Re: [PATCH] match.pd: Optimize ffs of known non-zero arg into ctz + 1 [PR94956]

2020-05-07 Thread Richard Biener
On Thu, 7 May 2020, Jakub Jelinek wrote:

> Hi!
> 
> The ffs expanders on several targets (x86, ia64, aarch64 at least)
> emit a conditional move or similar code to handle the case when the
> argument is 0, which makes the code longer.
> If we know from VRP that the argument will not be zero, we can (if the
> target has also an ctz expander) just use ctz which is undefined at zero
> and thus the expander doesn't need to deal with that.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

can you use direct_internal_fn_supported_p (IFN_CTZ, type, 
OPTIMIZE_FOR_SPEED)?

> 2020-05-06  Jakub Jelinek  
> 
>   PR tree-optimization/94956
>   * match.pd (FFS): Optimize __builtin_ffs* of non-zero argument into
>   __builtin_ctz* + 1 if ctz_optab has an expander.
> 
>   * gcc.target/i386/pr94956.c: New test.
> 
> --- gcc/match.pd.jj   2020-05-06 15:03:51.618058839 +0200
> +++ gcc/match.pd  2020-05-06 15:48:23.658858289 +0200
> @@ -5990,6 +5990,16 @@ (define_operator_list COND_TERNARY
>  (convert (IFN_POPCOUNT:type @0)
>  #endif
>  
> +/* __builtin_ffs needs to deal on many targets with the possible zero
> +   argument.  If we know the argument is always non-zero, __builtin_ctz + 1
> +   should lead to better code.  */
> +(simplify
> + (FFS tree_expr_nonzero_p@0)
> + (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
> +  && (optab_handler (ctz_optab, TYPE_MODE (TREE_TYPE (@0)))
> +   != CODE_FOR_nothing))
> +  (plus (CTZ:type @0) { build_one_cst (type); })))
> +
>  /* Simplify:
>  
>   a = a1 op a2
> --- gcc/testsuite/gcc.target/i386/pr94956.c.jj2020-05-06 
> 16:35:47.085876237 +0200
> +++ gcc/testsuite/gcc.target/i386/pr94956.c   2020-05-06 16:39:52.927140038 
> +0200
> @@ -0,0 +1,28 @@
> +/* PR tree-optimization/94956 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +/* { dg-final { scan-assembler-not "\tcmovne\t" } } */
> +/* { dg-final { scan-assembler-not "\tsete\t" } } */
> +
> +int
> +foo (unsigned x)
> +{
> +  if (x == 0) __builtin_unreachable ();
> +  return __builtin_ffs (x) - 1;
> +}
> +
> +int
> +bar (unsigned long x)
> +{
> +  if (x == 0) __builtin_unreachable ();
> +  return __builtin_ffsl (x) - 1;
> +}
> +
> +#ifdef __x86_64__
> +int
> +baz (unsigned long long x)
> +{
> +  if (x == 0) __builtin_unreachable ();
> +  return __builtin_ffsll (x) - 1;
> +}
> +#endif
> 
> 
>   Jakub
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)


Re: [PATCH] match.pd: Canonicalize (X + (X >> (prec - 1))) ^ (X >> (prec - 1)) to abs (X) [PR94783]

2020-05-07 Thread Richard Biener
On Thu, 7 May 2020, Jakub Jelinek wrote:

> Hi!
> 
> The following patch canonicalizes M = X >> (prec - 1); (X + M) ^ M
> for signed integral types into ABS_EXPR (X).  For X == min it is already
> UB because M is -1 and min + -1 is UB, so we can use ABS_EXPR rather than
> say ABSU_EXPR + cast.
> 
> The backend might then emit the abs code back using the shift and addition
> and xor if it is the best sequence for the target, but could do something
> different that is better.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> 
> 2020-05-06  Jakub Jelinek  
> 
>   PR tree-optimization/94783
>   * match.pd ((X + (X >> (prec - 1))) ^ (X >> (prec - 1)) to abs (X)):
>   New simplification.
> 
>   * gcc.dg/tree-ssa/pr94783.c: New test.
> 
> --- gcc/match.pd.jj   2020-05-06 15:48:23.658858289 +0200
> +++ gcc/match.pd  2020-05-06 17:47:02.035347946 +0200
> @@ -120,6 +120,18 @@ (define_operator_list COND_TERNARY
>(with { tree utype = unsigned_type_for (TREE_TYPE (@0)); }
> (convert (absu:utype @0)
>  
> +#if GIMPLE
> +/* Optimize (X + (X >> (prec - 1))) ^ (X >> (prec - 1)) into abs (X).  */
> +(simplify
> + (bit_xor:c (plus:cs @0 (rshift@2 @0 INTEGER_CST@1)) @2)
> + (if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
> +  && !TYPE_UNSIGNED (TREE_TYPE (@0))
> +  && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0))
> +  && wi::to_widest (@1) == element_precision (TREE_TYPE (@0)) - 1
> +  && TREE_CODE (@2) == SSA_NAME
> +  && num_imm_uses (@2) == 2)

I fear this num_imm_uses test is quite fragile since match.pd patterns
are invoked on transient sequences (w/o SSA operands) as well
(that of course holds for all single_use () tests as well but that
at least allows zero uses for this very reason - still fragile since
the single use might be _in_ the IL and thus a second).

I think unconditionally using (abs @0) is simplifying things enough
(getting rid of one xor and one plus) to not worry about keeping
the (x >> (prec - 1))?

Do you really need the TYPE_OVERFLOW_UNDEFINED check?

> +  (abs @0)))
> +#endif
>  
>  /* Simplifications of operations with one constant operand and
> simplifications to constants or single values.  */
> --- gcc/testsuite/gcc.dg/tree-ssa/pr94783.c.jj2020-05-06 
> 17:52:35.515323297 +0200
> +++ gcc/testsuite/gcc.dg/tree-ssa/pr94783.c   2020-05-06 17:52:10.915693948 
> +0200
> @@ -0,0 +1,12 @@
> +/* PR tree-optimization/94783 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
> +/* { dg-final { scan-tree-dump "ABS_EXPR" "optimized" } } */
> +/* { dg-final { scan-tree-dump-not " >> 31" "optimized" } } */
> +
> +int
> +foo (int v)
> +{
> +  int mask = v >> (__SIZEOF_INT__ * __CHAR_BIT__ - 1);
> +  return (v + mask) ^ mask;
> +}
> 
>   Jakub
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)


Re: [PATCH] csa: Fix --enable-checking=yes, df bootstrap failure in csa [PR94961]

2020-05-07 Thread Eric Botcazou
> Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux, plus
> tested with yes,df checking build/regtest on x86_64-linux (which previously
> failed), ok for trunk?
> 
> 2020-05-06  Jakub Jelinek  
> 
>   PR bootstrap/94961
>   PR rtl-optimization/94516
>   * combine-stack-adj.c (try_apply_stack_adjustment): Call
>   df_notes_rescan after calling remove_reg_equal_equiv_notes.

Do you need to backport this onto some branches?  Otherwise, it would probably 
be better to do in remove_reg_equal_equiv_notes what is done in remove_note.

-- 
Eric Botcazou


Re: [PATCH] csa: Fix --enable-checking=yes,df bootstrap failure in csa [PR94961]

2020-05-07 Thread Jakub Jelinek via Gcc-patches
On Thu, May 07, 2020 at 10:23:24AM +0200, Eric Botcazou wrote:
> > Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux, plus
> > tested with yes,df checking build/regtest on x86_64-linux (which previously
> > failed), ok for trunk?
> > 
> > 2020-05-06  Jakub Jelinek  
> > 
> > PR bootstrap/94961
> > PR rtl-optimization/94516
> > * combine-stack-adj.c (try_apply_stack_adjustment): Call
> > df_notes_rescan after calling remove_reg_equal_equiv_notes.
> 
> Do you need to backport this onto some branches?  Otherwise, it would 
> probably 

Still undecided about that.

> be better to do in remove_reg_equal_equiv_notes what is done in remove_note.

But I can surely do
  if (ret)
df_notes_rescan (insn);
before
  return ret;
in remove_reg_equal_equiv_notes.

Though, combine.c doesn't need that, as it shortly afterward the
remove_reg_equal_equiv_notes call calls full df_insn_rescan, postreload.c
has
  if (reg_state[regno].offset != const0_rtx
  /* Previous REG_EQUIV / REG_EQUAL notes for PREV
 are now invalid.  */
  && remove_reg_equal_equiv_notes (prev))
df_notes_rescan (prev);
and those are the only two callers of this function besides the new one
in combine-stack-adj.c.

Jakub



Re: [PATCH] libgcc: aarch64: Get hwcap for FreeBSD

2020-05-07 Thread Andreas Tobler

On 07.05.20 09:25, Kyrylo Tkachov wrote:

Hi Andreas,


-Original Message-
From: Gcc-patches  On Behalf Of
Andreas Tobler
Sent: 06 May 2020 21:17
To: GCC Patches 
Subject: [PATCH] libgcc: aarch64: Get hwcap for FreeBSD

Hi all,

Since FreeBSD 12, FreeBSD has a sys/auxv.h header too but it doesn't
provide the getauxval function. Instead it offers the elf_aux_info
function which provides a similar functionality.
This patch gets the hwcap for FreeBSD.

Is this ok for trunk?


This whole thing is gated on __gnu_linux__.  Does FreeBSD set that?


:) No, I developed the patch before you introduced the __gnu_linux__ guard.
This guard makes the patch obsolete.

Thanks,
Andreas



Re: [PATCH] csa: Fix --enable-checking=yes, df bootstrap failure in csa [PR94961]

2020-05-07 Thread Eric Botcazou
> But I can surely do
>   if (ret)
> df_notes_rescan (insn);
> before
>   return ret;
> in remove_reg_equal_equiv_notes.
> 
> Though, combine.c doesn't need that, as it shortly afterward the
> remove_reg_equal_equiv_notes call calls full df_insn_rescan, postreload.c
> has
>   if (reg_state[regno].offset != const0_rtx
>   /* Previous REG_EQUIV / REG_EQUAL notes for PREV
>  are now invalid.  */
>   && remove_reg_equal_equiv_notes (prev))
> df_notes_rescan (prev);
> and those are the only two callers of this function besides the new one
> in combine-stack-adj.c.

OK, let's add a parameter no_rescan to remove_reg_equal_equiv_notes and update 
the two callers accordingly.

-- 
Eric Botcazou


[PATCH] wwwdocs: Document the C++ ABI changes in GCC 10

2020-05-07 Thread Jakub Jelinek via Gcc-patches
Hi!

This is my humble attempt to document the C++ ABI changes.
Or do you have something better?
Not sure e.g. how C++ calls the TREE_ADDRESSABLE types which are
passed/returned by invisible reference, perhaps it would be nice
to make it clear that those aren't affected.

As the release tarballs contain a copy of changes.html, I'm afraid
we need to resolve this before I can roll the tarballs.

--- htdocs/gcc-10/changes.html  2020-05-07 10:44:17.391465227 +0200
+++ htdocs/gcc-10/changes.html  2020-05-07 11:02:50.580691123 +0200
@@ -31,6 +31,27 @@ a work-in-progress.
 Caveats
 
   
+The ABI
+of passing and returning certain C++ classes by value changed
+on several targets in GCC 10, including AArch64, ARM, PowerPC ELFv2,
+S/390 and Itanium.  In -std=c++17 and -std=c++20
+modes for classes with empty bases which otherwise contain only a single
+element or are handled as homogeneous aggregates in
+-std=c++14 and earlier modes the existence of the empty
+bases resulted in those classes not to be considered as having a single
+element or be homogeneous aggregate and so could be passed differently,
+making -std=c++17 and -std=c++14 compiled code
+ABI incompatible.  This has been corrected and the empty bases are
+ignored in those ABI decisions, so -std=c++14 and
+-std=c++17 compiled codes are now again ABI compatible.
+Example: struct empty {}; struct S : public empty { float f; 
}.
+Similarly, in classes containing non-static data members with empty
+class types and with C++20 [[no_unique_address]] attribute
+those members weren't ignored in the ABI argument passing decisions
+as they should be.  Both of these ABI changes are now diagnosed with
+-Wpsabi.
+  
+  
 The deprecated Profile Mode and array_allocator extensions
 have been removed from libstdc++.
   

Jakub



[PATCH v2] csa: Fix --enable-checking=yes, df bootstrap failure in csa [PR94961]

2020-05-07 Thread Jakub Jelinek via Gcc-patches
On Thu, May 07, 2020 at 10:40:12AM +0200, Eric Botcazou wrote:
> OK, let's add a parameter no_rescan to remove_reg_equal_equiv_notes and 
> update 
> the two callers accordingly.

I'll try following then.

2020-05-07  Jakub Jelinek  

PR bootstrap/94961
PR rtl-optimization/94516
* rtl.h (remove_reg_equal_equiv_notes): Add a bool argument defaulted
to false.
* rtlanal.c (remove_reg_equal_equiv_notes): Add no_rescan argument.
Call df_notes_rescan if that argument is not true and returning true.
* combine.c (adjust_for_new_dest): Pass true as second argument to
remove_reg_equal_equiv_notes.
* postreload.c (reload_combine_recognize_pattern): Don't call
df_notes_rescan.

--- gcc/rtl.h.jj2020-04-02 14:28:02.0 +0200
+++ gcc/rtl.h   2020-05-07 11:24:17.745856904 +0200
@@ -3500,7 +3500,7 @@ extern void add_args_size_note (rtx_insn
 extern void add_shallow_copy_of_reg_note (rtx_insn *, rtx);
 extern rtx duplicate_reg_note (rtx);
 extern void remove_note (rtx_insn *, const_rtx);
-extern bool remove_reg_equal_equiv_notes (rtx_insn *);
+extern bool remove_reg_equal_equiv_notes (rtx_insn *, bool = false);
 extern void remove_reg_equal_equiv_notes_for_regno (unsigned int);
 extern int side_effects_p (const_rtx);
 extern int volatile_refs_p (const_rtx);
--- gcc/rtlanal.c.jj2020-04-29 10:21:25.062999858 +0200
+++ gcc/rtlanal.c   2020-05-07 11:25:18.033937373 +0200
@@ -2483,7 +2483,7 @@ remove_note (rtx_insn *insn, const_rtx n
Return true if any note has been removed.  */
 
 bool
-remove_reg_equal_equiv_notes (rtx_insn *insn)
+remove_reg_equal_equiv_notes (rtx_insn *insn, bool no_rescan)
 {
   rtx *loc;
   bool ret = false;
@@ -2500,6 +2500,8 @@ remove_reg_equal_equiv_notes (rtx_insn *
   else
loc = &XEXP (*loc, 1);
 }
+  if (ret && !no_rescan)
+df_notes_rescan (insn);
   return ret;
 }
 
--- gcc/combine.c.jj2020-05-06 09:30:48.995407357 +0200
+++ gcc/combine.c   2020-05-07 11:25:34.302689241 +0200
@@ -2459,7 +2459,7 @@ static void
 adjust_for_new_dest (rtx_insn *insn)
 {
   /* For notes, be conservative and simply remove them.  */
-  remove_reg_equal_equiv_notes (insn);
+  remove_reg_equal_equiv_notes (insn, true);
 
   /* The new insn will have a destination that was previously the destination
  of an insn just above it.  Call distribute_links to make a LOG_LINK from
--- gcc/postreload.c.jj 2020-05-05 16:34:33.611007861 +0200
+++ gcc/postreload.c2020-05-07 11:26:13.506091307 +0200
@@ -1223,11 +1223,10 @@ reload_combine_recognize_pattern (rtx_in
  /* Delete the reg-reg addition.  */
  delete_insn (insn);
 
- if (reg_state[regno].offset != const0_rtx
- /* Previous REG_EQUIV / REG_EQUAL notes for PREV
-are now invalid.  */
- && remove_reg_equal_equiv_notes (prev))
-   df_notes_rescan (prev);
+ if (reg_state[regno].offset != const0_rtx)
+   /* Previous REG_EQUIV / REG_EQUAL notes for PREV
+  are now invalid.  */
+   remove_reg_equal_equiv_notes (prev);
 
  reg_state[regno].use_index = RELOAD_COMBINE_MAX_USES;
  return true;


Jakub



[PATCH] tree-optimization/94865 - combine BIT_INSERT_EXPR of BIT_FIELD_REF

2020-05-07 Thread Richard Biener
This implements patterns combining vector element insertion of
vector element extraction to a VEC_PERM_EXPR of both vectors
when supported.  Plus it adds the more generic identity transform
of inserting a piece of itself at the same position.

Richard - is there anything I can do to make this SVE aware?
I'd need to construct an identity permute and "insert" into
that permute that element from the other (or same) vector.
I suppose for most element positions that won't work but
at least inserting at [0] should?  I'm mostly struggling
on how to use vec_perm_builder here when nelts is not constant,
since it's derived from vec<> can I simply start with
a single pattern with 1 stride and then insert by using []?

Bootstrap / regtest running on x86_64-unknown-linux-gnu.

Thanks,
Richard.

2020-05-07  Richard Biener  

PR tree-optimization/94865
* match.pd ((bit_insert @0 (BIT_FIELD_REF @0 ...) ...) -> @0):
New simplification.
((bit_insert @0 (BIT_FIELD_REF @1 ...) ...) -> (vec_perm @0 @1 ..):
Likewise.

* gcc.dg/tree-ssa/forwprop-39.c: New testcase.
* gcc.dg/tree-ssa/forwprop-40.c: Likewise.
---
 gcc/match.pd| 42 +
 gcc/testsuite/gcc.dg/tree-ssa/forwprop-39.c | 21 +++
 gcc/testsuite/gcc.dg/tree-ssa/forwprop-40.c | 18 +
 3 files changed, 81 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/forwprop-39.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/forwprop-40.c

diff --git a/gcc/match.pd b/gcc/match.pd
index 9259dd4ddaa..4ce728d78c8 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -5838,6 +5838,48 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  wi::to_wide (@ipos) + isize))
 (BIT_FIELD_REF @0 @rsize @rpos)
 
+/* Optimize a bit-insertion of a bit-extraction from the same object,
+   position and size.  */
+(simplify
+ (bit_insert @0 (BIT_FIELD_REF@1 @0 @size @pos) @pos)
+ @0)
+
+/* Optimize an element insertion into a vector that is extracted from
+   another vector to a permutation of both vectors.  */
+(simplify
+ (bit_insert @0 (BIT_FIELD_REF@1 @2 @size @rpos) @ipos)
+ (if (VECTOR_TYPE_P (type)
+  && types_match (type, TREE_TYPE (@2))
+  && single_use (@1))
+  (with
+   {
+ unsigned HOST_WIDE_INT nelts;
+ unsigned int elem, at, elemsz;
+   }
+   (if (TYPE_VECTOR_SUBPARTS (type).is_constant (&nelts)
+   && tree_fits_uhwi_p (@size)
+   && constant_multiple_p (tree_to_poly_uint64 (@rpos),
+   tree_to_poly_uint64 (@size), &elem)
+   && constant_multiple_p (tree_to_poly_uint64 (@ipos),
+   tree_to_poly_uint64 (@size), &at)
+   && constant_multiple_p (tree_to_poly_uint64 (@size),
+   tree_to_poly_uint64
+ (TYPE_SIZE (TREE_TYPE (type))), &elemsz))
+(with
+ {
+   vec_perm_builder sel (nelts, nelts, 1);
+   for (unsigned i = 0; i < nelts; ++i)
+ sel.quick_push (i / elemsz == at
+? nelts + elem * elemsz + i % elemsz : i);
+   vec_perm_indices indices (sel, @0 == @2 ? 1 : 2, nelts);
+ }
+ (if (can_vec_perm_const_p (TYPE_MODE (TREE_TYPE (@0)), indices))
+  (vec_perm @0 @2 { vec_perm_indices_to_tree
+ (build_vector_type
+   (build_nonstandard_integer_type
+ (tree_to_uhwi (@size), 1),
+nelts), indices); })))
+
 (if (canonicalize_math_after_vectorization_p ())
  (for fmas (FMA)
   (simplify
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/forwprop-39.c 
b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-39.c
new file mode 100644
index 000..f0212c373e2
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-39.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-forwprop1 -fdump-tree-cddce1 -Wno-psabi -w" } 
*/
+
+typedef double v2df __attribute__((vector_size(16)));
+
+v2df move_sd(v2df a, v2df b)
+{
+  v2df result = a;
+  result[1] = b[1];
+  return result;
+}
+
+v2df move_nnop(v2df a)
+{
+  v2df result = a;
+  result[1] = a[1];
+  return result;
+}
+
+/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 1 "forwprop1" } } */
+/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF" 0 "cddce1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/forwprop-40.c 
b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-40.c
new file mode 100644
index 000..94329437f0d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-40.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fgimple -fdump-tree-forwprop1 -w -Wno-psabi" } */
+
+typedef int v4si __attribute__((vector_size(16)));
+typedef int v2si __attribute__((vector_size(8)));
+
+v4si __GIMPLE(ssa) bar (v4si a)
+{
+  v2si el;
+  v4si res;
+
+  __BB(2):
+  el_2 = __BIT_FIELD_REF  (a_1(D), 64u, 64u);
+  res_3 = __BIT_INSERT (a_1(D), el_2, 0u);
+  return res_3;
+}
+
+/* 

Re: [PATCH] wwwdocs: Document the C++ ABI changes in GCC 10

2020-05-07 Thread Jonathan Wakely via Gcc-patches

On 07/05/20 11:08 +0200, Jakub Jelinek wrote:

Hi!

This is my humble attempt to document the C++ ABI changes.
Or do you have something better?
Not sure e.g. how C++ calls the TREE_ADDRESSABLE types which are
passed/returned by invisible reference, perhaps it would be nice
to make it clear that those aren't affected.

As the release tarballs contain a copy of changes.html, I'm afraid
we need to resolve this before I can roll the tarballs.

--- htdocs/gcc-10/changes.html  2020-05-07 10:44:17.391465227 +0200
+++ htdocs/gcc-10/changes.html  2020-05-07 11:02:50.580691123 +0200
@@ -31,6 +31,27 @@ a work-in-progress.
Caveats

  
+The ABI
+of passing and returning certain C++ classes by value changed
+on several targets in GCC 10, including AArch64, ARM, PowerPC ELFv2,
+S/390 and Itanium.  In -std=c++17 and -std=c++20
+modes for classes with empty bases which otherwise contain only a single
+element or are handled as homogeneous aggregates in
+-std=c++14 and earlier modes the existence of the empty
+bases resulted in those classes not to be considered as having a single
+element or be homogeneous aggregate and so could be passed differently,
+making -std=c++17 and -std=c++14 compiled code
+ABI incompatible.  This has been corrected and the empty bases are
+ignored in those ABI decisions, so -std=c++14 and
+-std=c++17 compiled codes are now again ABI compatible.
+Example: struct empty {}; struct S : public empty { float f; 
}.
+Similarly, in classes containing non-static data members with empty
+class types and with C++20 [[no_unique_address]] attribute
+those members weren't ignored in the ABI argument passing decisions
+as they should be.  Both of these ABI changes are now diagnosed with
+-Wpsabi.
+  
+  
The deprecated Profile Mode and array_allocator extensions
have been removed from libstdc++.
  


Here's what I've committed after discussion on IRC, combining Jakub's
text with what I had.


commit f1d2be6c9fcc52d676266e7ede123953d150aaf3
Author: Jonathan Wakely 
Date:   Thu May 7 11:24:04 2020 +0100

Document C++17 ABI changes in GCC 10

diff --git a/htdocs/gcc-10/changes.html b/htdocs/gcc-10/changes.html
index f5b70eb4..52fd6e6a 100644
--- a/htdocs/gcc-10/changes.html
+++ b/htdocs/gcc-10/changes.html
@@ -30,6 +30,12 @@ a work-in-progress.
 
 Caveats
 
+  
+An ABI incompatibility between C++14 and C++17 has been fixed.  On some
+targets a class with a zero-sized subobject would be passed incorrectly
+when compiled as C++17 or C++20.
+See the C++ notes below for more details.
+  
   
 The deprecated Profile Mode and array_allocator extensions
 have been removed from libstdc++.
@@ -409,6 +415,39 @@ int get_na??ve_pi() {
 The attribute deprecated can now be used on
 namespaces too.
   
+  
+The ABI
+of passing and returning certain C++ classes by value changed
+on several targets in GCC 10, including
+https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94383";>AArch64,
+https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94711";>ARM,
+https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94383";>PowerPC ELFv2,
+https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94704";>S/390
+and
+https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94706";>Itanium.
+These changes affect classes with a zero-sized
+subobject (an empty base class, or data member with the
+[[no_unique_address]] attribute) where all other non-static
+data members have the same type (this is called a "homogeneous aggregate"
+in some ABI specifications, or if there is only one such member,
+a "single element").
+In -std=c++17 and -std=c++20 modes, classes with
+an empty base class were not considered to have a single element or
+to be a homogeneous aggregate, and so could be passed differently
+(in the wrong registers or at the wrong stack address). This could make
+code compiled with -std=c++17 and -std=c++14
+ABI incompatible.  This has been corrected and the empty bases are
+ignored in those ABI decisions, so functions compiled with
+-std=c++14 and -std=c++17 are now ABI compatible
+again.
+Example:
+struct empty {}; struct S : empty { float f; }; void f(S);.
+Similarly, in classes containing non-static data members with empty
+class types using the C++20 [[no_unique_address]] attribute,
+those members weren't ignored in the ABI argument passing decisions
+as they should be.  Both of these ABI changes are now diagnosed with
+-Wpsabi.
+  
 
 
 Runtime Library (libstdc++)


Re: [PATCH v2] csa: Fix --enable-checking=yes, df bootstrap failure in csa [PR94961]

2020-05-07 Thread Eric Botcazou
> I'll try following then.
> 
> 2020-05-07  Jakub Jelinek  
> 
> PR bootstrap/94961
> PR rtl-optimization/94516
>   * rtl.h (remove_reg_equal_equiv_notes): Add a bool argument defaulted
>   to false.
>   * rtlanal.c (remove_reg_equal_equiv_notes): Add no_rescan argument.
>   Call df_notes_rescan if that argument is not true and returning true.
>   * combine.c (adjust_for_new_dest): Pass true as second argument to
>   remove_reg_equal_equiv_notes.
>   * postreload.c (reload_combine_recognize_pattern): Don't call
>   df_notes_rescan.

OK with me modulo the missing documentation for the new parameter, thanks.

-- 
Eric Botcazou


AArch32: fix bootstrap failure

2020-05-07 Thread Alex Coplan
Hello,

The attached patch fixes a bootstrap failure on AArch32 introduced by
https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=308bc496884706af4b3077171cbac684c7a6f7c6

This makes the declaration of arm_add_stmt_cost match the definition, and 
removes the redundant
class keyword from the definition.

Testing:
  * Bootstrap on arm-linux-gnueabihf.

Thanks,
Alex

---

gcc/ChangeLog:

2020-05-07  Alex Coplan  

* config/arm/arm.c (arm_add_stmt_cost): Fix declaration, remove class 
from definition.
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index bbd7dc5316c..d50781953c0 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -306,7 +306,7 @@ static bool aarch_macro_fusion_pair_p (rtx_insn*, 
rtx_insn*);
 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt 
type_of_cost,
   tree vectype,
   int misalign ATTRIBUTE_UNUSED);
-static unsigned arm_add_stmt_cost (void *data, int count,
+static unsigned arm_add_stmt_cost (vec_info *vinfo, void *data, int count,
   enum vect_cost_for_stmt kind,
   struct _stmt_vec_info *stmt_info,
   int misalign,
@@ -12131,7 +12131,7 @@ arm_builtin_vectorization_cost (enum vect_cost_for_stmt 
type_of_cost,
 /* Implement targetm.vectorize.add_stmt_cost.  */
 
 static unsigned
-arm_add_stmt_cost (class vec_info *vinfo, void *data, int count,
+arm_add_stmt_cost (vec_info *vinfo, void *data, int count,
   enum vect_cost_for_stmt kind,
   struct _stmt_vec_info *stmt_info, int misalign,
   enum vect_cost_model_location where)


RE: AArch32: fix bootstrap failure

2020-05-07 Thread Kyrylo Tkachov
Hi Alex,

> -Original Message-
> From: Alex Coplan 
> Sent: 07 May 2020 11:31
> To: gcc-patches@gcc.gnu.org
> Cc: ni...@redhat.com; Richard Earnshaw ;
> Ramana Radhakrishnan ; Kyrylo
> Tkachov ; nd 
> Subject: AArch32: fix bootstrap failure
> 
> Hello,
> 
> The attached patch fixes a bootstrap failure on AArch32 introduced by
> https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=308bc496884706af4b3077171
> cbac684c7a6f7c6
> 
> This makes the declaration of arm_add_stmt_cost match the definition, and
> removes the redundant
> class keyword from the definition.

Thanks, I've pushed this patch to master.
Kyrill

> 
> Testing:
>   * Bootstrap on arm-linux-gnueabihf.
> 
> Thanks,
> Alex
> 
> ---
> 
> gcc/ChangeLog:
> 
> 2020-05-07  Alex Coplan  
> 
> * config/arm/arm.c (arm_add_stmt_cost): Fix declaration, remove class
> from definition.

I've fixed up the line length (it should be below 80 columns)




Re: [PATCH] tree-optimization/94865 - combine BIT_INSERT_EXPR of BIT_FIELD_REF

2020-05-07 Thread Richard Sandiford
Richard Biener  writes:
> This implements patterns combining vector element insertion of
> vector element extraction to a VEC_PERM_EXPR of both vectors
> when supported.  Plus it adds the more generic identity transform
> of inserting a piece of itself at the same position.
>
> Richard - is there anything I can do to make this SVE aware?
> I'd need to construct an identity permute and "insert" into
> that permute that element from the other (or same) vector.
> I suppose for most element positions that won't work but
> at least inserting at [0] should?  I'm mostly struggling
> on how to use vec_perm_builder here when nelts is not constant,
> since it's derived from vec<> can I simply start with
> a single pattern with 1 stride and then insert by using []?

I guess for SVE we still want to know that the range is safe
for all VL, so after dropping the is_constant check, we'd
want something like:

   {
 poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (type);
 unsigned int min_nelts = constant_lower_bound (nelts);
   }
   (if (...
&& at + elemsz <= min_nelts)

In theory (hah) it should then just be a case of changing the
vec_perm_builder constructor to:

  vec_perm_builder sel (nelts, min_nelts, 3);

and then iterating over min_nelts * 3 instead of nelts here:

> +   for (unsigned i = 0; i < nelts; ++i)
> + sel.quick_push (i / elemsz == at
> +  ? nelts + elem * elemsz + i % elemsz : i);

So as far as the encoding goes, the first min_nelts elements are arbitrary
values, and the following two min_nelts sequences form individual linear
series.

This ought to be work for both SVE and non-SVE, although obviously
there's a bit of wasted work for non-SVE.

(And thanks for asking :-))

Richard


Re: [PATCH] libstdc++: Update {x86_64, i?86, powerpc64, s390x, aarch64}-linux baselines for GCC 10.1

2020-05-07 Thread Eric Botcazou
> It looks like gcc-9 needs the same update too.

Attached, but results are identical with those of gcc-10, is that expected?

Using built-in specs.
COLLECT_GCC=gcc/xgcc
Target: sparc64-linux-gnu
Configured with: /home/ebotcazou/src-9/configure --build=sparc64-linux-gnu --
prefix=/home/ebotcazou/install --enable-languages=all --enable-gnu-unique-
object --with-cpu-32=ultrasparc --with-long-double-128 --enable-default-pie --
enable-checking=yes,rtl --disable-nls
Thread model: posix
gcc version 9.3.1 20200501 (GCC)


2020-05-07  Eric Botcazou  

* config/abi/post/sparc64-linux-gnu/baseline_symbols.txt: Update.
* config/abi/post/sparc64-linux-gnu/32/baseline_symbols.txt: Likewise.

-- 
Eric Botcazoudiff --git a/libstdc++-v3/config/abi/post/sparc64-linux-gnu/32/baseline_symbols.txt b/libstdc++-v3/config/abi/post/sparc64-linux-gnu/32/baseline_symbols.txt
index 31c73b03659..77c65db856f 100644
--- a/libstdc++-v3/config/abi/post/sparc64-linux-gnu/32/baseline_symbols.txt
+++ b/libstdc++-v3/config/abi/post/sparc64-linux-gnu/32/baseline_symbols.txt
@@ -338,7 +338,9 @@ FUNC:_ZNKSt10filesystem16filesystem_error4whatEv@@GLIBCXX_3.4.26
 FUNC:_ZNKSt10filesystem16filesystem_error5path1Ev@@GLIBCXX_3.4.26
 FUNC:_ZNKSt10filesystem16filesystem_error5path2Ev@@GLIBCXX_3.4.26
 FUNC:_ZNKSt10filesystem18directory_iteratordeEv@@GLIBCXX_3.4.26
+FUNC:_ZNKSt10filesystem28recursive_directory_iterator17recursion_pendingEv@@GLIBCXX_3.4.26
 FUNC:_ZNKSt10filesystem28recursive_directory_iterator5depthEv@@GLIBCXX_3.4.26
+FUNC:_ZNKSt10filesystem28recursive_directory_iterator7optionsEv@@GLIBCXX_3.4.26
 FUNC:_ZNKSt10filesystem28recursive_directory_iteratordeEv@@GLIBCXX_3.4.26
 FUNC:_ZNKSt10filesystem4path11parent_pathEv@@GLIBCXX_3.4.26
 FUNC:_ZNKSt10filesystem4path12has_filenameEv@@GLIBCXX_3.4.26
@@ -364,7 +366,9 @@ FUNC:_ZNKSt10filesystem7__cxx1116filesystem_error4whatEv@@GLIBCXX_3.4.26
 FUNC:_ZNKSt10filesystem7__cxx1116filesystem_error5path1Ev@@GLIBCXX_3.4.26
 FUNC:_ZNKSt10filesystem7__cxx1116filesystem_error5path2Ev@@GLIBCXX_3.4.26
 FUNC:_ZNKSt10filesystem7__cxx1118directory_iteratordeEv@@GLIBCXX_3.4.26
+FUNC:_ZNKSt10filesystem7__cxx1128recursive_directory_iterator17recursion_pendingEv@@GLIBCXX_3.4.26
 FUNC:_ZNKSt10filesystem7__cxx1128recursive_directory_iterator5depthEv@@GLIBCXX_3.4.26
+FUNC:_ZNKSt10filesystem7__cxx1128recursive_directory_iterator7optionsEv@@GLIBCXX_3.4.26
 FUNC:_ZNKSt10filesystem7__cxx1128recursive_directory_iteratordeEv@@GLIBCXX_3.4.26
 FUNC:_ZNKSt10filesystem7__cxx114path11parent_pathEv@@GLIBCXX_3.4.26
 FUNC:_ZNKSt10filesystem7__cxx114path12has_filenameEv@@GLIBCXX_3.4.26
@@ -982,6 +986,13 @@ FUNC:_ZNKSt7__cxx119money_putIwSt19ostreambuf_iteratorIwSt11char_traitsIwEEE6do_
 FUNC:_ZNKSt7__cxx119money_putIwSt19ostreambuf_iteratorIwSt11char_traitsIwEEE6do_putES4_bRSt8ios_basewe@@GLIBCXX_3.4.21
 FUNC:_ZNKSt7__cxx119money_putIwSt19ostreambuf_iteratorIwSt11char_traitsIwEEE9_M_insertILb0EEES4_S4_RSt8ios_basewRKNS_12basic_stringIwS3_SaIwEEE@@GLIBCXX_3.4.21
 FUNC:_ZNKSt7__cxx119money_putIwSt19ostreambuf_iteratorIwSt11char_traitsIwEEE9_M_insertILb1EEES4_S4_RSt8ios_basewRKNS_12basic_stringIwS3_SaIwEEE@@GLIBCXX_3.4.21
+FUNC:_ZNKSt7codecvtIDiDu11__mbstate_tE10do_unshiftERS0_PDuS3_RS3_@@GLIBCXX_3.4.26
+FUNC:_ZNKSt7codecvtIDiDu11__mbstate_tE11do_encodingEv@@GLIBCXX_3.4.26
+FUNC:_ZNKSt7codecvtIDiDu11__mbstate_tE13do_max_lengthEv@@GLIBCXX_3.4.26
+FUNC:_ZNKSt7codecvtIDiDu11__mbstate_tE16do_always_noconvEv@@GLIBCXX_3.4.26
+FUNC:_ZNKSt7codecvtIDiDu11__mbstate_tE5do_inERS0_PKDuS4_RS4_PDiS6_RS6_@@GLIBCXX_3.4.26
+FUNC:_ZNKSt7codecvtIDiDu11__mbstate_tE6do_outERS0_PKDiS4_RS4_PDuS6_RS6_@@GLIBCXX_3.4.26
+FUNC:_ZNKSt7codecvtIDiDu11__mbstate_tE9do_lengthERS0_PKDuS4_j@@GLIBCXX_3.4.26
 FUNC:_ZNKSt7codecvtIDic11__mbstate_tE10do_unshiftERS0_PcS3_RS3_@@GLIBCXX_3.4.21
 FUNC:_ZNKSt7codecvtIDic11__mbstate_tE11do_encodingEv@@GLIBCXX_3.4.21
 FUNC:_ZNKSt7codecvtIDic11__mbstate_tE13do_max_lengthEv@@GLIBCXX_3.4.21
@@ -989,6 +1000,13 @@ FUNC:_ZNKSt7codecvtIDic11__mbstate_tE16do_always_noconvEv@@GLIBCXX_3.4.21
 FUNC:_ZNKSt7codecvtIDic11__mbstate_tE5do_inERS0_PKcS4_RS4_PDiS6_RS6_@@GLIBCXX_3.4.21
 FUNC:_ZNKSt7codecvtIDic11__mbstate_tE6do_outERS0_PKDiS4_RS4_PcS6_RS6_@@GLIBCXX_3.4.21
 FUNC:_ZNKSt7codecvtIDic11__mbstate_tE9do_lengthERS0_PKcS4_j@@GLIBCXX_3.4.21
+FUNC:_ZNKSt7codecvtIDsDu11__mbstate_tE10do_unshiftERS0_PDuS3_RS3_@@GLIBCXX_3.4.26
+FUNC:_ZNKSt7codecvtIDsDu11__mbstate_tE11do_encodingEv@@GLIBCXX_3.4.26
+FUNC:_ZNKSt7codecvtIDsDu11__mbstate_tE13do_max_lengthEv@@GLIBCXX_3.4.26
+FUNC:_ZNKSt7codecvtIDsDu11__mbstate_tE16do_always_noconvEv@@GLIBCXX_3.4.26
+FUNC:_ZNKSt7codecvtIDsDu11__mbstate_tE5do_inERS0_PKDuS4_RS4_PDsS6_RS6_@@GLIBCXX_3.4.26
+FUNC:_ZNKSt7codecvtIDsDu11__mbstate_tE6do_outERS0_PKDsS4_RS4_PDuS6_RS6_@@GLIBCXX_3.4.26
+FUNC:_ZNKSt7codecvtIDsDu11__mbstate_tE9do_lengthERS0_PKDuS4_j@@GLIBCXX_3.4.26
 FUNC:_ZNKSt7codecvtIDsc11__mbstate_tE10do_unshiftERS0_PcS3_RS3_@@GLIBCXX_3.4.21
 FUNC:_ZNKSt7codecvtIDsc11__mbstate_tE11do_encodingEv@@GLIBCXX_3.4.21

Re: [PATCH] tree-optimization/94865 - combine BIT_INSERT_EXPR of BIT_FIELD_REF

2020-05-07 Thread Richard Biener
On Thu, 7 May 2020, Richard Sandiford wrote:

> Richard Biener  writes:
> > This implements patterns combining vector element insertion of
> > vector element extraction to a VEC_PERM_EXPR of both vectors
> > when supported.  Plus it adds the more generic identity transform
> > of inserting a piece of itself at the same position.
> >
> > Richard - is there anything I can do to make this SVE aware?
> > I'd need to construct an identity permute and "insert" into
> > that permute that element from the other (or same) vector.
> > I suppose for most element positions that won't work but
> > at least inserting at [0] should?  I'm mostly struggling
> > on how to use vec_perm_builder here when nelts is not constant,
> > since it's derived from vec<> can I simply start with
> > a single pattern with 1 stride and then insert by using []?
> 
> I guess for SVE we still want to know that the range is safe
> for all VL, so after dropping the is_constant check, we'd
> want something like:
> 
>{
>  poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (type);
>  unsigned int min_nelts = constant_lower_bound (nelts);
>}
>(if (...
> && at + elemsz <= min_nelts)
> 
> In theory (hah) it should then just be a case of changing the
> vec_perm_builder constructor to:
> 
>   vec_perm_builder sel (nelts, min_nelts, 3);
> 
> and then iterating over min_nelts * 3 instead of nelts here:
> 
> > +   for (unsigned i = 0; i < nelts; ++i)
> > + sel.quick_push (i / elemsz == at
> > +? nelts + elem * elemsz + i % elemsz : i);
> 
> So as far as the encoding goes, the first min_nelts elements are arbitrary
> values, and the following two min_nelts sequences form individual linear
> series.

OK - not sure why we need exactly three nelts per pattern here.
It also looks like all the constant_multiple_p () checks constrain
things quite a bit.  Oh, and does a BIT_FIELD_REF with poly-int position
extract multiple elements in the end?!  For the case we are extracting
a sub-vector and thus elemsz != 1 we constrain it so that this
sub-vector is not of variable size (err, not "independently" so,
whatever that means..)?

My brain hurts...  how do you write a GIMPLE testcase for aarch64
SVE covering such cases?

> This ought to be work for both SVE and non-SVE, although obviously
> there's a bit of wasted work for non-SVE.
> 
> (And thanks for asking :-))

So like this, it seems to still work on the x86 testcases?

Thanks,
Richard.

This implements patterns combining vector element insertion of
vector element extraction to a VEC_PERM_EXPR of both vectors
when supported.  Plus it adds the more generic identity transform
of inserting a piece of itself at the same position.

2020-05-07  Richard Biener  

PR tree-optimization/94864
PR tree-optimization/94865
* match.pd ((bit_insert @0 (BIT_FIELD_REF @0 ...) ...) -> @0):
New simplification.
((bit_insert @0 (BIT_FIELD_REF @1 ...) ...) -> (vec_perm @0 @1 ..):
Likewise.

* gcc.dg/tree-ssa/forwprop-39.c: New testcase.
* gcc.dg/tree-ssa/forwprop-40.c: Likewise.
---
 gcc/match.pd| 41 +
 gcc/testsuite/gcc.dg/tree-ssa/forwprop-39.c | 30 +
 gcc/testsuite/gcc.dg/tree-ssa/forwprop-40.c | 18 +
 3 files changed, 89 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/forwprop-39.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/forwprop-40.c

diff --git a/gcc/match.pd b/gcc/match.pd
index 9259dd4ddaa..28d3bc459a7 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -5838,6 +5838,47 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  wi::to_wide (@ipos) + isize))
 (BIT_FIELD_REF @0 @rsize @rpos)
 
+/* Optimize a bit-insertion of a bit-extraction from the same object,
+   position and size.  */
+(simplify
+ (bit_insert @0 (BIT_FIELD_REF@1 @0 @size @pos) @pos)
+ @0)
+
+/* Optimize an element insertion into a vector that is extracted from
+   another vector to a permutation of both vectors.  */
+(simplify
+ (bit_insert @0 (BIT_FIELD_REF@1 @2 @size @rpos) @ipos)
+ (if (VECTOR_TYPE_P (type)
+  && types_match (type, TREE_TYPE (@2))
+  && single_use (@1))
+  (with
+   {
+ poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (type);
+ unsigned int min_nelts = constant_lower_bound (nelts);
+ unsigned int elem_idx, at, n_elem;
+ unsigned HOST_WIDE_INT elem_sz
+   = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type)));
+   }
+   (if (constant_multiple_p (tree_to_poly_uint64 (@rpos),
+tree_to_poly_uint64 (@size), &elem_idx)
+   && constant_multiple_p (tree_to_poly_uint64 (@ipos),
+   tree_to_poly_uint64 (@size), &at)
+   && constant_multiple_p (tree_to_poly_uint64 (@size), elem_sz, &n_elem)
+   && at + n_elem <= min_nelts)
+(with
+ {
+   vec_perm_builder sel (nelts, min_nelts, 3);
+   for (unsigned i = 0; i < 3 * m

Re: [PATCH] c-family: Fix error-recovery ICE on __builtin_speculation_safe_value [PR94968]

2020-05-07 Thread Richard Biener via Gcc-patches
On Thu, May 7, 2020 at 9:48 AM Jakub Jelinek via Gcc-patches
 wrote:
>
> Hi!
>
> If the second argument of __builtin_speculation_safe_value is
> error_mark_node (or has such a type), we ICE during
> useless_typ_conversion_p.
>
> Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux, ok for
> trunk?

OK.

Richard.

> 202-05-06  Jakub Jelinek  
>
> PR c/94968
> * c-common.c (speculation_safe_value_resolve_params): Return false if
> error_operand_p (val2).
> (resolve_overloaded_builtin) :
> Remove extraneous semicolon.
>
> * gcc.dg/pr94968.c: New test.
>
> --- gcc/c-family/c-common.c.jj  2020-04-27 16:10:23.795726234 +0200
> +++ gcc/c-family/c-common.c 2020-05-06 12:37:40.708539995 +0200
> @@ -6716,6 +6716,8 @@ speculation_safe_value_resolve_params (l
>tree val2 = (*params)[1];
>if (TREE_CODE (TREE_TYPE (val2)) == ARRAY_TYPE)
> val2 = default_conversion (val2);
> +  if (error_operand_p (val2))
> +   return false;
>if (!(TREE_TYPE (val) == TREE_TYPE (val2)
> || useless_type_conversion_p (TREE_TYPE (val), TREE_TYPE (val2
> {
> @@ -7400,7 +7402,7 @@ resolve_overloaded_builtin (location_t l
>{
> tree new_function, first_param, result;
> enum built_in_function fncode
> - = speculation_safe_value_resolve_call (function, params);;
> + = speculation_safe_value_resolve_call (function, params);
>
> if (fncode == BUILT_IN_NONE)
>   return error_mark_node;
> --- gcc/testsuite/gcc.dg/pr94968.c.jj   2020-05-06 12:50:10.865191796 +0200
> +++ gcc/testsuite/gcc.dg/pr94968.c  2020-05-06 12:49:52.366471532 +0200
> @@ -0,0 +1,8 @@
> +/* PR c/94968 */
> +/* { dg-do compile } */
> +
> +int
> +foo (void)
> +{
> +  __builtin_speculation_safe_value (1, x); /* { dg-error "undeclared" } 
> */
> +}  /* { dg-message "each 
> undeclared identifier is reported only once" "" { target *-*-* } .-1 } */
>
> Jakub
>


Re: [PATCH] libstdc++: Update {x86_64, i?86, powerpc64, s390x, aarch64}-linux baselines for GCC 10.1

2020-05-07 Thread Jonathan Wakely via Gcc-patches

On 07/05/20 13:11 +0200, Eric Botcazou wrote:

It looks like gcc-9 needs the same update too.


Attached, but results are identical with those of gcc-10, is that expected?


Yes, there have been no changes since 9.3.0

OK for gcc-9 - thanks.



Re: [PATCH] tree-optimization/94865 - combine BIT_INSERT_EXPR of BIT_FIELD_REF

2020-05-07 Thread Richard Sandiford
Richard Biener  writes:
> On Thu, 7 May 2020, Richard Sandiford wrote:
>
>> Richard Biener  writes:
>> > This implements patterns combining vector element insertion of
>> > vector element extraction to a VEC_PERM_EXPR of both vectors
>> > when supported.  Plus it adds the more generic identity transform
>> > of inserting a piece of itself at the same position.
>> >
>> > Richard - is there anything I can do to make this SVE aware?
>> > I'd need to construct an identity permute and "insert" into
>> > that permute that element from the other (or same) vector.
>> > I suppose for most element positions that won't work but
>> > at least inserting at [0] should?  I'm mostly struggling
>> > on how to use vec_perm_builder here when nelts is not constant,
>> > since it's derived from vec<> can I simply start with
>> > a single pattern with 1 stride and then insert by using []?
>> 
>> I guess for SVE we still want to know that the range is safe
>> for all VL, so after dropping the is_constant check, we'd
>> want something like:
>> 
>>{
>>  poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (type);
>>  unsigned int min_nelts = constant_lower_bound (nelts);
>>}
>>(if (...
>> && at + elemsz <= min_nelts)
>> 
>> In theory (hah) it should then just be a case of changing the
>> vec_perm_builder constructor to:
>> 
>>   vec_perm_builder sel (nelts, min_nelts, 3);
>> 
>> and then iterating over min_nelts * 3 instead of nelts here:
>> 
>> > +   for (unsigned i = 0; i < nelts; ++i)
>> > + sel.quick_push (i / elemsz == at
>> > +   ? nelts + elem * elemsz + i % elemsz : i);
>> 
>> So as far as the encoding goes, the first min_nelts elements are arbitrary
>> values, and the following two min_nelts sequences form individual linear
>> series.
>
> OK - not sure why we need exactly three nelts per pattern here.

There are three styles of encoding (see the VECTOR_CST docs in
generic.texi for the full gory details):

- replicated {a0,...,an} (1 element per pattern)

- {a0,...,an} followed by replicated {b0,...,bn} (2 elements per pattern)

- {a0,...,an} followed by {b0,...,bn,b0+step0,...,bn+stepn,b0+step0*2,...}
  (3 elements per pattern)

The min_elts check ensures that the difference from the identity permute
selector is all in {a0,...,an}.  The rest of the vector contains the normal
elements for an identity selector and extends for as long as the runtime
VL needs it to extend.

> It also looks like all the constant_multiple_p () checks constrain
> things quite a bit.

I don't think it constrains it beyond what we can reasonably do.
For SVE this is most likely to be useful when converting between
SVE and Advanced SIMD.

> Oh, and does a BIT_FIELD_REF with poly-int position
> extract multiple elements in the end?!  For the case we are extracting
> a sub-vector and thus elemsz != 1 we constrain it so that this
> sub-vector is not of variable size (err, not "independently" so,
> whatever that means..)?

No, a poly-int position doesn't change how many elements we extract.
It just defers the calculation of the position until runtime.

> My brain hurts...  how do you write a GIMPLE testcase for aarch64
> SVE covering such cases?

Gimple testcase: with difficulty :-)  I don't think we have a
gimple FE syntax for poly_ints yet.

It might be possible to construct a C testcase using intrinsics.
I'll give it a go...

>> This ought to be work for both SVE and non-SVE, although obviously
>> there's a bit of wasted work for non-SVE.
>> 
>> (And thanks for asking :-))
>
> So like this, it seems to still work on the x86 testcases?

LGTM.  I think the elemsz calculate is going to run into the same
kind of trouble as PR94980 for AVX/SVE vector booleans, but that
shouldn't hold the patch up.

Thanks,
Richard


Re: [PATCH] c-family: Fix error-recovery ICE on __builtin_speculation_safe_value [PR94968]

2020-05-07 Thread Marek Polacek via Gcc-patches
On Thu, May 07, 2020 at 09:42:32AM +0200, Jakub Jelinek wrote:
> Hi!
> 
> If the second argument of __builtin_speculation_safe_value is
> error_mark_node (or has such a type), we ICE during
> useless_typ_conversion_p.
> 
> Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux, ok for
> trunk?

Ok, thanks.

Marek



Re: [PATCH] tree-optimization/94865 - combine BIT_INSERT_EXPR of BIT_FIELD_REF

2020-05-07 Thread Richard Biener
On Thu, 7 May 2020, Richard Sandiford wrote:

> Richard Biener  writes:
> > On Thu, 7 May 2020, Richard Sandiford wrote:
> >
> >> Richard Biener  writes:
> >> > This implements patterns combining vector element insertion of
> >> > vector element extraction to a VEC_PERM_EXPR of both vectors
> >> > when supported.  Plus it adds the more generic identity transform
> >> > of inserting a piece of itself at the same position.
> >> >
> >> > Richard - is there anything I can do to make this SVE aware?
> >> > I'd need to construct an identity permute and "insert" into
> >> > that permute that element from the other (or same) vector.
> >> > I suppose for most element positions that won't work but
> >> > at least inserting at [0] should?  I'm mostly struggling
> >> > on how to use vec_perm_builder here when nelts is not constant,
> >> > since it's derived from vec<> can I simply start with
> >> > a single pattern with 1 stride and then insert by using []?
> >> 
> >> I guess for SVE we still want to know that the range is safe
> >> for all VL, so after dropping the is_constant check, we'd
> >> want something like:
> >> 
> >>{
> >>  poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (type);
> >>  unsigned int min_nelts = constant_lower_bound (nelts);
> >>}
> >>(if (...
> >> && at + elemsz <= min_nelts)
> >> 
> >> In theory (hah) it should then just be a case of changing the
> >> vec_perm_builder constructor to:
> >> 
> >>   vec_perm_builder sel (nelts, min_nelts, 3);
> >> 
> >> and then iterating over min_nelts * 3 instead of nelts here:
> >> 
> >> > +   for (unsigned i = 0; i < nelts; ++i)
> >> > + sel.quick_push (i / elemsz == at
> >> > + ? nelts + elem * elemsz + i % elemsz : i);
> >> 
> >> So as far as the encoding goes, the first min_nelts elements are arbitrary
> >> values, and the following two min_nelts sequences form individual linear
> >> series.
> >
> > OK - not sure why we need exactly three nelts per pattern here.
> 
> There are three styles of encoding (see the VECTOR_CST docs in
> generic.texi for the full gory details):
> 
> - replicated {a0,...,an} (1 element per pattern)
> 
> - {a0,...,an} followed by replicated {b0,...,bn} (2 elements per pattern)
> 
> - {a0,...,an} followed by {b0,...,bn,b0+step0,...,bn+stepn,b0+step0*2,...}
>   (3 elements per pattern)
> 
> The min_elts check ensures that the difference from the identity permute
> selector is all in {a0,...,an}.  The rest of the vector contains the normal
> elements for an identity selector and extends for as long as the runtime
> VL needs it to extend.
> 
> > It also looks like all the constant_multiple_p () checks constrain
> > things quite a bit.
> 
> I don't think it constrains it beyond what we can reasonably do.
> For SVE this is most likely to be useful when converting between
> SVE and Advanced SIMD.
> 
> > Oh, and does a BIT_FIELD_REF with poly-int position
> > extract multiple elements in the end?!  For the case we are extracting
> > a sub-vector and thus elemsz != 1 we constrain it so that this
> > sub-vector is not of variable size (err, not "independently" so,
> > whatever that means..)?
> 
> No, a poly-int position doesn't change how many elements we extract.
> It just defers the calculation of the position until runtime.
> 
> > My brain hurts...  how do you write a GIMPLE testcase for aarch64
> > SVE covering such cases?
> 
> Gimple testcase: with difficulty :-)  I don't think we have a
> gimple FE syntax for poly_ints yet.
> 
> It might be possible to construct a C testcase using intrinsics.
> I'll give it a go...
> 
> >> This ought to be work for both SVE and non-SVE, although obviously
> >> there's a bit of wasted work for non-SVE.
> >> 
> >> (And thanks for asking :-))
> >
> > So like this, it seems to still work on the x86 testcases?
> 
> LGTM.  I think the elemsz calculate is going to run into the same
> kind of trouble as PR94980 for AVX/SVE vector booleans, but that
> shouldn't hold the patch up.

I see.  Looks like I have to fix PR88540 first as I see both
gcc.target/i386/pr54855-8.c and gcc.target/i386/pr54855-9.c FAIL
after the patch.  Those are testcases for fancy combiner patterns
involving min/max operations on the [0] lane, leaving the rest of
the vector unmodified.  With the patch we enter PRE with

   [local count: 1073741824]:
  _1 = BIT_FIELD_REF ;
  if (_1 > a_4(D))
goto ; [50.00%]
  else
goto ; [50.00%]

   [local count: 536870913]:

   [local count: 1073741824]:
  # iftmp.0_2 = PHI <_1(3), a_4(D)(2)>
  x_5 = BIT_INSERT_EXPR ;
  return x_5;

and of course PRE sees that on one path x is unchanged and turns
it into

   [local count: 1073741824]:
  _1 = BIT_FIELD_REF ;
  if (_1 > a_4(D))
goto ; [50.00%]
  else
goto ; [50.00%]

   [local count: 536870912]:
  _7 = BIT_INSERT_EXPR ;

   [local count: 1073741824]:
  # prephitmp_8 = PHI <_7(3), x_3(D)(2)>
  return prephitmp_8;

defeating RTL if-conversion (there's some IEEE_MIN/MAX UNSPECs
in 

[PATCH 2/2] ipa/94947 - avoid using externally_visible_p ()

2020-05-07 Thread Richard Biener
externally_visible_p wasn't the correct predicate to use (even if it
worked), instead we should use DECL_EXTERNAL || TREE_PUBLIC.

2020-05-07  Richard Biener  

PR ipa/94947
* tree-ssa-structalias.c (refered_from_nonlocal_fn): Use
DECL_EXTERNAL || TREE_PUBLIC instead of externally_visible.
(refered_from_nonlocal_var): Likewise.
(ipa_pta_execute): Likewise.
---
 gcc/tree-ssa-structalias.c | 12 
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/gcc/tree-ssa-structalias.c b/gcc/tree-ssa-structalias.c
index e9fcafd9b34..40171d53d0a 100644
--- a/gcc/tree-ssa-structalias.c
+++ b/gcc/tree-ssa-structalias.c
@@ -8085,7 +8085,8 @@ refered_from_nonlocal_fn (struct cgraph_node *node, void 
*data)
 {
   bool *nonlocal_p = (bool *)data;
   *nonlocal_p |= (node->used_from_other_partition
- || node->externally_visible
+ || DECL_EXTERNAL (node->decl)
+ || TREE_PUBLIC (node->decl)
  || node->force_output
  || lookup_attribute ("noipa", DECL_ATTRIBUTES (node->decl)));
   return false;
@@ -8097,7 +8098,8 @@ refered_from_nonlocal_var (struct varpool_node *node, 
void *data)
 {
   bool *nonlocal_p = (bool *)data;
   *nonlocal_p |= (node->used_from_other_partition
- || node->externally_visible_p ()
+ || DECL_EXTERNAL (node->decl)
+ || TREE_PUBLIC (node->decl)
  || node->force_output);
   return false;
 }
@@ -8146,7 +8148,8 @@ ipa_pta_execute (void)
 For local functions we see all callers and thus do not need initial
 constraints for parameters.  */
   bool nonlocal_p = (node->used_from_other_partition
-|| node->externally_visible
+|| DECL_EXTERNAL (node->decl)
+|| TREE_PUBLIC (node->decl)
 || node->force_output
 || lookup_attribute ("noipa",
  DECL_ATTRIBUTES (node->decl)));
@@ -8187,7 +8190,8 @@ ipa_pta_execute (void)
 
   /* For the purpose of IPA PTA unit-local globals are not
  escape points.  */
-  bool nonlocal_p = (var->externally_visible_p ()
+  bool nonlocal_p = (DECL_EXTERNAL (var->decl)
+|| TREE_PUBLIC (var->decl)
 || var->used_from_other_partition
 || var->force_output);
   var->call_for_symbol_and_aliases (refered_from_nonlocal_var,
-- 
2.13.7


Re: [PATCH] tree: Fix up get_narrower [PR94724]

2020-05-07 Thread Thomas Schwinge
Hi!

On 2020-04-23T21:04:35+0200, Richard Biener  wrote:
> On April 23, 2020 8:19:55 PM GMT+02:00, Jakub Jelinek  
> wrote:
>>[patch] instead uses an auto_vec and builds them in the reverse
>>order
>>so when they are built, they are built with the correct operands.
>>
>>Bootstrapped/regtested on powerpc64{,le}-linux, ok for trunk?
>
> OK.

(That got pushed in commit bca558de2a24b2a78c6a321d6cec384e07759d77
"tree: Fix up get_narrower [PR94724]".)

>>--- gcc/tree.c.jj 2020-04-04 09:14:29.808002636 +0200
>>+++ gcc/tree.c2020-04-23 11:07:34.003675831 +0200
>>@@ -8881,18 +8881,22 @@ get_narrower (tree op, int *unsignedp_pt

Note existing 'op' variable here...

>>
>>   if (TREE_CODE (op) == COMPOUND_EXPR)
>> {
>>-  while (TREE_CODE (op) == COMPOUND_EXPR)
>>+  do
>>  op = TREE_OPERAND (op, 1);
>>+  while (TREE_CODE (op) == COMPOUND_EXPR);
>>   tree ret = get_narrower (op, unsignedp_ptr);
>>   if (ret == op)
>>  return win;
>>-  op = win;
>>-  for (tree *p = &win; TREE_CODE (op) == COMPOUND_EXPR;
>>-op = TREE_OPERAND (op, 1), p = &TREE_OPERAND (*p, 1))
>>- *p = build2_loc (EXPR_LOCATION (op), COMPOUND_EXPR,
>>-  TREE_TYPE (ret), TREE_OPERAND (op, 0),
>>-  ret);
>>-  return win;
>>+  auto_vec  v;
>>+  unsigned int i;
>>+  for (tree op = win; TREE_CODE (op) == COMPOUND_EXPR;

..., and new 'op' variable here.

>>+op = TREE_OPERAND (op, 1))
>>+ v.safe_push (op);
>>+  FOR_EACH_VEC_ELT_REVERSE (v, i, op)
>>+ ret = build2_loc (EXPR_LOCATION (op), COMPOUND_EXPR,
>>+   TREE_TYPE (win), TREE_OPERAND (op, 0),
>>+   ret);
>>+  return ret;
>> }
>>   while (TREE_CODE (op) == NOP_EXPR)
>> {

Old compilers:

g++ (Sourcery CodeBench 2014.05-45) 4.8.3 20140320 (prerelease)

..., and also:

g++-4.6 (Ubuntu/Linaro 4.6.3-10ubuntu1) 4.6.3 20120918 (prerelease)

... warn:

{+[...]/gcc/tree.c: In function 'tree_node* get_narrower(tree, int*)':+}
{+[...]/gcc/tree.c:8897:55: warning: name lookup of 'op' changed [enabled 
by default]+}
{+   FOR_EACH_VEC_ELT_REVERSE (v, i, op)+}
{+   ^+}
{+[...]/gcc/tree.c:8877:1: warning:   matches this 'op' under ISO standard 
rules [enabled by default]+}
{+ get_narrower (tree op, int *unsignedp_ptr)+}
{+ ^+}
{+[...]/gcc/tree.c:8894:17: warning:   matches this 'op' under old rules 
[enabled by default]+}
{+   for (tree op = win; TREE_CODE (op) == COMPOUND_EXPR;+}
{+ ^+}

("Interesting.")  The bootstrapped GCC itself doesn't diagnose this.  Is
there something to be worried about?  (Certainly the variable shadowing
could be avoided?)


Grüße
 Thomas
-
Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Alexander 
Walter


Re: [PATCH] testsuite: Improve g++.dg/ext/attr-parm-1.C testcase [PR94946]

2020-05-07 Thread Nathan Sidwell

On 5/7/20 4:03 AM, Jakub Jelinek wrote:

Hi!

On Wed, May 06, 2020 at 02:14:03PM -0400, Nathan Sidwell wrote:

--- /dev/null
+++ w/gcc/testsuite/g++.dg/ext/attr-parm-1.C
@@ -0,0 +1,7 @@
+// { dg-do compile { target { i?86-*-* x86_64-*-* } } }
+// { dg-options -m32 }
+// PR 94946
+class a {
+  template  a(b (*)());
+  template  a(b(__attribute__((fastcall)) *c)());
+};


The testcase in the current form doesn't FAIL without the patch on
x86_64-linux unless also testing with -m32; as that the 64-bit testing
on that target is probably way more common, and we can use also attributes


yeah, that was what I was trying to cover originally.  Thanks for fixing up!


that FAIL without the patch with -m64, the following patch adjusts the
test, so that it FAILs without the patch for both -m64 and -m32 (but not
-mx32) and PASSes with the patch.  Ok for trunk/10.2?
And for 9.4 with additional removal of the forgotten dg-options -m32 line?

Regtested on x86_64-linux and i686-linux.

2020-05-06  Jakub Jelinek  

PR c++/94946
* g++.dg/ext/attr-parm-1.C: Enable the test also for lp64 x86, use
sysv_abi and ms_abi attributes in that case instead of fastcall and
no attribute.

--- gcc/testsuite/g++.dg/ext/attr-parm-1.C.jj   2020-05-06 22:48:46.215419942 
+0200
+++ gcc/testsuite/g++.dg/ext/attr-parm-1.C  2020-05-06 22:50:40.117727005 
+0200
@@ -1,6 +1,11 @@
-// { dg-do compile { target { { i?86-*-* x86_64-*-* } && ia32 } } }
+// { dg-do compile { target { { i?86-*-* x86_64-*-* } && { ia32 || lp64 } } } }
  // PR 94946
  class a {
+#ifdef __LP64__
+  template  a(b(__attribute__((sysv_abi)) *c)());
+  template  a(b(__attribute__((ms_abi)) *c)());
+#else
template  a(b (*)());
template  a(b(__attribute__((fastcall)) *c)());
+#endif
  };


Jakub




--
Nathan Sidwell


[Patch] (general Fortran + OpenMP) [Fortran] Fix/modify present() handling for assumed-shape optional (PR 94672)

2020-05-07 Thread Tobias Burnus

The main purpose of this patch is to fix OpenMP, but it modifies
the general Fortran handling of assumed-shape optional arguments.

For assumed shape, gfortran generates an "arg.0 = arg->data"
artificial variable – and with optional one has something like

if (arg != NULL && arg->data != NULL)
  {
arg.0 = arg->data;
lbound.0 = ...
  }

And an "if (present(arg))" becomes
"if (arg != NULL && arg->data != NULL)".

The proposed change changes the init to:

if (arg != NULL && arg->data != NULL)
  {
arg.0 = arg->data;
lbound.0 = ...
  }
else
  arg.0 = NULL;  // <-- new

Such that an "if (present(arg))" becomes "if (arg.0 != NULL)".

I think for Fortran code itself, it does not really make any
difference. However, for OpenMP (and OpenACC) it does.

Currently,
  !$omp …
if (present(arg)) stop 1
  !$omp end …

has decl = "arg.0" and SAVED_DESCRIPTOR(decl) == "arg" such
that inside the omp block everything is "arg.0" – except for
"if (present(arg))" which is converted to the "!arg && !arg->data".

This causes the problems shown in the PR (PR94672).

For optional & 'omp target' where one has to map the variable and
has to check it inside the target function, I even ended up setting
"arg.0 = NULL" explicitly as this was much simpler than adding more
checking in gcc/omp-low.c.


Thus: I think either variant (checking arg directly vs. checking arg.0
plus setting it to NULL) works equally well with normal Fortran code;
one can probably design code where one or the other is slightly faster,
but at the end it should not matter.
And for OpenMP/OpenACC, the new variant avoids several problems.

Hence:
OK for the trunk – and GCC 10 (regression, rejects valid code)?

Tobias

-
Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Alexander 
Walter
[Fortran] Fix/modify present() handling for assumed-shape optional (PR 94672)

gcc/fortran/
2020-05-07  Tobias Burnus  

	PR fortran/94672
	* trans.h (gfc_conv_expr_present): Add use_saved_decl=false argument.
	* trans-expr.c (gfc_conv_expr_present): Likewise; use DECL directly
	and only if use_saved_decl is true, use the actual PARAM_DECL arg (saved
	descriptor).
	* trans-array.c (gfc_trans_dummy_array_bias): Set local 'arg.0'
	variable to NULL if 'arg' is not present.
	* trans-openmp.c (gfc_omp_check_optional_argument): Simplify by checking
	'arg.0' instead of the true PARM_DECL.
	(gfc_omp_finish_clause): Remove setting 'arg.0' to NULL.

gcc/testsuite/
2020-05-07  Jakub Jelinek  
	Tobias Burnus  

	PR fortran/94672
	* gfortran.dg/gomp/pr94672.f90: New.
	* gfortran.dg/missing_optional_dummy_6a.f90: Update scan-tree.

 gcc/fortran/trans-array.c  |   8 +-
 gcc/fortran/trans-expr.c   |  22 ++--
 gcc/fortran/trans-openmp.c |  42 +--
 gcc/fortran/trans.h|   2 +-
 gcc/testsuite/gfortran.dg/gomp/pr94672.f90 | 127 +
 .../gfortran.dg/missing_optional_dummy_6a.f90  |   3 +-
 6 files changed, 152 insertions(+), 52 deletions(-)

diff --git a/gcc/fortran/trans-array.c b/gcc/fortran/trans-array.c
index 9c928d04e0a..434960c5bc7 100644
--- a/gcc/fortran/trans-array.c
+++ b/gcc/fortran/trans-array.c
@@ -6787,9 +6787,11 @@ gfc_trans_dummy_array_bias (gfc_symbol * sym, tree tmpdesc,
 		  && sym->attr.dummy));
   if (optional_arg)
 {
-  tmp = gfc_conv_expr_present (sym);
-  stmtInit = build3_v (COND_EXPR, tmp, stmtInit,
-			   build_empty_stmt (input_location));
+  tree zero_init = fold_convert (TREE_TYPE (tmpdesc), null_pointer_node);
+  zero_init = fold_build2_loc (input_location, MODIFY_EXPR, void_type_node,
+   tmpdesc, zero_init);
+  tmp = gfc_conv_expr_present (sym, true);
+  stmtInit = build3_v (COND_EXPR, tmp, stmtInit, zero_init);
 }
 
   /* Cleanup code.  */
diff --git a/gcc/fortran/trans-expr.c b/gcc/fortran/trans-expr.c
index 030edc1e5ce..33fc061d89b 100644
--- a/gcc/fortran/trans-expr.c
+++ b/gcc/fortran/trans-expr.c
@@ -1712,12 +1712,12 @@ gfc_make_safe_expr (gfc_se * se)
Also used for arguments to procedures with multiple entry points.  */
 
 tree
-gfc_conv_expr_present (gfc_symbol * sym)
+gfc_conv_expr_present (gfc_symbol * sym, bool use_saved_desc)
 {
-  tree decl, cond;
+  tree decl, orig_decl, cond;
 
   gcc_assert (sym->attr.dummy);
-  decl = gfc_get_symbol_decl (sym);
+  orig_decl = decl = gfc_get_symbol_decl (sym);
 
   /* Intrinsic scalars with VALUE attribute which are passed by value
  use a hidden argument to denote the present status.  */
@@ -1744,10 +1744,13 @@ gfc_conv_expr_present (gfc_symbol * sym)
   return cond;
 }
 
-  if (TREE_CODE (decl) != PARM_DECL)
+  /* Assumed-shape arrays use a local variable for the array data;
+ the actual PARAM_DECL is in a saved decl.  As the local variable
+ is NULL, it can be checked instead, unless use_sav

Re: [PATCH][RFC] extend DECL_GIMPLE_REG_P to all types

2020-05-07 Thread Richard Biener
On Thu, 23 Apr 2020, Richard Biener wrote:

> On Wed, 22 Apr 2020, Richard Sandiford wrote:
> 
> > Richard Biener  writes:
> > > On Wed, 22 Apr 2020, Richard Biener wrote:
> > >
> > >> 
> > >> This extends DECL_GIMPLE_REG_P to all types so we can clear
> > >> TREE_ADDRESSABLE even for integers with partial defs, not just
> > >> complex and vector variables.  To make that transition easier
> > >> the patch inverts DECL_GIMPLE_REG_P to DECL_NOT_GIMPLE_REG_P
> > >> since that makes the default the current state for all other
> > >> types besides complex and vectors.  That also nicely simplifies
> > >> code throughout the compiler.
> > >> 
> > >> TREE_ADDRESSABLE and DECL_NOT_GIMPLE_REG_P are now truly
> > >> independent, either set prevents a decl from being rewritten
> > >> into SSA form.
> > >> 
> > >> For the testcase in PR94703 we're able to expand the partial
> > >> def'ed local integer to a register then, producing a single
> > >> movl rather than going through the stack.
> > >> 
> > >> Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.
> > >> 
> > >> If there are no objections I'm going to install this once
> > >> stage1 opens.
> > >
> > > Of course there was some fallout.  On 32bit x86 gcc.dg/torture/pr71522.c
> > > fails execution because while the GIMPLE is unchanged at RTL expansion
> > > time:
> > >
> > > main ()
> > > {
> > >   char s[12];
> > >   long double d;
> > >
> > >   MEM  [(char * {ref-all})&d] = MEM  > > char[12]> 
> > > [(char * {ref-all})"AAA"];
> > >   MEM  [(char * {ref-all})&s] = MEM  > > char[12]> 
> > > [(char * {ref-all})&d];
> > >   _1 = __builtin_strcmp (&s, "AAA");
> > >   if (_1 != 0)
> > > ...
> > >
> > > we now assign 'd' an XFmode register (TREE_ADDRESSABLE is cleared
> > > now since we can set DECL_NOT_GIMPLE_REG_P).  The case is lost
> > > then, impossible to fix up AFAICS.  On x86 all moves to/from
> > > XFmode are normalizing, specifically we end up with
> > >
> > > fldt.LC0
> > > fstpt   (%esp)
> > >
> > > now the most appealing solution - and totally in the opposite
> > > direction of this patch - is to simply stop expanding non-SSA names
> > > as pseudos.  I do not remember the history as why we do this
> > > but it's likely remanents we preserved from either pre-SSA, times
> > > we did not go into SSA for -O0 or times we really gone out-of-SSA.
> > >
> > > There is _some_ good reason to expand a non-SSA "register" into
> > > a pseudo though - namely that RTL is not SSA and thus can accept
> > > partial defs.  And of course that RTL cannot get rid of a stack
> > > slot assigned to a variable.  Today we have somewhat robust
> > > infrastructure to deal with partial defs on GIMPLE, namely
> > > BIT_INSERT_EXPR, but it's not fully exercised.
> > 
> > Yeah, not being able to get rid of the stack slot seems
> > worrying here.
> > 
> > > It's of course possible to fixup the above problematical
> > > cases (there's precenent with discover_nonconstant_array_refs,
> > > which could be "easily" extended to handle "weird" accesses
> > > of non-integral-mode variables) but with the recent discussion
> > > on making RTL expansion more straight-forward I'd bring up
> > > the above idea ... it would get rid of quite some special
> > > code dealing with tcc_reference trees (and MEM_REFs) ending
> > > up operating on registers.
> > 
> > It might be nice to do it eventually, but I think at least
> > is_gimple_reg_type would need to be "return true" first,
> > otherwise we'll lose too much on aggregates.
> > 
> > There's also the problem that things passed in registers do need
> > to be RTL registers at function boundaries, so I'm not sure all
> > the expand code would necessarily go away.
> > 
> > Wouldn't want to see all targets suffer for XFmode oddities :-)
> 
> OK, so here's the patch amemded with some heuristics to catch
> this.  The heuristic triggers exactly on the previously
> failing testcase and nothing else on a x86_64 bootstrap and regtest.
> Citing the code:
> 
> /* If there's a chance to get a pseudo for t then if it would be of float 
> mode
>and the actual access is via an integer mode (lowered memcpy or similar
>access) then avoid the register expansion if the mode likely is not 
> storage
>suitable for raw bits processing (like XFmode on i?86).  */
> 
> static void
> avoid_type_punning_on_regs (tree t)
> {
>   machine_mode access_mode = TYPE_MODE (TREE_TYPE (t));
>   if (access_mode != BLKmode
>   && !SCALAR_INT_MODE_P (access_mode))
> return;
>   tree base = get_base_address (t);
>   if (DECL_P (base)
>   && !TREE_ADDRESSABLE (base)
>   && FLOAT_MODE_P (DECL_MODE (base))
>   && maybe_lt (GET_MODE_PRECISION (DECL_MODE (base)),
>GET_MODE_BITSIZE (GET_MODE_INNER (DECL_MODE (base
>   /* Double check in the expensive way we really would get a pseudo.  
> */
>   && use_register_for_decl (base))
> TREE_ADDRESSABLE (base) = 1;
> }
> 
> invoked on stores like

[PATCH][DOC] Fix optindex entries for 2 options.

2020-05-07 Thread Martin Liška

Hi.

It's an obvious documentation fix. I'm going to install it
also to gcc-10 branch.

Martin

gcc/ChangeLog:

2020-05-07  Martin Liska  

* doc/invoke.texi: Fix 2 optindex entries.
---
 gcc/doc/invoke.texi | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)


diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index c97318f0465..35e8242af5f 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -11399,7 +11399,7 @@ default, GCC emits an error message when an inconsistent profile is detected.
 This option is enabled by @option{-fauto-profile}.
 
 @item -fprofile-partial-training
-@opindex fprofile-use
+@opindex fprofile-partial-training
 With @code{-fprofile-use} all portions of programs not executed during train
 run are optimized agressively for size rather than speed.  In some cases it is
 not practical to train all possible hot paths in the program. (For
@@ -13393,6 +13393,7 @@ location.  If you combine the option with multiple source files,
 the @file{.gcno} file will be overwritten.
 
 @item -fprofile-prefix-path=@var{path}
+@opindex fprofile-prefix-path
 
 This option can be used in combination with
 @option{profile-generate=}@var{profile_dir} and



Ping: [PATCH] wwwdocs: Add D front-end section for GCC 10 changes

2020-05-07 Thread Iain Buclaw via Gcc-patches
Hi,

Updated the patch to include missed changes, and slighted reworded some entries
to make them clearer/read easier.

OK to commit?

Iain.

---
 htdocs/gcc-10/changes.html | 35 +++
 1 file changed, 35 insertions(+)

diff --git a/htdocs/gcc-10/changes.html b/htdocs/gcc-10/changes.html
index 41c2dc0d..f10cfd56 100644
--- a/htdocs/gcc-10/changes.html
+++ b/htdocs/gcc-10/changes.html
@@ -432,6 +432,41 @@ a work-in-progress.
   
 
 
+D
+
+  Support for static foreach has been implemented.
+  Aliases can now be created directly from any __trait that
+  return symbols or tuples.  Previously, an AliasSeq was
+  necessary in order to alias their return.
+  
+  It is now possible to detect the language ABI specified for a struct,
+  class, or interface using __traits(getLinkage, ...)
+  Support for core.math.toPrec intrinsics have been added.
+  These intrinsics guarantee the rounding to specific floating-point
+  precisions at required points in the code.
+  
+  Support for pragma(inline) has been implemented.  Previously
+  the pragma was recognized, but had no effect on the compilation.
+  
+  Optional parentheses in asm operands are now deprecated and
+  will be removed in a future release.
+  
+  All content imported files are now included in the make dependency list
+  when compiling with -M.
+  
+  Compiler recognized attributes provided by the gcc.attribute
+  module will now take effect when applied to function prototypes as well
+  as when applied to full function declarations.
+  
+  Added --enable-libphobos-checking configure option to
+  control whether run-time checks are compiled into the D runtime library.
+  
+  Added --with-libphobos-druntime-only configure option to
+  allow specifying whether to build only the core D runtime library, or
+  both the core and standard libraries into libphobos.
+  
+
+
 Fortran
 
   use_device_addr of version 5.0 of the


[PATCH] tree: Avoid variable sharing in get_narrower [PR94724]

2020-05-07 Thread Jakub Jelinek via Gcc-patches
Hi!

On Thu, May 07, 2020 at 02:45:29PM +0200, Thomas Schwinge wrote:
> >>+  for (tree op = win; TREE_CODE (op) == COMPOUND_EXPR;
> 
> ..., and new 'op' variable here.
> 
> >>+op = TREE_OPERAND (op, 1))
> >>+ v.safe_push (op);
> >>+  FOR_EACH_VEC_ELT_REVERSE (v, i, op)
> >>+ ret = build2_loc (EXPR_LOCATION (op), COMPOUND_EXPR,
> >>+   TREE_TYPE (win), TREE_OPERAND (op, 0),
> >>+   ret);
> >>+  return ret;
> >> }
> >>   while (TREE_CODE (op) == NOP_EXPR)
> >> {

There is no reason for the shadowing and op at this point acts as a
temporary and will be overwritten in FOR_EACH_VEC_ELT_REVERSE anyway.
So, we can just s/tree // here.
Ok for trunk if it passes bootstrap/regtest?

> ("Interesting.")  The bootstrapped GCC itself doesn't diagnose this.  Is
> there something to be worried about?  (Certainly the variable shadowing
> could be avoided?)

Nothing to be worried about, -Wshadow isn't part of -W -Wall from what I can
understand.  If you use -Wshadow, it is diagnosed.

2020-05-07  Jakub Jelinek  

PR middle-end/94724
* tree.c (get_narrower): Reuse the op temporary instead of
shadowing it.

--- gcc/tree.c.jj   2020-05-05 08:57:55.646638787 +0200
+++ gcc/tree.c  2020-05-07 15:58:17.049717054 +0200
@@ -8889,7 +8889,7 @@ get_narrower (tree op, int *unsignedp_pt
return win;
   auto_vec  v;
   unsigned int i;
-  for (tree op = win; TREE_CODE (op) == COMPOUND_EXPR;
+  for (op = win; TREE_CODE (op) == COMPOUND_EXPR;
   op = TREE_OPERAND (op, 1))
v.safe_push (op);
   FOR_EACH_VEC_ELT_REVERSE (v, i, op)


Jakub



[PATCH v2] match.pd: Optimize ffs of known non-zero arg into ctz + 1 [PR94956]

2020-05-07 Thread Jakub Jelinek via Gcc-patches
On Thu, May 07, 2020 at 10:04:35AM +0200, Richard Biener wrote:
> On Thu, 7 May 2020, Jakub Jelinek wrote:
> > The ffs expanders on several targets (x86, ia64, aarch64 at least)
> > emit a conditional move or similar code to handle the case when the
> > argument is 0, which makes the code longer.
> > If we know from VRP that the argument will not be zero, we can (if the
> > target has also an ctz expander) just use ctz which is undefined at zero
> > and thus the expander doesn't need to deal with that.
> > 
> > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> 
> can you use direct_internal_fn_supported_p (IFN_CTZ, type, 
> OPTIMIZE_FOR_SPEED)?

Only if it is guarded with #if GIMPLE (because otherwise the fn
isn't declared).
Though, restricting this to GIMPLE seems like a good idea anyway to me.

Ok for trunk if it passes bootstrap/regtest?

2020-05-07  Jakub Jelinek  

PR tree-optimization/94956
* match.pd (FFS): Optimize __builtin_ffs* of non-zero argument into
__builtin_ctz* + 1 if direct IFN_CTZ is supported.

* gcc.target/i386/pr94956.c: New test.

--- gcc/match.pd.jj 2020-05-06 15:03:51.618058839 +0200
+++ gcc/match.pd2020-05-07 16:16:48.466970168 +0200
@@ -5986,6 +5986,16 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
&& direct_internal_fn_supported_p (IFN_POPCOUNT, type,
   OPTIMIZE_FOR_BOTH))
 (convert (IFN_POPCOUNT:type @0)
+
+/* __builtin_ffs needs to deal on many targets with the possible zero
+   argument.  If we know the argument is always non-zero, __builtin_ctz + 1
+   should lead to better code.  */
+(simplify
+ (FFS tree_expr_nonzero_p@0)
+ (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
+  && direct_internal_fn_supported_p (IFN_CTZ, TREE_TYPE (@0),
+OPTIMIZE_FOR_SPEED))
+  (plus (CTZ:type @0) { build_one_cst (type); })))
 #endif
 
 /* Simplify:
--- gcc/testsuite/gcc.target/i386/pr94956.c.jj  2020-05-06 16:35:47.085876237 
+0200
+++ gcc/testsuite/gcc.target/i386/pr94956.c 2020-05-06 16:39:52.927140038 
+0200
@@ -0,0 +1,28 @@
+/* PR tree-optimization/94956 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-not "\tcmovne\t" } } */
+/* { dg-final { scan-assembler-not "\tsete\t" } } */
+
+int
+foo (unsigned x)
+{
+  if (x == 0) __builtin_unreachable ();
+  return __builtin_ffs (x) - 1;
+}
+
+int
+bar (unsigned long x)
+{
+  if (x == 0) __builtin_unreachable ();
+  return __builtin_ffsl (x) - 1;
+}
+
+#ifdef __x86_64__
+int
+baz (unsigned long long x)
+{
+  if (x == 0) __builtin_unreachable ();
+  return __builtin_ffsll (x) - 1;
+}
+#endif


Jakub



[PATCH v2] match.pd: Simplify unsigned A - B - 1 >= A to B >= A [PR94913]

2020-05-07 Thread Jakub Jelinek via Gcc-patches
On Thu, May 07, 2020 at 09:59:57AM +0200, Richard Biener wrote:
> Maybe write A - B + -1 >= A to actually match what you match below
...
> on the plus :c is not needed, canonicalization will put the constant
> literal second
> 
> The previous pattern has a single_use check on the minus, since
> the result is always "simple" (a single stmt) the :s have no
> effect (hmm, I guess a genmatch warning for this case might be nice).
> 
> And yes, if the TYPE_OVERFLOW_WRAPS checks are unnecessary remove
> them please, we'll hopefully resist all attempts to ubsan
> unsigned overflow ...

So like this if it passes bootstrap/regtest?

2020-05-07  Jakub Jelinek  

PR tree-optimization/94913
* match.pd (A - B + -1 >= A to B >= A): New simplification.
(A - B > A to A < B): Don't test TYPE_OVERFLOW_WRAPS which is always
true for TYPE_UNSIGNED integral types.

* gcc.dg/tree-ssa/pr94913.c: New test.

--- gcc/match.pd.jj 2020-05-06 11:18:35.0 +0200
+++ gcc/match.pd2020-05-07 15:52:41.450787729 +0200
@@ -4787,10 +4787,17 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (cmp:c (minus@2 @0 @1) @0)
   (if (single_use (@2)
&& ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
-   && TYPE_UNSIGNED (TREE_TYPE (@0))
-   && TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)))
+   && TYPE_UNSIGNED (TREE_TYPE (@0)))
(cmp @1 @0
 
+/* Optimize A - B + -1 >= A into B >= A for unsigned comparisons.  */
+(for cmp (ge lt)
+ (simplify
+  (cmp:c (plus (minus @0 @1) integer_minus_onep) @0)
+   (if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
+   && TYPE_UNSIGNED (TREE_TYPE (@0)))
+(cmp @1 @0
+
 /* Testing for overflow is unnecessary if we already know the result.  */
 /* A - B > A  */
 (for cmp (gt le)
--- gcc/testsuite/gcc.dg/tree-ssa/pr94913.c.jj  2020-05-06 15:20:08.306376994 
+0200
+++ gcc/testsuite/gcc.dg/tree-ssa/pr94913.c 2020-05-06 15:19:45.120725533 
+0200
@@ -0,0 +1,33 @@
+/* PR tree-optimization/94913 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump " (?:b_\[0-9]+\\\(D\\\) >= a|a_\[0-9]+\\\(D\\\) 
<= b)_\[0-9]+\\\(D\\\);" "optimized" } } */
+/* { dg-final { scan-tree-dump " (?:c_\[0-9]+\\\(D\\\) > d|d_\[0-9]+\\\(D\\\) 
< c)_\[0-9]+\\\(D\\\);" "optimized" } } */
+/* { dg-final { scan-tree-dump " (?:f_\[0-9]+\\\(D\\\) >= e|e_\[0-9]+\\\(D\\\) 
<= f)_\[0-9]+\\\(D\\\);" "optimized" } } */
+/* { dg-final { scan-tree-dump " (?:g_\[0-9]+\\\(D\\\) > h|h_\[0-9]+\\\(D\\\) 
< g)_\[0-9]+\\\(D\\\);" "optimized" } } */
+
+int
+foo (unsigned a, unsigned b)
+{
+  return (a - b - 1) >= a;
+}
+
+int
+bar (unsigned c, unsigned d)
+{
+  return (c - d - 1) < c;
+}
+
+int
+baz (unsigned e, unsigned f)
+{
+  unsigned t = e - f;
+  return (t - 1) >= e;
+}
+
+int
+qux (unsigned g, unsigned h)
+{
+  unsigned t = g - h;
+  return (t - 1) < g;
+}


Jakub



[PATCH v2] match.pd: Canonicalize (X + (X >> (prec - 1))) ^ (X >> (prec - 1)) to abs (X) [PR94783]

2020-05-07 Thread Jakub Jelinek via Gcc-patches
On Thu, May 07, 2020 at 10:12:12AM +0200, Richard Biener wrote:
> I think unconditionally using (abs @0) is simplifying things enough
> (getting rid of one xor and one plus) to not worry about keeping
> the (x >> (prec - 1))?

Ok.

> Do you really need the TYPE_OVERFLOW_UNDEFINED check?

Probably not, if overflow isn't undefined, then ABS_EXPR will not be
undefined on the type minimum either.

So like this (including dropping :s from plus), or should there be
single_use for it?  You've said that if the replacement is a simple stmt
:s is ignored...

2020-05-07  Jakub Jelinek  

PR tree-optimization/94783
* match.pd ((X + (X >> (prec - 1))) ^ (X >> (prec - 1)) to abs (X)):
New simplification.

* gcc.dg/tree-ssa/pr94783.c: New test.

--- gcc/match.pd.jj 2020-05-06 15:48:23.658858289 +0200
+++ gcc/match.pd2020-05-07 16:30:15.666817448 +0200
@@ -120,6 +120,16 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (with { tree utype = unsigned_type_for (TREE_TYPE (@0)); }
(convert (absu:utype @0)
 
+#if GIMPLE
+/* Optimize (X + (X >> (prec - 1))) ^ (X >> (prec - 1)) into abs (X).  */
+(simplify
+ (bit_xor:c (plus:c @0 (rshift@2 @0 INTEGER_CST@1)) @2)
+ (if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
+  && !TYPE_UNSIGNED (TREE_TYPE (@0))
+  && wi::to_widest (@1) == element_precision (TREE_TYPE (@0)) - 1
+  && TREE_CODE (@2) == SSA_NAME)
+  (abs @0)))
+#endif
 
 /* Simplifications of operations with one constant operand and
simplifications to constants or single values.  */
--- gcc/testsuite/gcc.dg/tree-ssa/pr94783.c.jj  2020-05-06 17:52:35.515323297 
+0200
+++ gcc/testsuite/gcc.dg/tree-ssa/pr94783.c 2020-05-06 17:52:10.915693948 
+0200
@@ -0,0 +1,12 @@
+/* PR tree-optimization/94783 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump "ABS_EXPR" "optimized" } } */
+/* { dg-final { scan-tree-dump-not " >> 31" "optimized" } } */
+
+int
+foo (int v)
+{
+  int mask = v >> (__SIZEOF_INT__ * __CHAR_BIT__ - 1);
+  return (v + mask) ^ mask;
+}


Jakub



Re: [PATCH] tree: Avoid variable sharing in get_narrower [PR94724]

2020-05-07 Thread Jeff Law via Gcc-patches
On Thu, 2020-05-07 at 16:13 +0200, Jakub Jelinek via Gcc-patches wrote:
> Hi!
> 
> On Thu, May 07, 2020 at 02:45:29PM +0200, Thomas Schwinge wrote:
> > > > +  for (tree op = win; TREE_CODE (op) == COMPOUND_EXPR;
> > 
> > ..., and new 'op' variable here.
> > 
> > > > +op = TREE_OPERAND (op, 1))
> > > > + v.safe_push (op);
> > > > +  FOR_EACH_VEC_ELT_REVERSE (v, i, op)
> > > > + ret = build2_loc (EXPR_LOCATION (op), COMPOUND_EXPR,
> > > > +   TREE_TYPE (win), TREE_OPERAND (op, 0),
> > > > +   ret);
> > > > +  return ret;
> > > > }
> > > >   while (TREE_CODE (op) == NOP_EXPR)
> > > > {
> 
> There is no reason for the shadowing and op at this point acts as a
> temporary and will be overwritten in FOR_EACH_VEC_ELT_REVERSE anyway.
> So, we can just s/tree // here.
> Ok for trunk if it passes bootstrap/regtest?
> 
> > ("Interesting.")  The bootstrapped GCC itself doesn't diagnose this.  Is
> > there something to be worried about?  (Certainly the variable shadowing
> > could be avoided?)
> 
> Nothing to be worried about, -Wshadow isn't part of -W -Wall from what I can
> understand.  If you use -Wshadow, it is diagnosed.
> 
> 2020-05-07  Jakub Jelinek  
> 
>   PR middle-end/94724
>   * tree.c (get_narrower): Reuse the op temporary instead of
>   shadowing it.
OK
jeff
> 



Re: [PATCH v2] match.pd: Simplify unsigned A - B - 1 >= A to B >= A [PR94913]

2020-05-07 Thread Jeff Law via Gcc-patches
On Thu, 2020-05-07 at 16:27 +0200, Jakub Jelinek via Gcc-patches wrote:
> On Thu, May 07, 2020 at 09:59:57AM +0200, Richard Biener wrote:
> > Maybe write A - B + -1 >= A to actually match what you match below
> ...
> > on the plus :c is not needed, canonicalization will put the constant
> > literal second
> > 
> > The previous pattern has a single_use check on the minus, since
> > the result is always "simple" (a single stmt) the :s have no
> > effect (hmm, I guess a genmatch warning for this case might be nice).
> > 
> > And yes, if the TYPE_OVERFLOW_WRAPS checks are unnecessary remove
> > them please, we'll hopefully resist all attempts to ubsan
> > unsigned overflow ...
> 
> So like this if it passes bootstrap/regtest?
> 
> 2020-05-07  Jakub Jelinek  
> 
>   PR tree-optimization/94913
>   * match.pd (A - B + -1 >= A to B >= A): New simplification.
>   (A - B > A to A < B): Don't test TYPE_OVERFLOW_WRAPS which is always
>   true for TYPE_UNSIGNED integral types.
> 
>   * gcc.dg/tree-ssa/pr94913.c: New test.
I wouldn't be surprised if this trips issues with -Wuninitialized and the C++
runtime.   I like the change, but the VRP bits which detect overflow tests and
extract ranges are fragile -- though it's also possible your change would
eliminate the need for those bits.

jeff
> 



Re: [PATCH v2] c++: ICE with -Wall and constexpr if [PR94937]

2020-05-07 Thread Marek Polacek via Gcc-patches
On Wed, May 06, 2020 at 05:26:32PM -0400, Jason Merrill wrote:
> On 5/5/20 6:17 PM, Marek Polacek wrote:
> > An ICE arises here because we call cp_get_callee_fndecl_nofold in a
> > template, and we've got a CALL_EXPR whose CALL_EXPR_FN is a BASELINK.
> > This tickles the INDIRECT_TYPE_P assert in cp_get_fndecl_from_callee.
> > 
> > Jakub said in the PR that he'd hit a similar problem too and dealt
> > with it in omp_declare_variant_finalize_one.  I considered tweaking
> > is_std_constant_evaluated_p to return false for a BASELINK, since the
> > std::is_constant_evaluated call we're looking for can't be a member
> > function, but perhaps we could get another unexpected CALL_EXPR and
> > crash the same.  In which case it might be better to make out the
> > omp_* code into a new routine and use that, as below.
> 
> Why not adjust cp_get_fndecl_from_callee to, say, return null instead of
> aborting when !INDIRECT_TYPE_P?

It's often hard (for me, anyway) to decide if an assert should stay or can
be turned into a condition.  In this case we could argue that initially
cp_get_fndecl_from_callee wasn't meant to be called in a template, but now
we have reasonable use cases where it actually is called in a template, so
should cope instead of crashing.  Is that a fair assesment?

I suspect one could argue that if we see a BASELINK, we might want to use
BASELINK_FUNCTIONS to get the FUNCTION_DECL, but I don't have a test where
that would make a difference.

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk/10.2?

-- >8 --
An ICE arises here because we call cp_get_callee_fndecl_nofold in a
template, and we've got a CALL_EXPR whose CALL_EXPR_FN is a BASELINK.
This tickles the INDIRECT_TYPE_P assert in cp_get_fndecl_from_callee.

Fixed by turning the assert into a condition and returning NULL_TREE
in that case.

PR c++/94937
* cvt.c (cp_get_fndecl_from_callee): Return NULL_TREE if the function
type is not INDIRECT_TYPE_P.
* decl.c (omp_declare_variant_finalize_one): Call
cp_get_callee_fndecl_nofold instead of looking for the function decl
manually.

* g++.dg/cpp1z/constexpr-if34.C: New test.
* g++.dg/cpp2a/is-constant-evaluated10.C: New test.
---
 gcc/cp/cvt.c  |  3 +-
 gcc/cp/decl.c | 12 +---
 gcc/testsuite/g++.dg/cpp1z/constexpr-if34.C   | 15 ++
 .../g++.dg/cpp2a/is-constant-evaluated10.C| 30 +++
 4 files changed, 47 insertions(+), 13 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp1z/constexpr-if34.C
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/is-constant-evaluated10.C

diff --git a/gcc/cp/cvt.c b/gcc/cp/cvt.c
index 656e7fd3ec0..371002833d0 100644
--- a/gcc/cp/cvt.c
+++ b/gcc/cp/cvt.c
@@ -995,9 +995,8 @@ cp_get_fndecl_from_callee (tree fn, bool fold /* = true */)
   if (TREE_CODE (fn) == FUNCTION_DECL)
 return fn;
   tree type = TREE_TYPE (fn);
-  if (type == unknown_type_node)
+  if (type == NULL_TREE || !INDIRECT_TYPE_P (type))
 return NULL_TREE;
-  gcc_assert (INDIRECT_TYPE_P (type));
   if (fold)
 fn = maybe_constant_init (fn);
   STRIP_NOPS (fn);
diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c
index 232d7ed4a14..cbdef3e3365 100644
--- a/gcc/cp/decl.c
+++ b/gcc/cp/decl.c
@@ -7297,17 +7297,7 @@ omp_declare_variant_finalize_one (tree decl, tree attr)
   if (variant == error_mark_node && !processing_template_decl)
 return true;
 
-  variant = cp_get_callee (variant);
-  if (variant)
-{
-  if (TREE_CODE (variant) == FUNCTION_DECL)
-   ;
-  else if (TREE_TYPE (variant) && INDIRECT_TYPE_P (TREE_TYPE (variant)))
-   variant = cp_get_fndecl_from_callee (variant, false);
-  else
-   variant = NULL_TREE;
-}
-
+  variant = cp_get_callee_fndecl_nofold (variant);
   input_location = save_loc;
 
   if (variant)
diff --git a/gcc/testsuite/g++.dg/cpp1z/constexpr-if34.C 
b/gcc/testsuite/g++.dg/cpp1z/constexpr-if34.C
new file mode 100644
index 000..6e0b2597a53
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp1z/constexpr-if34.C
@@ -0,0 +1,15 @@
+// PR c++/94937 - ICE with -Wall and constexpr if.
+// { dg-do compile { target c++17 } }
+// { dg-options "-Wall" }
+
+struct B {
+  static constexpr bool foo() { return false; }
+};
+
+template
+struct C {
+  static void bar ()
+  {
+if constexpr (B::foo()) ;
+  }
+};
diff --git a/gcc/testsuite/g++.dg/cpp2a/is-constant-evaluated10.C 
b/gcc/testsuite/g++.dg/cpp2a/is-constant-evaluated10.C
new file mode 100644
index 000..7b2e345f448
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/is-constant-evaluated10.C
@@ -0,0 +1,30 @@
+// { dg-do compile { target c++2a } }
+// { dg-options "-Wtautological-compare" }
+
+namespace std {
+  constexpr inline bool
+  is_constant_evaluated () noexcept
+  {
+return __builtin_is_constant_evaluated ();
+  }
+}
+
+template
+constexpr int
+foo(int i)
+{
+  if constexpr (std::is_constant_evaluated ()) // { dg-warning 
".std::i

Re: [PATCH] match.pd: Canonicalize (X + (X >> (prec - 1))) ^ (X >> (prec - 1)) to abs (X) [PR94783]

2020-05-07 Thread Jeff Law via Gcc-patches
On Thu, 2020-05-07 at 10:12 +0200, Richard Biener wrote:
> On Thu, 7 May 2020, Jakub Jelinek wrote:
> 
> > Hi!
> > 
> > The following patch canonicalizes M = X >> (prec - 1); (X + M) ^ M
> > for signed integral types into ABS_EXPR (X).  For X == min it is already
> > UB because M is -1 and min + -1 is UB, so we can use ABS_EXPR rather than
> > say ABSU_EXPR + cast.
> > 
> > The backend might then emit the abs code back using the shift and addition
> > and xor if it is the best sequence for the target, but could do something
> > different that is better.
> > 
> > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> > 
> > 2020-05-06  Jakub Jelinek  
> > 
> > PR tree-optimization/94783
> > * match.pd ((X + (X >> (prec - 1))) ^ (X >> (prec - 1)) to abs (X)):
> > New simplification.
> > 
> > * gcc.dg/tree-ssa/pr94783.c: New test.
> > 
> > --- gcc/match.pd.jj 2020-05-06 15:48:23.658858289 +0200
> > +++ gcc/match.pd2020-05-06 17:47:02.035347946 +0200
> > @@ -120,6 +120,18 @@ (define_operator_list COND_TERNARY
> >(with { tree utype = unsigned_type_for (TREE_TYPE (@0)); }
> > (convert (absu:utype @0)
> >  
> > +#if GIMPLE
> > +/* Optimize (X + (X >> (prec - 1))) ^ (X >> (prec - 1)) into abs (X).  */
> > +(simplify
> > + (bit_xor:c (plus:cs @0 (rshift@2 @0 INTEGER_CST@1)) @2)
> > + (if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
> > +  && !TYPE_UNSIGNED (TREE_TYPE (@0))
> > +  && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0))
> > +  && wi::to_widest (@1) == element_precision (TREE_TYPE (@0)) - 1
> > +  && TREE_CODE (@2) == SSA_NAME
> > +  && num_imm_uses (@2) == 2)
> 
> I fear this num_imm_uses test is quite fragile since match.pd patterns
> are invoked on transient sequences (w/o SSA operands) as well
> (that of course holds for all single_use () tests as well but that
> at least allows zero uses for this very reason - still fragile since
> the single use might be _in_ the IL and thus a second).
> 
> I think unconditionally using (abs @0) is simplifying things enough
> (getting rid of one xor and one plus) to not worry about keeping
> the (x >> (prec - 1))?
I'd tend to agree.   abs form is optimized better all the way to code 
generation,
so there's probably good secondary benefits as well.

jeff
> 



Re: [PATCH v2] match.pd: Optimize ffs of known non-zero arg into ctz + 1 [PR94956]

2020-05-07 Thread Richard Biener
On May 7, 2020 4:25:45 PM GMT+02:00, Jakub Jelinek  wrote:
>On Thu, May 07, 2020 at 10:04:35AM +0200, Richard Biener wrote:
>> On Thu, 7 May 2020, Jakub Jelinek wrote:
>> > The ffs expanders on several targets (x86, ia64, aarch64 at least)
>> > emit a conditional move or similar code to handle the case when the
>> > argument is 0, which makes the code longer.
>> > If we know from VRP that the argument will not be zero, we can (if
>the
>> > target has also an ctz expander) just use ctz which is undefined at
>zero
>> > and thus the expander doesn't need to deal with that.
>> > 
>> > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for
>trunk?
>> 
>> can you use direct_internal_fn_supported_p (IFN_CTZ, type, 
>> OPTIMIZE_FOR_SPEED)?
>
>Only if it is guarded with #if GIMPLE (because otherwise the fn
>isn't declared).
>Though, restricting this to GIMPLE seems like a good idea anyway to me.
>
>Ok for trunk if it passes bootstrap/regtest?

OK. 

Richard. 

>2020-05-07  Jakub Jelinek  
>
>   PR tree-optimization/94956
>   * match.pd (FFS): Optimize __builtin_ffs* of non-zero argument into
>   __builtin_ctz* + 1 if direct IFN_CTZ is supported.
>
>   * gcc.target/i386/pr94956.c: New test.
>
>--- gcc/match.pd.jj2020-05-06 15:03:51.618058839 +0200
>+++ gcc/match.pd   2020-05-07 16:16:48.466970168 +0200
>@@ -5986,6 +5986,16 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>   && direct_internal_fn_supported_p (IFN_POPCOUNT, type,
>  OPTIMIZE_FOR_BOTH))
> (convert (IFN_POPCOUNT:type @0)
>+
>+/* __builtin_ffs needs to deal on many targets with the possible zero
>+   argument.  If we know the argument is always non-zero,
>__builtin_ctz + 1
>+   should lead to better code.  */
>+(simplify
>+ (FFS tree_expr_nonzero_p@0)
>+ (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
>+  && direct_internal_fn_supported_p (IFN_CTZ, TREE_TYPE (@0),
>+   OPTIMIZE_FOR_SPEED))
>+  (plus (CTZ:type @0) { build_one_cst (type); })))
> #endif
> 
> /* Simplify:
>--- gcc/testsuite/gcc.target/i386/pr94956.c.jj 2020-05-06
>16:35:47.085876237 +0200
>+++ gcc/testsuite/gcc.target/i386/pr94956.c2020-05-06
>16:39:52.927140038 +0200
>@@ -0,0 +1,28 @@
>+/* PR tree-optimization/94956 */
>+/* { dg-do compile } */
>+/* { dg-options "-O2" } */
>+/* { dg-final { scan-assembler-not "\tcmovne\t" } } */
>+/* { dg-final { scan-assembler-not "\tsete\t" } } */
>+
>+int
>+foo (unsigned x)
>+{
>+  if (x == 0) __builtin_unreachable ();
>+  return __builtin_ffs (x) - 1;
>+}
>+
>+int
>+bar (unsigned long x)
>+{
>+  if (x == 0) __builtin_unreachable ();
>+  return __builtin_ffsl (x) - 1;
>+}
>+
>+#ifdef __x86_64__
>+int
>+baz (unsigned long long x)
>+{
>+  if (x == 0) __builtin_unreachable ();
>+  return __builtin_ffsll (x) - 1;
>+}
>+#endif
>
>
>   Jakub



Re: [PATCH v2] match.pd: Simplify unsigned A - B - 1 >= A to B >= A [PR94913]

2020-05-07 Thread Richard Biener
On May 7, 2020 4:27:26 PM GMT+02:00, Jakub Jelinek  wrote:
>On Thu, May 07, 2020 at 09:59:57AM +0200, Richard Biener wrote:
>> Maybe write A - B + -1 >= A to actually match what you match below
>...
>> on the plus :c is not needed, canonicalization will put the constant
>> literal second
>> 
>> The previous pattern has a single_use check on the minus, since
>> the result is always "simple" (a single stmt) the :s have no
>> effect (hmm, I guess a genmatch warning for this case might be nice).
>> 
>> And yes, if the TYPE_OVERFLOW_WRAPS checks are unnecessary remove
>> them please, we'll hopefully resist all attempts to ubsan
>> unsigned overflow ...
>
>So like this if it passes bootstrap/regtest?

Yes. Mit sure what Jeff is suggesting. 

Thanks, 
Richard. 

>2020-05-07  Jakub Jelinek  
>
>   PR tree-optimization/94913
>   * match.pd (A - B + -1 >= A to B >= A): New simplification.
>   (A - B > A to A < B): Don't test TYPE_OVERFLOW_WRAPS which is always
>   true for TYPE_UNSIGNED integral types.
>
>   * gcc.dg/tree-ssa/pr94913.c: New test.
>
>--- gcc/match.pd.jj2020-05-06 11:18:35.0 +0200
>+++ gcc/match.pd   2020-05-07 15:52:41.450787729 +0200
>@@ -4787,10 +4787,17 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>   (cmp:c (minus@2 @0 @1) @0)
>   (if (single_use (@2)
>&& ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
>-   && TYPE_UNSIGNED (TREE_TYPE (@0))
>-   && TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)))
>+   && TYPE_UNSIGNED (TREE_TYPE (@0)))
>(cmp @1 @0
> 
>+/* Optimize A - B + -1 >= A into B >= A for unsigned comparisons.  */
>+(for cmp (ge lt)
>+ (simplify
>+  (cmp:c (plus (minus @0 @1) integer_minus_onep) @0)
>+   (if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
>+  && TYPE_UNSIGNED (TREE_TYPE (@0)))
>+(cmp @1 @0
>+
>/* Testing for overflow is unnecessary if we already know the result. 
>*/
> /* A - B > A  */
> (for cmp (gt le)
>--- gcc/testsuite/gcc.dg/tree-ssa/pr94913.c.jj 2020-05-06
>15:20:08.306376994 +0200
>+++ gcc/testsuite/gcc.dg/tree-ssa/pr94913.c2020-05-06
>15:19:45.120725533 +0200
>@@ -0,0 +1,33 @@
>+/* PR tree-optimization/94913 */
>+/* { dg-do compile } */
>+/* { dg-options "-O2 -fdump-tree-optimized" } */
>+/* { dg-final { scan-tree-dump " (?:b_\[0-9]+\\\(D\\\) >=
>a|a_\[0-9]+\\\(D\\\) <= b)_\[0-9]+\\\(D\\\);" "optimized" } } */
>+/* { dg-final { scan-tree-dump " (?:c_\[0-9]+\\\(D\\\) >
>d|d_\[0-9]+\\\(D\\\) < c)_\[0-9]+\\\(D\\\);" "optimized" } } */
>+/* { dg-final { scan-tree-dump " (?:f_\[0-9]+\\\(D\\\) >=
>e|e_\[0-9]+\\\(D\\\) <= f)_\[0-9]+\\\(D\\\);" "optimized" } } */
>+/* { dg-final { scan-tree-dump " (?:g_\[0-9]+\\\(D\\\) >
>h|h_\[0-9]+\\\(D\\\) < g)_\[0-9]+\\\(D\\\);" "optimized" } } */
>+
>+int
>+foo (unsigned a, unsigned b)
>+{
>+  return (a - b - 1) >= a;
>+}
>+
>+int
>+bar (unsigned c, unsigned d)
>+{
>+  return (c - d - 1) < c;
>+}
>+
>+int
>+baz (unsigned e, unsigned f)
>+{
>+  unsigned t = e - f;
>+  return (t - 1) >= e;
>+}
>+
>+int
>+qux (unsigned g, unsigned h)
>+{
>+  unsigned t = g - h;
>+  return (t - 1) < g;
>+}
>
>
>   Jakub



Re: [PATCH v2] match.pd: Canonicalize (X + (X >> (prec - 1))) ^ (X >> (prec - 1)) to abs (X) [PR94783]

2020-05-07 Thread Richard Biener
On May 7, 2020 4:34:59 PM GMT+02:00, Jakub Jelinek  wrote:
>On Thu, May 07, 2020 at 10:12:12AM +0200, Richard Biener wrote:
>> I think unconditionally using (abs @0) is simplifying things enough
>> (getting rid of one xor and one plus) to not worry about keeping
>> the (x >> (prec - 1))?
>
>Ok.
>
>> Do you really need the TYPE_OVERFLOW_UNDEFINED check?
>
>Probably not, if overflow isn't undefined, then ABS_EXPR will not be
>undefined on the type minimum either.
>
>So like this (including dropping :s from plus), or should there be
>single_use for it?  You've said that if the replacement is a simple
>stmt
>:s is ignored...
>
>2020-05-07  Jakub Jelinek  
>
>   PR tree-optimization/94783
>   * match.pd ((X + (X >> (prec - 1))) ^ (X >> (prec - 1)) to abs (X)):
>   New simplification.
>
>   * gcc.dg/tree-ssa/pr94783.c: New test.
>
>--- gcc/match.pd.jj2020-05-06 15:48:23.658858289 +0200
>+++ gcc/match.pd   2020-05-07 16:30:15.666817448 +0200
>@@ -120,6 +120,16 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>   (with { tree utype = unsigned_type_for (TREE_TYPE (@0)); }
>(convert (absu:utype @0)
> 
>+#if GIMPLE
>+/* Optimize (X + (X >> (prec - 1))) ^ (X >> (prec - 1)) into abs (X). 
>*/
>+(simplify
>+ (bit_xor:c (plus:c @0 (rshift@2 @0 INTEGER_CST@1)) @2)
>+ (if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
>+  && !TYPE_UNSIGNED (TREE_TYPE (@0))
>+  && wi::to_widest (@1) == element_precision (TREE_TYPE (@0)) - 1
>+  && TREE_CODE (@2) == SSA_NAME)

The SSA_NAME check is superfluous now. 

Otherwise OK. 

Thanks, 
Richard. 

>+  (abs @0)))
>+#endif
> 
> /* Simplifications of operations with one constant operand and
>simplifications to constants or single values.  */
>--- gcc/testsuite/gcc.dg/tree-ssa/pr94783.c.jj 2020-05-06
>17:52:35.515323297 +0200
>+++ gcc/testsuite/gcc.dg/tree-ssa/pr94783.c2020-05-06
>17:52:10.915693948 +0200
>@@ -0,0 +1,12 @@
>+/* PR tree-optimization/94783 */
>+/* { dg-do compile } */
>+/* { dg-options "-O2 -fdump-tree-optimized" } */
>+/* { dg-final { scan-tree-dump "ABS_EXPR" "optimized" } } */
>+/* { dg-final { scan-tree-dump-not " >> 31" "optimized" } } */
>+
>+int
>+foo (int v)
>+{
>+  int mask = v >> (__SIZEOF_INT__ * __CHAR_BIT__ - 1);
>+  return (v + mask) ^ mask;
>+}
>
>
>   Jakub



RE: [PATCH] aarch64: prefer using csinv, csneg in zero extend contexts

2020-05-07 Thread Alex Coplan
> -Original Message-
> From: Richard Sandiford 
> Sent: 06 May 2020 11:28
> To: Alex Coplan 
> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw ;
> Marcus Shawcroft ; Kyrylo Tkachov
> ; nd 
> Subject: Re: [PATCH] aarch64: prefer using csinv, csneg in zero extend
> contexts
>
> Alex Coplan  writes:
> >> -Original Message-
> >> From: Richard Sandiford 
> >> Sent: 30 April 2020 15:13
> >> To: Alex Coplan 
> >> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
> ;
> >> Marcus Shawcroft ; Kyrylo Tkachov
> >> ; nd 
> >> Subject: Re: [PATCH] aarch64: prefer using csinv, csneg in zero extend
> contexts
> >>
> >> Yeah, I was hoping for something like...
> >>
> >> > Indeed, clang generates a MVN + CSEL sequence where the CSEL
> operates on the
> >> > 64-bit registers:
> >> >
> >> > f:
> >> > mvn w8, w2
> >> > cmp w0, #0
> >> > cselx0, x8, x1, eq
> >> > ret
> >>
> >> ...this rather than the 4-insn (+ret) sequence that we currently
> >> generate.  So it would have been a define_insn_and_split that handles
> >> the zero case directly but splits into the "optimal" two-instruction
> >> sequence for registers.
> >>
> >> But I guess the underlying problem is instead that we don't have
> >> a pattern for (zero_extend:DI (not:SI ...)).  Adding that would
> >> definitely be a better fix.
> >
> > Yes. I sent a patch for this very fix which Kyrill is going to commit
> once stage
> > 1 opens: https://gcc.gnu.org/pipermail/gcc-patches/2020-
> April/544365.html
>
> Sorry, missed that.
>
> It looks like that patch hinders this one though.  Trying it with
> current master (where that patch is applied), I get:
>
> FAIL: gcc.target/aarch64/csinv-neg.c check-function-bodies inv_zero1
> FAIL: gcc.target/aarch64/csinv-neg.c check-function-bodies inv_zero2
>
> It looks like a costs issue:
>
> Trying 27 -> 18:
>27: r99:DI=zero_extend(~r101:SI)
>   REG_DEAD r101:SI
>18: x0:DI={(cc:CC==0)?r99:DI:0}
>   REG_DEAD cc:CC
>   REG_DEAD r99:DI
> Successfully matched this instruction:
> (set (reg/i:DI 0 x0)
> (if_then_else:DI (eq (reg:CC 66 cc)
> (const_int 0 [0]))
> (zero_extend:DI (not:SI (reg:SI 101)))
> (const_int 0 [0])))
> rejecting combination of insns 27 and 18
> original costs 4 + 4 = 8
> replacement cost 12
>
> I guess we'll need to teach aarch64_if_then_else_costs about the costs
> of the new insns.

Ah, thanks for catching this. I've attached an updated patch which fixes the
costs issue here. With the new patch, all the test cases in csinv-neg.c now
pass. In addition, I've done a bootstrap and regtest on aarch64-linux with no
new failures.

Do you think we need to add cases to aarch64_if_then_else_costs for the other
new insns, or is the attached patch OK for master?

Thanks,
Alex

---

gcc/ChangeLog:

2020-05-07  Alex Coplan  

* config/aarch64/aarch64.c (aarch64_if_then_else_costs): Add case to 
correctly
calculate cost for new pattern (*csinv3_uxtw_insn3).
* config/aarch64/aarch64.md (*csinv3_utxw_insn1): New.
(*csinv3_uxtw_insn2): New.
(*csinv3_uxtw_insn3): New.
* config/aarch64/iterators.md (neg_not_cs): New.

gcc/testsuite/ChangeLog:

2020-05-07  Alex Coplan  

* gcc.target/aarch64/csinv-neg.c: New test.
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index e92c7e69fcb..efb3da7a7fc 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -11695,6 +11695,15 @@ aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, 
int *cost, bool speed)
  op1 = XEXP (op1, 0);
  op2 = XEXP (op2, 0);
}
+  else if (GET_CODE (op1) == ZERO_EXTEND && op2 == const0_rtx)
+   {
+ inner = XEXP (op1, 0);
+ if (GET_CODE (inner) == NEG || GET_CODE (inner) == NOT)
+ {
+   /* CSINV/NEG with zero extend + const 0 (*csinv3_uxtw_insn3).  */
+   op1 = XEXP (inner, 0);
+ }
+   }
 
   *cost += rtx_cost (op1, VOIDmode, IF_THEN_ELSE, 1, speed);
   *cost += rtx_cost (op2, VOIDmode, IF_THEN_ELSE, 2, speed);
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index ff15505d455..b2cfd015530 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -4391,6 +4391,44 @@
   [(set_attr "type" "csel")]
 )
 
+(define_insn "*csinv3_uxtw_insn1"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+   (if_then_else:DI
+ (match_operand 1 "aarch64_comparison_operation" "")
+ (zero_extend:DI
+   (match_operand:SI 2 "register_operand" "r"))
+ (zero_extend:DI
+   (NEG_NOT:SI (match_operand:SI 3 "register_operand" "r")]
+  ""
+  "cs\\t%w0, %w2, %w3, %m1"
+  [(set_attr "type" "csel")]
+)
+
+(define_insn "*csinv3_uxtw_insn2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+   (if_then_else:DI
+ (match_operand 1 "aarch64_comparison_operation" "")
+ (zero_extend:DI
+   (N

Re: [PATCH] implement pre-c++20 contracts

2020-05-07 Thread Jeff Chapman via Gcc-patches
Hello,

On 12/10/19, Jason Merrill wrote:
> On 11/13/19, Jeff Chapman wrote:
>> Attached is a patch that implements pre-c++20 contracts. This comes
>> from a long running development branch which included ChangeLog entries
>> as we went, which are included in the patch itself. The repo and
>> initial wiki are located here:
>> https://gitlab.com/lock3/gcc-new/wikis/GCC-with-Contracts
>
> Thanks.  I've mostly been referring to the repo rather than the attached
> patch.  Below are a bunch of comments about the implementation, in no
> particular order.
>

I've attached a new squashed revision of the patch, and you can see the
changes I've made from your input on the contracts-jac-prep branch:
https://gitlab.com/lock3/gcc-new/-/tree/contracts-jac-prep . If there's
an easier format for you to review that I can produce please let me
know.

I'll address a few things inline below. Everything else should either be
handled or explained by Andrew's last email. If anything needs further
addressing or something hasn't been brought up yet please let me know :)


>> +handle_OPT_fcontract_build_level_ (const char *arg)
>> +{
>> +  if (contracts_p1332_default || contracts_p1332_review ||
>> contracts_p1429)
>> +{
>> +  error ("-fcontract-build-level= cannot be mixed with p1332/p1429");
>
> Hmm, P1429 includes the notion of build level, it's just checked after
> explicit semantics.  In general, P1429 seems like a compatible extension
> of the semantics previously in the working paper.
>
> P1332 could also be treated as compatible if we consider the P0542 build
> level to affect the default role as specified in P1429.  P1680 seems to
> suggest that this is what you had in mind.
>

These could possibly be made compatible, but in some cases the flags are
changing the same entries in the table. That would require deciding
whether flag ordering matters or whether a certain flags can't change
values set by other flags.

I'm not sure it's a worthwhile change. While it increases the valid
space of command line invocations, it doesn't actually increase the the
result space. I'd prefer an eventual solution that removed flags
entirely instead.


>> +  /* Check that assertions are null statements.  */
>> +  if (attribute_contract_assert_p (contract_attrs))
>> +if (token->type != CPP_SEMICOLON)
>> +  error_at (token->location, "assertions must be followed by
>> %<;%>");
>
> Better I think to handle this further down where [[fallthrough]] has the
> same requirement.
>

I'm wondering if it would be better to move [[fallthrough]] up, since
the later check is not always executed and in the case of [[assert]] we
actually need to error.

  [[fallthrough]] int x;

for instance just gets a generic 'attribute ignored' warning. I'm not
entirely happy with how we prevent assert/pre/post from appearing in
invalid locations in general which I'll try to improve. If you have
concrete suggestions please let me know.


> Why not leave the function the user declared as the unchecked function
> (just changing some linkage properties) and create a new function for
> the checked wrapper?
>

This revision of the patch does not include changes to the
checked/unchecked function split. We're exploring an alternative rewrite
that leaves the original function declaration alone and should address
or sidestep a number of these comments.

Specifically, we're exploring generating pre and post functions with
calls to them in the correct places (upon entering a guarded function,
wrapping the return value):

  int f(int n) [[ pre: n > 0 ]] [[ post r: r < 0 ]] { return -n; }

turns into

  void __pre_f(int n) { [[ assert: n > 0 ]]; }
  int __post_f(int r) { [[ assert: r < 0 ]]; return r; }
  int f(int n) {
__pre_f(n);
return __post_f(-n);
  }

with whatever hints we can give to optimize this as much as possible.


>> +/* Return the source text between two locations.  */
>> +
>> +static char *
>> +get_source (location_t start, location_t end)
>
> This seems like it belongs in libcpp.  It also needs to
>

This has been moved to input since it uses input functions, but needs
more work. Was there another comment you had that cutoff?


>> +  tree level_str = build_string_literal (strlen (level) + 1, level);
>> +  tree role_str = build_string_literal (strlen (role) + 1, role);
>
> Maybe combine these into a single string argument?
>

These are used separately in std::contract_violation and to my
understanding building them separately will collapse duplicate levels
and roles instead of duplicating them unless they both match -- is that
correct?


>> +  /* We never want to accidentally instantiate templates.  */
>> +  if (code == TEMPLATE_DECL)
>> +return *tp; /* FIXME? */
>
> This seems unlikely to have the desired effect; we should see template
> instantiations as FUNCTION_DECL or VAR_DECL.  I'm also not sure what the
> cp_tree_defined_p check is trying to do; surely using defined functions
> and variables can also lead to runtime code?
>

This is an

avoid infinite loops in rpo fre

2020-05-07 Thread Alexandre Oliva


gnat.dg/opt83.adb compiled with -O2+ would enter an infinite loop with
memory allocation within fre.  I don't think there is anything
Ada-specific in the bug, but the exact inlining and loop unrolling
circumstances needed to trigger the bug are quite fragile, so I didn't
try very hard to translate it to C.

The problem comes about while attempting to eliminate the last of the
following stmts, generated for 'R (0) := F;':

  A78b_144 = MEM  [(struct opt83__e &)_41][0]{lb: _46 
sz: 16}._tag;
  MEM  [(struct opt83__e &)_41][0]{lb: _46 sz: 16} = 
f;
  MEM  [(struct opt83__e &)_41][0]{lb: _46 sz: 
16}._tag = A78b_144;

valueize_refs_1 takes a sequence of vn_reference_op_s with _41 in it, and
when it gets to that op, vn_valueize = rpo_vn_valueize replaces _41 with
_47, defined in the previous block as:

  _47 = &(*_41)[0]{lb: _46 sz: 16};

_47 is the first argument passed to the function synthesized to copy F
to the first element of array R, after checking that their addresses
do not compare equal.

There is another earlier def in the Value Numbering set associated with
_41, namely:

  _164 = &MEM[(struct ALLOC *)_163].ARRAY;

_163 is the newly-allocated storage for the 0..4 array.  Unfortunately
the logic in rpo_vn_valueize selects the former, and then we add the
_47 definition in _41's place in the op sequence.  Problem is _41 is
part of the expression, and thus of the expansion, so eventually we
reach it and replace it again, and again, and at every cycle we add
more ops than we remove, so the sequence grows unbounded.


Limiting the selection of alternate defs for the value to those that
dominate the def we're replacing should be enough to avoid the
problem, since we'd only perform replacements "up" the CFG.  Changing
the BB context for the selection of the value equivalence to that of
the name we're replacing, rather than that of the expression in which
we're replacing it, seems to be close enough.  It does solve the
problem without any codegen changes in a GCC bootstrap, despite a few
differences in eliminate_avail.

Regstrapped on x86_64-linux-gnu.  Ok to install?

As I prepare to post this, it occurs to me that maybe, instead of using
vn_context_bb for a default NAME like before, we should abandon the
attempt to substitute it, lest we might run into the same kind of
infinite loop in for e.g. _41(D).  WDYT?


for  gcc/ChangeLog

* tree-ssa-sccvn.c (rpo_vn_valueize): Take the BB context from
NAME.

for  gcc/testsuite/ChangeLog

* gnat.dg/opt83.adb: New.
---
 gcc/testsuite/gnat.dg/opt83.adb |   33 +
 gcc/tree-ssa-sccvn.c|7 ++-
 2 files changed, 39 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gnat.dg/opt83.adb

diff --git a/gcc/testsuite/gnat.dg/opt83.adb b/gcc/testsuite/gnat.dg/opt83.adb
new file mode 100644
index ..7418520
--- /dev/null
+++ b/gcc/testsuite/gnat.dg/opt83.adb
@@ -0,0 +1,33 @@
+--  { dg-do compile }
+--  { dg-options "-O2" }
+
+--  rpo fre3 used to loop indefinitely replacing _2 with _8 and back,
+--  given MEM[(struct test__e &)_2][0]{lb: _7 sz: 16}._tag = A23s_29;
+--  and an earlier _8 = &*_2[0]{lb: _7 sz: 16}.
+
+procedure Opt83 is
+
+   type E is tagged record
+  I : Natural := 0;
+   end record;
+
+   type A is array (Natural range <>) of aliased E;
+
+   F : E;
+
+   R : access A;
+
+   procedure N is 
+   begin
+  if R = null then
+R := new A (0 .. 4);
+  end if;
+   end N;
+
+begin
+
+   N;
+
+   R (0) := F;
+
+end Opt83;
diff --git a/gcc/tree-ssa-sccvn.c b/gcc/tree-ssa-sccvn.c
index 8a4af91..9008724 100644
--- a/gcc/tree-ssa-sccvn.c
+++ b/gcc/tree-ssa-sccvn.c
@@ -6790,9 +6790,14 @@ rpo_vn_valueize (tree name)
{
  if (TREE_CODE (tem) != SSA_NAME)
return tem;
+ basic_block bb = vn_context_bb;
+ /* Avoid replacing name with anything whose definition
+could refer back to name.  */
+ if (! SSA_NAME_IS_DEFAULT_DEF (name))
+   bb = gimple_bb (SSA_NAME_DEF_STMT (name));
  /* For all values we only valueize to an available leader
 which means we can use SSA name info without restriction.  */
- tem = rpo_avail->eliminate_avail (vn_context_bb, tem);
+ tem = rpo_avail->eliminate_avail (bb, tem);
  if (tem)
return tem;
}

-- 
Alexandre Oliva, freedom fighterhe/himhttps://FSFLA.org/blogs/lxo/
Free Software Evangelist  Stallman was right, but he's left :(
GNU Toolchain Engineer   Live long and free, and prosper ethically


Re: [PATCH] OpenACC reference count consistency checking

2020-05-07 Thread Julian Brown
Sorry about the delay replying to this email!

On Thu, 30 Jan 2020 16:21:20 +0100
Thomas Schwinge  wrote:

> Hi Julian!
> 
> Notwithstanding the open question about how to implement this
> checking in libgomp in a non-intrusive (performance-wise) yet
> maintainable (avoid '#if 0') way, I have two more questions.
> 
> 
> Is there a specific reason why this checking isn't also enabled for
> libgomp OpenMP 'target' entry points?

Just that it was developed in the context of adding manual deep-copy
support to OpenACC -- OpenMP wasn't my focus at that point. So, I
didn't try adding checking for OpenMP also. It might be interesting to
see how that goes though, particularly with regards to dynamic data
lifetimes in OpenMP.

> Can you please explain (textually?) how this checking (design per your
> textual description below) is working in context of mixed OpenACC
> structured ("S") and dynamic ("D") reference counts?  For example:
> 
> // S: 0, D: 0
> 
> #pragma acc enter data copyin ([data]) // copyin; S: 0, D: 1
> 
> acc_copyin ([data]) // no-op; S: 0, D: 2

Unfortunately it's not quite that simple. The "refcount" fields (in
either splay tree keys or target_mem_descs) do not really represent
program-level reference counts, but rather references in the linked
splay tree structure within libgomp. That's correct: the refcounts are
used so as to know when data is still live, and when it can be freed.

Structured data mapping operations ("acc data", "acc parallel", etc.)
always create a target_mem_desc, with a list of target_var_descs that
describe data mapped in that structured block. That target_mem_desc
either "owns" a block of target memory corresponding to the structured
data block, or it doesn't.

We might have something like this (excuse ASCII art!):

   +===++=+
   | TARGET_MEM_DESC 1 |   ,--> | TARGET_VAR_DESC |
   +---+   |+-+
   | tgt_start...  |   || splay_tree_key  | --> ... 
   +---+   |+=+
   | target_var_desc 0 | --' 
   | target_var_desc 1 | ---.   +=+
   | target_var_desc 2 | -. `-> | TARGET_VAR_DESC |
   +===+  | +-+
  | | splay_tree_key  | --> ...  
  | +=+  
  |  
  | +=+  
  `---> | TARGET_VAR_DESC |  
+-+  
   +=+   .- | splay_tree_key  |
   | SPLAY_TREE_KEY  | <-'  +=+
   +-+   
   | target_mem_desc | -.   +===+
   +=+  '-> | TARGET_MEM_DESC 2 |
+---+
| tgt_start...  |
+---+
| target_var_desc   |
+===+

(Non-virtual/non-dynamic) reference counts correspond to the arrows
between blocks in the diagram (for the pointed-to block --
target_mem_desc or splay tree key).

For a structured data mapping, say "TARGET_MEM_DESC 1" is the descriptor
returned from gomp_map_vars.

Now, "TARGET_MEM_DESC 1" and "TARGET_MEM_DESC 2" can be the same block,
or different blocks. (Each of the TARGET_MEM_DESCs linked from splay
tree keys, linked from TARGET_VAR_DESCs, can be a mix of such
identical or different blocks for each of the splay tree keys linked
from TARGET_VAR_DESCs.) In the case where they're different blocks, and
TARGET_MEM_DESC 2 (etc.) owns its own mapped memory, TARGET_MEM_DESC 1
may have a NULL tgt_start -- thus, not own a target data block itself.

In the case of a dynamic mapping, this subtlety is especially
important. A target_mem_desc being returned from
gomp_map_vars{_internal} with a refcount of zero -- one which no splay
tree keys link back to, because it does not own its own block of target
memory -- is discarded before the function returns.

So, the first time a dynamic data mapping takes place for DATA, we have:

> // S: 0, D: 0
> 
> #pragma acc enter data copyin ([data]) // copyin; S: 1, D: 0

This is because the target_mem_desc created to describe on-target
memory for DATA will "own" that data: nothing has referred to it
beforehand. So there's a "real" link from the splay tree key for DATA's
host region to the target_mem_desc we just created. (Yes, the
splay tree key's reference counts look just like a structured data
mapping. That was a subject for another patch.)

> acc_copyin ([data]) // no-op; S: 2, D: 1

So now we have another dynamic mapping. This time, we already have a
target_mem_desc describing DATA on the target. The
gomp_map_vars_internal function will return NULL -- but before it does
that, it realises that it will "lose" references in doi

Re: [PATCH] libstdc++: Implement integer-class types as defined in [iterator.concept.winc]

2020-05-07 Thread Patrick Palka via Gcc-patches
On Mon, 2 Mar 2020, Patrick Palka wrote:

> On Mon, 24 Feb 2020, Patrick Palka wrote:
> 
> > On Mon, 24 Feb 2020, Patrick Palka wrote:
> > 
> > > This implements signed and unsigned integer-class types, whose width is 
> > > one bit
> > > larger than the widest native signed and unsigned integral type 
> > > respectively.
> > > In our case this is either __int128 and unsigned __int128, or long long 
> > > and
> > > unsigned long long.
> > > 
> > > Internally, the two integer-class types are represented as a largest 
> > > native
> > > unsigned integral type plus one extra bit.  The signed integer-class type 
> > > is
> > > represented in two's complement form with the extra bit acting as the 
> > > sign bit.
> > > 
> > > libstdc++-v3/ChangeLog:
> > > 
> > >   * include/bits/iterator_concepts.h (ranges::__detail::__max_diff_type):
> > >   Remove definition, replace with forward declaration of class
> > >   __max_diff_type.
> > >   (ranges::__detail::__max_size_type): Remove definition, replace with
> > >   forward declaration of class __max_size_type.
> > >   (__detail::__is_integer_like): Accept __int128 and unsigned __int128.
> > >   (__detail::__is_signed_integer_like): Accept __int128.
> > >   * include/bits/range_access.h (__detail::__max_size_type): New class.
> > >   (__detail::__max_diff_type): New class.
> > >   (__detail::__max_size_type::__max_size_type): Define this constructor
> > >   out-of-line to break the cycle.
> > >   (__detail::__to_unsigned_like): New function.
> > >   (numeric_limits<__detail::__max_size_type>): New explicit 
> > > specialization.
> > >   (numeric_limits<__detail::__max_diff_type>): New explicit 
> > > specialization.
> > >   * testsuite/std/ranges/iota/differenc_type.cc: New test.
> > 
> > Here's v2 of the patch that splits out __max_size_type and
> > __max_diff_type into a dedicated header, along with other misc
> > improvements and fixes.
> > 
> > -- >8 --
> 
> Here's v3 of the patch.  Changes from v2:
> 
> * The arithmetic tests in difference_type.cc have been split out to a
> separate file.
> 
> * The arithmetic tests now run successfully in strict ANSI mode.  The
> issue was that __int128 does not model the integral concept in strict
> ANSI mode, which we use to make operations on this type behave as
> integer operations do.  But for that we need to always treat __int128 as
> an integer type in this API.  So a new concept __integralish which is
> always modelled by __int128 is introduced and used in the API instead.
> 
> * Comments have been added explaining why __int128 is always used as the
> underlying type even when the widest integer type in strict ANSI mode is
> long long.
> 
> * New tests, some minor code clean-ups, and added comments to the
> unsigned division and multiplication routines.
> 
> Tested on x86_64-pc-linux-gnu in both strict and GNU compilation modes,
> with and without -U__SIZEOF_INT128__.

Ping (now that stage 1 is open).  Here's the latest rebased of version
of the patch:

-- >8 --

Subject: [PATCH] libstdc++: integer-class types as per [iterator.concept.winc]

This implements signed and unsigned integer-class types, whose width is
one bit larger than the widest supported signed and unsigned integral
type respectively.  In our case this is either __int128 and unsigned
__int128, or long long and unsigned long long.

Internally, the two integer-class types are represented as a largest
supported unsigned integral type plus one extra bit.  The signed
integer-class type is represented in two's complement form with the
extra bit acting as the sign bit.

libstdc++-v3/ChangeLog:

* include/Makefile.am (bits_headers): Add new header
.
* include/Makefile.in: Regenerate.
* include/bits/iterator_concepts.h
(ranges::__detail::__max_diff_type): Remove definition, replace
with forward declaration of class __max_diff_type.
(__detail::__max_size_type): Remove definition, replace with
forward declaration of class __max_size_type.
(__detail::__is_unsigned_int128, __is_signed_int128,
__is_int128): New concepts.
(__detail::__is_integer_like): Accept __int128 and unsigned
__int128.
(__detail::__is_signed_integer_like): Accept __int128.
* include/bits/max_size_type.h: New header.
* include/bits/range_access.h: Include .
(__detail::__to_unsigned_like): Two new overloads.
* testsuite/std/ranges/iota/difference_type.cc: New test.
* testsuite/std/ranges/iota/max_size_type.cc: New test.
---
 libstdc++-v3/include/Makefile.am  |   1 +
 libstdc++-v3/include/Makefile.in  |   1 +
 libstdc++-v3/include/bits/iterator_concepts.h |  24 +-
 libstdc++-v3/include/bits/max_size_type.h | 753 ++
 libstdc++-v3/include/bits/range_access.h  |  11 +
 .../std/ranges/iota/difference_type.cc|  57 ++
 .../std/ranges/iota/max_size_type.cc  | 376 +
 7 files changed, 1219 insertio

[PATCH] tree-pretty-print: Handle boolean types

2020-05-07 Thread Richard Sandiford
AVX512-style masks and SVE-style predicates can be difficult
to debug in gimple dumps, since the types are printed like this:

  vector(4)  foo;

Some important details are hidden by that ,
such as the number of bits in an element and whether the type
is signed or unsigned.

This patch uses an ad-hoc syntax for printing unnamed
boolean types.  Normal frontend ones should be handled
by the earlier TYPE_NAME code.

Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?

Richard


2020-05-07  Richard Sandiford  

gcc/
* tree-pretty-print.c (dump_generic_node): Handle BOOLEAN_TYPEs.
---
 gcc/tree-pretty-print.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/gcc/tree-pretty-print.c b/gcc/tree-pretty-print.c
index 885ca8cd329..f04fd65091a 100644
--- a/gcc/tree-pretty-print.c
+++ b/gcc/tree-pretty-print.c
@@ -1761,6 +1761,14 @@ dump_generic_node (pretty_printer *pp, tree node, int 
spc, dump_flags_t flags,
pp_decimal_int (pp, TYPE_PRECISION (node));
pp_greater (pp);
  }
+   else if (TREE_CODE (node) == BOOLEAN_TYPE)
+ {
+   pp_string (pp, (TYPE_UNSIGNED (node)
+   ? "

Re: [PATCH v2] match.pd: Simplify unsigned A - B - 1 >= A to B >= A [PR94913]

2020-05-07 Thread Jeff Law via Gcc-patches
On Thu, 2020-05-07 at 17:39 +0200, Richard Biener wrote:
> On May 7, 2020 4:27:26 PM GMT+02:00, Jakub Jelinek  wrote:
> > On Thu, May 07, 2020 at 09:59:57AM +0200, Richard Biener wrote:
> > > Maybe write A - B + -1 >= A to actually match what you match below
> > ...
> > > on the plus :c is not needed, canonicalization will put the constant
> > > literal second
> > > 
> > > The previous pattern has a single_use check on the minus, since
> > > the result is always "simple" (a single stmt) the :s have no
> > > effect (hmm, I guess a genmatch warning for this case might be nice).
> > > 
> > > And yes, if the TYPE_OVERFLOW_WRAPS checks are unnecessary remove
> > > them please, we'll hopefully resist all attempts to ubsan
> > > unsigned overflow ...
> > 
> > So like this if it passes bootstrap/regtest?
> 
> Yes. Mit sure what Jeff is suggesting. 
Sorry.  Just noting a potential problem area.  If it works, I've got no
objections.

jeff
> 



[PATCH] c++: explain fn template argument type/value mismatch failures

2020-05-07 Thread Patrick Palka via Gcc-patches
In fn_type_unifcation, we are passing NULL_TREE as the 'in_decl'
parameter to coerce_template_parms, and this is causing template
type/value mismatch error messages to get suppressed regardless of the
value of 'complain'.

This means that when substitution into a function template fails due to
a type/value mismatch between a template parameter and the provided
template argument, we just say "template argument deduction/substitution
failed:" without a followup explanation of the failure.

Fix this by passing 'fn' instead of NULL_TREE to coerce_template_parms.
Passes 'make check-c++', does this look OK to commit after a full
bootstrap and regtest?

gcc/cp/ChangeLog:

* pt.c (fn_type_unification): Pass 'fn' instead of NULL_TREE as
the 'in_decl' parameter to coerce_template_parms.

gcc/testsuite/ChangeLog:

* g++.dg/cpp2a/concepts-ts4.C: Expect a "type/value mismatch"
diagnostic.
* g++.dg/cpp2a/concepts-ts6.C: Likewise.
* g++.dg/template/error56.C: Likewise.
* g++.dg/template/error59.C: New test.

libstdc++-v3/ChangeLog:

* testsuite/20_util/pair/astuple/get_neg.cc: Prune "type/value
mismatch" messages.
* testsuite/20_util/tuple/element_access/get_neg.cc: Likewise.
---
 gcc/cp/pt.c   |  2 +-
 gcc/testsuite/g++.dg/cpp2a/concepts-ts4.C |  2 ++
 gcc/testsuite/g++.dg/cpp2a/concepts-ts6.C |  4 +++-
 gcc/testsuite/g++.dg/template/error56.C   |  2 ++
 gcc/testsuite/g++.dg/template/error59.C   | 11 +++
 .../testsuite/20_util/pair/astuple/get_neg.cc |  1 +
 .../testsuite/20_util/tuple/element_access/get_neg.cc |  1 +
 7 files changed, 21 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/template/error59.C

diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index c6091127225..52bb6f34d6a 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -21088,7 +21088,7 @@ fn_type_unification (tree fn,
   /* Adjust any explicit template arguments before entering the
 substitution context.  */
   explicit_targs
-   = (coerce_template_parms (tparms, explicit_targs, NULL_TREE,
+   = (coerce_template_parms (tparms, explicit_targs, fn,
  complain|tf_partial,
  /*require_all_args=*/false,
  /*use_default_args=*/false));
diff --git a/gcc/testsuite/g++.dg/cpp2a/concepts-ts4.C 
b/gcc/testsuite/g++.dg/cpp2a/concepts-ts4.C
index aa96621d9cf..2b0fd1b8deb 100644
--- a/gcc/testsuite/g++.dg/cpp2a/concepts-ts4.C
+++ b/gcc/testsuite/g++.dg/cpp2a/concepts-ts4.C
@@ -31,4 +31,6 @@ void driver()
   fn<0>(); // OK
   fn<-1>(); // { dg-error "" }
   fn(); // { dg-error "no matching function" }
+  // { dg-error "type/value mismatch at argument 1" "" { target *-*-* } .-1 }
+  // { dg-message "expected a constant of type .int., got .int." "" { target 
*-*-* } .-2 }
 }
diff --git a/gcc/testsuite/g++.dg/cpp2a/concepts-ts6.C 
b/gcc/testsuite/g++.dg/cpp2a/concepts-ts6.C
index bf665aa6308..e3dff54c83d 100644
--- a/gcc/testsuite/g++.dg/cpp2a/concepts-ts6.C
+++ b/gcc/testsuite/g++.dg/cpp2a/concepts-ts6.C
@@ -25,6 +25,8 @@ void driver1() {
 
   f();
   f(); // { dg-error "no matching function for call" }
+  // { dg-error "type/value mismatch at argument 1" "" { target *-*-* } .-1 }
+  // { dg-message "expected a class template, got .int." "" { target *-*-* } 
.-2 }
 
   S2 s2a;
   S2 s2b;
@@ -69,4 +71,4 @@ void driver2()
   S6 s6a;
   S6 s6c; // { dg-error "template constraint failure" }
   S6 s6b; // { dg-error "wrong number of template arguments" 
}
-}
\ No newline at end of file
+}
diff --git a/gcc/testsuite/g++.dg/template/error56.C 
b/gcc/testsuite/g++.dg/template/error56.C
index 3eda04c3225..e85471a50b0 100644
--- a/gcc/testsuite/g++.dg/template/error56.C
+++ b/gcc/testsuite/g++.dg/template/error56.C
@@ -9,4 +9,6 @@ struct A
 int main()
 {
   A().f<1>();  // { dg-error "f<1>" }
+  // { dg-error "type/value mismatch at argument 1" "" { target *-*-* } .-1 }
+  // { dg-message "expected a type, got .1." "" { target *-*-* } .-2 }
 }
diff --git a/gcc/testsuite/g++.dg/template/error59.C 
b/gcc/testsuite/g++.dg/template/error59.C
new file mode 100644
index 000..f81a28c2f1a
--- /dev/null
+++ b/gcc/testsuite/g++.dg/template/error59.C
@@ -0,0 +1,11 @@
+template struct S { };
+
+template class TT>
+void foo();
+
+void bar()
+{
+  foo(); // { dg-error "no matching function" }
+  // { dg-error "type/value mismatch at argument 1" "" { target *-*-* } .-1 }
+  // { dg-message "expected a template of type .template class TT., got 
.template struct S." "" { target *-*-* } .-2 }
+}
diff --git a/libstdc++-v3/testsuite/20_util/pair/astuple/get_neg.cc 
b/libstdc++-v3/testsuite/20_util/pair/astuple/get_neg.cc
index bcf3940e16d..1f76aef6d73 100644
--- a/libstdc++-v3/testsuite/20_util/pair/astuple/get_neg.cc
+++ b/libstdc++-v3/testsuite/20_util/pair/astuple/get

Re: [PATCH PR94026] combine missed opportunity to simplify comparisons with zero

2020-05-07 Thread Segher Boessenkool
Hi!

On Wed, May 06, 2020 at 08:57:52AM +, Yangfei (Felix) wrote:
> > This looks promising.  I'll try it out, see what it does on other targets.  
> > (It will
> > have to wait for GCC 11 stage 1, of course).
> 
> I see GCC11 stage 1 opens for commits now.
> I have rebased the patch on the latest repo.  Attached please find the v2 
> patch.
> Bootstrapped and tested on x86-64-linux-gnu and aarch64-linux-gnu.
> Is this good to go?

I'll try it out.  Takes a day or so...  Stay tuned.

> > p.s.  Please use a correct mime type?  application/octet-stream isn't
> > something I can reply to.  Just text/plain is fine :-)
> 
> I have using plain text now, hope that works for you.  :-)

It is still application/octet-stream.


Segher


Re: [PATCH] tree-pretty-print: Handle boolean types

2020-05-07 Thread Jeff Law via Gcc-patches
On Thu, 2020-05-07 at 17:40 +0100, Richard Sandiford wrote:
> AVX512-style masks and SVE-style predicates can be difficult
> to debug in gimple dumps, since the types are printed like this:
> 
>   vector(4)  foo;
> 
> Some important details are hidden by that ,
> such as the number of bits in an element and whether the type
> is signed or unsigned.
> 
> This patch uses an ad-hoc syntax for printing unnamed
> boolean types.  Normal frontend ones should be handled
> by the earlier TYPE_NAME code.
> 
> Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?
> 
> Richard
> 
> 
> 2020-05-07  Richard Sandiford  
> 
> gcc/
>   * tree-pretty-print.c (dump_generic_node): Handle BOOLEAN_TYPEs.
OK
jeff
> 



Re: [PATCH] tree-ssa-structalias.c: Fix comments

2020-05-07 Thread Jeff Law via Gcc-patches
On Wed, 2020-05-06 at 13:57 +0200, Richard Biener via Gcc-patches wrote:
> On Wed, May 6, 2020 at 1:07 PM Erick Ochoa
>  wrote:
> > This patch fixes some quotations inside comments. The change in syntax
> > highlighting was bothering me. I also found a typo.
> 
> OK
I don't think Erick has commit privs, so I went ahead and committed the change.

> 
> > ChangeLog:
> > 
> > 2020-05-06  Erick Ochoa 
> > 
> > * gcc/tree-ssa-struct-alias.c: Fix comments
> > 
> > 
> > 
[ snip ]



Re: [PATCH] c++: Fix spelling of non-static

2020-05-07 Thread Marek Polacek via Gcc-patches
On Thu, Mar 19, 2020 at 12:51:42PM -0400, Marek Polacek via Gcc-patches wrote:
> On Thu, Mar 19, 2020 at 10:45:01AM -0600, Martin Sebor via Gcc-patches wrote:
> > On 3/19/20 9:48 AM, Marek Polacek via Gcc-patches wrote:
> > > I was looking at DR 296 and noticed that we say "nonstatic" instead of
> > > "non-static", which is the version the standard uses.  So this patch
> > > fixes the spelling throughout the front end.  Did not check e.g.
> > > non-dependent or any other.
> > > 
> > > Bootstrapped/regtested on x86_64-linux, ok for trunk?
> > 
> > If this is the spelling we want to standardize on, would you mind
> > adding a check to -Wformat-diag as well so these misspellings get
> > diagnosed in diagnostic messages?  We might also want to add
> > a mention of it to the Spelling section of the GCC Coding
> > Conventions.
> 
> Good point, I can definitely add it.  For now I'm putting this aside though.

Actually, I wasn't sure where to add such a diagnostic, check_plain doesn't seem
to already check for similar typos, so I'll just go ahead with my patch as-is.

> > Alternatively, please try to remember to CC me when you commit
> > the patch in stage1 and I'll add that myself.
> > 
> > Thanks
> > Martin
> > 
> > PS The hyphenated form makes sense to me when applied to keywords
> > or standard terms like non-inline or non-static.  I'm not sure it's
> > necessary or even widespread in other already established terms
> > like nonnegative, and it's even explicitly discouraged in the GCC
> > Coding Conventions for nonzero.
> > 
> > > 
> > >   * decl.c (grok_op_properties): Fix spelling of non-static.
> > >   * typeck.c (build_class_member_access_expr): Likewise.
> > > 
> > >   * g++.dg/other/operator1.C: Adjust expected message.
> > >   * g++.dg/overload/operator2.C: Likewise.
> > >   * g++.dg/template/error30.C: Likewise.
> > >   * g++.old-deja/g++.jason/operator.C: Likewise.
> > > ---
> > >   gcc/cp/call.c   |  2 +-
> > >   gcc/cp/class.c  |  8 
> > >   gcc/cp/cxx-pretty-print.c   |  2 +-
> > >   gcc/cp/decl.c   |  2 +-
> > >   gcc/cp/init.c   | 10 +-
> > >   gcc/cp/search.c |  6 +++---
> > >   gcc/cp/typeck.c |  2 +-
> > >   gcc/testsuite/g++.dg/other/operator1.C  |  2 +-
> > >   gcc/testsuite/g++.dg/overload/operator2.C   |  4 ++--
> > >   gcc/testsuite/g++.dg/template/error30.C |  2 +-
> > >   gcc/testsuite/g++.old-deja/g++.jason/operator.C |  4 ++--
> > >   11 files changed, 22 insertions(+), 22 deletions(-)
> > > 
> > > diff --git a/gcc/cp/call.c b/gcc/cp/call.c
> > > index 1715acc0ec3..db396f428a4 100644
> > > --- a/gcc/cp/call.c
> > > +++ b/gcc/cp/call.c
> > > @@ -8671,7 +8671,7 @@ build_over_call (struct z_candidate *cand, int 
> > > flags, tsubst_flags_t complain)
> > > (DECL_CONTEXT (fn), BINFO_TYPE (cand->conversion_path
> > >   flags |= LOOKUP_NONVIRTUAL;
> > > -  /* [class.mfct.nonstatic]: If a nonstatic member function of a 
> > > class
> > > +  /* [class.mfct.non-static]: If a non-static member function of a 
> > > class
> > >X is called for an object that is not of type X, or of a type
> > >derived from X, the behavior is undefined.
> > > diff --git a/gcc/cp/class.c b/gcc/cp/class.c
> > > index 5340799fdd3..fb2ef202629 100644
> > > --- a/gcc/cp/class.c
> > > +++ b/gcc/cp/class.c
> > > @@ -3661,7 +3661,7 @@ check_field_decls (tree t, tree *access_decls,
> > >   {
> > > /* ARM $12.6.2: [A member initializer list] (or, for an
> > >aggregate, initialization by a brace-enclosed list) is 
> > > the
> > > -  only way to initialize nonstatic const and reference
> > > +  only way to initialize non-static const and reference
> > >members.  */
> > > TYPE_HAS_COMPLEX_COPY_ASSIGN (t) = 1;
> > > TYPE_HAS_COMPLEX_MOVE_ASSIGN (t) = 1;
> > > @@ -3784,7 +3784,7 @@ check_field_decls (tree t, tree *access_decls,
> > >   {
> > > /* ARM $12.6.2: [A member initializer list] (or, for an
> > >aggregate, initialization by a brace-enclosed list) is 
> > > the
> > > -  only way to initialize nonstatic const and reference
> > > +  only way to initialize non-static const and reference
> > >members.  */
> > > TYPE_HAS_COMPLEX_COPY_ASSIGN (t) = 1;
> > > TYPE_HAS_COMPLEX_MOVE_ASSIGN (t) = 1;
> > > @@ -3799,7 +3799,7 @@ check_field_decls (tree t, tree *access_decls,
> > >   | CLASSTYPE_READONLY_FIELDS_NEED_INIT (type));
> > >   }
> > > -  /* Core issue 80: A nonstatic data member is required to have a
> > > +  /* Core issue 80: A non-static data member is required to have

Re: gcc.dg testsuite glitches

2020-05-07 Thread Jeff Law via Gcc-patches
On Tue, 2020-04-28 at 10:36 -0400, Nathan Sidwell wrote:
> On 4/28/20 5:12 AM, Manfred Schwarb wrote:
> > Hi,
> > 
> > first, I do not have commit rights, so please somebody check and commit,
> > I guess this goes under the obvious and trivial rules.
> > 
> > There are several malformed dejagnu directives in the gcc.dg testsuite.
> > Below I fixed some of them following these criteria:
> > - fix mis-typed directives
> > - require that the outermost braces are space padded
> > - fix imbalanced braces
> > 
> > Several of these changes are no-ops, as nonsensical directives act as "dg-do
> > compile",
> > but they should be fixed nevertheless, IMO.
> 
> thanks, these look good, but gonna wait until stage 1.
I've committed and pushedd these to the trunk.

jeff



Re: gcc.dg testsuite glitches

2020-05-07 Thread Nathan Sidwell

On 5/7/20 1:34 PM, Jeff Law wrote:

On Tue, 2020-04-28 at 10:36 -0400, Nathan Sidwell wrote:

On 4/28/20 5:12 AM, Manfred Schwarb wrote:

Hi,

first, I do not have commit rights, so please somebody check and commit,
I guess this goes under the obvious and trivial rules.

There are several malformed dejagnu directives in the gcc.dg testsuite.
Below I fixed some of them following these criteria:
- fix mis-typed directives
- require that the outermost braces are space padded
- fix imbalanced braces

Several of these changes are no-ops, as nonsensical directives act as "dg-do
compile",
but they should be fixed nevertheless, IMO.


thanks, these look good, but gonna wait until stage 1.

I've committed and pushedd these to the trunk.


thanks, I'd forgotten

--
Nathan Sidwell


Re: [PATCH v3] [Stage1] Refactor tree-ssa-operands.c

2020-05-07 Thread Jeff Law via Gcc-patches
On Mon, 2020-05-04 at 19:00 -0300, Giuliano Belinassi via Gcc-patches wrote:
> Hi,
> 
> This patch Refactors tree-ssa-operands.c by wrapping the global
> variables into a class, and also removes unused code.
> 
> The difference between this version and v2 is:
>   * Disable the copy of operands_scanner
>   * remove void from empty arguments functions.
> 
> 
> gcc/ChangeLog:
> 2020-05-04  Giuliano Belinassi  
> 
>   * tree-ssa-operands.c (operands_scanner): New class.
>   (operands_bitmap_obstack): Remove.
>   (n_initialized): Remove.
>   (build_uses): Move to operands_scanner class.
>   (build_vuse): Same as above.
>   (build_vdef): Same as above.
>   (verify_ssa_operands): Same as above.
>   (finalize_ssa_uses): Same as above.
>   (cleanup_build_arrays): Same as above.
>   (finalize_ssa_stmt_operands): Same as above.
>   (start_ssa_stmt_operands): Same as above.
>   (append_use): Same as above.
>   (append_vdef): Same as above.
>   (add_virtual_operand): Same as above.
>   (add_stmt_operand): Same as above.
>   (get_mem_ref_operands): Same as above.
>   (get_tmr_operands): Same as above.
>   (maybe_add_call_vops): Same as above.
>   (get_asm_stmt_operands): Same as above.
>   (get_expr_operands): Same as above.
>   (parse_ssa_operands): Same as above.
>   (finalize_ssa_defs): Same as above.
>   (build_ssa_operands): Same as above, plus create a C-like wrapper.
>   (update_stmt_operands): Create an instance of operands_scanner.
Like Richi, I want to say thanks for taking care of this and feel free to repeat
elsewhere :-)  There's a ton of this kind of cleanup we ought to be doing.

I've committed the patch and pushed  to the trunk.

Jeff



Re: [PATCH] tree-pretty-print: Handle boolean types

2020-05-07 Thread Richard Biener via Gcc-patches
On Thu, May 7, 2020 at 7:24 PM Richard Sandiford
 wrote:
>
> AVX512-style masks and SVE-style predicates can be difficult
> to debug in gimple dumps, since the types are printed like this:
>
>   vector(4)  foo;
>
> Some important details are hidden by that ,
> such as the number of bits in an element and whether the type
> is signed or unsigned.
>
> This patch uses an ad-hoc syntax for printing unnamed
> boolean types.  Normal frontend ones should be handled
> by the earlier TYPE_NAME code.
>
> Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?

OK.

> Richard
>
>
> 2020-05-07  Richard Sandiford  
>
> gcc/
> * tree-pretty-print.c (dump_generic_node): Handle BOOLEAN_TYPEs.
> ---
>  gcc/tree-pretty-print.c | 8 
>  1 file changed, 8 insertions(+)
>
> diff --git a/gcc/tree-pretty-print.c b/gcc/tree-pretty-print.c
> index 885ca8cd329..f04fd65091a 100644
> --- a/gcc/tree-pretty-print.c
> +++ b/gcc/tree-pretty-print.c
> @@ -1761,6 +1761,14 @@ dump_generic_node (pretty_printer *pp, tree node, int 
> spc, dump_flags_t flags,
> pp_decimal_int (pp, TYPE_PRECISION (node));
> pp_greater (pp);
>   }
> +   else if (TREE_CODE (node) == BOOLEAN_TYPE)
> + {
> +   pp_string (pp, (TYPE_UNSIGNED (node)
> +   ? " +   : " +   pp_decimal_int (pp, TYPE_PRECISION (node));
> +   pp_greater (pp);
> + }
> else if (TREE_CODE (node) == VOID_TYPE)
>   pp_string (pp, "void");
> else


Re: gcc.dg testsuite glitches

2020-05-07 Thread Jeff Law via Gcc-patches
On Thu, 2020-05-07 at 13:44 -0400, Nathan Sidwell wrote:
> On 5/7/20 1:34 PM, Jeff Law wrote:
> > On Tue, 2020-04-28 at 10:36 -0400, Nathan Sidwell wrote:
> > > On 4/28/20 5:12 AM, Manfred Schwarb wrote:
> > > > Hi,
> > > > 
> > > > first, I do not have commit rights, so please somebody check and commit,
> > > > I guess this goes under the obvious and trivial rules.
> > > > 
> > > > There are several malformed dejagnu directives in the gcc.dg testsuite.
> > > > Below I fixed some of them following these criteria:
> > > > - fix mis-typed directives
> > > > - require that the outermost braces are space padded
> > > > - fix imbalanced braces
> > > > 
> > > > Several of these changes are no-ops, as nonsensical directives act as
> > > > "dg-do
> > > > compile",
> > > > but they should be fixed nevertheless, IMO.
> > > 
> > > thanks, these look good, but gonna wait until stage 1.
> > I've committed and pushedd these to the trunk.
> 
> thanks, I'd forgotten
NP.  I'm just walking through everything gcc-11 related that I'd saved away :-) 

jeff



Re: [PATCH, vect] Check alignment for no peeling gaps handling

2020-05-07 Thread Jeff Law via Gcc-patches
On Tue, 2020-04-28 at 09:30 +0200, Richard Biener via Gcc-patches wrote:
> On Fri, Apr 10, 2020 at 11:28 AM Kewen.Lin  wrote:
> > Hi,
> > 
> > This is one fix following Richi's comments here:
> > https://gcc.gnu.org/pipermail/gcc-patches/2020-March/542232.html
> > 
> > I noticed the current half vector support for no peeling gaps
> > handled some cases which never check the half size vector
> > support.  By further investigation, those cases are safe
> > to play without peeling gaps due to ideal alignment.  It
> > means they don't require half vector handlings, we should
> > avoid to use half vector for them.
> > 
> > The fix is to add alignment check as a part of conditions for
> > half vector support avoiding redundant half vector codes.
> > 
> > Bootstrapped/regtested on powerpc64le-linux-gnu P8, while
> > aarch64-linux-gnu testing is ongoing.
> > 
> > Is it ok for trunk if all testings are fine?
> 
> OK for stage1 (it's just a missed optimization).
> 
> Thanks,
> Richard.
> 
> > BR,
> > Kewen
> > 
> > 
> > gcc/ChangeLog
> > 
> > 2020-MM-DD  Kewen Lin  
> > 
> > * gcc/tree-vect-stmts.c (vectorizable_load): Check alignment to 
> > avoid
> > redundant half vector handlings for no peeling gaps.
Committed to the trunk.

jeff



Re: set_rtx_cost used wrongly, should be set_src_cost

2020-05-07 Thread Jeff Law via Gcc-patches
On Tue, 2020-04-21 at 13:41 +0100, Richard Sandiford wrote:
> Alan Modra via Gcc-patches  writes:
> > I believe set_rtx_cost is meant to handle a SET, not a PLUS as is
> > passed in these two locations.  Using the proper function for a PLUS
> > doesn't make a huge difference: the only arg change to rtx_cost of any
> > consequence is outer_code of SET rather than INSN.  A mode of
> > word_mode rather than VOIDmode makes no difference at all since the
> > mode is taken from the PLUS.  An opno of 1 rather than 4 also doesn't
> > change anything since the only backend that does anything with opno
> > (besides pass it back to a recursive rtx_cost call) is nios2, and
> > there "opno == 0" is the only use of opno.
> > 
> > Bootstrapped and regression tested powerpc64le-linux and x86_64-linux.
> > OK for next stage1?
> 
> Yes, thanks.
> 
> Richard
> 
> > * tree-ssa-reassoc.c (optimize_range_tests_to_bit_test): Replace
> > set_rtx_cost with set_src_cost.
> > * tree-switch-conversion.c (bit_test_cluster::emit): Likewise.
Pushed to the trunk.

jeff
> > 



Re: [PATCH] tree-optimization/57359 - rewrite SM code

2020-05-07 Thread H.J. Lu via Gcc-patches
On Wed, May 6, 2020 at 6:26 AM Richard Biener  wrote:
>
> On Tue, 5 May 2020, Richard Biener wrote:
>
> >
> > This rewrites store-motion to process candidates where we can
> > ensure order preserving separately and with no need to disambiguate
> > against all stores.  Those candidates we cannot handle this way
> > are validated to be independent on all stores (w/o TBAA) and then
> > processed as "unordered" (all conditionally executed stores are so
> > as well).
> >
> > This will necessary cause
> >   FAIL: gcc.dg/graphite/pr80906.c scan-tree-dump graphite "isl AST to 
> > Gimple succeeded"
> > because the SM previously performed is not valid for exactly the PR57359
> > reason, we still perform SM of qc for the innermost loop but that's not 
> > enough.
> >
> > There is still room for improvements because we still check some constraints
> > for the order preserving cases that are only necessary in the current
> > strict way for the unordered ones.  Leaving that for the furture.
> >
> > Bootstrapped and tested on x86_64-unknown-linux-gnu, a final
> > SPEC 2006 evaluation is running.
>
> A first complete 3-run looked too good so I repeated the off-noise
> ones which eliminated most of them as noise ... there's two
> consistent parts namely a ~1% regression on 401.bzip2 and a
> ~1% progression on 464.h264ref.  Given earlier much larger (~5-10%)
> "consistent" noise on other tests I declare it a wash ...
>
> Means, I'll push the patch to master once 10.1 is released (unless
> there are comments on the patch itself).

This caused:

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94988

-- 
H.J.


Re: [PATCH] c++: Fix spelling of non-static

2020-05-07 Thread Martin Sebor via Gcc-patches

On 5/7/20 11:24 AM, Marek Polacek wrote:

On Thu, Mar 19, 2020 at 12:51:42PM -0400, Marek Polacek via Gcc-patches wrote:

On Thu, Mar 19, 2020 at 10:45:01AM -0600, Martin Sebor via Gcc-patches wrote:

On 3/19/20 9:48 AM, Marek Polacek via Gcc-patches wrote:

I was looking at DR 296 and noticed that we say "nonstatic" instead of
"non-static", which is the version the standard uses.  So this patch
fixes the spelling throughout the front end.  Did not check e.g.
non-dependent or any other.

Bootstrapped/regtested on x86_64-linux, ok for trunk?


If this is the spelling we want to standardize on, would you mind
adding a check to -Wformat-diag as well so these misspellings get
diagnosed in diagnostic messages?  We might also want to add
a mention of it to the Spelling section of the GCC Coding
Conventions.


Good point, I can definitely add it.  For now I'm putting this aside though.


Actually, I wasn't sure where to add such a diagnostic, check_plain doesn't seem
to already check for similar typos, so I'll just go ahead with my patch as-is.


Thanks for remembering this!  I think the word should be added to
the badwords array (above "non-static").  But I can do that if you
prefer.  Looks like I'll be doing some work in this space anyway.

Martin




Alternatively, please try to remember to CC me when you commit
the patch in stage1 and I'll add that myself.

Thanks
Martin

PS The hyphenated form makes sense to me when applied to keywords
or standard terms like non-inline or non-static.  I'm not sure it's
necessary or even widespread in other already established terms
like nonnegative, and it's even explicitly discouraged in the GCC
Coding Conventions for nonzero.



* decl.c (grok_op_properties): Fix spelling of non-static.
* typeck.c (build_class_member_access_expr): Likewise.

* g++.dg/other/operator1.C: Adjust expected message.
* g++.dg/overload/operator2.C: Likewise.
* g++.dg/template/error30.C: Likewise.
* g++.old-deja/g++.jason/operator.C: Likewise.
---
   gcc/cp/call.c   |  2 +-
   gcc/cp/class.c  |  8 
   gcc/cp/cxx-pretty-print.c   |  2 +-
   gcc/cp/decl.c   |  2 +-
   gcc/cp/init.c   | 10 +-
   gcc/cp/search.c |  6 +++---
   gcc/cp/typeck.c |  2 +-
   gcc/testsuite/g++.dg/other/operator1.C  |  2 +-
   gcc/testsuite/g++.dg/overload/operator2.C   |  4 ++--
   gcc/testsuite/g++.dg/template/error30.C |  2 +-
   gcc/testsuite/g++.old-deja/g++.jason/operator.C |  4 ++--
   11 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/gcc/cp/call.c b/gcc/cp/call.c
index 1715acc0ec3..db396f428a4 100644
--- a/gcc/cp/call.c
+++ b/gcc/cp/call.c
@@ -8671,7 +8671,7 @@ build_over_call (struct z_candidate *cand, int flags, 
tsubst_flags_t complain)
  (DECL_CONTEXT (fn), BINFO_TYPE (cand->conversion_path
flags |= LOOKUP_NONVIRTUAL;
-  /* [class.mfct.nonstatic]: If a nonstatic member function of a class
+  /* [class.mfct.non-static]: If a non-static member function of a class
 X is called for an object that is not of type X, or of a type
 derived from X, the behavior is undefined.
diff --git a/gcc/cp/class.c b/gcc/cp/class.c
index 5340799fdd3..fb2ef202629 100644
--- a/gcc/cp/class.c
+++ b/gcc/cp/class.c
@@ -3661,7 +3661,7 @@ check_field_decls (tree t, tree *access_decls,
{
  /* ARM $12.6.2: [A member initializer list] (or, for an
 aggregate, initialization by a brace-enclosed list) is the
-only way to initialize nonstatic const and reference
+only way to initialize non-static const and reference
 members.  */
  TYPE_HAS_COMPLEX_COPY_ASSIGN (t) = 1;
  TYPE_HAS_COMPLEX_MOVE_ASSIGN (t) = 1;
@@ -3784,7 +3784,7 @@ check_field_decls (tree t, tree *access_decls,
{
  /* ARM $12.6.2: [A member initializer list] (or, for an
 aggregate, initialization by a brace-enclosed list) is the
-only way to initialize nonstatic const and reference
+only way to initialize non-static const and reference
 members.  */
  TYPE_HAS_COMPLEX_COPY_ASSIGN (t) = 1;
  TYPE_HAS_COMPLEX_MOVE_ASSIGN (t) = 1;
@@ -3799,7 +3799,7 @@ check_field_decls (tree t, tree *access_decls,
| CLASSTYPE_READONLY_FIELDS_NEED_INIT (type));
}
-  /* Core issue 80: A nonstatic data member is required to have a
+  /* Core issue 80: A non-static data member is required to have a
 different name from the class iff the class has a
 user-declared constructor.  */
 if (constructor_name_p (DECL_NAME (field), t)
@@ -8104,7 +8104,7 @@ resolve_address_of_overloaded_funct

Re: [PATCH] c++: Fix spelling of non-static

2020-05-07 Thread Marek Polacek via Gcc-patches
On Thu, May 07, 2020 at 12:34:44PM -0600, Martin Sebor wrote:
> On 5/7/20 11:24 AM, Marek Polacek wrote:
> > On Thu, Mar 19, 2020 at 12:51:42PM -0400, Marek Polacek via Gcc-patches 
> > wrote:
> > > On Thu, Mar 19, 2020 at 10:45:01AM -0600, Martin Sebor via Gcc-patches 
> > > wrote:
> > > > On 3/19/20 9:48 AM, Marek Polacek via Gcc-patches wrote:
> > > > > I was looking at DR 296 and noticed that we say "nonstatic" instead of
> > > > > "non-static", which is the version the standard uses.  So this patch
> > > > > fixes the spelling throughout the front end.  Did not check e.g.
> > > > > non-dependent or any other.
> > > > > 
> > > > > Bootstrapped/regtested on x86_64-linux, ok for trunk?
> > > > 
> > > > If this is the spelling we want to standardize on, would you mind
> > > > adding a check to -Wformat-diag as well so these misspellings get
> > > > diagnosed in diagnostic messages?  We might also want to add
> > > > a mention of it to the Spelling section of the GCC Coding
> > > > Conventions.
> > > 
> > > Good point, I can definitely add it.  For now I'm putting this aside 
> > > though.
> > 
> > Actually, I wasn't sure where to add such a diagnostic, check_plain doesn't 
> > seem
> > to already check for similar typos, so I'll just go ahead with my patch 
> > as-is.
> 
> Thanks for remembering this!  I think the word should be added to
> the badwords array (above "non-static").  But I can do that if you
> prefer.  Looks like I'll be doing some work in this space anyway.

Oof, how did I miss that?  Does this patch look OK to you then?
Not sure if it deserves a test, I would think not.

* c-format.c (badwords): Add "nonstatic".
---
 gcc/c-family/c-format.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/c-family/c-format.c b/gcc/c-family/c-format.c
index 33a5b6d3965..77d24ad94e4 100644
--- a/gcc/c-family/c-format.c
+++ b/gcc/c-family/c-format.c
@@ -2997,6 +2997,7 @@ static const struct
NAME ("decl", "declaration"),
NAME ("enumeral", "enumerated"),
NAME ("floating point", "floating-point"),
+   NAME ("nonstatic", "non-static"),
NAME ("non-zero", "nonzero"),
NAME ("reg", "register"),
NAME ("stmt", "statement"),

base-commit: 5d5dcc65aae1024da31e0e9cae6a8966461037e8
-- 
Marek Polacek • Red Hat, Inc. • 300 A St, Boston, MA



Re: [PATCH] c++: Fix spelling of non-static

2020-05-07 Thread Martin Sebor via Gcc-patches

On 5/7/20 12:44 PM, Marek Polacek wrote:

On Thu, May 07, 2020 at 12:34:44PM -0600, Martin Sebor wrote:

On 5/7/20 11:24 AM, Marek Polacek wrote:

On Thu, Mar 19, 2020 at 12:51:42PM -0400, Marek Polacek via Gcc-patches wrote:

On Thu, Mar 19, 2020 at 10:45:01AM -0600, Martin Sebor via Gcc-patches wrote:

On 3/19/20 9:48 AM, Marek Polacek via Gcc-patches wrote:

I was looking at DR 296 and noticed that we say "nonstatic" instead of
"non-static", which is the version the standard uses.  So this patch
fixes the spelling throughout the front end.  Did not check e.g.
non-dependent or any other.

Bootstrapped/regtested on x86_64-linux, ok for trunk?


If this is the spelling we want to standardize on, would you mind
adding a check to -Wformat-diag as well so these misspellings get
diagnosed in diagnostic messages?  We might also want to add
a mention of it to the Spelling section of the GCC Coding
Conventions.


Good point, I can definitely add it.  For now I'm putting this aside though.


Actually, I wasn't sure where to add such a diagnostic, check_plain doesn't seem
to already check for similar typos, so I'll just go ahead with my patch as-is.


Thanks for remembering this!  I think the word should be added to
the badwords array (above "non-static").  But I can do that if you
prefer.  Looks like I'll be doing some work in this space anyway.


Oof, how did I miss that?  Does this patch look OK to you then?
Not sure if it deserves a test, I would think not.


Looks good to me. (Despite the contrast with nonzero ;)

Thanks again!

Martin



* c-format.c (badwords): Add "nonstatic".
---
  gcc/c-family/c-format.c | 1 +
  1 file changed, 1 insertion(+)

diff --git a/gcc/c-family/c-format.c b/gcc/c-family/c-format.c
index 33a5b6d3965..77d24ad94e4 100644
--- a/gcc/c-family/c-format.c
+++ b/gcc/c-family/c-format.c
@@ -2997,6 +2997,7 @@ static const struct
 NAME ("decl", "declaration"),
 NAME ("enumeral", "enumerated"),
 NAME ("floating point", "floating-point"),
+   NAME ("nonstatic", "non-static"),
 NAME ("non-zero", "nonzero"),
 NAME ("reg", "register"),
 NAME ("stmt", "statement"),

base-commit: 5d5dcc65aae1024da31e0e9cae6a8966461037e8





Re: [PATCH 4/4] Use const for template argument.

2020-05-07 Thread Jonathan Wakely via Gcc-patches

On 06/05/20 11:01 +0100, Jonathan Wakely wrote:

On 04/02/20 14:55 +0100, Martin Liska wrote:

diff --git a/libstdc++-v3/include/parallel/multiway_merge.h 
b/libstdc++-v3/include/parallel/multiway_merge.h
index 983c7b2bd9a..97a9ce0feb7 100644
--- a/libstdc++-v3/include/parallel/multiway_merge.h
+++ b/libstdc++-v3/include/parallel/multiway_merge.h
@@ -118,7 +118,7 @@ namespace __gnu_parallel
  *  @return @c true if less. */
 friend bool
 operator<(_GuardedIterator<_RAIter, _Compare>& __bi1,
-   _GuardedIterator<_RAIter, _Compare>& __bi2)
+   _GuardedIterator<_RAIter, const _Compare>& __bi2)
 {
  if (__bi1._M_current == __bi1._M_end)   // __bi1 is sup
return __bi2._M_current == __bi2._M_end;  // __bi2 is not sup
@@ -188,7 +188,7 @@ namespace __gnu_parallel
  *  @return @c true if less. */
 friend bool
 operator<(_UnguardedIterator<_RAIter, _Compare>& __bi1,
-   _UnguardedIterator<_RAIter, _Compare>& __bi2)
+   _UnguardedIterator<_RAIter, const _Compare>& __bi2)
 {
  // Normal compare.
  return (__bi1.__comp)(*__bi1, *__bi2);



This is completely bogus, please revert.

The cppcheck warning is saying that it could be:

   const _UnguardedIterator<_RAIter, _Compare>&

which is completely different from:

   _UnguardedIterator<_RAIter, const _Compare>&

Also both parameters of operator< should have been changed, not just
one, and operator<= should have the same change.

But cppcheck is completely wrong anyway. The operator* member of
_GuardedIterator and _UnguardedIterator is not const, so trying to
dereference *__b1 and *__b2 would fail.

Nack nack nack.


Here's a correct fix for the cppcheck complaint.

Tested powerpc64le-linux, 'make check check-parallel', committed to
master.


commit 4cbc9d8b346b932f34828a51e8822881413951b7
Author: Jonathan Wakely 
Date:   Thu May 7 21:43:49 2020 +0100

libstdc++: Make relational operators work with const guarded iterators (PR 92472)

This is a correct fix for the incorrect cppcheck suggestion to make
these parameters const. In order to that, the dereference operators need
to be const. The conversions to the underlying iterator can be const
too.

PR c/92472
* include/parallel/multiway_merge.h (_GuardedIterator::operator*)
(_GuardedIterator::operator _RAIter, _UnguardedIterator::operator*)
(_UnguardedIterator::operator _RAIter): Add const qualifier.
(operator<(_GuardedIterator&, _GuardedIterator&)
(operator<=(_GuardedIterator&, _GuardedIterator&)
(operator<(_UnguardedIterator&, _UnguardedIterator&)
(operator<=(_UnguardedIterator&, _UnguardedIterator&): Change
parameters to const references.

diff --git a/libstdc++-v3/include/parallel/multiway_merge.h b/libstdc++-v3/include/parallel/multiway_merge.h
index 983c7b2bd9a..52a8b2ca9e7 100644
--- a/libstdc++-v3/include/parallel/multiway_merge.h
+++ b/libstdc++-v3/include/parallel/multiway_merge.h
@@ -104,12 +104,12 @@ namespace __gnu_parallel
   /** @brief Dereference operator.
   *  @return Referenced element. */
   typename std::iterator_traits<_RAIter>::value_type&
-  operator*()
+  operator*() const
   { return *_M_current; }
 
   /** @brief Convert to wrapped iterator.
   *  @return Wrapped iterator. */
-  operator _RAIter()
+  operator _RAIter() const
   { return _M_current; }
 
   /** @brief Compare two elements referenced by guarded iterators.
@@ -117,8 +117,8 @@ namespace __gnu_parallel
*  @param __bi2 Second iterator.
*  @return @c true if less. */
   friend bool
-  operator<(_GuardedIterator<_RAIter, _Compare>& __bi1,
-		_GuardedIterator<_RAIter, _Compare>& __bi2)
+  operator<(const _GuardedIterator<_RAIter, _Compare>& __bi1,
+		const _GuardedIterator<_RAIter, _Compare>& __bi2)
   {
 	if (__bi1._M_current == __bi1._M_end)   // __bi1 is sup
 	  return __bi2._M_current == __bi2._M_end;  // __bi2 is not sup
@@ -132,8 +132,8 @@ namespace __gnu_parallel
*  @param __bi2 Second iterator.
*  @return @c True if less equal. */
   friend bool
-  operator<=(_GuardedIterator<_RAIter, _Compare>& __bi1,
-		 _GuardedIterator<_RAIter, _Compare>& __bi2)
+  operator<=(const _GuardedIterator<_RAIter, _Compare>& __bi1,
+		 const _GuardedIterator<_RAIter, _Compare>& __bi2)
   {
 	if (__bi2._M_current == __bi2._M_end)   // __bi1 is sup
 	  return __bi1._M_current != __bi1._M_end;  // __bi2 is not sup
@@ -174,12 +174,12 @@ namespace __gnu_parallel
   /** @brief Dereference operator.
   *  @return Referenced element. */
   typename std::iterator_traits<_RAIter>::value_type&
-  operator*()
+  operator*() const
   { return *_M_current; }
 
   /** @brief Convert to wrapped iterator.
   *  @return Wrapped iterator. */
-  operator _RAIter()
+  operator _R

[committed] libstdc++: Fix some C++20 algorithms to work in parallel mode

2020-05-07 Thread Jonathan Wakely via Gcc-patches
Some new algorithms need to use _GLIBCXX_STD_A to refer to the "normal"
version of the algorithm, to workaround the namespace dance done for
parallel mode.

PR libstdc++/94971 (partial)
* include/bits/ranges_algo.h (ranges::__sample_fn): Qualify
std::sample using macro to work in parallel mode.
(__sort_fn): Likewise for std::sort.
(ranges::__nth_element_fn): Likewise for std::nth_element.
* include/bits/stl_algobase.h (lexicographical_compare_three_way):
Likewise for std::__min_cmp.
* include/parallel/algobase.h (lexicographical_compare_three_way):
Add to namespace std::__parallel.

Tested powerpc64le-linux, committed to master.

Probably not worth backporting. As noted in the bugzilla PR, parallel
mode remains pretty broken for C++20, and has issues even for C++11.
I'd like to deprecate it in favour of the C++17 parallel algos,
probably if/when we change the default to -std=gnu++17.


commit 9c24e97a97aaad4ad0500170cbae4f387d82ddd6
Author: Jonathan Wakely 
Date:   Thu May 7 21:43:49 2020 +0100

libstdc++: Fix some C++20 algorithms to work in parallel mode

Some new algorithms need to use _GLIBCXX_STD_A to refer to the "normal"
version of the algorithm, to workaround the namespace dance done for
parallel mode.

PR libstdc++/94971 (partial)
* include/bits/ranges_algo.h (ranges::__sample_fn): Qualify
std::sample using macro to work in parallel mode.
(__sort_fn): Likewise for std::sort.
(ranges::__nth_element_fn): Likewise for std::nth_element.
* include/bits/stl_algobase.h (lexicographical_compare_three_way):
Likewise for std::__min_cmp.
* include/parallel/algobase.h (lexicographical_compare_three_way):
Add to namespace std::__parallel.

diff --git a/libstdc++-v3/include/bits/ranges_algo.h 
b/libstdc++-v3/include/bits/ranges_algo.h
index aa07cb97ea6..c038a505afa 100644
--- a/libstdc++-v3/include/bits/ranges_algo.h
+++ b/libstdc++-v3/include/bits/ranges_algo.h
@@ -1758,8 +1758,9 @@ namespace ranges
// FIXME: Forwarding to std::sample here requires computing __lasti
// which may take linear time.
auto __lasti = ranges::next(__first, __last);
-   return std::sample(std::move(__first), std::move(__lasti),
-  std::move(__out), __n, std::forward<_Gen>(__g));
+   return _GLIBCXX_STD_A::
+ sample(std::move(__first), std::move(__lasti), std::move(__out),
+__n, std::forward<_Gen>(__g));
  }
else
  {
@@ -2018,8 +2019,8 @@ namespace ranges
 _Comp __comp = {}, _Proj __proj = {}) const
   {
auto __lasti = ranges::next(__first, __last);
-   std::sort(std::move(__first), __lasti,
- __detail::__make_comp_proj(__comp, __proj));
+   _GLIBCXX_STD_A::sort(std::move(__first), __lasti,
+__detail::__make_comp_proj(__comp, __proj));
return __lasti;
   }
 
@@ -2262,8 +2263,9 @@ namespace ranges
 _Comp __comp = {}, _Proj __proj = {}) const
   {
auto __lasti = ranges::next(__first, __last);
-   std::nth_element(std::move(__first), std::move(__nth), __lasti,
-__detail::__make_comp_proj(__comp, __proj));
+   _GLIBCXX_STD_A::nth_element(std::move(__first), std::move(__nth),
+   __lasti,
+   __detail::__make_comp_proj(__comp, __proj));
return __lasti;
   }
 
diff --git a/libstdc++-v3/include/bits/stl_algobase.h 
b/libstdc++-v3/include/bits/stl_algobase.h
index 089ec2903f6..0a0e29923b8 100644
--- a/libstdc++-v3/include/bits/stl_algobase.h
+++ b/libstdc++-v3/include/bits/stl_algobase.h
@@ -1706,8 +1706,8 @@ _GLIBCXX_BEGIN_NAMESPACE_ALGO
  if constexpr (__is_byte_iter<_InputIter1>)
if constexpr (__is_byte_iter<_InputIter2>)
  {
-   const auto [__len, __lencmp]
- = std::__min_cmp(__last1 - __first1, __last2 - __first2);
+   const auto [__len, __lencmp] = _GLIBCXX_STD_A::
+ __min_cmp(__last1 - __first1, __last2 - __first2);
if (__len)
  {
const auto __c
@@ -1737,9 +1737,9 @@ _GLIBCXX_BEGIN_NAMESPACE_ALGO
  _InputIter2 __first2,
  _InputIter2 __last2)
 {
-  return std::lexicographical_compare_three_way(__first1, __last1,
-   __first2, __last2,
-   compare_three_way{});
+  return _GLIBCXX_STD_A::
+   lexicographical_compare_three_way(__first1, __last1, __first2, __last2,
+ compare_three_way{});
 }
 #endif // three_way_comparison

Re: [PATCH] Improve std::fill for vector

2020-05-07 Thread Jonathan Wakely via Gcc-patches

On 06/05/20 20:35 +0100, Jonathan Wakely wrote:

On 06/05/20 20:46 +0200, François Dumont via Libstdc++ wrote:

Hi

I am not clear about current stage so I am proposing this trivial 
patch to find out if we are back in stage 1.


The current status is always shown on the front page of gcc.gnu.org
(although currently the link to the GCC 11 status is broken, because
the list archives got renumbered for some reason, it should be
https://gcc.gnu.org/pipermail/gcc/2020-April/000505.html for GCC 11).

This patch extend the overload so that it is used even when 
_GLIBCXX_DEBUG mode is activated.


    * include/bits/stl_algobase.h (struct _Bit_iterator): 
New declaration.
    (std::__fill_a1(_Bit_iterator, _Bit_iterator, const 
bool&)): Likewise.

    * include/bits/stl_bvector.h (__fill_bvector): Move outside
    _GLIBCXX_STD_C namespace.
    (fill(_Bit_iterator, _Bit_iterator, const bool&)): 
Likewise and rename

    into...
    (__fill_a1): ...this.
    * testsuite/25_algorithms/fill/bvector/1.cc: New.

Tested under Linux x86_64 normal and debug modes.

Ok to commit ?


OK, thanks.


I've just fixed the indentation in libstdc++-v3/ChangeLog to use
leading tabs not spaces (at 91d505491c7deda61de04dd64da008e0205abf74).




[committed] Drop remnants of H8/300 COFF support

2020-05-07 Thread Jeff Law via Gcc-patches


We dropped support for COFF from the H8 port over 10 years ago.  These tidbits
were missed.

Committing to the trunk,

Jeff
commit 7764bebb9f772337bcb065aa290ca355083e31ce
Author: Jeff Law 
Date:   Thu Apr 23 13:10:48 2020 -0600

Drop remnants of COFF support

diff --git a/gcc/config/h8300/h8300.c b/gcc/config/h8300/h8300.c
index d2fea04d8e4..2c51c7c2955 100644
--- a/gcc/config/h8300/h8300.c
+++ b/gcc/config/h8300/h8300.c
@@ -102,9 +102,6 @@ static tree h8300_handle_tiny_data_attribute (tree *, tree, 
tree, int, bool *);
 static void h8300_print_operand_address (FILE *, machine_mode, rtx);
 static void h8300_print_operand (FILE *, rtx, int);
 static bool h8300_print_operand_punct_valid_p (unsigned char code);
-#ifndef OBJECT_FORMAT_ELF
-static void h8300_asm_named_section (const char *, unsigned int, tree);
-#endif
 static int h8300_register_move_cost (machine_mode, reg_class_t, reg_class_t);
 static int h8300_and_costs (rtx);
 static int h8300_shift_costs (rtx);
@@ -323,14 +320,6 @@ h8300_option_override (void)
   static const char *const h8_pop_ops[2]  = { "pop"  , "pop.l"  };
   static const char *const h8_mov_ops[2]  = { "mov.w", "mov.l"  };
 
-#ifndef OBJECT_FORMAT_ELF
-  if (TARGET_H8300SX)
-{
-  error ("%<-msx%> is not supported in coff");
-  target_flags |= MASK_H8300S;
-}
-#endif
-
   if (TARGET_H8300)
 {
   cpu_type = (int) CPU_H8300;
@@ -5457,16 +5446,6 @@ h8300_reorg (void)
 shorten_branches (get_insns ());
 }
 
-#ifndef OBJECT_FORMAT_ELF
-static void
-h8300_asm_named_section (const char *name, unsigned int flags ATTRIBUTE_UNUSED,
-tree decl)
-{
-  /* ??? Perhaps we should be using default_coff_asm_named_section.  */
-  fprintf (asm_out_file, "\t.section %s\n", name);
-}
-#endif /* ! OBJECT_FORMAT_ELF */
-
 /* Nonzero if X is a constant address suitable as an 8-bit absolute,
which is a special case of the 'R' operand.  */
 


[committed] Dropping more COFF support

2020-05-07 Thread Jeff Law via Gcc-patches

I keep finding these tidbits...  In the case of these two macros, they are 
always
provided by elf.h and/or linux.h.

Committing to the trunk.

Jeff
commit b9cb3bb9ad0dd1bc780a857b96e7bd9c82ba58e6
Author: Jeff Law 
Date:   Thu May 7 17:25:30 2020 -0400

Drop more COFF support from H8 port

* config/h8300/h8300.h (LINK_SPEC): Remove.
(USER_LABEL_PREFIX): Likewise.

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index c2382f7f2d1..036d386d040 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,8 @@
 2020-05-07 Jeff Law  
 
+   * config/h8300/h8300.h (LINK_SPEC): Remove.
+   (USER_LABEL_PREFIX): Likewise.
+
* config/h8300/h8300.c (h8300_asm_named_section): Remove.
(h8300_option_override): Remove remnants of COFF support.
 
diff --git a/gcc/config/h8300/h8300.h b/gcc/config/h8300/h8300.h
index 32e5f7dc5cf..50dd7e7766c 100644
--- a/gcc/config/h8300/h8300.h
+++ b/gcc/config/h8300/h8300.h
@@ -76,8 +76,6 @@ extern const char * const *h8_reg_names;
 }  \
   while (0)
 
-#define LINK_SPEC "%{mh:%{mn:-m h8300hn}} %{mh:%{!mn:-m h8300h}} %{ms:%{mn:-m 
h8300sn}} %{ms:%{!mn:-m h8300s}}"
-
 #define LIB_SPEC "%{mrelax:-relax} %{g:-lg} 
%{!p:%{!pg:-lc}}%{p:-lc_p}%{pg:-lc_p}"
 
 /* Macros used in the machine description to test the flags.  */
@@ -659,10 +657,6 @@ struct cum_arg
 #define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL) \
ASM_OUTPUT_LABEL (FILE, NAME)
 
-/* The prefix to add to user-visible assembler symbols.  */
-
-#define USER_LABEL_PREFIX "_"
-
 /* This is how to store into the string LABEL
the symbol_ref name of an internal numbered label where
PREFIX is the class of label and NUM is the number within the class.


[committed] Remove original H8/300 support

2020-05-07 Thread Jeff Law via Gcc-patches


OK.  So a more substantial patch this time.

The original 8 bit H8/300 part is horrifically old.  They probably haven't been
made in 20 years or longer.  While the tester builds multilibs for that part, we
don't test them.

Dropping support for the original H8/300 part ultimately allows for some useful
simplifications in the remaining code and reduces the amount of work necessary
for the cc0->CC_REG transition.  We still support the H8/300H and newer parts 
and
default to H8/300H code generation.  This patch does _some_ simplification, but
there's more to follow...

This doesn't drop the "normal" multilibs, but we might in the near future.  The
"normal" multilibs allow building H8/300H code that runs with 16bit pointers. 
There's just not much to be gained from a simplification standpoint in dropping
"normal" support.

I've verified with this patch doesn't alter the code generated for the chip
variants we still support -- at least across libgcc and newlib.

Committing to the trunk.

Jeff







[committed] Simplifications in H8 patterns, use md include facilities a bit

2020-05-07 Thread Jeff Law via Gcc-patches
This has a few pattern cleanups that are made possible now that the original
H8/300 support is gone.  It also moves all the peepholes into their own file. 
That's a trend that will continue with follow-up patches.  Doing that allows me
to disable large chunks of code during the conversion with minimal effort.

Committing to the trunk.

jeff
More cleanups.  Merging patterns with iterators, split out peepholes, etc.

* config/h8300/h8300.md (adds/subs splitters): Merge into single
splitter.
(negation expanders and patterns): Simplify and combine using
iterators.
(one_cmpl expanders and patterns): Likewise.
(tablejump, indirect_jump patterns ): Likewise.
(shift and rotate expanders and patterns): Likewise.
(absolute value expander and pattern): Drop expander, rename pattern
to just "abssf2"
(peephole2 patterns): Move into...
* config/h8300/peepholes.md: New file.


diff --git a/gcc/config/h8300/h8300.md b/gcc/config/h8300/h8300.md
index f87b0f7fd2f..4fc55b675e2 100644
--- a/gcc/config/h8300/h8300.md
+++ b/gcc/config/h8300/h8300.md
@@ -698,13 +698,13 @@
(set_attr "cc" "set_zn")])
 
 (define_split
-  [(set (match_operand:HI 0 "register_operand" "")
-   (plus:HI (match_dup 0)
-(match_operand:HI 1 "two_insn_adds_subs_operand" "")))]
+  [(set (match_operand:HSI 0 "register_operand" "")
+   (plus:HSI (match_dup 0)
+(match_operand:HSI 1 "two_insn_adds_subs_operand" "")))]
   ""
   [(const_int 0)]
   {
-split_adds_subs (HImode, operands);
+split_adds_subs (mode, operands);
 DONE;
   })
 
@@ -722,17 +722,6 @@
(set (attr "cc")
(symbol_ref "compute_plussi_cc (operands)"))])
 
-(define_split
-  [(set (match_operand:SI 0 "register_operand" "")
-   (plus:SI (match_dup 0)
-(match_operand:SI 1 "two_insn_adds_subs_operand" "")))]
-  ""
-  [(const_int 0)]
-  {
-split_adds_subs (SImode, operands);
-DONE;
-  })
-
 ;; --
 ;; SUBTRACT INSTRUCTIONS
 ;; --
@@ -1256,47 +1245,29 @@
   [(set (match_operand:QHSIF 0 "register_operand" "")
(neg:QHSIF (match_operand:QHSIF 1 "register_operand" "")))]
   ""
-  { })
-
-(define_insn "*negqi2"
-  [(set (match_operand:QI 0 "h8300_dst_operand" "=rQ")
-   (neg:QI (match_operand:QI 1 "h8300_dst_operand" "0")))]
-  ""
-  "neg %X0"
-  [(set_attr "length_table" "unary")
-   (set_attr "cc" "set_zn")])
+  "")
 
-(define_expand "neg2_h8300"
-  [(set (match_dup 2)
-   (not:HSI (match_operand:HSI 1 "register_operand" "")))
-   (set (match_dup 2) (plus:HSI (match_dup 2) (const_int 1)))
-   (set (match_operand:HSI 0 "register_operand" "")
-   (match_dup 2))]
+(define_insn "*neg2"
+  [(set (match_operand:QHSI 0 "h8300_dst_operand" "=rQ")
+   (neg:QHSI (match_operand:QHSI 1 "h8300_dst_operand" "0")))]
   ""
   {
-operands[2] = gen_reg_rtx (mode);
-  })
-
-(define_insn "*neghi2_h8300hs"
-  [(set (match_operand:HI 0 "h8300_dst_operand" "=rQ")
-   (neg:HI (match_operand:HI 1 "h8300_dst_operand" "0")))]
-  "h8300_operands_match_p (operands)"
-  "neg.w   %T0"
+if (mode == E_QImode)
+  return "neg  %X0";
+if (mode == E_HImode)
+  return "neg.w%T0";
+if (mode == E_SImode)
+  return "neg.l%S0";
+gcc_unreachable ();
+  }
   [(set_attr "length_table" "unary")
(set_attr "cc" "set_zn")])
 
-(define_insn "*negsi2_h8300hs"
-  [(set (match_operand:SI 0 "h8300_dst_operand" "=rQ")
-   (neg:SI (match_operand:SI 1 "h8300_dst_operand" "0")))]
-  "h8300_operands_match_p (operands)"
-  "neg.l   %S0"
-  [(set_attr "length_table" "unary")
-   (set_attr "cc" "set_zn")])
 
 (define_insn "*negsf2_h8300hs"
   [(set (match_operand:SF 0 "register_operand" "=r")
(neg:SF (match_operand:SF 1 "register_operand" "0")))]
-  "TARGET_H8300H || TARGET_H8300S"
+  ""
   "xor.w\\t#32768,%e0"
   [(set_attr "length" "4")])
 
@@ -1305,13 +1276,7 @@
 ;; ABSOLUTE VALUE INSTRUCTIONS
 ;; --
 
-(define_expand "abssf2"
-  [(set (match_operand:SF 0 "register_operand" "")
-   (abs:SF (match_operand:SF 1 "register_operand" "")))]
-  ""
-  "")
-
-(define_insn "*abssf2_h8300hs"
+(define_insn "abssf2"
   [(set (match_operand:SF 0 "register_operand" "=r")
(abs:SF (match_operand:SF 1 "register_operand" "0")))]
   ""
@@ -1322,35 +1287,22 @@
 ;; NOT INSTRUCTIONS
 ;; --
 
-(define_expand "one_cmpl2"
-  [(set (match_operand:QHSI 0 "register_operand" "")
-   (not:QHSI (match_operand:QHSI 1 "register_operand" "")))]
-  ""
-  "")
-
-(define_insn "*one_cmplqi2"
-  [(set (match_operand:QI 0 "h8300_dst_operand" "=rQ")
-   (not:QI (match_operand:QI 1 "h8300_dst_operand" "0")))]
+(define_insn "one_cmpl2"
+ 

[PATCH] i386: Define __ILP32__ and _ILP32 for all 32-bit targets

2020-05-07 Thread Gerald Pfeifer
A user reported that gcc -m32 on x86-64 does not define __ILP32__ 
and I found the same on i686 (with gcc -x c -dM -E /dev/null).

The code has

  if (TARGET_X32)
{
  cpp_define (parse_in, "_ILP32");
  cpp_define (parse_in, "__ILP32__");
}

so -mx32 does define __ILP32__.  

This patch does the same for all "regular" 32-bit x86 targets.
Tested on i386-unknown-freebsd11.3 so far.

Okay?

Gerald


* config/i386/i386-c.c (ix86_target_macros): Define _ILP32 and
__ILP32__ for 32-bit targets.
---
 gcc/config/i386/i386-c.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c
index b46ebb289d2..891b2c68372 100644
--- a/gcc/config/i386/i386-c.c
+++ b/gcc/config/i386/i386-c.c
@@ -701,6 +701,8 @@ ix86_target_macros (void)
   cpp_assert (parse_in, "cpu=i386");
   cpp_assert (parse_in, "machine=i386");
   builtin_define_std ("i386");
+  cpp_define (parse_in, "_ILP32");
+  cpp_define (parse_in, "__ILP32__");
 }
 
   if (!TARGET_80387)
-- 
2.26.2


Re: [C++ Patch] PR 90915 [9/10 Regression] ICE in has_attribute, at c-family/c-attribs.c:4221

2020-05-07 Thread Marek Polacek via Gcc-patches
On Wed, Jan 29, 2020 at 10:06:51PM +0100, Paolo Carlini wrote:
> Hi,
> 
> On 29/01/20 19:00, Jason Merrill wrote:
> > On 1/29/20 4:31 AM, Paolo Carlini wrote:
> > > Hi,
> > > 
> > > in this regression we issue a diagnostic about an incomplete type
> > > (only a warning by default) and then we crash when we try to query
> > > has_attribute on a member of the type because in such cases the
> > > member remains an IDENTIFIER_NODE which of course doesn't have a
> > > TREE_TYPE neither a DECL_ATTRIBUTES... Simply recognizing
> > > IDENTIFIER_NODEs and returning false works fine, not sure if we want
> > > to do something more sophisticated. Tested x86_64-linux.
> > 
> > Why are we getting to has_attribute at all for a type-dependent argument?
> 
> Because the implementation of __builtin_has_attribute, largely shared with
> the C front-end, doesn't know about templates at all? :-/
> 
> Not sure it's the best time to complete it, but shouldn't be too difficult.

This ICEs even with a more reasonable test like

template
void foo ()
{
  static_assert(!__builtin_has_attribute(T::a, aligned));
}

The problem here is that __builtin_has_attribute doesn't handle type-dependent
arguments at all.  To handle type-dependent arguments we'd have to introduce
a new template code, like STATIC_ASSERT or ADDRESSOF_EXPR (or a new generic
template code for built-ins?), but that's always a pain.

Or, meanwhile, we could just sorry.  Martin, what do you think?

--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -8682,7 +8682,12 @@ cp_parser_has_attribute_expression (cp_parser *parser)
   location_t atloc = cp_lexer_peek_token (parser->lexer)->location;
   if (tree attr = cp_parser_gnu_attribute_list (parser, /*exactly_one=*/true))
 {
-  if (oper != error_mark_node)
+  if (oper == error_mark_node)
+   /* Nothing.  */;
+  else if (type_dependent_expression_p (oper))
+   sorry_at (atloc, "%<__builtin_has_attribute%> with dependent argument "
+ "not supported yet");
+  else
{
  /* Fold constant expressions used in attributes first.  */
  cp_check_const_attributes (attr);


Marek



Re: [C++ Patch] PR 90915 [9/10 Regression] ICE in has_attribute, at c-family/c-attribs.c:4221

2020-05-07 Thread Martin Sebor via Gcc-patches

On 5/7/20 5:03 PM, Marek Polacek via Gcc-patches wrote:

On Wed, Jan 29, 2020 at 10:06:51PM +0100, Paolo Carlini wrote:

Hi,

On 29/01/20 19:00, Jason Merrill wrote:

On 1/29/20 4:31 AM, Paolo Carlini wrote:

Hi,

in this regression we issue a diagnostic about an incomplete type
(only a warning by default) and then we crash when we try to query
has_attribute on a member of the type because in such cases the
member remains an IDENTIFIER_NODE which of course doesn't have a
TREE_TYPE neither a DECL_ATTRIBUTES... Simply recognizing
IDENTIFIER_NODEs and returning false works fine, not sure if we want
to do something more sophisticated. Tested x86_64-linux.


Why are we getting to has_attribute at all for a type-dependent argument?


Because the implementation of __builtin_has_attribute, largely shared with
the C front-end, doesn't know about templates at all? :-/

Not sure it's the best time to complete it, but shouldn't be too difficult.


This ICEs even with a more reasonable test like

template
void foo ()
{
   static_assert(!__builtin_has_attribute(T::a, aligned));
}

The problem here is that __builtin_has_attribute doesn't handle type-dependent
arguments at all.  To handle type-dependent arguments we'd have to introduce
a new template code, like STATIC_ASSERT or ADDRESSOF_EXPR (or a new generic
template code for built-ins?), but that's always a pain.

Or, meanwhile, we could just sorry.  Martin, what do you think?


I never did implement the template handling and I didn't think to put
in a stopgap like the one below.  It makes sense until I get around to
implementing it, hopefully for GCC 11.

Thanks!

Martin



--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -8682,7 +8682,12 @@ cp_parser_has_attribute_expression (cp_parser *parser)
location_t atloc = cp_lexer_peek_token (parser->lexer)->location;
if (tree attr = cp_parser_gnu_attribute_list (parser, /*exactly_one=*/true))
  {
-  if (oper != error_mark_node)
+  if (oper == error_mark_node)
+   /* Nothing.  */;
+  else if (type_dependent_expression_p (oper))
+   sorry_at (atloc, "%<__builtin_has_attribute%> with dependent argument "
+ "not supported yet");
+  else
 {
   /* Fold constant expressions used in attributes first.  */
   cp_check_const_attributes (attr);


Marek





Re: [C++ Patch] PR 90915 [9/10 Regression] ICE in has_attribute, at c-family/c-attribs.c:4221

2020-05-07 Thread Marek Polacek via Gcc-patches
On Thu, May 07, 2020 at 06:09:30PM -0600, Martin Sebor wrote:
> On 5/7/20 5:03 PM, Marek Polacek via Gcc-patches wrote:
> > On Wed, Jan 29, 2020 at 10:06:51PM +0100, Paolo Carlini wrote:
> > > Hi,
> > > 
> > > On 29/01/20 19:00, Jason Merrill wrote:
> > > > On 1/29/20 4:31 AM, Paolo Carlini wrote:
> > > > > Hi,
> > > > > 
> > > > > in this regression we issue a diagnostic about an incomplete type
> > > > > (only a warning by default) and then we crash when we try to query
> > > > > has_attribute on a member of the type because in such cases the
> > > > > member remains an IDENTIFIER_NODE which of course doesn't have a
> > > > > TREE_TYPE neither a DECL_ATTRIBUTES... Simply recognizing
> > > > > IDENTIFIER_NODEs and returning false works fine, not sure if we want
> > > > > to do something more sophisticated. Tested x86_64-linux.
> > > > 
> > > > Why are we getting to has_attribute at all for a type-dependent 
> > > > argument?
> > > 
> > > Because the implementation of __builtin_has_attribute, largely shared with
> > > the C front-end, doesn't know about templates at all? :-/
> > > 
> > > Not sure it's the best time to complete it, but shouldn't be too 
> > > difficult.
> > 
> > This ICEs even with a more reasonable test like
> > 
> > template
> > void foo ()
> > {
> >static_assert(!__builtin_has_attribute(T::a, aligned));
> > }
> > 
> > The problem here is that __builtin_has_attribute doesn't handle 
> > type-dependent
> > arguments at all.  To handle type-dependent arguments we'd have to introduce
> > a new template code, like STATIC_ASSERT or ADDRESSOF_EXPR (or a new generic
> > template code for built-ins?), but that's always a pain.
> > 
> > Or, meanwhile, we could just sorry.  Martin, what do you think?
> 
> I never did implement the template handling and I didn't think to put
> in a stopgap like the one below.  It makes sense until I get around to
> implementing it, hopefully for GCC 11.

Ah, and we have PR92104 to track that.   Here's a complete patch then:

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

>From 7ed334b7998314bab12fe4741bc311a47457ea3a Mon Sep 17 00:00:00 2001
From: Marek Polacek 
Date: Thu, 7 May 2020 21:10:42 -0400
Subject: [PATCH] c++: Sorry about type-dependent arg for
 __builtin_has_attribute [PR90915]

Until 92104 is fixed, let's sorry rather than crash.

PR c++/90915
* parser.c (cp_parser_has_attribute_expression): Sorry on a
type-dependent argument.

* g++.dg/ext/builtin-has-attribute.C: New test.
---
 gcc/cp/parser.c  | 7 ++-
 gcc/testsuite/g++.dg/ext/builtin-has-attribute.C | 8 
 2 files changed, 14 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/g++.dg/ext/builtin-has-attribute.C

diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
index d67fa3b13d1..f586c89b109 100644
--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -8682,7 +8682,12 @@ cp_parser_has_attribute_expression (cp_parser *parser)
   location_t atloc = cp_lexer_peek_token (parser->lexer)->location;
   if (tree attr = cp_parser_gnu_attribute_list (parser, /*exactly_one=*/true))
 {
-  if (oper != error_mark_node)
+  if (oper == error_mark_node)
+   /* Nothing.  */;
+  else if (type_dependent_expression_p (oper))
+   sorry_at (atloc, "%<__builtin_has_attribute%> with dependent argument "
+ "not supported yet");
+  else
{
  /* Fold constant expressions used in attributes first.  */
  cp_check_const_attributes (attr);
diff --git a/gcc/testsuite/g++.dg/ext/builtin-has-attribute.C 
b/gcc/testsuite/g++.dg/ext/builtin-has-attribute.C
new file mode 100644
index 000..3438dd59ba3
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/builtin-has-attribute.C
@@ -0,0 +1,8 @@
+// PR c++/90915
+// { dg-do compile { target c++11 } }
+
+template
+void foo ()
+{
+  static_assert(!__builtin_has_attribute(T::a, aligned), ""); // { dg-message 
"sorry, unimplemented: .__builtin_has_attribute. with dependent argument not 
supported yet" }
+}

base-commit: 74d58ad2c208c9c445bb3e8288db08e092a66316
-- 
Marek Polacek • Red Hat, Inc. • 300 A St, Boston, MA



[PATCH] rs6000: powerpc_future_ok and powerpc_future_hw

2020-05-07 Thread Bill Schmidt via Gcc-patches
From: Kelvin Nilsen 

Dejagnu targets for these.

Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no
regressions.  Is this okay for master?

Patch shepherded by Bill Schmidt on behalf of Kelvin Nilsen.

Thanks!
Bill

2020-03-04  Kelvin Nilsen  

* gcc.target/powerpc/dg-future-0.c: New.
* gcc.target/powerpc/dg-future-1.c: New.
* lib/target-supports.exp (check_powerpc_future_hw_available):
Replace -mfuture with -mcpu=future.
(check_effective_target_powerpc_future_ok): Likewise.
(is-effective-target): Add powerpc_future_hw.
---
 .../gcc.target/powerpc/dg-future-0.c  | 14 ++
 .../gcc.target/powerpc/dg-future-1.c  | 19 +++
 gcc/testsuite/lib/target-supports.exp |  5 +++--
 3 files changed, 36 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/dg-future-0.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/dg-future-1.c

diff --git a/gcc/testsuite/gcc.target/powerpc/dg-future-0.c 
b/gcc/testsuite/gcc.target/powerpc/dg-future-0.c
new file mode 100644
index 000..abfda82a684
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/dg-future-0.c
@@ -0,0 +1,14 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-options "-mdejagnu-cpu=future" } */
+
+/* This tests that powerpc_future_ok works.  */
+
+extern void abort (void);
+
+int futurity (void) {
+  long int e = -1;
+  asm ("pli %0,%1": "+r" (e) : "n" (0x12345));
+  return (e == 0x12345);
+}
+
+/* { dg-final { scan-assembler {\mpli\M} } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/dg-future-1.c 
b/gcc/testsuite/gcc.target/powerpc/dg-future-1.c
new file mode 100644
index 000..6087fe7c894
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/dg-future-1.c
@@ -0,0 +1,19 @@
+/* { dg-do run { target { powerpc*-*-* } } } */
+/* { dg-require-effective-target powerpc_future_hw } */
+/* { dg-options "-mdejagnu-cpu=future" } */
+
+/* This tests that powerpc_future_hw works.  */
+
+extern void abort (void);
+
+int futurity (void) {
+  long int e = -1;
+  asm ("pli %0,%1": "+r" (e) : "n" (0x12345));
+  return (e == 0x12345);
+}
+
+int main (int argc, char *argv [])
+{
+  if (!futurity ())
+abort ();
+}
diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index 13761491e63..40978e580a3 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -2204,7 +2204,7 @@ proc check_powerpc_future_hw_available { } {
asm ("pli %0,%1" : "+r" (e) : "n" (0x12345));
return (e == 0x12345);
}
-   } "-mfuture"
+   } "-mcpu=future"
 }]
 }
 
@@ -5816,7 +5816,7 @@ proc check_effective_target_powerpc_future_ok { } {
asm ("pli %0,%1" : "=r" (e) : "n" (0x12345));
return e;
}
-   } "-mfuture"]
+   } "-mcpu=future"]
 } else {
return 0
 }
@@ -7763,6 +7763,7 @@ proc is-effective-target { arg } {
  "named_sections" { set selected [check_named_sections_available] }
  "gc_sections"{ set selected [check_gc_sections_available] }
  "cxa_atexit" { set selected [check_cxa_atexit_available] }
+ "powerpc_future_hw" { set selected 
[check_powerpc_future_hw_available] }
  default  { error "unknown effective target keyword `$arg'" }
}
 }
-- 
2.17.1



[PATCH] rs6000: Add vector count under mask

2020-05-07 Thread Bill Schmidt via Gcc-patches
From: Kelvin Nilsen 

Add support for new vclzdm and vctzdm vector instructions that
count leading and trailing zeros under control of a mask.

Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no
regressions.  Is this okay for master?

Thanks,
Bill

[gcc]

2020-05-07  Kelvin Nilsen  
Bill Schmidt  

* config/rs6000/altivec.h (vec_clzm): New macro.
(vec_ctzm): Likewise.
* config/rs6000/altivec.md (UNSPEC_VCLZDM): New constant.
(UNSPEC_VCTZDM): Likewise.
(vclzdm): New insn.
(vctzdm): Likewise.
* config/rs6000/rs6000-builtin.def (BU_FUTURE_V_0): New macro.
(BU_FUTURE_V_1): Likewise.
(BU_FUTURE_V_2): Likewise.
(BU_FUTURE_V_3): Likewise.
(__builtin_altivec_vclzdm): New builtin definition.
(__builtin_altivec_vctzdm): Likewise.
* config/rs6000/rs6000-c.c (rs6000_target_modify_macros): Cause
_ARCH_PWR_FUTURE macro to be defined if OPTION_MASK_FUTURE flag is
set.
* config/rs6000/rs6000-call.c (builtin_function_type): Set return
value and parameter types to be unsigned for VCLZDM and VCTZDM.
* config/rs6000/rs6000.c (rs6000_builtin_mask_calculate): Add
support for TARGET_FUTURE flag.
* config/rs6000/rs6000.h (RS6000_BTM_FUTURE): New macro constant.
* doc/extend.texi (PowerPC Altivec Built-in Functions Available
for a Future Architecture): New subsubsection.

[gcc/testsuite]

2020-05-07  Kelvin Nilsen  

* gcc.target/powerpc/vec-clzm-0.c: New test.
* gcc.target/powerpc/vec-clzm-1.c: New test.
* gcc.target/powerpc/vec-ctzm-0.c: New test.
* gcc.target/powerpc/vec-ctzm-1.c: New test.
---
 gcc/config/rs6000/altivec.h   |  7 +++
 gcc/config/rs6000/altivec.md  | 21 
 gcc/config/rs6000/rs6000-builtin.def  | 40 ++
 gcc/config/rs6000/rs6000-c.c  |  2 +
 gcc/config/rs6000/rs6000-call.c   |  2 +
 gcc/config/rs6000/rs6000.c|  3 +-
 gcc/config/rs6000/rs6000.h|  2 +
 gcc/doc/extend.texi   | 27 ++
 gcc/testsuite/gcc.target/powerpc/vec-clzm-0.c | 54 +++
 gcc/testsuite/gcc.target/powerpc/vec-clzm-1.c | 54 +++
 gcc/testsuite/gcc.target/powerpc/vec-ctzm-0.c | 54 +++
 gcc/testsuite/gcc.target/powerpc/vec-ctzm-1.c | 53 ++
 12 files changed, 318 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-clzm-0.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-clzm-1.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-ctzm-0.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-ctzm-1.c

diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
index 5f1f5924488..e1e75ad0f1e 100644
--- a/gcc/config/rs6000/altivec.h
+++ b/gcc/config/rs6000/altivec.h
@@ -686,4 +686,11 @@ __altivec_scalar_pred(vec_any_nle,
to #define vec_step to __builtin_vec_step.  */
 #define vec_step(x) __builtin_vec_step (* (__typeof__ (x) *) 0)
 
+#ifdef _ARCH_PWR_FUTURE
+/* May modify these macro definitions if future capabilities overload
+   with support for different vector argument and result types.  */
+#define vec_clzm(a, b) __builtin_altivec_vclzdm (a, b)
+#define vec_ctzm(a, b) __builtin_altivec_vctzdm (a, b)
+#endif
+
 #endif /* _ALTIVEC_H */
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 6b1d987913c..5ef4889ba55 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -160,6 +160,8 @@ (define_c_enum "unspec"
UNSPEC_BCD_OVERFLOW
UNSPEC_VRLMI
UNSPEC_VRLNM
+   UNSPEC_VCLZDM
+   UNSPEC_VCTZDM
 ])
 
 (define_c_enum "unspecv"
@@ -4096,6 +4098,25 @@ (define_insn "*bcd_test2"
   "bcd. %0,%1,%2,%3"
   [(set_attr "type" "vecsimple")])
 
+(define_insn "vclzdm"
+  [(set (match_operand:V2DI 0 "altivec_register_operand" "=v")
+   (unspec:V2DI [(match_operand:V2DI 1 "altivec_register_operand" "v")
+ (match_operand:V2DI 2 "altivec_register_operand" "v")]
+UNSPEC_VCLZDM))]
+   "TARGET_FUTURE"
+   "vclzdm %0,%1,%2"
+   [(set_attr "type" "vecsimple")])
+
+(define_insn "vctzdm"
+  [(set (match_operand:V2DI 0 "altivec_register_operand" "=v")
+   (unspec:V2DI [(match_operand:V2DI 1 "altivec_register_operand" "v")
+ (match_operand:V2DI 2 "altivec_register_operand" "v")]
+UNSPEC_VCTZDM))]
+   "TARGET_FUTURE"
+   "vctzdm %0,%1,%2"
+   [(set_attr "type" "vecsimple")])
+
+
 (define_expand "bcd_"
   [(parallel [(set (reg:CCFP CR6_REGNO)
   (compare:CCFP
diff --git a/gcc/config/rs6000/rs6000-builtin.def 
b/gcc/config/rs6000/rs6000-builtin.def
index 54f750c8384..9293e7cf4fb 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -933,6 +933,42 @@
 | RS6000_BTC_BINARY),  

[PATCH] rs6000: Add pdep/pext

2020-05-07 Thread Bill Schmidt via Gcc-patches
From: Kelvin Nilsen 

Add support for the vpdepd and vpextd instructions which perform
vector parallel bit deposit and vector parallel bit extract.

Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no
regressions.  Is this okay for master?

Thanks,
Bill

2020-05-07  Kelvin Nilsen  
Bill Schmidt  

* config/rs6000/altivec.h (vec_pdep): New macro implementing new
built-in function.
(vec_pext): Likewise.
* config/rs6000/altivec.md (UNSPEC_VPDEPD): New constant.
(UNSPEC_VPEXTD): Likewise.
(vpdepd): New insn.
(vpextd): Likewise.
* config/rs6000/rs6000-builtin.def (__builtin_altivec_vpdepd): New
built-in function.
(__builtin_altivec_vpextd): Likewise.
* config/rs6000/rs6000-call.c (builtin_function_type): Add
handling for FUTURE_BUILTIN_VPDEPD and FUTURE_BUILTIN_VPEXTD
cases.
* doc/extend.texi (PowerPC Altivec Built-in Functions Available
for a Future Architecture): Add description of vec_pdep and
vec_pext built-in functions.

2020-05-07  Kelvin Nilsen  

* gcc.target/powerpc/vec-pdep-0.c: New.
* gcc.target/powerpc/vec-pdep-1.c: New.
* gcc.target/powerpc/vec-pext-0.c: New.
* gcc.target/powerpc/vec-pext-1.c: New.
---
 gcc/config/rs6000/altivec.h   |  3 +
 gcc/config/rs6000/altivec.md  | 20 ++
 gcc/config/rs6000/rs6000-builtin.def  |  2 +
 gcc/config/rs6000/rs6000-call.c   |  2 +
 gcc/doc/extend.texi   | 15 +
 gcc/testsuite/gcc.target/powerpc/vec-pdep-0.c | 61 +++
 gcc/testsuite/gcc.target/powerpc/vec-pdep-1.c | 53 
 gcc/testsuite/gcc.target/powerpc/vec-pext-0.c | 53 
 gcc/testsuite/gcc.target/powerpc/vec-pext-1.c | 52 
 9 files changed, 261 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-pdep-0.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-pdep-1.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-pext-0.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-pext-1.c

diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
index e1e75ad0f1e..12dfcd8d2bf 100644
--- a/gcc/config/rs6000/altivec.h
+++ b/gcc/config/rs6000/altivec.h
@@ -691,6 +691,9 @@ __altivec_scalar_pred(vec_any_nle,
with support for different vector argument and result types.  */
 #define vec_clzm(a, b) __builtin_altivec_vclzdm (a, b)
 #define vec_ctzm(a, b) __builtin_altivec_vctzdm (a, b)
+#define vec_pdep(a, b) __builtin_altivec_vpdepd (a, b)
+#define vec_pext(a, b) __builtin_altivec_vpextd (a, b)
+
 #endif
 
 #endif /* _ALTIVEC_H */
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 5ef4889ba55..33ba57855bc 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -162,6 +162,8 @@ (define_c_enum "unspec"
UNSPEC_VRLNM
UNSPEC_VCLZDM
UNSPEC_VCTZDM
+   UNSPEC_VPDEPD
+   UNSPEC_VPEXTD
 ])
 
 (define_c_enum "unspecv"
@@ -4116,6 +4118,24 @@ (define_insn "vctzdm"
"vctzdm %0,%1,%2"
[(set_attr "type" "vecsimple")])
 
+(define_insn "vpdepd"
+  [(set (match_operand:V2DI 0 "altivec_register_operand" "=v")
+   (unspec:V2DI [(match_operand:V2DI 1 "altivec_register_operand" "v")
+ (match_operand:V2DI 2 "altivec_register_operand" "v")]
+UNSPEC_VPDEPD))]
+   "TARGET_FUTURE"
+   "vpdepd %0,%1,%2"
+   [(set_attr "type" "vecsimple")])
+
+(define_insn "vpextd"
+  [(set (match_operand:V2DI 0 "altivec_register_operand" "=v")
+   (unspec:V2DI [(match_operand:V2DI 1 "altivec_register_operand" "v")
+ (match_operand:V2DI 2 "altivec_register_operand" "v")]
+UNSPEC_VPEXTD))]
+   "TARGET_FUTURE"
+   "vpextd %0,%1,%2"
+   [(set_attr "type" "vecsimple")])
+
 
 (define_expand "bcd_"
   [(parallel [(set (reg:CCFP CR6_REGNO)
diff --git a/gcc/config/rs6000/rs6000-builtin.def 
b/gcc/config/rs6000/rs6000-builtin.def
index 9293e7cf4fb..776fc542ebf 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -2518,6 +2518,8 @@ BU_P9_OVERLOAD_2 (CMPEQB, "byte_in_set")
 /* Future architecture vector built-ins.  */
 BU_FUTURE_V_2 (VCLZDM, "vclzdm", CONST, vclzdm)
 BU_FUTURE_V_2 (VCTZDM, "vctzdm", CONST, vctzdm)
+BU_FUTURE_V_2 (VPDEPD, "vpdepd", CONST, vpdepd)
+BU_FUTURE_V_2 (VPEXTD, "vpextd", CONST, vpextd)
 
 /* 1 argument crypto functions.  */
 BU_CRYPTO_1 (VSBOX,"vsbox",  CONST, crypto_vsbox_v2di)
diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index 2a4ce5bd340..ab6ba576605 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -12928,6 +12928,8 @@ builtin_function_type (machine_mode mode_ret, 
machine_mode mode_arg0,
 case P8V_BUILTIN_ORC_V1TI_UNS:
 case FUTURE_BUILTIN_VCLZDM:
 case FUTURE_BUILTIN_VCTZDM:
+case FUTURE_BUILTIN_VPD

[PATCH] rs6000: Add vgnb

2020-05-07 Thread Bill Schmidt via Gcc-patches
From: Kelvin Nilsen 

Add support for the vgnb instruction, which gathers every Nth bit
per vector element.

Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no
regressions.  Is this okay for master?

Thanks,
Bill

[gcc]

2020-05-07  Kelvin Nilsen  
Bill Schmidt  

* config/rs6000/altivec.h (vec_gnb): New #define.
* config/rs6000/altivec.md (UNSPEC_VGNB): New constant.
(vgnb): New insn.
* config/rs6000/rs6000-builtin.def (BU_FUTURE_OVERLOAD_1): New
#define.
(BU_FUTURE_OVERLOAD_2): Likewise.
(BU_FUTURE_OVERLOAD_3): Likewise.
(__builtin_altivec_gnb): New built-in function.
(__buiiltin_vec_gnb): New overloaded built-in function.
* config/rs6000/rs6000-call.c (altivec_overloaded_builtins):
Define overloaded forms of __builtin_vec_gnb.
(rs6000_expand_binop_builtin): Add error checking for 2nd argument
of __builtin_vec_gnb.
(builtin_function_type): Mark return value and arguments unsigned
for FUTURE_BUILTIN_VGNB.
* doc/extend.texi (PowerPC AltiVec Built-in Functions Available
for a Future Architecture): Add description of vec_gnb built-in
function.

[gcc/testsuite]

2020-05-07  Kelvin Nilsen  
Bill Schmidt  

* gcc.target/powerpc/vec-gnb-0.c: New test.
* gcc.target/powerpc/vec-gnb-1.c: New test.
* gcc.target/powerpc/vec-gnb-10.c: New test.
* gcc.target/powerpc/vec-gnb-2.c: New test.
* gcc.target/powerpc/vec-gnb-3.c: New test.
* gcc.target/powerpc/vec-gnb-4.c: New test.
* gcc.target/powerpc/vec-gnb-5.c: New test.
* gcc.target/powerpc/vec-gnb-6.c: New test.
* gcc.target/powerpc/vec-gnb-7.c: New test.
* gcc.target/powerpc/vec-gnb-8.c: New test.
* gcc.target/powerpc/vec-gnb-9.c: New test.
---
 gcc/config/rs6000/altivec.h   |  2 +
 gcc/config/rs6000/altivec.md  | 10 +++
 gcc/config/rs6000/rs6000-builtin.def  | 29 +++
 gcc/config/rs6000/rs6000-call.c   | 26 +++
 gcc/doc/extend.texi   | 18 -
 gcc/testsuite/gcc.target/powerpc/vec-gnb-0.c  | 75 +++
 gcc/testsuite/gcc.target/powerpc/vec-gnb-1.c  | 75 +++
 gcc/testsuite/gcc.target/powerpc/vec-gnb-10.c | 72 ++
 gcc/testsuite/gcc.target/powerpc/vec-gnb-2.c  | 73 ++
 gcc/testsuite/gcc.target/powerpc/vec-gnb-3.c  | 72 ++
 gcc/testsuite/gcc.target/powerpc/vec-gnb-4.c  | 71 ++
 gcc/testsuite/gcc.target/powerpc/vec-gnb-5.c  | 71 ++
 gcc/testsuite/gcc.target/powerpc/vec-gnb-6.c  | 71 ++
 gcc/testsuite/gcc.target/powerpc/vec-gnb-7.c  | 71 ++
 gcc/testsuite/gcc.target/powerpc/vec-gnb-8.c  | 75 +++
 gcc/testsuite/gcc.target/powerpc/vec-gnb-9.c  | 74 ++
 16 files changed, 884 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-gnb-0.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-gnb-1.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-gnb-10.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-gnb-2.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-gnb-3.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-gnb-4.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-gnb-5.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-gnb-6.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-gnb-7.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-gnb-8.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-gnb-9.c

diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
index 12dfcd8d2bf..b6ecad6911d 100644
--- a/gcc/config/rs6000/altivec.h
+++ b/gcc/config/rs6000/altivec.h
@@ -694,6 +694,8 @@ __altivec_scalar_pred(vec_any_nle,
 #define vec_pdep(a, b) __builtin_altivec_vpdepd (a, b)
 #define vec_pext(a, b) __builtin_altivec_vpextd (a, b)
 
+/* Overloaded built-in functions for future architecture.  */
+#define vec_gnb(a, b)  __builtin_vec_gnb (a, b)
 #endif
 
 #endif /* _ALTIVEC_H */
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 33ba57855bc..7cebb58331e 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -162,6 +162,7 @@ (define_c_enum "unspec"
UNSPEC_VRLNM
UNSPEC_VCLZDM
UNSPEC_VCTZDM
+   UNSPEC_VGNB
UNSPEC_VPDEPD
UNSPEC_VPEXTD
 ])
@@ -4136,6 +4137,15 @@ (define_insn "vpextd"
"vpextd %0,%1,%2"
[(set_attr "type" "vecsimple")])
 
+(define_insn "vgnb"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+(unspec:DI [(match_operand:V2DI 1 "altivec_register_operand" "v")
+   (match_operand:QI 2 "u3bit_cint_operand" "n")]
+ UNSPEC_VGNB))]
+   "TARGET_FUTURE"
+   "vgnb %0,%1,%2"
+   [(set_attr "type" "vecsimple")])
+
 
 (define_expand "bcd_"

[PATCH PR94991] aarch64: ICE: Segmentation fault with option -mgeneral-regs-only

2020-05-07 Thread Yangfei (Felix)
Hi,

  Witnessed another ICE with option -mgeneral-regs-only. 
  I have created a bug for that: 
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94991 

  For the given testcase, we are doing FAIL for scalar floating move expand 
pattern since TARGET_FLOAT
  is false with option -mgeneral-regs-only. But move expand pattern cannot 
fail. It would be better to 
  replace the FAIL with code that bitcasts to the equivalent integer mode, 
using gen_lowpart.

  Bootstrap and tested on aarch64-linux-gnu.  Comments?

Thanks,
Felix


pr94991-v1.diff
Description: pr94991-v1.diff


Re: [PATCH] i386: Define __ILP32__ and _ILP32 for all 32-bit targets

2020-05-07 Thread Uros Bizjak via Gcc-patches
On Fri, May 8, 2020 at 12:58 AM Gerald Pfeifer  wrote:
>
> A user reported that gcc -m32 on x86-64 does not define __ILP32__
> and I found the same on i686 (with gcc -x c -dM -E /dev/null).
>
> The code has
>
>   if (TARGET_X32)
> {
>   cpp_define (parse_in, "_ILP32");
>   cpp_define (parse_in, "__ILP32__");
> }
>
> so -mx32 does define __ILP32__.
>
> This patch does the same for all "regular" 32-bit x86 targets.
> Tested on i386-unknown-freebsd11.3 so far.
>
> Okay?

OK.

Thanks,
Uros.

> Gerald
>
>
> * config/i386/i386-c.c (ix86_target_macros): Define _ILP32 and
> __ILP32__ for 32-bit targets.
> ---
>  gcc/config/i386/i386-c.c | 2 ++
>  1 file changed, 2 insertions(+)
>
> diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c
> index b46ebb289d2..891b2c68372 100644
> --- a/gcc/config/i386/i386-c.c
> +++ b/gcc/config/i386/i386-c.c
> @@ -701,6 +701,8 @@ ix86_target_macros (void)
>cpp_assert (parse_in, "cpu=i386");
>cpp_assert (parse_in, "machine=i386");
>builtin_define_std ("i386");
> +  cpp_define (parse_in, "_ILP32");
> +  cpp_define (parse_in, "__ILP32__");
>  }
>
>if (!TARGET_80387)
> --
> 2.26.2


Re: avoid infinite loops in rpo fre

2020-05-07 Thread Richard Biener via Gcc-patches
On Thu, May 7, 2020 at 6:27 PM Alexandre Oliva  wrote:
>
>
> gnat.dg/opt83.adb compiled with -O2+ would enter an infinite loop with
> memory allocation within fre.  I don't think there is anything
> Ada-specific in the bug, but the exact inlining and loop unrolling
> circumstances needed to trigger the bug are quite fragile, so I didn't
> try very hard to translate it to C.
>
> The problem comes about while attempting to eliminate the last of the
> following stmts, generated for 'R (0) := F;':
>
>   A78b_144 = MEM  [(struct opt83__e &)_41][0]{lb: 
> _46 sz: 16}._tag;
>   MEM  [(struct opt83__e &)_41][0]{lb: _46 sz: 16} 
> = f;
>   MEM  [(struct opt83__e &)_41][0]{lb: _46 sz: 
> 16}._tag = A78b_144;
>
> valueize_refs_1 takes a sequence of vn_reference_op_s with _41 in it, and
> when it gets to that op, vn_valueize = rpo_vn_valueize replaces _41 with
> _47, defined in the previous block as:
>
>   _47 = &(*_41)[0]{lb: _46 sz: 16};
>
> _47 is the first argument passed to the function synthesized to copy F
> to the first element of array R, after checking that their addresses
> do not compare equal.
>
> There is another earlier def in the Value Numbering set associated with
> _41, namely:
>
>   _164 = &MEM[(struct ALLOC *)_163].ARRAY;
>
> _163 is the newly-allocated storage for the 0..4 array.  Unfortunately
> the logic in rpo_vn_valueize selects the former, and then we add the
> _47 definition in _41's place in the op sequence.  Problem is _41 is
> part of the expression, and thus of the expansion, so eventually we
> reach it and replace it again, and again, and at every cycle we add
> more ops than we remove, so the sequence grows unbounded.

So value-numbering value-numbered _41 and _47 the same which
looks sensible only if _46 is zero.  But at the _47 definition we should
not have recorded _47 as another available name for _41 so we should
not have valueized to _47.

I'll try to debug this myself, the proposed patch looks wrong to me.

Richard.

>
> Limiting the selection of alternate defs for the value to those that
> dominate the def we're replacing should be enough to avoid the
> problem, since we'd only perform replacements "up" the CFG.  Changing
> the BB context for the selection of the value equivalence to that of
> the name we're replacing, rather than that of the expression in which
> we're replacing it, seems to be close enough.  It does solve the
> problem without any codegen changes in a GCC bootstrap, despite a few
> differences in eliminate_avail.
>
> Regstrapped on x86_64-linux-gnu.  Ok to install?
>
> As I prepare to post this, it occurs to me that maybe, instead of using
> vn_context_bb for a default NAME like before, we should abandon the
> attempt to substitute it, lest we might run into the same kind of
> infinite loop in for e.g. _41(D).  WDYT?
>
>
> for  gcc/ChangeLog
>
> * tree-ssa-sccvn.c (rpo_vn_valueize): Take the BB context from
> NAME.
>
> for  gcc/testsuite/ChangeLog
>
> * gnat.dg/opt83.adb: New.
> ---
>  gcc/testsuite/gnat.dg/opt83.adb |   33 +
>  gcc/tree-ssa-sccvn.c|7 ++-
>  2 files changed, 39 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/gnat.dg/opt83.adb
>
> diff --git a/gcc/testsuite/gnat.dg/opt83.adb b/gcc/testsuite/gnat.dg/opt83.adb
> new file mode 100644
> index ..7418520
> --- /dev/null
> +++ b/gcc/testsuite/gnat.dg/opt83.adb
> @@ -0,0 +1,33 @@
> +--  { dg-do compile }
> +--  { dg-options "-O2" }
> +
> +--  rpo fre3 used to loop indefinitely replacing _2 with _8 and back,
> +--  given MEM[(struct test__e &)_2][0]{lb: _7 sz: 16}._tag = A23s_29;
> +--  and an earlier _8 = &*_2[0]{lb: _7 sz: 16}.
> +
> +procedure Opt83 is
> +
> +   type E is tagged record
> +  I : Natural := 0;
> +   end record;
> +
> +   type A is array (Natural range <>) of aliased E;
> +
> +   F : E;
> +
> +   R : access A;
> +
> +   procedure N is
> +   begin
> +  if R = null then
> +R := new A (0 .. 4);
> +  end if;
> +   end N;
> +
> +begin
> +
> +   N;
> +
> +   R (0) := F;
> +
> +end Opt83;
> diff --git a/gcc/tree-ssa-sccvn.c b/gcc/tree-ssa-sccvn.c
> index 8a4af91..9008724 100644
> --- a/gcc/tree-ssa-sccvn.c
> +++ b/gcc/tree-ssa-sccvn.c
> @@ -6790,9 +6790,14 @@ rpo_vn_valueize (tree name)
> {
>   if (TREE_CODE (tem) != SSA_NAME)
> return tem;
> + basic_block bb = vn_context_bb;
> + /* Avoid replacing name with anything whose definition
> +could refer back to name.  */
> + if (! SSA_NAME_IS_DEFAULT_DEF (name))
> +   bb = gimple_bb (SSA_NAME_DEF_STMT (name));
>   /* For all values we only valueize to an available leader
>  which means we can use SSA name info without restriction.  */
> - tem = rpo_avail->eliminate_avail (vn_context_bb, tem);
> + tem = rpo_avail->eliminate_avail (bb, tem);
>   if (tem)
>