Re: [committed] libstdc++: Optimize std::to_array for trivial types [PR110167]

2023-06-20 Thread Jonathan Wakely via Gcc-patches
On Tue, 20 Jun 2023 at 09:29, Jonathan Wakely  wrote:

>
>
> On Tue, 20 Jun 2023 at 01:54, Patrick Palka  wrote:
>
>> On Fri, 9 Jun 2023, Jonathan Wakely via Libstdc++ wrote:
>>
>> > Tested powerpc64le-linux. Pushed to trunk.
>> >
>> > This makes sense to backport after some soak time on trunk.
>> >
>> > -- >8 --
>> >
>> > As reported in PR libstdc++/110167, std::to_array compiles extremely
>> > slowly for very large arrays. It needs to instantiate a very large
>> > specialization of std::index_sequence and then create a very large
>> > aggregate initializer from the pack expansion. For trivial types we can
>> > simply default-initialize the std::array and then use memcpy to copy the
>> > values. For non-trivial types we need to use the existing
>> > implementation, despite the compilation cost.
>> >
>> > As also noted in the PR, using a generic lambda instead of the
>> > __to_array helper compiles faster since gcc-13. It also produces
>> > slightly smaller code at -O1, due to additional inlining. The code at
>> > -Os, -O2 and -O3 seems to be the same. This new implementation requires
>> > __cpp_generic_lambdas >= 201707L (i.e. P0428R2) but that is supported
>> > since Clang 10 and since Intel icc 2021.5.0 (and since GCC 10.1).
>> >
>> > libstdc++-v3/ChangeLog:
>> >
>> >   PR libstdc++/110167
>> >   * include/std/array (to_array): Initialize arrays of trivial
>> >   types using memcpy. For non-trivial types, use lambda
>> >   expressions instead of a separate helper function.
>> >   (__to_array): Remove.
>> >   * testsuite/23_containers/array/creation/110167.cc: New test.
>> > ---
>> >  libstdc++-v3/include/std/array| 53 +--
>> >  .../23_containers/array/creation/110167.cc| 14 +
>> >  2 files changed, 51 insertions(+), 16 deletions(-)
>> >  create mode 100644
>> libstdc++-v3/testsuite/23_containers/array/creation/110167.cc
>> >
>> > diff --git a/libstdc++-v3/include/std/array
>> b/libstdc++-v3/include/std/array
>> > index 70280c1beeb..b791d86ddb2 100644
>> > --- a/libstdc++-v3/include/std/array
>> > +++ b/libstdc++-v3/include/std/array
>> > @@ -414,19 +414,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>> >return std::move(std::get<_Int>(__arr));
>> >  }
>> >
>> > -#if __cplusplus > 201703L
>> > +#if __cplusplus >= 202002L && __cpp_generic_lambdas >= 201707L
>> >  #define __cpp_lib_to_array 201907L
>> > -
>> > -  template
>> > -constexpr array, sizeof...(_Idx)>
>> > -__to_array(_Tp (&__a)[sizeof...(_Idx)], index_sequence<_Idx...>)
>> > -{
>> > -  if constexpr (_Move)
>> > - return {{std::move(__a[_Idx])...}};
>> > -  else
>> > - return {{__a[_Idx]...}};
>> > -}
>> > -
>> >template
>> >  [[nodiscard]]
>> >  constexpr array, _Nm>
>> > @@ -436,8 +425,24 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>> >static_assert(!is_array_v<_Tp>);
>> >static_assert(is_constructible_v<_Tp, _Tp&>);
>> >if constexpr (is_constructible_v<_Tp, _Tp&>)
>> > - return __to_array(__a, make_index_sequence<_Nm>{});
>> > -  __builtin_unreachable(); // FIXME: see PR c++/91388
>> > + {
>> > +   if constexpr (is_trivial_v<_Tp> && _Nm != 0)
>>
>> redundant _Nm != 0 test?
>>
>
> Ah yes, I added it below to ensure we don't use memcpy with a null
> __arr.data() and forgot to remove it here.
>
>
>>
>> > + {
>> > +   array, _Nm> __arr;
>> > +   if (!__is_constant_evaluated() && _Nm != 0)
>> > + __builtin_memcpy(__arr.data(), __a, sizeof(__a));
>> > +   else
>> > + for (size_t __i = 0; __i < _Nm; ++__i)
>> > +   __arr._M_elems[__i] = __a[__i];
>> > +   return __arr;
>> > + }
>> > +   else
>> > + return [&__a](index_sequence<_Idx...>) {
>> > +   return array, _Nm>{{ __a[_Idx]... }};
>> > + }(make_index_sequence<_Nm>{});
>> > + }
>> > +  else
>> > + __builtin_unreachable(); // FIXME: see PR c++/91388
>> >  }
>> >
>> >template
>> > @@ -449,8 +454,24 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>> >static_assert(!is_array_v<_Tp>);
>> >static_assert(is_move_constructible_v<_Tp>);
>> >if constexpr (is_move_constructible_v<_Tp>)
>> > - return __to_array<1>(__a, make_index_sequence<_Nm>{});
>> > -  __builtin_unreachable(); // FIXME: see PR c++/91388
>> > + {
>> > +   if constexpr (is_trivial_v<_Tp>)
>> > + {
>> > +   array, _Nm> __arr;
>> > +   if (!__is_constant_evaluated() && _Nm != 0)
>> > + __builtin_memcpy(__arr.data(), __a, sizeof(__a));
>> > +   else
>> > + for (size_t __i = 0; __i < _Nm; ++__i)
>> > +   __arr._M_elems[__i] = std::move(__a[__i]);
>>
>> IIUC this std::move is unnecessary for trivial arrays?
>>
>
> Good point, thanks.
>
> That makes the lvalue and rvalue overloads identical for trivial types. It
> seems a shame to duplicate the code, so the rvalue one could do:
>
>  

Re: [committed] libstdc++: Optimize std::to_array for trivial types [PR110167]

2023-06-20 Thread Jonathan Wakely via Gcc-patches
On Tue, 20 Jun 2023 at 01:54, Patrick Palka  wrote:

> On Fri, 9 Jun 2023, Jonathan Wakely via Libstdc++ wrote:
>
> > Tested powerpc64le-linux. Pushed to trunk.
> >
> > This makes sense to backport after some soak time on trunk.
> >
> > -- >8 --
> >
> > As reported in PR libstdc++/110167, std::to_array compiles extremely
> > slowly for very large arrays. It needs to instantiate a very large
> > specialization of std::index_sequence and then create a very large
> > aggregate initializer from the pack expansion. For trivial types we can
> > simply default-initialize the std::array and then use memcpy to copy the
> > values. For non-trivial types we need to use the existing
> > implementation, despite the compilation cost.
> >
> > As also noted in the PR, using a generic lambda instead of the
> > __to_array helper compiles faster since gcc-13. It also produces
> > slightly smaller code at -O1, due to additional inlining. The code at
> > -Os, -O2 and -O3 seems to be the same. This new implementation requires
> > __cpp_generic_lambdas >= 201707L (i.e. P0428R2) but that is supported
> > since Clang 10 and since Intel icc 2021.5.0 (and since GCC 10.1).
> >
> > libstdc++-v3/ChangeLog:
> >
> >   PR libstdc++/110167
> >   * include/std/array (to_array): Initialize arrays of trivial
> >   types using memcpy. For non-trivial types, use lambda
> >   expressions instead of a separate helper function.
> >   (__to_array): Remove.
> >   * testsuite/23_containers/array/creation/110167.cc: New test.
> > ---
> >  libstdc++-v3/include/std/array| 53 +--
> >  .../23_containers/array/creation/110167.cc| 14 +
> >  2 files changed, 51 insertions(+), 16 deletions(-)
> >  create mode 100644
> libstdc++-v3/testsuite/23_containers/array/creation/110167.cc
> >
> > diff --git a/libstdc++-v3/include/std/array
> b/libstdc++-v3/include/std/array
> > index 70280c1beeb..b791d86ddb2 100644
> > --- a/libstdc++-v3/include/std/array
> > +++ b/libstdc++-v3/include/std/array
> > @@ -414,19 +414,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >return std::move(std::get<_Int>(__arr));
> >  }
> >
> > -#if __cplusplus > 201703L
> > +#if __cplusplus >= 202002L && __cpp_generic_lambdas >= 201707L
> >  #define __cpp_lib_to_array 201907L
> > -
> > -  template
> > -constexpr array, sizeof...(_Idx)>
> > -__to_array(_Tp (&__a)[sizeof...(_Idx)], index_sequence<_Idx...>)
> > -{
> > -  if constexpr (_Move)
> > - return {{std::move(__a[_Idx])...}};
> > -  else
> > - return {{__a[_Idx]...}};
> > -}
> > -
> >template
> >  [[nodiscard]]
> >  constexpr array, _Nm>
> > @@ -436,8 +425,24 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >static_assert(!is_array_v<_Tp>);
> >static_assert(is_constructible_v<_Tp, _Tp&>);
> >if constexpr (is_constructible_v<_Tp, _Tp&>)
> > - return __to_array(__a, make_index_sequence<_Nm>{});
> > -  __builtin_unreachable(); // FIXME: see PR c++/91388
> > + {
> > +   if constexpr (is_trivial_v<_Tp> && _Nm != 0)
>
> redundant _Nm != 0 test?
>

Ah yes, I added it below to ensure we don't use memcpy with a null
__arr.data() and forgot to remove it here.


>
> > + {
> > +   array, _Nm> __arr;
> > +   if (!__is_constant_evaluated() && _Nm != 0)
> > + __builtin_memcpy(__arr.data(), __a, sizeof(__a));
> > +   else
> > + for (size_t __i = 0; __i < _Nm; ++__i)
> > +   __arr._M_elems[__i] = __a[__i];
> > +   return __arr;
> > + }
> > +   else
> > + return [&__a](index_sequence<_Idx...>) {
> > +   return array, _Nm>{{ __a[_Idx]... }};
> > + }(make_index_sequence<_Nm>{});
> > + }
> > +  else
> > + __builtin_unreachable(); // FIXME: see PR c++/91388
> >  }
> >
> >template
> > @@ -449,8 +454,24 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >static_assert(!is_array_v<_Tp>);
> >static_assert(is_move_constructible_v<_Tp>);
> >if constexpr (is_move_constructible_v<_Tp>)
> > - return __to_array<1>(__a, make_index_sequence<_Nm>{});
> > -  __builtin_unreachable(); // FIXME: see PR c++/91388
> > + {
> > +   if constexpr (is_trivial_v<_Tp>)
> > + {
> > +   array, _Nm> __arr;
> > +   if (!__is_constant_evaluated() && _Nm != 0)
> > + __builtin_memcpy(__arr.data(), __a, sizeof(__a));
> > +   else
> > + for (size_t __i = 0; __i < _Nm; ++__i)
> > +   __arr._M_elems[__i] = std::move(__a[__i]);
>
> IIUC this std::move is unnecessary for trivial arrays?
>

Good point, thanks.

That makes the lvalue and rvalue overloads identical for trivial types. It
seems a shame to duplicate the code, so the rvalue one could do:

  if constexpr (is_trivial_v<_Tp>)
   return std::to_array<_Tp, _Num>(__a);
 else

But that would imply an extra function call at -O0, and repeating overload
resolution. Since the duplicated 

Re: [committed] libstdc++: Optimize std::to_array for trivial types [PR110167]

2023-06-19 Thread Patrick Palka via Gcc-patches
On Fri, 9 Jun 2023, Jonathan Wakely via Libstdc++ wrote:

> Tested powerpc64le-linux. Pushed to trunk.
> 
> This makes sense to backport after some soak time on trunk.
> 
> -- >8 --
> 
> As reported in PR libstdc++/110167, std::to_array compiles extremely
> slowly for very large arrays. It needs to instantiate a very large
> specialization of std::index_sequence and then create a very large
> aggregate initializer from the pack expansion. For trivial types we can
> simply default-initialize the std::array and then use memcpy to copy the
> values. For non-trivial types we need to use the existing
> implementation, despite the compilation cost.
> 
> As also noted in the PR, using a generic lambda instead of the
> __to_array helper compiles faster since gcc-13. It also produces
> slightly smaller code at -O1, due to additional inlining. The code at
> -Os, -O2 and -O3 seems to be the same. This new implementation requires
> __cpp_generic_lambdas >= 201707L (i.e. P0428R2) but that is supported
> since Clang 10 and since Intel icc 2021.5.0 (and since GCC 10.1).
> 
> libstdc++-v3/ChangeLog:
> 
>   PR libstdc++/110167
>   * include/std/array (to_array): Initialize arrays of trivial
>   types using memcpy. For non-trivial types, use lambda
>   expressions instead of a separate helper function.
>   (__to_array): Remove.
>   * testsuite/23_containers/array/creation/110167.cc: New test.
> ---
>  libstdc++-v3/include/std/array| 53 +--
>  .../23_containers/array/creation/110167.cc| 14 +
>  2 files changed, 51 insertions(+), 16 deletions(-)
>  create mode 100644 
> libstdc++-v3/testsuite/23_containers/array/creation/110167.cc
> 
> diff --git a/libstdc++-v3/include/std/array b/libstdc++-v3/include/std/array
> index 70280c1beeb..b791d86ddb2 100644
> --- a/libstdc++-v3/include/std/array
> +++ b/libstdc++-v3/include/std/array
> @@ -414,19 +414,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>return std::move(std::get<_Int>(__arr));
>  }
>  
> -#if __cplusplus > 201703L
> +#if __cplusplus >= 202002L && __cpp_generic_lambdas >= 201707L
>  #define __cpp_lib_to_array 201907L
> -
> -  template
> -constexpr array, sizeof...(_Idx)>
> -__to_array(_Tp (&__a)[sizeof...(_Idx)], index_sequence<_Idx...>)
> -{
> -  if constexpr (_Move)
> - return {{std::move(__a[_Idx])...}};
> -  else
> - return {{__a[_Idx]...}};
> -}
> -
>template
>  [[nodiscard]]
>  constexpr array, _Nm>
> @@ -436,8 +425,24 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>static_assert(!is_array_v<_Tp>);
>static_assert(is_constructible_v<_Tp, _Tp&>);
>if constexpr (is_constructible_v<_Tp, _Tp&>)
> - return __to_array(__a, make_index_sequence<_Nm>{});
> -  __builtin_unreachable(); // FIXME: see PR c++/91388
> + {
> +   if constexpr (is_trivial_v<_Tp> && _Nm != 0)

redundant _Nm != 0 test?

> + {
> +   array, _Nm> __arr;
> +   if (!__is_constant_evaluated() && _Nm != 0)
> + __builtin_memcpy(__arr.data(), __a, sizeof(__a));
> +   else
> + for (size_t __i = 0; __i < _Nm; ++__i)
> +   __arr._M_elems[__i] = __a[__i];
> +   return __arr;
> + }
> +   else
> + return [&__a](index_sequence<_Idx...>) {
> +   return array, _Nm>{{ __a[_Idx]... }};
> + }(make_index_sequence<_Nm>{});
> + }
> +  else
> + __builtin_unreachable(); // FIXME: see PR c++/91388
>  }
>  
>template
> @@ -449,8 +454,24 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>static_assert(!is_array_v<_Tp>);
>static_assert(is_move_constructible_v<_Tp>);
>if constexpr (is_move_constructible_v<_Tp>)
> - return __to_array<1>(__a, make_index_sequence<_Nm>{});
> -  __builtin_unreachable(); // FIXME: see PR c++/91388
> + {
> +   if constexpr (is_trivial_v<_Tp>)
> + {
> +   array, _Nm> __arr;
> +   if (!__is_constant_evaluated() && _Nm != 0)
> + __builtin_memcpy(__arr.data(), __a, sizeof(__a));
> +   else
> + for (size_t __i = 0; __i < _Nm; ++__i)
> +   __arr._M_elems[__i] = std::move(__a[__i]);

IIUC this std::move is unnecessary for trivial arrays?

> +   return __arr;
> + }
> +   else
> + return [&__a](index_sequence<_Idx...>) {
> +   return array, _Nm>{{ std::move(__a[_Idx])... }};
> + }(make_index_sequence<_Nm>{});
> + }
> +  else
> + __builtin_unreachable(); // FIXME: see PR c++/91388
>  }
>  #endif // C++20
>  
> diff --git a/libstdc++-v3/testsuite/23_containers/array/creation/110167.cc 
> b/libstdc++-v3/testsuite/23_containers/array/creation/110167.cc
> new file mode 100644
> index 000..c2aecc911bd
> --- /dev/null
> +++ b/libstdc++-v3/testsuite/23_containers/array/creation/110167.cc
> @@ -0,0 +1,14 @@
> +// { dg-options "-std=gnu++20" }
> +// { dg-do compile { target c++20 } }
> +
> +// PR 

[committed] libstdc++: Optimize std::to_array for trivial types [PR110167]

2023-06-09 Thread Jonathan Wakely via Gcc-patches
Tested powerpc64le-linux. Pushed to trunk.

This makes sense to backport after some soak time on trunk.

-- >8 --

As reported in PR libstdc++/110167, std::to_array compiles extremely
slowly for very large arrays. It needs to instantiate a very large
specialization of std::index_sequence and then create a very large
aggregate initializer from the pack expansion. For trivial types we can
simply default-initialize the std::array and then use memcpy to copy the
values. For non-trivial types we need to use the existing
implementation, despite the compilation cost.

As also noted in the PR, using a generic lambda instead of the
__to_array helper compiles faster since gcc-13. It also produces
slightly smaller code at -O1, due to additional inlining. The code at
-Os, -O2 and -O3 seems to be the same. This new implementation requires
__cpp_generic_lambdas >= 201707L (i.e. P0428R2) but that is supported
since Clang 10 and since Intel icc 2021.5.0 (and since GCC 10.1).

libstdc++-v3/ChangeLog:

PR libstdc++/110167
* include/std/array (to_array): Initialize arrays of trivial
types using memcpy. For non-trivial types, use lambda
expressions instead of a separate helper function.
(__to_array): Remove.
* testsuite/23_containers/array/creation/110167.cc: New test.
---
 libstdc++-v3/include/std/array| 53 +--
 .../23_containers/array/creation/110167.cc| 14 +
 2 files changed, 51 insertions(+), 16 deletions(-)
 create mode 100644 
libstdc++-v3/testsuite/23_containers/array/creation/110167.cc

diff --git a/libstdc++-v3/include/std/array b/libstdc++-v3/include/std/array
index 70280c1beeb..b791d86ddb2 100644
--- a/libstdc++-v3/include/std/array
+++ b/libstdc++-v3/include/std/array
@@ -414,19 +414,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   return std::move(std::get<_Int>(__arr));
 }
 
-#if __cplusplus > 201703L
+#if __cplusplus >= 202002L && __cpp_generic_lambdas >= 201707L
 #define __cpp_lib_to_array 201907L
-
-  template
-constexpr array, sizeof...(_Idx)>
-__to_array(_Tp (&__a)[sizeof...(_Idx)], index_sequence<_Idx...>)
-{
-  if constexpr (_Move)
-   return {{std::move(__a[_Idx])...}};
-  else
-   return {{__a[_Idx]...}};
-}
-
   template
 [[nodiscard]]
 constexpr array, _Nm>
@@ -436,8 +425,24 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   static_assert(!is_array_v<_Tp>);
   static_assert(is_constructible_v<_Tp, _Tp&>);
   if constexpr (is_constructible_v<_Tp, _Tp&>)
-   return __to_array(__a, make_index_sequence<_Nm>{});
-  __builtin_unreachable(); // FIXME: see PR c++/91388
+   {
+ if constexpr (is_trivial_v<_Tp> && _Nm != 0)
+   {
+ array, _Nm> __arr;
+ if (!__is_constant_evaluated() && _Nm != 0)
+   __builtin_memcpy(__arr.data(), __a, sizeof(__a));
+ else
+   for (size_t __i = 0; __i < _Nm; ++__i)
+ __arr._M_elems[__i] = __a[__i];
+ return __arr;
+   }
+ else
+   return [&__a](index_sequence<_Idx...>) {
+ return array, _Nm>{{ __a[_Idx]... }};
+   }(make_index_sequence<_Nm>{});
+   }
+  else
+   __builtin_unreachable(); // FIXME: see PR c++/91388
 }
 
   template
@@ -449,8 +454,24 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   static_assert(!is_array_v<_Tp>);
   static_assert(is_move_constructible_v<_Tp>);
   if constexpr (is_move_constructible_v<_Tp>)
-   return __to_array<1>(__a, make_index_sequence<_Nm>{});
-  __builtin_unreachable(); // FIXME: see PR c++/91388
+   {
+ if constexpr (is_trivial_v<_Tp>)
+   {
+ array, _Nm> __arr;
+ if (!__is_constant_evaluated() && _Nm != 0)
+   __builtin_memcpy(__arr.data(), __a, sizeof(__a));
+ else
+   for (size_t __i = 0; __i < _Nm; ++__i)
+ __arr._M_elems[__i] = std::move(__a[__i]);
+ return __arr;
+   }
+ else
+   return [&__a](index_sequence<_Idx...>) {
+ return array, _Nm>{{ std::move(__a[_Idx])... }};
+   }(make_index_sequence<_Nm>{});
+   }
+  else
+   __builtin_unreachable(); // FIXME: see PR c++/91388
 }
 #endif // C++20
 
diff --git a/libstdc++-v3/testsuite/23_containers/array/creation/110167.cc 
b/libstdc++-v3/testsuite/23_containers/array/creation/110167.cc
new file mode 100644
index 000..c2aecc911bd
--- /dev/null
+++ b/libstdc++-v3/testsuite/23_containers/array/creation/110167.cc
@@ -0,0 +1,14 @@
+// { dg-options "-std=gnu++20" }
+// { dg-do compile { target c++20 } }
+
+// PR libstdc++/110167 - excessive compile time when optimizing std::to_array
+
+#include 
+
+constexpr int N = 512 * 512;
+
+std::array
+make_std_array(int ()[N])
+{
+  return std::to_array(a);
+}
-- 
2.40.1