Re: [PATCH] libstdc++: Avoid MMX return types from __builtin_shufflevector

2024-05-28 Thread Jonathan Wakely
On Wed, 15 May 2024 at 20:50, Matthias Kretz  wrote:
>
> Tested on aarch64-linux-gnu, arm-linux-gnueabihf, powerpc64le-linux-gnu,
> x86_64-linux-gnu (-m64, -m32, -mx32), and arm-linux-gnueabi
>
> OK for trunk?

OK

> And when backporting, should I squash it with the commit that
> introduced the regression?

I don't mind about that. If you cherry-pick them next to each other
and push them at the same time, nobody's going to end up using the
broken commit before the fix. It's fine to squash it if you prefer to
though.

OK for backports either way.

>
>  8< ---
>
> This resolves a regression on i686 that was introduced with
> r15-429-gfb1649f8b4ad50.
>
> Signed-off-by: Matthias Kretz 
>
> libstdc++-v3/ChangeLog:
>
> PR libstdc++/114958
> * include/experimental/bits/simd.h (__as_vector): Don't use
> vector_size(8) on __i386__.
> (__vec_shuffle): Never return MMX vectors, widen to 16 bytes
> instead.
> (concat): Fix padding calculation to pick up widening logic from
> __as_vector.
> ---
>  libstdc++-v3/include/experimental/bits/simd.h | 39 +--
>  1 file changed, 28 insertions(+), 11 deletions(-)
>
>
> --
> ──
>  Dr. Matthias Kretz   https://mattkretz.github.io
>  GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
>  stdₓ::simd
> ──


Re: [PATCH] libstdc++: Avoid MMX return types from __builtin_shufflevector

2024-05-27 Thread Matthias Kretz
ping

On Wednesday, 15 May 2024 21:49:56 GMT+2 Matthias Kretz wrote:
> Tested on aarch64-linux-gnu, arm-linux-gnueabihf, powerpc64le-linux-gnu,
> x86_64-linux-gnu (-m64, -m32, -mx32), and arm-linux-gnueabi
> 
> OK for trunk? And when backporting, should I squash it with the commit that
> introduced the regression?
> 
>  8< ---
> 
> This resolves a regression on i686 that was introduced with
> r15-429-gfb1649f8b4ad50.
> 
> Signed-off-by: Matthias Kretz 
> 
> libstdc++-v3/ChangeLog:
> 
>   PR libstdc++/114958
>   * include/experimental/bits/simd.h (__as_vector): Don't use
>   vector_size(8) on __i386__.
>   (__vec_shuffle): Never return MMX vectors, widen to 16 bytes
>   instead.
>   (concat): Fix padding calculation to pick up widening logic from
>   __as_vector.
> ---
>  libstdc++-v3/include/experimental/bits/simd.h | 39 +--
>  1 file changed, 28 insertions(+), 11 deletions(-)
> 
> 
> --
> ──
>  Dr. Matthias Kretz   https://mattkretz.github.io
>  GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
>  stdₓ::simd
> ──


-- 
──┬
 Dr. Matthias Kretz   │ SDE — Software Development for Experiments
 Senior Software Engineer,│  +49 6159 713084
 SIMD Expert, │  m.kr...@gsi.de floss.social/@mkretz
 ISO C++ Numerics Chair   │  mattkretz.github.io
──┴

GSI Helmholtzzentrum für Schwerionenforschung GmbH
Planckstraße 1, 64291 Darmstadt, Germany, www.gsi.de

Commercial Register / Handelsregister: Amtsgericht Darmstadt, HRB 1528
Managing Directors / Geschäftsführung:
Professor Dr. Paolo Giubellino, Jörg Blaurock
Chairman of the GSI Supervisory Board / Vorsitzender des GSI-Aufsichtsrats:
Ministerialdirigent Dr. Volkmar Dietz


signature.asc
Description: This is a digitally signed message part.


[PATCH] libstdc++: Avoid MMX return types from __builtin_shufflevector

2024-05-15 Thread Matthias Kretz
Tested on aarch64-linux-gnu, arm-linux-gnueabihf, powerpc64le-linux-gnu, 
x86_64-linux-gnu (-m64, -m32, -mx32), and arm-linux-gnueabi

OK for trunk? And when backporting, should I squash it with the commit that 
introduced the regression?

 8< ---

This resolves a regression on i686 that was introduced with
r15-429-gfb1649f8b4ad50.

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

PR libstdc++/114958
* include/experimental/bits/simd.h (__as_vector): Don't use
vector_size(8) on __i386__.
(__vec_shuffle): Never return MMX vectors, widen to 16 bytes
instead.
(concat): Fix padding calculation to pick up widening logic from
__as_vector.
---
 libstdc++-v3/include/experimental/bits/simd.h | 39 +--
 1 file changed, 28 insertions(+), 11 deletions(-)


--
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──diff --git a/libstdc++-v3/include/experimental/bits/simd.h b/libstdc++-v3/include/experimental/bits/simd.h
index 6a6fd4f109d..7c524625719 100644
--- a/libstdc++-v3/include/experimental/bits/simd.h
+++ b/libstdc++-v3/include/experimental/bits/simd.h
@@ -1665,7 +1665,12 @@ __as_vector(_V __x)
 	  {
 	static_assert(is_simd<_V>::value);
 	using _Tp = typename _V::value_type;
+#ifdef __i386__
+	constexpr auto __bytes = sizeof(_Tp) == 8 ? 16 : sizeof(_Tp);
+	using _RV [[__gnu__::__vector_size__(__bytes)]] = _Tp;
+#else
 	using _RV [[__gnu__::__vector_size__(sizeof(_Tp))]] = _Tp;
+#endif
 	return _RV{__data(__x)};
 	  }
   }
@@ -2081,11 +2086,14 @@ __not(_Tp __a) noexcept
 // }}}
 // __vec_shuffle{{{
 template 
-  _GLIBCXX_SIMD_INTRINSIC constexpr auto
+  _GLIBCXX_SIMD_INTRINSIC constexpr
+  __vector_type_t()[0])>, sizeof...(_Is)>
   __vec_shuffle(_T0 __x, _T1 __y, index_sequence<_Is...> __seq, _Fun __idx_perm)
   {
 constexpr int _N0 = sizeof(__x) / sizeof(__x[0]);
 constexpr int _N1 = sizeof(__y) / sizeof(__y[0]);
+using _Tp = remove_reference_t()[0])>;
+using _RV [[maybe_unused]] = __vector_type_t<_Tp, sizeof...(_Is)>;
 #if __has_builtin(__builtin_shufflevector)
 #ifdef __clang__
 // Clang requires _T0 == _T1
@@ -2105,14 +2113,23 @@ __not(_Tp __a) noexcept
 	 });
 else
 #endif
-  return __builtin_shufflevector(__x, __y, [=] {
-	   constexpr int __j = __idx_perm(_Is);
-	   static_assert(__j < _N0 + _N1);
-	   return __j;
-	 }()...);
+  {
+	const auto __r = __builtin_shufflevector(__x, __y, [=] {
+			   constexpr int __j = __idx_perm(_Is);
+			   static_assert(__j < _N0 + _N1);
+			   return __j;
+			 }()...);
+#ifdef __i386__
+	if constexpr (sizeof(__r) == sizeof(_RV))
+	  return __r;
+	else
+	  return _RV {__r[_Is]...};
+#else
+	return __r;
+#endif
+  }
 #else
-using _Tp = __remove_cvref_t;
-return __vector_type_t<_Tp, sizeof...(_Is)> {
+return _RV {
   [=]() -> _Tp {
 	constexpr int __j = __idx_perm(_Is);
 	static_assert(__j < _N0 + _N1);
@@ -4393,9 +4410,9 @@ for (unsigned __j = 0; __j < __i; ++__j)
 		__vec_shuffle(__as_vector(__xs)..., std::make_index_sequence<_RW::_S_full_size>(),
 			  [](int __i) {
 constexpr int __sizes[2] = {int(simd_size_v<_Tp, _As>)...};
-constexpr int __padding0
-  = sizeof(__vector_type_t<_Tp, __sizes[0]>) / sizeof(_Tp)
-  - __sizes[0];
+constexpr int __vsizes[2]
+  = {int(sizeof(__as_vector(__xs)) / sizeof(_Tp))...};
+constexpr int __padding0 = __vsizes[0] - __sizes[0];
 return __i >= _Np ? -1 : __i < __sizes[0] ? __i : __i + __padding0;
 			  })};
   }