On Sun, Aug 3, 2025 at 11:16 PM Luc Grosheintz <[email protected]>
wrote:
> In both fully static and dynamic extents the comparison
> static_extent(i) == dynamic_extent
> is known at compile time. As a result, extents::extent doesn't
> need to perform the check at runtime.
>
> An illustrative example is:
>
> using E = std::extents<int, 3, 5, 7, 11, 13, 17>;
> int required_span_size(const typename Layout::mapping<E>& m)
> { return m.required_span_size(); }
>
> Prior to this commit the generated code (on -O2) is:
>
> 2a0: b9 01 00 00 00 mov ecx,0x1
> 2a5: 31 d2 xor edx,edx
> 2a7: 66 66 2e 0f 1f 84 00 data16 cs nop WORD PTR [rax+rax*1+0x0]
> 2ae: 00 00 00 00
> 2b2: 66 66 2e 0f 1f 84 00 data16 cs nop WORD PTR [rax+rax*1+0x0]
> 2b9: 00 00 00 00
> 2bd: 0f 1f 00 nop DWORD PTR [rax]
> 2c0: 48 8b 04 d5 00 00 00 mov rax,QWORD PTR [rdx*8+0x0]
> 2c7: 00
> 2c8: 48 83 f8 ff cmp rax,0xffffffffffffffff
> 2cc: 0f 84 00 00 00 00 je 2d2
> <required_span_size_6d_static+0x32>
> 2d2: 83 e8 01 sub eax,0x1
> 2d5: 0f af 04 97 imul eax,DWORD PTR [rdi+rdx*4]
> 2d9: 48 83 c2 01 add rdx,0x1
> 2dd: 01 c1 add ecx,eax
> 2df: 48 83 fa 06 cmp rdx,0x6
> 2e3: 75 db jne 2c0
> <required_span_size_6d_static+0x20>
> 2e5: 89 c8 mov eax,ecx
> 2e7: c3 ret
>
> which is a scalar loop, and notably includes the check
>
> 308: 48 83 f8 ff cmp rax,0xffffffffffffffff
>
> to assert that the static extent is indeed not -1. Note, that on -O3 the
> optimizer eliminates the comparison; and generates a sequence of scalar
> operations: lea, shl, add and mov. The aim of this commit is to
> eliminate this comparison also for -O2. With the optimization applied we
> get:
>
> 2e0: f3 0f 6f 0f movdqu xmm1,XMMWORD PTR [rdi]
> 2e4: 66 0f 6f 15 00 00 00 movdqa xmm2,XMMWORD PTR [rip+0x0]
> 2eb: 00
> 2ec: 8b 57 10 mov edx,DWORD PTR [rdi+0x10]
> 2ef: 66 0f 6f c1 movdqa xmm0,xmm1
> 2f3: 66 0f 73 d1 20 psrlq xmm1,0x20
> 2f8: 66 0f f4 c2 pmuludq xmm0,xmm2
> 2fc: 66 0f 73 d2 20 psrlq xmm2,0x20
> 301: 8d 14 52 lea edx,[rdx+rdx*2]
> 304: 66 0f f4 ca pmuludq xmm1,xmm2
> 308: 66 0f 70 c0 08 pshufd xmm0,xmm0,0x8
> 30d: 66 0f 70 c9 08 pshufd xmm1,xmm1,0x8
> 312: 66 0f 62 c1 punpckldq xmm0,xmm1
> 316: 66 0f 6f c8 movdqa xmm1,xmm0
> 31a: 66 0f 73 d9 08 psrldq xmm1,0x8
> 31f: 66 0f fe c1 paddd xmm0,xmm1
> 323: 66 0f 6f c8 movdqa xmm1,xmm0
> 327: 66 0f 73 d9 04 psrldq xmm1,0x4
> 32c: 66 0f fe c1 paddd xmm0,xmm1
> 330: 66 0f 7e c0 movd eax,xmm0
> 334: 8d 54 90 01 lea edx,[rax+rdx*4+0x1]
> 338: 8b 47 14 mov eax,DWORD PTR [rdi+0x14]
> 33b: c1 e0 04 shl eax,0x4
> 33e: 01 d0 add eax,edx
> 340: c3 ret
>
> Which shows eliminating the trivial comparison, unlocks a new set of
> optimizations, i.e. SIMD-vectorization. In particular, the loop has been
> vectorized by loading the first four constants from aligned memory; the
> first four strides from non-aligned memory, then computes the product
> and reduction. It interleaves the above with computing 1 + 12*S[4] +
> 16*S[5] (as scalar operations) and then finishes the reduction.
>
> A similar effect can be observed for fully dynamic extents.
>
> libstdc++-v3/ChangeLog:
>
> * include/std/mdspan (__mdspan::__all_static): New function.
> (__mdspan::_StaticExtents::_S_is_dyn): Inline and eliminate.
> (__mdspan::_ExtentsStorage::_S_is_dynamic): New method.
> (__mdspan::_ExtentsStorage::_M_extent): Use _S_is_dynamic.
>
> Signed-off-by: Luc Grosheintz <[email protected]>
> ---
>
Again applying locally small changes.
> libstdc++-v3/include/std/mdspan | 35 +++++++++++++++++++++++----------
> 1 file changed, 25 insertions(+), 10 deletions(-)
>
> diff --git a/libstdc++-v3/include/std/mdspan
> b/libstdc++-v3/include/std/mdspan
> index ccf1028466f..ee0c2b65c7f 100644
> --- a/libstdc++-v3/include/std/mdspan
> +++ b/libstdc++-v3/include/std/mdspan
> @@ -49,6 +49,15 @@ namespace std _GLIBCXX_VISIBILITY(default)
> _GLIBCXX_BEGIN_NAMESPACE_VERSION
> namespace __mdspan
> {
> + template<array _Extents>
> + consteval bool
> + __all_static(size_t __begin = 0, size_t __end = _Extents.size())
>
I pass span here.
> + {
> + for(size_t __i = __begin; __i < __end; ++__i)
> + if (_Extents[__i] == dynamic_extent)
> + return false;
> + return true;
> + }
>
> template<array _Extents>
> consteval bool
> @@ -64,10 +73,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> class _StaticExtents
> {
> public:
> - static consteval bool
> - _S_is_dyn(size_t __ext) noexcept
> - { return __ext == dynamic_extent; }
> -
> static constexpr size_t _S_rank = _Extents.size();
>
> // For __r in [0, _S_rank], _S_dynamic_index(__r) is the number
> @@ -87,7 +92,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> for (size_t __i = 0; __i < _S_rank; ++__i)
> {
> __ret[__i] = __dyn;
> - __dyn += _S_is_dyn(_Extents[__i]);
> + __dyn += _Extents[__i] == dynamic_extent;
>
Added parenthesis here:
__dyn += (_Extents[__i] == dynamic_extent);
> }
> __ret[_S_rank] = __dyn;
> return __ret;
> @@ -105,7 +110,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> {
> array<size_t, _S_rank_dynamic> __ret;
> for (size_t __i = 0, __r = 0; __i < _S_rank; ++__i)
> - if (_S_is_dyn(_Extents[__i]))
> + if (_Extents[__i] == dynamic_extent)
> __ret[__r++] = __i;
> return __ret;
> }();
> @@ -150,6 +155,17 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> using _S_base::_S_dynamic_index_inv;
> using _S_base::_S_static_extent;
>
> + static constexpr bool
> + _S_is_dynamic(size_t __r) noexcept
> + {
> + if constexpr (__all_static<_Extents>())
> + return false;
> + else if constexpr (__all_dynamic<_Extents>())
> + return true;
> + else
> + return _Extents[__r] == dynamic_extent;
> + }
>
And this is now:
static constexpr bool
_S_is_dynamic(size_t __r) noexcept
{
if constexpr (__all_static(_Extents))
return false;
else if constexpr (__all_dynamic(_Extents))
return true;
else
return _Extents[__r] == dynamic_extent;
}
> +
> template<typename _OIndexType>
> static constexpr _IndexType
> _S_int_cast(const _OIndexType& __other) noexcept
> @@ -158,11 +174,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> constexpr _IndexType
> _M_extent(size_t __r) const noexcept
> {
> - auto __se = _Extents[__r];
> - if (__se == dynamic_extent)
> + if (_S_is_dynamic(__r))
> return _M_dyn_exts[_S_dynamic_index(__r)];
> else
> - return __se;
> + return _S_static_extent(__r);
> }
>
> template<size_t _OtherRank, typename _GetOtherExtent>
> @@ -171,7 +186,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> {
> if constexpr (_OtherRank == _S_rank)
> for (size_t __i = 0; __i < _S_rank; ++__i)
> - if (_Extents[__i] != dynamic_extent
> + if (!_S_is_dynamic(__i)
> && !cmp_equal(_Extents[__i],
> _S_int_cast(__get_extent(__i))))
> return false;
> return true;
> --
> 2.50.0
>
>