On Sun, Jul 27, 2025 at 2:57 PM Luc Grosheintz <luc.groshei...@gmail.com> wrote:
> In mdspan related code involving static extents, often the IndexType is > part of the template parameters, even though it's not needed. > > This commit extracts the parts of _ExtentsStorage not related to > IndexType into a separate class _StaticExtents. > > It also prefers passing the array of static extents, instead of the > whole extents object where possible. > > These changes don't effect the compilation time, i.e. the difference was > (much) less than 10%. However, the size of the object file is noticeably > impacted. > > Consider an extreme example that computes required_span_size and stride > for all combinations of: > > - all three layouts, > - three choices of static extents Indices... > - eight choices of IndexType. > > In this case the size of the object file reduced by 45% (22.3kB -> > 12.1kB) when compiling with -O2. Using objdump -h we see that the size > of .text is unchanged, but .rodata decreases. This is plausible, > because: > > - the deduplicated code is short and is always inlined, > - the static storage for _FwdProd, _RevProd, the NTTP _Extents and > _StaticExtents::_S_dynamic_index is not deduplicated before this > commit. > > libstdc++-v3/ChangeLog: > > * include/std/mdspan (__mdspan::_StaticExtents): New class. > (__mdspan::_ExtentsStorage): Inherit non-IndexType related > methods from _StaticExtents. > (__mdspan::_ExtentsStorage::_S_static_extents): Return reference > to NTTP _Extents. > (__mdspan::__static_extents): Ditto. > (__mdspan::__static_prod): Use NTTP for static extents. > (__mdspan::_FwdProd): Ditto. > (__mdspan::_RevProd): Ditto. > (__mdspan::__contains_zero): New overload for const array&. > > Signed-off-by: Luc Grosheintz <luc.groshei...@gmail.com> > --- > I have given you the same suggestion while reviewing my previous commit, so this looks good to me. Would it be much of the issue to put this comment before the changes introducing the caching of indices when doing new revision. This is not strictly necessary, but this ones is not tradeoff between conde size and performance. libstdc++-v3/include/std/mdspan | 70 +++++++++++++++++++++------------ > 1 file changed, 45 insertions(+), 25 deletions(-) > > diff --git a/libstdc++-v3/include/std/mdspan > b/libstdc++-v3/include/std/mdspan > index 5b47c95d2c6..2cf572f1410 100644 > --- a/libstdc++-v3/include/std/mdspan > +++ b/libstdc++-v3/include/std/mdspan > @@ -49,19 +49,14 @@ namespace std _GLIBCXX_VISIBILITY(default) > _GLIBCXX_BEGIN_NAMESPACE_VERSION > namespace __mdspan > { > - template<typename _IndexType, array _Extents> > - class _ExtentsStorage > + template<array _Extents> > + class _StaticExtents > { > public: > static consteval bool > _S_is_dyn(size_t __ext) noexcept > { return __ext == dynamic_extent; } > > - template<typename _OIndexType> > - static constexpr _IndexType > - _S_int_cast(const _OIndexType& __other) noexcept > - { return _IndexType(__other); } > - > static constexpr size_t _S_rank = _Extents.size(); > > // For __r in [0, _S_rank], _S_dynamic_index[__r] is the number > @@ -99,6 +94,24 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION > static constexpr size_t > _S_static_extent(size_t __r) noexcept > { return _Extents[__r]; } > + }; > + > + template<typename _IndexType, array _Extents> > + class _ExtentsStorage : public _StaticExtents<_Extents> > + { > + private: > + using _S_base = _StaticExtents<_Extents>; > + > + public: > + using _S_base::_S_rank; > + using _S_base::_S_rank_dynamic; > + using _S_base::_S_dynamic_index; > + using _S_base::_S_dynamic_index_inv; > + > + template<typename _OIndexType> > + static constexpr _IndexType > + _S_int_cast(const _OIndexType& __other) noexcept > + { return _IndexType(__other); } > > constexpr _IndexType > _M_extent(size_t __r) const noexcept > @@ -157,9 +170,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION > { return __exts[__i]; }); > } > > - static constexpr span<const size_t> > - _S_static_extents(size_t __begin, size_t __end) noexcept > - { return {_Extents.data() + __begin, _Extents.data() + __end}; } > + static constexpr const array<size_t, _S_rank>& > + _S_static_extents() noexcept > + { return _Extents; } > > constexpr span<const _IndexType> > _M_dynamic_extents(size_t __begin, size_t __end) const noexcept > @@ -184,27 +197,27 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION > __valid_static_extent = _Extent == dynamic_extent > || _Extent <= numeric_limits<_IndexType>::max(); > > - template<typename _Extents> > + template<array _Extents> > consteval size_t > __static_prod(size_t __begin, size_t __end) > { > size_t __prod = 1; > for(size_t __i = __begin; __i < __end; ++__i) > { > - auto __ext = _Extents::static_extent(__i); > + auto __ext = _Extents[__i]; > __prod *= __ext == dynamic_extent ? size_t(1) : __ext; > } > return __prod; > } > > // Pre-compute: \prod_{i = 0}^r _Extents[i] > - template<typename _Extents> > + template<array _Extents> > struct _FwdProd > { > - constexpr static std::array<size_t, _Extents::rank() + 1> _S_value > = > + constexpr static std::array<size_t, _Extents.size() + 1> _S_value = > [] consteval > { > - constexpr size_t __rank = _Extents::rank(); > + constexpr size_t __rank = _Extents.size(); > std::array<size_t, __rank + 1> __ret; > for(size_t __r = 0; __r < __rank + 1; ++__r) > __ret[__r] = __static_prod<_Extents>(0, __r); > @@ -213,13 +226,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION > }; > > // Pre-compute: \prod_{i = r+1}^n _Extents[i] > - template<typename _Extents> > + template<array _Extents> > struct _RevProd > { > - constexpr static std::array<size_t, _Extents::rank()> _S_value = > + constexpr static std::array<size_t, _Extents.size()> _S_value = > [] consteval > { > - constexpr size_t __rank = _Extents::rank(); > + constexpr size_t __rank = _Extents.size(); > std::array<size_t, __rank> __ret; > for(size_t __r = 0; __r < __rank; ++__r) > __ret[__r] = __static_prod<_Extents>(__r + 1, __rank); > @@ -228,10 +241,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION > }; > > template<typename _Extents> > - constexpr span<const size_t> > - __static_extents(size_t __begin = 0, size_t __end = > _Extents::rank()) > + constexpr const array<size_t, _Extents::rank()>& > + __static_extents() > noexcept > - { return _Extents::_S_storage::_S_static_extents(__begin, __end); } > + { return _Extents::_S_storage::_S_static_extents(); } > > template<typename _Extents> > constexpr span<const typename _Extents::index_type> > @@ -358,8 +371,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION > } > > private: > - friend span<const size_t> > - __mdspan::__static_extents<extents>(size_t, size_t); > + friend const array<size_t, rank()>& > + __mdspan::__static_extents<extents>(); > > friend span<const index_type> > __mdspan::__dynamic_extents<extents>(const extents&, size_t, > size_t); > @@ -384,6 +397,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION > return false; > } > > + template<typename _Tp, size_t _Nm> > Add [[__gnu__::__always_inline__]], as we do not want this to be emitted even in debug mode. We may want to do the same for __empty, __size, __fwd_prod and __rev__prod, that uses _Extents as template parameter. > + consteval bool > + __contains_zero(const array<_Tp, _Nm>& __exts) noexcept > + { return __contains_zero(span<const _Tp, _Nm>{__exts}); } > + > template<typename _Extents> > constexpr bool > __empty(const _Extents& __exts) noexcept > @@ -412,7 +430,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION > constexpr typename _Extents::index_type > __fwd_prod(const _Extents& __exts, size_t __r) noexcept > { > - size_t __sta_prod = _FwdProd<_Extents>::_S_value[__r]; > + constexpr auto __sta_exts = __static_extents<_Extents>(); > + size_t __sta_prod = _FwdProd<__sta_exts>::_S_value[__r]; > return __extents_prod(__exts, __sta_prod, 0, __r); > } > > @@ -420,8 +439,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION > constexpr typename _Extents::index_type > __rev_prod(const _Extents& __exts, size_t __r) noexcept > { > + constexpr auto __sta_exts = __static_extents<_Extents>(); > constexpr size_t __rank = _Extents::rank(); > - size_t __sta_prod = _RevProd<_Extents>::_S_value[__r]; > + size_t __sta_prod = _RevProd<__sta_exts>::_S_value[__r]; > return __extents_prod(__exts, __sta_prod, __r + 1, __rank); > } > > -- > 2.50.0 > >