On 7/28/25 08:13, Tomasz Kaminski wrote:
On Sun, Jul 27, 2025 at 2:57 PM Luc Grosheintz <luc.groshei...@gmail.com>
wrote:
In mdspan related code involving static extents, often the IndexType is
part of the template parameters, even though it's not needed.
This commit extracts the parts of _ExtentsStorage not related to
IndexType into a separate class _StaticExtents.
It also prefers passing the array of static extents, instead of the
whole extents object where possible.
These changes don't effect the compilation time, i.e. the difference was
(much) less than 10%. However, the size of the object file is noticeably
impacted.
Consider an extreme example that computes required_span_size and stride
for all combinations of:
- all three layouts,
- three choices of static extents Indices...
- eight choices of IndexType.
In this case the size of the object file reduced by 45% (22.3kB ->
12.1kB) when compiling with -O2. Using objdump -h we see that the size
of .text is unchanged, but .rodata decreases. This is plausible,
because:
- the deduplicated code is short and is always inlined,
- the static storage for _FwdProd, _RevProd, the NTTP _Extents and
_StaticExtents::_S_dynamic_index is not deduplicated before this
commit.
libstdc++-v3/ChangeLog:
* include/std/mdspan (__mdspan::_StaticExtents): New class.
(__mdspan::_ExtentsStorage): Inherit non-IndexType related
methods from _StaticExtents.
(__mdspan::_ExtentsStorage::_S_static_extents): Return reference
to NTTP _Extents.
(__mdspan::__static_extents): Ditto.
(__mdspan::__static_prod): Use NTTP for static extents.
(__mdspan::_FwdProd): Ditto.
(__mdspan::_RevProd): Ditto.
(__mdspan::__contains_zero): New overload for const array&.
Signed-off-by: Luc Grosheintz <luc.groshei...@gmail.com>
---
I have given you the same suggestion while reviewing my previous commit,
so this looks good to me. Would it be much of the issue to put this comment
before the changes introducing the caching of indices when doing new
revision.
This is not strictly necessary, but this ones is not tradeoff between conde
size and
performance.
I'd prefer not to, the diffs don't commute and require some temporary minor
inconvenience to eliminate one case of __static_extents(__begin, __end).
Maybe a better excuse: this way around we can focus on code in .text first
and then check that .rodata and object file size shrink down to the expected
size. Somehow, this order of the git messages feels more interesting than the
reverse. In the reverse we first check object size; then completely rewrite
how we use static variables; making the previous commit message instantly
outdated.
libstdc++-v3/include/std/mdspan | 70 +++++++++++++++++++++------------
1 file changed, 45 insertions(+), 25 deletions(-)
diff --git a/libstdc++-v3/include/std/mdspan
b/libstdc++-v3/include/std/mdspan
index 5b47c95d2c6..2cf572f1410 100644
--- a/libstdc++-v3/include/std/mdspan
+++ b/libstdc++-v3/include/std/mdspan
@@ -49,19 +49,14 @@ namespace std _GLIBCXX_VISIBILITY(default)
_GLIBCXX_BEGIN_NAMESPACE_VERSION
namespace __mdspan
{
- template<typename _IndexType, array _Extents>
- class _ExtentsStorage
+ template<array _Extents>
+ class _StaticExtents
{
public:
static consteval bool
_S_is_dyn(size_t __ext) noexcept
{ return __ext == dynamic_extent; }
- template<typename _OIndexType>
- static constexpr _IndexType
- _S_int_cast(const _OIndexType& __other) noexcept
- { return _IndexType(__other); }
-
static constexpr size_t _S_rank = _Extents.size();
// For __r in [0, _S_rank], _S_dynamic_index[__r] is the number
@@ -99,6 +94,24 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
static constexpr size_t
_S_static_extent(size_t __r) noexcept
{ return _Extents[__r]; }
+ };
+
+ template<typename _IndexType, array _Extents>
+ class _ExtentsStorage : public _StaticExtents<_Extents>
+ {
+ private:
+ using _S_base = _StaticExtents<_Extents>;
+
+ public:
+ using _S_base::_S_rank;
+ using _S_base::_S_rank_dynamic;
+ using _S_base::_S_dynamic_index;
+ using _S_base::_S_dynamic_index_inv;
+
+ template<typename _OIndexType>
+ static constexpr _IndexType
+ _S_int_cast(const _OIndexType& __other) noexcept
+ { return _IndexType(__other); }
constexpr _IndexType
_M_extent(size_t __r) const noexcept
@@ -157,9 +170,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{ return __exts[__i]; });
}
- static constexpr span<const size_t>
- _S_static_extents(size_t __begin, size_t __end) noexcept
- { return {_Extents.data() + __begin, _Extents.data() + __end}; }
+ static constexpr const array<size_t, _S_rank>&
+ _S_static_extents() noexcept
+ { return _Extents; }
constexpr span<const _IndexType>
_M_dynamic_extents(size_t __begin, size_t __end) const noexcept
@@ -184,27 +197,27 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
__valid_static_extent = _Extent == dynamic_extent
|| _Extent <= numeric_limits<_IndexType>::max();
- template<typename _Extents>
+ template<array _Extents>
consteval size_t
__static_prod(size_t __begin, size_t __end)
{
size_t __prod = 1;
for(size_t __i = __begin; __i < __end; ++__i)
{
- auto __ext = _Extents::static_extent(__i);
+ auto __ext = _Extents[__i];
__prod *= __ext == dynamic_extent ? size_t(1) : __ext;
}
return __prod;
}
// Pre-compute: \prod_{i = 0}^r _Extents[i]
- template<typename _Extents>
+ template<array _Extents>
struct _FwdProd
{
- constexpr static std::array<size_t, _Extents::rank() + 1> _S_value
=
+ constexpr static std::array<size_t, _Extents.size() + 1> _S_value =
[] consteval
{
- constexpr size_t __rank = _Extents::rank();
+ constexpr size_t __rank = _Extents.size();
std::array<size_t, __rank + 1> __ret;
for(size_t __r = 0; __r < __rank + 1; ++__r)
__ret[__r] = __static_prod<_Extents>(0, __r);
@@ -213,13 +226,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
};
// Pre-compute: \prod_{i = r+1}^n _Extents[i]
- template<typename _Extents>
+ template<array _Extents>
struct _RevProd
{
- constexpr static std::array<size_t, _Extents::rank()> _S_value =
+ constexpr static std::array<size_t, _Extents.size()> _S_value =
[] consteval
{
- constexpr size_t __rank = _Extents::rank();
+ constexpr size_t __rank = _Extents.size();
std::array<size_t, __rank> __ret;
for(size_t __r = 0; __r < __rank; ++__r)
__ret[__r] = __static_prod<_Extents>(__r + 1, __rank);
@@ -228,10 +241,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
};
template<typename _Extents>
- constexpr span<const size_t>
- __static_extents(size_t __begin = 0, size_t __end =
_Extents::rank())
+ constexpr const array<size_t, _Extents::rank()>&
+ __static_extents()
noexcept
- { return _Extents::_S_storage::_S_static_extents(__begin, __end); }
+ { return _Extents::_S_storage::_S_static_extents(); }
template<typename _Extents>
constexpr span<const typename _Extents::index_type>
@@ -358,8 +371,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
}
private:
- friend span<const size_t>
- __mdspan::__static_extents<extents>(size_t, size_t);
+ friend const array<size_t, rank()>&
+ __mdspan::__static_extents<extents>();
friend span<const index_type>
__mdspan::__dynamic_extents<extents>(const extents&, size_t,
size_t);
@@ -384,6 +397,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
return false;
}
+ template<typename _Tp, size_t _Nm>
Add [[__gnu__::__always_inline__]], as we do not want this to be emitted
even in debug mode.
We may want to do the same for __empty, __size, __fwd_prod and __rev__prod,
that uses
_Extents as template parameter.
+ consteval bool
+ __contains_zero(const array<_Tp, _Nm>& __exts) noexcept
+ { return __contains_zero(span<const _Tp, _Nm>{__exts}); }
+
template<typename _Extents>
constexpr bool
__empty(const _Extents& __exts) noexcept
@@ -412,7 +430,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
constexpr typename _Extents::index_type
__fwd_prod(const _Extents& __exts, size_t __r) noexcept
{
- size_t __sta_prod = _FwdProd<_Extents>::_S_value[__r];
+ constexpr auto __sta_exts = __static_extents<_Extents>();
+ size_t __sta_prod = _FwdProd<__sta_exts>::_S_value[__r];
return __extents_prod(__exts, __sta_prod, 0, __r);
}
@@ -420,8 +439,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
constexpr typename _Extents::index_type
__rev_prod(const _Extents& __exts, size_t __r) noexcept
{
+ constexpr auto __sta_exts = __static_extents<_Extents>();
constexpr size_t __rank = _Extents::rank();
- size_t __sta_prod = _RevProd<_Extents>::_S_value[__r];
+ size_t __sta_prod = _RevProd<__sta_exts>::_S_value[__r];
return __extents_prod(__exts, __sta_prod, __r + 1, __rank);
}
--
2.50.0