https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66059
--- Comment #7 from Jonathan Wakely <redi at gcc dot gnu.org> --- (In reply to Daniel Frey from comment #3) > A better O(log N) library-only solution than the linked one is available at > https://github.com/taocpp/sequences/blob/master/include/tao/seq/ > make_integer_sequence.hpp The version I came up with is very close to Xeo's at stackoverflow. I tried something more like yours and it used a LOT more memory. Here's what I tested: #include <stddef.h> namespace std { template<size_t... _Indexes> struct _Index_tuple { }; #if DUP template<typename _ITup, int _Odd> struct _Itup_dup; template<size_t... _Ind> struct _Itup_dup<_Index_tuple<_Ind...>, 0> { static constexpr size_t _Nm = sizeof...(_Ind); using __type = _Index_tuple<_Ind..., _Nm + _Ind...>; }; template<size_t... _Ind> struct _Itup_dup<_Index_tuple<_Ind...>, 1> { static constexpr size_t _Nm = sizeof...(_Ind); using __type = _Index_tuple<_Ind..., _Nm + _Ind..., 2 * _Nm>; }; // Builds an _Index_tuple<0, 1, 2, ..., _Num-1>. template<size_t _Num> struct _Build_index_tuple { using __type = typename _Itup_dup< typename _Build_index_tuple<_Num / 2>::__type, _Num % 2>::__type; }; #else template<typename _ITup, typename _Jtup> struct _Itup_cat; template<size_t... _Ind1, size_t... _Ind2> struct _Itup_cat<_Index_tuple<_Ind1...>, _Index_tuple<_Ind2...>> { using __type = _Index_tuple<_Ind1..., (_Ind2 + sizeof...(_Ind1))...>; }; template<size_t _Num> struct _Build_index_tuple : _Itup_cat<typename _Build_index_tuple<_Num / 2>::__type, typename _Build_index_tuple<_Num - _Num / 2>::__type> { }; #endif template<> struct _Build_index_tuple<1> { typedef _Index_tuple<0> __type; }; template<> struct _Build_index_tuple<0> { typedef _Index_tuple<> __type; }; } template<typename T, typename U> struct is_same { static const bool value = false; }; template<typename T> struct is_same<T, T> { static const bool value = true; }; int main() { constexpr auto N = 1024; using T = std::_Build_index_tuple<N>::__type; static_assert(sizeof(T), ""); static_assert( is_same<std::_Build_index_tuple<10>::__type, std::_Index_tuple<0, 1, 2, 3, 4, 5, 6, 7, 8, 9>>::value, ""); static_assert( is_same<std::_Build_index_tuple<9>::__type, std::_Index_tuple<0, 1, 2, 3, 4, 5, 6, 7, 8>>::value, ""); } tmp$ g++11 it.cc -ftime-report Execution times (seconds) phase setup : 0.01 (11%) usr 0.00 ( 0%) sys 0.02 (20%) wall 1396 kB (15%) ggc phase parsing : 0.07 (78%) usr 0.01 (100%) sys 0.08 (80%) wall 7235 kB (80%) ggc phase opt and generate : 0.01 (11%) usr 0.00 ( 0%) sys 0.00 ( 0%) wall 421 kB ( 5%) ggc |name lookup : 0.00 ( 0%) usr 0.00 ( 0%) sys 0.01 (10%) wall 111 kB ( 1%) ggc |overload resolution : 0.00 ( 0%) usr 0.00 ( 0%) sys 0.01 (10%) wall 66 kB ( 1%) ggc garbage collection : 0.00 ( 0%) usr 0.00 ( 0%) sys 0.01 (10%) wall 0 kB ( 0%) ggc template instantiation : 0.07 (78%) usr 0.01 (100%) sys 0.07 (70%) wall 6763 kB (75%) ggc initialize rtl : 0.01 (11%) usr 0.00 ( 0%) sys 0.00 ( 0%) wall 8 kB ( 0%) ggc TOTAL : 0.09 0.01 0.10 9066 kB Extra diagnostic checks enabled; compiler may run slowly. Configure with --enable-checking=release to disable checks. tmp$ g++11 it.cc -ftime-report -DDUP Execution times (seconds) phase setup : 0.00 ( 0%) usr 0.01 (33%) sys 0.02 ( 2%) wall 1396 kB ( 3%) ggc phase parsing : 0.91 (99%) usr 0.02 (67%) sys 0.93 (98%) wall 45729 kB (96%) ggc phase finalize : 0.01 ( 1%) usr 0.00 ( 0%) sys 0.00 ( 0%) wall 0 kB ( 0%) ggc garbage collection : 0.01 ( 1%) usr 0.00 ( 0%) sys 0.01 ( 1%) wall 0 kB ( 0%) ggc preprocessing : 0.01 ( 1%) usr 0.00 ( 0%) sys 0.00 ( 0%) wall 30 kB ( 0%) ggc parser struct body : 0.00 ( 0%) usr 0.00 ( 0%) sys 0.01 ( 1%) wall 22 kB ( 0%) ggc template instantiation : 0.89 (97%) usr 0.02 (67%) sys 0.91 (96%) wall 45262 kB (95%) ggc TOTAL : 0.92 0.03 0.95 47577 kB Extra diagnostic checks enabled; compiler may run slowly. Configure with --enable-checking=release to disable checks. As you can see, defining DUP makes it 10 times slower and using 5 times as much memory. Looking at your implementation I see only one relevant difference, you pass N/2 as a template argument instead of calculating it with sizeof... i.e. template<typename _ITup, size_t N, int _Odd> struct _Itup_dup; template<size_t... _Ind, size_t _Nm> struct _Itup_dup<_Index_tuple<_Ind...>, _Nm, 0> { using __type = _Index_tuple<_Ind..., _Nm + _Ind...>; }; template<size_t... _Ind, size_t _Nm> struct _Itup_dup<_Index_tuple<_Ind...>, _Nm, 1> { using __type = _Index_tuple<_Ind..., _Nm + _Ind..., 2 * _Nm>; }; // Builds an _Index_tuple<0, 1, 2, ..., _Num-1>. template<size_t _Num> struct _Build_index_tuple : typename _Itup_dup< typename _Build_index_tuple<_Num / 2>::__type, _Num / 2, _Num % 2> { }; and indeed this is even faster than what I just committed. I didn't realise sizeof... would have such an impact. My one can be optimised to be almost as fast by avoiding sizeof... in _Itup_cat.