[Bug libstdc++/66059] make_integer_sequence should use a log(N) implementation

redi at gcc dot gnu.org Tue, 17 Nov 2015 12:15:06 -0800

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66059


--- Comment #7 from Jonathan Wakely <redi at gcc dot gnu.org> ---
(In reply to Daniel Frey from comment #3)
> A better O(log N) library-only solution than the linked one is available at
> https://github.com/taocpp/sequences/blob/master/include/tao/seq/
> make_integer_sequence.hpp

The version I came up with is very close to Xeo's at stackoverflow. I tried
something more like yours and it used a LOT more memory.

Here's what I tested:

#include <stddef.h>

namespace std
{
  template<size_t... _Indexes> struct _Index_tuple { };

#if DUP
  template<typename _ITup, int _Odd> struct _Itup_dup;

  template<size_t... _Ind>
    struct _Itup_dup<_Index_tuple<_Ind...>, 0>
    {
      static constexpr size_t _Nm = sizeof...(_Ind);
      using __type = _Index_tuple<_Ind..., _Nm + _Ind...>;
    };

  template<size_t... _Ind>
    struct _Itup_dup<_Index_tuple<_Ind...>, 1>
    {
      static constexpr size_t _Nm = sizeof...(_Ind);
      using __type = _Index_tuple<_Ind..., _Nm + _Ind..., 2 * _Nm>;
    };

  // Builds an _Index_tuple<0, 1, 2, ..., _Num-1>.
  template<size_t _Num>
    struct _Build_index_tuple
    {
      using __type = typename _Itup_dup<
        typename _Build_index_tuple<_Num / 2>::__type, _Num % 2>::__type;
    };
#else
  template<typename _ITup, typename _Jtup> struct _Itup_cat;

  template<size_t... _Ind1, size_t... _Ind2>
    struct _Itup_cat<_Index_tuple<_Ind1...>, _Index_tuple<_Ind2...>>
    {
      using __type = _Index_tuple<_Ind1..., (_Ind2 + sizeof...(_Ind1))...>;
    };

  template<size_t _Num>
    struct _Build_index_tuple
    : _Itup_cat<typename _Build_index_tuple<_Num / 2>::__type,
                typename _Build_index_tuple<_Num - _Num / 2>::__type>
    { };
#endif

  template<>
    struct _Build_index_tuple<1> { typedef _Index_tuple<0> __type; };

  template<>
    struct _Build_index_tuple<0> { typedef _Index_tuple<> __type; };

}

template<typename T, typename U> struct is_same { static const bool value =
false; };
template<typename T> struct is_same<T, T> { static const bool value = true; };

int main()
{
    constexpr auto N = 1024;
    using T = std::_Build_index_tuple<N>::__type;
    static_assert(sizeof(T), "");
    static_assert( is_same<std::_Build_index_tuple<10>::__type,
std::_Index_tuple<0, 1, 2, 3, 4, 5, 6, 7, 8, 9>>::value, "");
    static_assert( is_same<std::_Build_index_tuple<9>::__type,
std::_Index_tuple<0, 1, 2, 3, 4, 5, 6, 7, 8>>::value, "");

}



tmp$ g++11 it.cc -ftime-report 

Execution times (seconds)
 phase setup             :   0.01 (11%) usr   0.00 ( 0%) sys   0.02 (20%) wall 
  1396 kB (15%) ggc
 phase parsing           :   0.07 (78%) usr   0.01 (100%) sys   0.08 (80%) wall
   7235 kB (80%) ggc
 phase opt and generate  :   0.01 (11%) usr   0.00 ( 0%) sys   0.00 ( 0%) wall 
   421 kB ( 5%) ggc
 |name lookup            :   0.00 ( 0%) usr   0.00 ( 0%) sys   0.01 (10%) wall 
   111 kB ( 1%) ggc
 |overload resolution    :   0.00 ( 0%) usr   0.00 ( 0%) sys   0.01 (10%) wall 
    66 kB ( 1%) ggc
 garbage collection      :   0.00 ( 0%) usr   0.00 ( 0%) sys   0.01 (10%) wall 
     0 kB ( 0%) ggc
 template instantiation  :   0.07 (78%) usr   0.01 (100%) sys   0.07 (70%) wall
   6763 kB (75%) ggc
 initialize rtl          :   0.01 (11%) usr   0.00 ( 0%) sys   0.00 ( 0%) wall 
     8 kB ( 0%) ggc
 TOTAL                 :   0.09             0.01             0.10              
9066 kB
Extra diagnostic checks enabled; compiler may run slowly.
Configure with --enable-checking=release to disable checks.
tmp$ g++11 it.cc -ftime-report -DDUP

Execution times (seconds)
 phase setup             :   0.00 ( 0%) usr   0.01 (33%) sys   0.02 ( 2%) wall 
  1396 kB ( 3%) ggc
 phase parsing           :   0.91 (99%) usr   0.02 (67%) sys   0.93 (98%) wall 
 45729 kB (96%) ggc
 phase finalize          :   0.01 ( 1%) usr   0.00 ( 0%) sys   0.00 ( 0%) wall 
     0 kB ( 0%) ggc
 garbage collection      :   0.01 ( 1%) usr   0.00 ( 0%) sys   0.01 ( 1%) wall 
     0 kB ( 0%) ggc
 preprocessing           :   0.01 ( 1%) usr   0.00 ( 0%) sys   0.00 ( 0%) wall 
    30 kB ( 0%) ggc
 parser struct body      :   0.00 ( 0%) usr   0.00 ( 0%) sys   0.01 ( 1%) wall 
    22 kB ( 0%) ggc
 template instantiation  :   0.89 (97%) usr   0.02 (67%) sys   0.91 (96%) wall 
 45262 kB (95%) ggc
 TOTAL                 :   0.92             0.03             0.95             
47577 kB
Extra diagnostic checks enabled; compiler may run slowly.
Configure with --enable-checking=release to disable checks.

As you can see, defining DUP makes it 10 times slower and using 5 times as much
memory. Looking at your implementation I see only one relevant difference, you
pass N/2 as a template argument instead of calculating it with sizeof... i.e.

  template<typename _ITup, size_t N, int _Odd> struct _Itup_dup;

  template<size_t... _Ind, size_t _Nm>
    struct _Itup_dup<_Index_tuple<_Ind...>, _Nm, 0>
    {
      using __type = _Index_tuple<_Ind..., _Nm + _Ind...>;
    };

  template<size_t... _Ind, size_t _Nm>
    struct _Itup_dup<_Index_tuple<_Ind...>, _Nm, 1>
    {
      using __type = _Index_tuple<_Ind..., _Nm + _Ind..., 2 * _Nm>;
    };

  // Builds an _Index_tuple<0, 1, 2, ..., _Num-1>.
  template<size_t _Num>
    struct _Build_index_tuple
    : typename _Itup_dup<
        typename _Build_index_tuple<_Num / 2>::__type, _Num / 2, _Num % 2>
    { };

and indeed this is even faster than what I just committed. I didn't realise
sizeof... would have such an impact.

My one can be optimised to be almost as fast by avoiding sizeof... in
_Itup_cat.

[Bug libstdc++/66059] make_integer_sequence should use a log(N) implementation

Reply via email to