https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70001

--- Comment #2 from Markus Trippelsdorf <trippels at gcc dot gnu.org> ---
markus@x4 tmp % cat fft-old.cpp
#include <array>
#include <complex>

using namespace std;

typedef std::complex<double> cd;

const int LOG = 17;
const int N = (1 << LOG);

array<cd, N> A;
array<cd, N> B;

void FFT(array<cd, N> &arr) { array<array<cd, N>, LOG + 1> F; }

int main() {
  FFT(A);
  FFT(B);
}

markus@x4 tmp % perf stat clang++ -c -std=c++14 fft-old.cpp

 Performance counter stats for 'clang++ -c -std=c++14 fft-old.cpp':

       1244.864638      task-clock (msec)         #    0.992 CPUs utilized      
                15      context-switches          #    0.012 K/sec              
                 3      cpu-migrations            #    0.002 K/sec              
            14,555      page-faults               #    0.012 M/sec              
     3,665,601,629      cycles                    #    2.945 GHz               
      (72.48%)
     1,076,285,871      stalled-cycles-frontend   #   29.36% frontend cycles
idle     (77.86%)
       461,560,586      stalled-cycles-backend    #   12.59% backend  cycles
idle     (70.45%)
     4,099,304,808      instructions              #    1.12  insns per cycle    
                                                  #    0.26  stalled cycles per
insn  (84.63%)
       994,875,396      branches                  #  799.184 M/sec             
      (74.43%)
        17,383,559      branch-misses             #    1.75% of all branches   
      (57.77%)

       1.254379190 seconds time elapsed

markus@x4 tmp % perf stat g++ -c -std=c++14 fft-old.cpp

 Performance counter stats for 'g++ -c -std=c++14 fft-old.cpp':

      34408.032001      task-clock (msec)         #    0.999 CPUs utilized      
               104      context-switches          #    0.003 K/sec              
                 5      cpu-migrations            #    0.000 K/sec              
         1,699,084      page-faults               #    0.049 M/sec              
   106,988,050,234      cycles                    #    3.109 GHz               
      (68.78%)
    18,142,016,241      stalled-cycles-frontend   #   16.96% frontend cycles
idle     (64.97%)
    44,391,891,312      stalled-cycles-backend    #   41.49% backend  cycles
idle     (67.37%)
    99,403,441,373      instructions              #    0.93  insns per cycle    
                                                  #    0.45  stalled cycles per
insn  (67.52%)
    21,050,529,933      branches                  #  611.791 M/sec             
      (65.02%)
       455,155,359      branch-misses             #    2.16% of all branches   
      (68.65%)

      34.436263240 seconds time elapsed

Reply via email to