Am Wed, 13 Feb 2013 16:17:12 +0100
schrieb FG <h...@fgda.pl>:

> Good point about choosing the right type of floating point numbers.
> Conclusion: when there's enough space, always pick double over float.
> Tested with GDC in win64. floats: 16.0s / doubles: 14.1s / reals: 11.2s.
> I thought to myself: cool, I almost beat the 13.4s I got with C++, until I 
> changed the C++ code to also use doubles and... got a massive speedup: 7.1s!

Yeah we are living in the 32-bit past ;)

Still, be aware that we only write to 2 memory locations in
that program!
We have neither exceeded the L1 cache size with that nor have
we put any strain on the prefetcher and memory bandwidth.
With the modification below it is more clear why I said "use
float for storage". The result with LDC2 for me is:

D code serial with dimension 8192 ...
  using floats Total time: 4.235 [sec]
  using doubles Total time: 5.58 [sec] // ~+32% over float
  using reals Total time: 6.432 [sec]

So all the in-CPU performance gain from using doubles is more
than lost, when you run out of bandwidth.

---8<-----------------------------------

module main;

import std.datetime;
import std.metastrings;
import std.stdio;
import std.typetuple;
import std.random;
import core.stdc.stdlib;


enum DIM = 8 * 1024;

int juliaValue;

size_t* randomAcc;

static this()
{
        randomAcc = cast(size_t*) malloc((DIM * DIM + 200) * size_t.sizeof);
        foreach (i; 0 .. DIM * DIM)
                randomAcc[i] = i;
        randomAcc[0 .. DIM * DIM].randomShuffle();
        randomAcc[DIM * DIM .. DIM * DIM + 200] = randomAcc[0 .. 200];
}

static ~this() { free(randomAcc); }

template Julia(TReal)
{
        TReal* squares;

        static this() { squares = cast(TReal*) malloc(DIM * DIM * 
TReal.sizeof); }

        static ~this() { free(squares); }

        struct ComplexStruct
        {
                TReal r;
                TReal i;
        
                TReal squarePlusMag(const ComplexStruct another)
                {
                        TReal r1 = r*r - i*i + another.r;
                        TReal i1 = 2.0*i*r + another.i;
                        
                        r = r1;
                        i = i1;
                        
                        return (r1*r1 + i1*i1);
                }
        }

        int juliaFunction( int x, int y )
        {
                auto c = ComplexStruct(0.8, 0.156);
                auto a = ComplexStruct(x, y);
        
                foreach (i; 0 .. 200) {
                        size_t idx = randomAcc[DIM * x + y + i];
                        squares[idx] = a.squarePlusMag(c);
                        if (squares[idx] > 1000)
                                return 0;
                }
                return 1;
        }
        
        void kernel()
        {
                foreach (x; 0 .. DIM) {
                        foreach (y; 0 .. DIM) {
                                juliaValue = juliaFunction( x, y );
                        }
                }
        }
}

void main()
{
        writeln("D code serial with dimension " ~ toStringNow!DIM ~ " ...");
        StopWatch sw;
        foreach (Math; TypeTuple!(float, double, real))
        {
                sw.start();
                Julia!(Math).kernel();
                sw.stop();
                writefln("  using %ss Total time: %s [sec]",
                         Math.stringof, (sw.peek().msecs * 0.001));
                sw.reset();
        }
}
-- 
Marco

Reply via email to