Two discoveries were made from this benchmark. 1) There is no appreciable speed difference between delegates and functors. I re-ran the benchmark several times; sometimes one was faster, sometimes the other - no clear advantage was discernible. The visible differences can be blamed on experimental error. Feel free to rerun it on a pure benchmarking machine.. 2) The GC is slooooow (factor of 40!). No surprise there.
The code: gentoo-pc ~ $ cat test.d; gdc-build test.d -o test_c -O3 -frelease -march=nocona && ./test_c module test; import std.stdio; struct Functor { void delegate() dg; void opCall() { dg(); } } void bench(I, C)(string name, I iters, C callable) { auto start = sec(); // sorry for (I l = 0; l < iters; ++l) static if (is(typeof(callable.opCall))) callable.opCall(); else callable(); auto taken = sec() - start; writefln(name, ": ", taken, "s, ", ((taken / iters) * 1000_000), " µs per call" ); } struct _test3 { void test() { } void opCall() { auto dg = new Functor; dg.dg = &test; dg.opCall(); } } import tools.time; void main() { auto dg1 = (){ }, dg2 = new Functor; dg2.dg = dg1; // spin up processor writefln("Warm-up"); for (int k = 0; k < 1024*1024*256; ++k) { dg1(); (*dg2)(); } writefln("Begin benchmark"); const ITERS = cast(long) (1024*1024*1024) * 4; bench("Method 1", ITERS, dg1); bench("Method 2", ITERS, dg2); _test3 test3; // Done this way to allow inlining bench("Method 3", ITERS / 256, test3); } gdc -J. test.d tools/time.d tools/log.d tools/compat.d tools/base.d tools/smart_import.d tools/ctfe.d tools/tests.d tools/functional.d -o test_c -O3 -frelease -march=nocona Warm-up Begin benchmark Method 1: 20.5247s, 0.00477877 µs per call Method 2: 19.6544s, 0.00457615 µs per call Method 3: 2.86392s, 0.170703 µs per call