I have done one more test, with a much simpler zip(). The code is now faster, but it seems there's no hope to have an acceptable performance. Maybe D needs a built-in zip as Clay.
Bye, bearophile ----------------------- Timings, seconds, best of 3: #1: 1.95 #2: 61.50 #3: 2.25 #4: 1.52 #5: 15.50 Note: removing the this() the program #5 gets about 2-3 seconds faster. ----------------------- // program #5 (D2) import std.c.stdio: printf; struct zip { int[] arr1, arr2; size_t i; static struct Element { int _0, _1; } this(int[] a1, int[] a2) in { assert(a1.length == a2.length); } body { arr1 = a1; arr2 = a2; } bool empty() { return i >= arr1.length; } @property Element front() { return Element(arr1[i], arr2[i]); } void popFront() { i++; } } void main() { auto a = [18,10,17,14,19,23,11,0,6,0,17,25,5,4,19,21,17,13,5,7,11,22,23,17,24,7,11,11,1,28]; auto b = [9,12,1,4,1,18,11,6,5,18,24,15,26,14,24,8,17,26,23,17,3,28,27,0,9,27,0,19,13,25]; int tot = 0; foreach (i; 0 .. 50_000_000) foreach (xy; zip(a, b)) tot += xy._0 + xy._1; printf("%d\n", tot); } ---------------------- Just loops program #5: LCE: mov 024h[ESP],EBX lea EBX,054h[ESP] xor EDX,EDX mov [EBX],EDX mov EAX,014h[ESP] lea ESI,054h[ESP] mov 4[EBX],EDX lea EDI,034h[ESP] mov 8[EBX],EDX mov 0Ch[EBX],EDX mov 010h[EBX],EDX mov EDX,018h[ESP] mov 058h[ESP],EDX mov EDX,020h[ESP] mov 054h[ESP],EAX mov EAX,01Ch[ESP] mov 05Ch[ESP],EAX mov 060h[ESP],EDX movsd movsd movsd movsd movsd mov ECX,044h[ESP] cmp ECX,034h[ESP] jae L167 L11D: mov EBX,044h[ESP] mov EDX,038h[ESP] mov ESI,[EBX*4][EDX] mov EAX,034h[ESP] mov EDX,040h[ESP] mov ECX,[EBX*4][EDX] mov 078h[ESP],ECX mov EAX,03Ch[ESP] mov EDX,078h[ESP] mov 074h[ESP],ESI mov EAX,074h[ESP] mov EBX,074h[ESP] mov 070h[ESP],EDX add EBX,070h[ESP] add EBP,EBX inc dword ptr 044h[ESP] mov ESI,044h[ESP] mov 06Ch[ESP],EAX cmp ESI,034h[ESP] jb L11D L167: mov EBX,024h[ESP] inc EBX cmp EBX,02FAF080h jb LCE ----------------------