http://d.puremagic.com/issues/show_bug.cgi?id=4393
--- Comment #1 from bearophile_h...@eml.cc 2011-04-28 18:55:10 PDT --- Some code I have written for arrays of floats: float dot(float[] a, float[] b) { enum size_t UNROLL_MASK = 0b111; assert(a.length == b.length, "dot(): the two array lengths differ."); typeof(a[0]) tot = void; auto a_ptr = a.ptr; auto b_ptr = b.ptr; assert((cast(size_t)a_ptr & cast(size_t)0b1111) == 0, "dot(): the first array is not aligned to 16 bytes."); assert((cast(size_t)b_ptr & cast(size_t)0b1111) == 0, "dot(): the second array is not aligned to 16 bytes."); size_t len = (a.length & (~UNROLL_MASK)) * a[0].sizeof; if (len) { asm { mov EAX, a_ptr; mov ECX, b_ptr; mov EDI, len; xorps XMM0, XMM0; xorps XMM3, XMM3; xor EDX, EDX; align 8; LOOP_START: movaps XMM1, [EAX + EDX]; mulps XMM1, [ECX + EDX]; movaps XMM2, [EAX + EDX + 16]; mulps XMM2, [ECX + EDX + 16]; add EDX, 32; addps XMM0, XMM1; cmp EDI, EDX; addps XMM3, XMM2; jne LOOP_START; addps XMM0, XMM3; // XMM0[0] = XMM0[0] + XMM0[1] + XMM0[2] + XMM0[3] movhlps XMM1, XMM0; addps XMM0, XMM1; pshufd XMM1, XMM0, 0b01_01_01_01; addss XMM0, XMM1; movss tot, XMM0; } } else tot = 0.0; size_t left = a.length & UNROLL_MASK; for (size_t i = left; i > 0; i--) tot += a[$ - i] * b[$ - i]; return tot; } And for arrays of doubles: import std.c.stdio; void main() { double[] A = [0.7644108908809033, 0.96458177717869509, 0.51166055832523683, 0.098840360055908461, 0.40813780586233483, 0.32285857447088551, 0.59137950751990331, 0.59518178287473289]; double[] B = [0.28061374331187983, 0.85036446787626108, 0.52498124274748326, 0.84170745998075014, 0.55819169392258683, 0.62586351111477134, 0.43021720539707864, 0.652708603473523]; // >>> sum(a*b for a,b in zip(A, B)) // 2.4593435789602185 if (A.length % 4 != 0) throw new Error(""); double tot1 = void, tot2 = void; auto a_ptr = cast(double*)A.ptr; auto b_ptr = cast(double*)B.ptr; size_t len = A.length * double.sizeof; asm { mov EAX, a_ptr; mov ECX, b_ptr; mov EDI, len; xorps XMM0, XMM0; xorps XMM3, XMM3; xor EDX, EDX; align 8; LOOP_START: movapd XMM1, [EAX + EDX]; mulpd XMM1, [ECX + EDX]; movapd XMM2, [EAX + EDX + 16]; mulpd XMM2, [ECX + EDX + 16]; add EDX, 32; addpd XMM0, XMM1; cmp EDI, EDX; addpd XMM3, XMM2; jne LOOP_START; addpd XMM0, XMM3; // XMM0 += XMM3 movhpd tot1, XMM0; movlpd tot2, XMM0; } printf("%.15lf\n", tot1 + tot2); } See bug 5880 too. -- Configure issuemail: http://d.puremagic.com/issues/userprefs.cgi?tab=email ------- You are receiving this mail because: -------