Dmitry Olshansky:
> Probably, this could be a starting point (BTW  In D you can write 
> templates very easy):
> 
> T dot_product(size_t N, T)(T[] a, T[] b){
>      static if (N == 1){
>          return a[0] * b[0];
>      }else{
>          return a[0] * b[0] + dot_product!(N-1)(a[1..$],b[1..$]);
>      }
> }
> 
> void main()
> {
>      int[] a = [1, 2, 3];
>      int[] b = [5, 6, 7];
>      assert( dot_product!3(a,b) == 38 );//sanity check
> }
> And I didn't checked the disassembly.

I have added another version:

import std.typetuple: TypeTuple;

T dot_product(size_t N, T)(T[] a, T[] b) {
     static if (N == 1)
         return a[0] * b[0];
     else
         return a[0] * b[0] + dot_product!(N-1)(a[1..$],b[1..$]);
}

template Iota(int stop) {
    static if (stop <= 0)
        alias TypeTuple!() Iota;
    else
        alias TypeTuple!(Iota!(stop-1), stop-1) Iota;
}

T dotProduct(T, int N)(T[N] a, T[N] b) {
    T result = 0;
    foreach (i; Iota!(N))
        result += a[i] * b[i];
    return result;
}

void main() {
    double[3] a = [1., 2., 3.];
    double[3] b = [5., 6., 7.];
    assert(dot_product!3(a, b) == 38.0);
    assert(dotProduct(a, b) == 38.0);
}


The asm (dmd 2.047 -O -release -inline):

_D6test6b22__T11dot_productVi3TdZ11dot_productFAdAdZd   comdat
                sub     ESP,038h
                mov     EDX,048h[ESP]
                mov     EAX,044h[ESP]
                push    EBX
                mov     ECX,044h[ESP]
                mov     EBX,048h[ESP]
                push    ESI
                dec     EBX
                mov     ESI,044h[ESP]
                mov     8[ESP],EBX
                dec     ESI
                mov     EAX,044h[ESP]
                mov     010h[ESP],ESI
                mov     EAX,8[ESP]
                mov     EBX,010h[ESP]
                dec     EBX
                dec     EAX
                fld     qword ptr [EDX]
                lea     EDX,8[EDX]
                fmul    qword ptr [ECX]
                lea     ECX,8[ECX]
                mov     0Ch[ESP],EDX
                mov     014h[ESP],ECX
                fld     qword ptr [EDX]
                lea     EDX,8[EDX]
                fmul    qword ptr [ECX]
                lea     ECX,8[ECX]
                fld     qword ptr [EDX]
                fmul    qword ptr [ECX]
                faddp   ST(1),ST
                faddp   ST(1),ST
                pop     ESI
                pop     EBX
                add     ESP,038h
                ret     010h

_D6test6b22__T11dot_productVk2TdZ11dot_productFAdAdZd   comdat
                sub     ESP,018h
                mov     EDX,028h[ESP]
                mov     EAX,024h[ESP]
                push    EBX
                mov     ECX,024h[ESP]
                mov     EBX,028h[ESP]
                push    ESI
                mov     ESI,024h[ESP]
                mov     EAX,024h[ESP]
                dec     ESI
                dec     EBX
                fld     qword ptr [EDX]
                lea     EDX,8[EDX]
                fmul    qword ptr [ECX]
                lea     ECX,8[ECX]
                fld     qword ptr [EDX]
                fmul    qword ptr [ECX]
                faddp   ST(1),ST
                pop     ESI
                pop     EBX
                add     ESP,018h
                ret     010h

_D6test6b22__T11dot_productVk1TdZ11dot_productFAdAdZd   comdat
                mov     EDX,010h[ESP]
                mov     EAX,0Ch[ESP]
                mov     EAX,4[ESP]
                fld     qword ptr [EDX]
                mov     EDX,8[ESP]
                fmul    qword ptr [EDX]
                ret     010h

_D6test6b21__T10dotProductTdVk3Z10dotProductFG3dG3dZd   comdat
                sub     ESP,048h
                fld     qword ptr 064h[ESP]
                fmul    qword ptr 04Ch[ESP]
                fldz
                faddp   ST(1),ST
                fstp    qword ptr [ESP]
                fld     qword ptr 06Ch[ESP]
                fld     qword ptr 074h[ESP]
                fxch    ST1
                fmul    qword ptr 054h[ESP]
                fxch    ST1
                fmul    qword ptr 05Ch[ESP]
                faddp   ST(1),ST
                fadd    qword ptr [ESP]
                fstp    qword ptr [ESP]
                fld     qword ptr [ESP]
                add     ESP,048h
                ret     030h


Bye,
bearophile

Reply via email to