This works for me:

julia> function mindists_sq(pos, dists_min, Acp)
           
           for i in 1:size(pos, 2)
               dists_min[i] = Inf
               for j in 1:size(Acp, 2)
                   t = 0.0
                   for k=1:size(pos,1)
                       t += (pos[k, i]-Acp[k, j])^2
                   end
                   if t < dists_min[i]
                       dists_min[i] = t
                   end
               end
           end
           return dists_min
       end
mindists_sq (generic function with 1 method)

julia> function test()
           const pos = rand(3, 64)
           const Acp = rand(3, 1200)
           const dists_min = zeros(64)
           const tmp = zeros(typeof(Acp[1]), 1)
           @time mindists_sq(pos, dists_min, Acp)
       end
test (generic function with 1 method)

julia> test();
elapsed time: 0.001279041 seconds (0 bytes allocated)

Is this how you unrolled the innermost loop too?

On Wed, 2014-10-22 at 17:38, accouju...@gmail.com wrote:
> As a complement, I just tried with Julia master and in the first case, I got 
> an
> exec time around 0.7s. The second case is virtually the same.
>
> Julia Version 0.4.0-dev+1177
> Commit 16c3222* (2014-10-22 12:49 UTC)
> Platform Info:
>   System: Linux (x86_64-unknown-linux-gnu)
>   CPU: Intel(R) Core(TM)2 Duo CPU     P8700  @ 2.53GHz
>   WORD_SIZE: 64
>   BLAS: libopenblas (USE64BITINT DYNAMIC_ARCH NO_AFFINITY Penryn)
>   LAPACK: libopenblas
>   LIBM: libopenlibm
>   LLVM: libLLVM-3.3
>
> And as a reference, the corresponding Python code is that one:
>
> def mindists_sq(B, Acp):
>     return ((Acp - B[:, None])**2).sum(2).min(1)
>
> And get executed in 4-8 ms. 
>
>
> To Julia's honor, I can go down to 4-8 ms with it, but I need to emulate 
> Python
> behavior with broadcasts and large preallocated temporaries:
>
> function mindists_sq2(pos, dists_min, Acp, tmp, ntmp)
>     for j in 1:size(pos, 2)
>         broadcast!(.-, tmp, Acp, sub(pos, :, j))
>             for i in 1:length(tmp[:])
>             tmp[i] *= tmp[i]
>             end
>         ntmp[:] = sum(tmp, 1)
>         dists_min[j] = minimum(ntmp)
>     end
>     return dists_min
> end
>
>
> function test(pos, Acp)
>     const dists_min = zeros(typeof(Acp[1]), lenght(pos))
>     const tmp = similar(Acp)
>     const ntmp = similar(Acp[1,:,:])
>     @time mindists_sq2(pos, dists_min, Acp, tmp, ntmp)
> end
> test(A, B)
>
> elapsed time: 0.004768065 seconds (3624960 bytes allocated)
>
> but it's a bit more verbose and convoluted.

Reply via email to