Hi,

I have started running Kernel benchmarks calculations using Mir NDSlice, and I'm getting times that are much slower than expected. To check that I'm not making an obvious mistake, below are samples of the code I am using. The way the selection happens is that the `calculateKernelMatrix` function assumes that the data under the slice object is column major, if it is row major the calculation will be slow which could account for the issues I'm seeing. Thanks

Dot product functor
```
struct DotProduct(T)
{
  public:
  this(T _nothing)
  {}
  T opCall(U...)(Slice!(T*, U) x, Slice!(T*, U) y) const
  {
    T dist = 0;
    auto m = x.length;
    for(size_t i = 0; i < m; ++i)
    {
      dist += x[i] * y[i];
    }
    return dist;
  }
}
```

Kernel Matrix function:
```
auto calculateKernelMatrix(alias K, T, U...)(K!(T) kernel, Slice!(T*, U) data)
{
  size_t n = data.length!1;
  auto mat = slice!(T)(n, n);

  foreach(j; taskPool.parallel(iota(n)))
  {
    auto arrj = data[0..$, j];
    foreach(size_t i; j..n)
    {
      mat[i, j] = kernel(data[0..$, i], arrj);
      mat[j, i] = mat[i, j];
    }
  }
  return mat;
}
```

Benchmark Function
```
auto bench(alias K, T)(K!(T) kernel, long[] n, bool verbose = true)
{
  auto times = new double[n.length];
  auto sw = StopWatch(AutoStart.no);
  foreach(i; 0..n.length)
  {
    double[3] _times;
    auto data = UniformVariable!T(0, 1).randomSlice(784L, n[i]);
    foreach(ref t; _times[])
    {
      sw.start();
      auto mat = calculateKernelMatrix!(K, T)(kernel, data);
      sw.stop();
      t = sw.peek.total!"nsecs"/1000_000_000.0;
      sw.reset();
    }
    times[i] = sum(_times[])/3.0;
    if(verbose)
    {
writeln("Average time for n = ", n[i], ", ", times[i], " seconds.");
      writeln("Detailed times: ", _times, "\n");
    }
  }
  return tuple(n, times);
}
```

Reply via email to