Hi,
I have started running Kernel benchmarks calculations using Mir
NDSlice, and I'm getting times that are much slower than
expected. To check that I'm not making an obvious mistake, below
are samples of the code I am using. The way the selection happens
is that the `calculateKernelMatrix` function assumes that the
data under the slice object is column major, if it is row major
the calculation will be slow which could account for the issues
I'm seeing. Thanks
Dot product functor
```
struct DotProduct(T)
{
public:
this(T _nothing)
{}
T opCall(U...)(Slice!(T*, U) x, Slice!(T*, U) y) const
{
T dist = 0;
auto m = x.length;
for(size_t i = 0; i < m; ++i)
{
dist += x[i] * y[i];
}
return dist;
}
}
```
Kernel Matrix function:
```
auto calculateKernelMatrix(alias K, T, U...)(K!(T) kernel,
Slice!(T*, U) data)
{
size_t n = data.length!1;
auto mat = slice!(T)(n, n);
foreach(j; taskPool.parallel(iota(n)))
{
auto arrj = data[0..$, j];
foreach(size_t i; j..n)
{
mat[i, j] = kernel(data[0..$, i], arrj);
mat[j, i] = mat[i, j];
}
}
return mat;
}
```
Benchmark Function
```
auto bench(alias K, T)(K!(T) kernel, long[] n, bool verbose =
true)
{
auto times = new double[n.length];
auto sw = StopWatch(AutoStart.no);
foreach(i; 0..n.length)
{
double[3] _times;
auto data = UniformVariable!T(0, 1).randomSlice(784L, n[i]);
foreach(ref t; _times[])
{
sw.start();
auto mat = calculateKernelMatrix!(K, T)(kernel, data);
sw.stop();
t = sw.peek.total!"nsecs"/1000_000_000.0;
sw.reset();
}
times[i] = sum(_times[])/3.0;
if(verbose)
{
writeln("Average time for n = ", n[i], ", ", times[i], "
seconds.");
writeln("Detailed times: ", _times, "\n");
}
}
return tuple(n, times);
}
```