Here is my implementatation of SIMD find. Function returns index
of ubyte in static 16 byte array with unique values.
------
immutable size_t ArraySize = 16;
int simdIndexOf(ubyte niddle, ref const ubyte[ArraySize] haystack)
{
ubyte16 arr;
arr.array = haystack[];
ubyte16 niddles;
niddles.array[] = niddle;
ubyte16 result;
result = __simd_sto(XMM.PCMPEQB, arr, niddles);
alias Mask = ulong;
static assert(2 * Mask.sizeof == result.sizeof);
immutable BitsInByte = 8;
if (Mask mask = *cast(Mask*)(result.array.ptr))
{
return bsf(mask) / BitsInByte;
}
else if (Mask mask = *cast(Mask*)(result.array.ptr +
Mask.sizeof))
{
return bsf(mask) / BitsInByte + cast(int)Mask.sizeof;
}
else
{
return -1;
}
}
------
Is it optimal and how do you implement this stuf?
Full exaple with comparation of algorithms (SIMD, naive, binary
search):
http://dpaste.dzfl.pl/f3b8989841e3
Benchmark result on dpaste.dzfl.pl:
SIMD: TickDuration(157000)
Binary: TickDuration(472000)
Naive: TickDuration(437000)
At home with defult dub config "dub run --build=release":
SIMD: TickDuration(241566)
Binary: TickDuration(450515)
Naive: TickDuration(450371)