Here's my benchmarking file anyway, it may prove useful. Alan.
On Sun, Nov 16, 2008 at 2:37 AM, Alan McGovern <[EMAIL PROTECTED]> wrote: > Hey, > > On Sat, Nov 15, 2008 at 3:50 PM, Rodrigo Kumpera <[EMAIL PROTECTED]> wrote: >> Hi Alan, >> -Getters and setter are a hint of ill vectorized code. > > In this particular scenario, I'm not sure how i can get rid of the use > of getters/setters unless I use even more unsafe code. I don't know > whether it's feasible or not, but it'd be great to be able to use this > API without having to use unsafe code. At the moment, I don't think > it's really possible to use this API without getters and setters. > >> The last part of your unsafe code should use temps for the intermediate >> results. > Do you mean that I should copy the vector 'e', which i got from > XOR'ing my values, into another Vector4ui using the store operation? > Then I should do my bitshifting/storing into uint[] from that one? > > >> -For the safe case we still miss proper integration with arrays, in the form >> of methods to >> extract and store vectors from them. > > I was thinking that the API could expose something like: > Vector4ui.Create (uint[] array, int offset, ref Vector4ui result) > which could be changed into: > result = *((Vector4ui*)&array [offset]); > > Though I'm sure you have ideas already on this ;) A similar method for > storing the result into a uint[] would be great too. > >> >> Your code looks a bit strange, the Vector4ui constructor indexes in >> particular. Have you checked that >> the output of the 3 methods are the same? > Yes, there is a bug in my implementation there, I left out a bracket > when setting the value of buff[t+3]. There should be an additional > bracket around (e.W << 1) | (e.W >> 31). Other than that, the > implementation is correct. I've pasted the correct implementation of > the unsafe and safe SIMD versions below. Just for reference purposes. > >> >> I'll work on the Mono.Simd issues next week, getting setters to be >> accelerated, some methods >> to better integrate with arrays and other things like element extractors. > > Great stuff. Give me a shout when you've done that and I'll try to > improve the above implementation. Though if you have time to spare > while writing the SIMD code, you could take a look at it yourself ;) > > Thanks, > Alan. > > Reference implementations (non buggy ;) ): > public static void FillBuffSafe(uint[] buff) > { > for (int t = 16; t < buff.Length; t += 4) > { > Vector4ui e = new Vector4ui(buff[t - 3], buff[t - 2], > buff[t - 1], buff[t - 0]) ^ > new Vector4ui(buff[t - 8], buff[t - 7], > buff[t - 6], buff[t - 5]) ^ > new Vector4ui(buff[t - 14], buff[t - > 13], buff[t - 12], buff[t - 11]) ^ > new Vector4ui(buff[t - 16], buff[t - > 15], buff[t - 14], buff[t - 13]); > e.W ^= buff[t]; > > buff[t] = (e.X << 1) | (e.X >> 31); > buff[t + 1] = (e.Y << 1) | (e.Y >> 31); > buff[t + 2] = (e.Z << 1) | (e.Z >> 31); > buff[t + 3] = ((e.W << 1) | (e.W >> 31)) ^ ((e.X << 2) > | (e.X >> 30)); > } > } > > > public unsafe static void FillBuffUnsafe(uint[] buffb) > { > fixed (uint* buff = buffb) > { > for (int t = 16; t < buffb.Length; t += 4) > { > Vector4ui e = *((Vector4ui*)&buff[t - 3]) ^ > *((Vector4ui*)&buff[t - 8]) ^ > *((Vector4ui*)&buff[t - 14]) ^ > *((Vector4ui*)&buff[t - 16]); > e.W ^= buff[t]; > > buff[t] = (e.X << 1) | (e.X >> 31); > buff[t + 1] = (e.Y << 1) | (e.Y >> 31); > buff[t + 2] = (e.Z << 1) | (e.Z >> 31); > buff[t + 3] = ((e.W << 1) | (e.W >> 31)) ^ ((e.X > << 2) | (e.X >> 30)); > } > } > } > >> >> Rodrigo >> >> On Sat, Nov 15, 2008 at 12:13 AM, Alan McGovern <[EMAIL PROTECTED]> >> wrote: >>> >>> I found a bit of code in the SHA1 implementation which i thought was >>> ideal for SIMD optimisations. However, unless i resort to unsafe code, >>> it's actually substantially slower! I've attached three >>> implementations of the method here. The original, the safe SIMD and >>> the unsafe SIMD. The runtimes are as follows: >>> >>> Original: 600ms >>> Unsafe Simd: 450ms >>> Safe Simd: 1700ms >>> >>> Also, the method is always called with a uint[] of length 80. >>> >>> Is this just the wrong place to be using simd? It seemed ideal because >>> i need 75% less XOR's. If anyone has an ideas on whether SIMD could >>> actually be useful for this case or not, let me know. >>> >>> Thanks, >>> Alan. >>> >>> >>> The original code is: >>> >>> private static void FillBuff(uint[] buff) >>> { >>> uint val; >>> for (int i = 16; i < 80; i += 8) >>> { >>> val = buff[i - 3] ^ buff[i - 8] ^ buff[i - 14] ^ buff[i - >>> 16]; >>> buff[i] = (val << 1) | (val >> 31); >>> >>> val = buff[i - 2] ^ buff[i - 7] ^ buff[i - 13] ^ buff[i - >>> 15]; >>> buff[i + 1] = (val << 1) | (val >> 31); >>> >>> val = buff[i - 1] ^ buff[i - 6] ^ buff[i - 12] ^ buff[i - >>> 14]; >>> buff[i + 2] = (val << 1) | (val >> 31); >>> >>> val = buff[i + 0] ^ buff[i - 5] ^ buff[i - 11] ^ buff[i - >>> 13]; >>> buff[i + 3] = (val << 1) | (val >> 31); >>> >>> val = buff[i + 1] ^ buff[i - 4] ^ buff[i - 10] ^ buff[i - >>> 12]; >>> buff[i + 4] = (val << 1) | (val >> 31); >>> >>> val = buff[i + 2] ^ buff[i - 3] ^ buff[i - 9] ^ buff[i - >>> 11]; >>> buff[i + 5] = (val << 1) | (val >> 31); >>> >>> val = buff[i + 3] ^ buff[i - 2] ^ buff[i - 8] ^ buff[i - >>> 10]; >>> buff[i + 6] = (val << 1) | (val >> 31); >>> >>> val = buff[i + 4] ^ buff[i - 1] ^ buff[i - 7] ^ buff[i - >>> 9]; >>> buff[i + 7] = (val << 1) | (val >> 31); >>> } >>> } >>> >>> The unsafe SIMD code is: >>> public unsafe static void FillBuff(uint[] buffb) >>> { >>> fixed (uint* buff = buffb) { >>> Vector4ui e; >>> for (int t = 16; t < buffb.Length; t += 4) >>> { >>> e = *((Vector4ui*)&(buff [t-16])) ^ >>> *((Vector4ui*)&(buff [t-14])) ^ >>> *((Vector4ui*)&(buff [t- 8])) ^ >>> *((Vector4ui*)&(buff [t- 3])); >>> e.W ^= buff[t]; >>> >>> buff[t] = (e.X << 1) | (e.X >> 31); >>> buff[t + 1] = (e.Y << 1) | (e.Y >> 31); >>> buff[t + 2] = (e.Z << 1) | (e.Z >> 31); >>> buff[t + 3] = (e.W << 1) | (e.W >> 31) ^ ((e.X << 2) | (e.X >> >>> 30)); >>> } >>> } >>> } >>> >>> The safe simd code is: >>> public static void FillBuff(uint[] buff) >>> { >>> Vector4ui e; >>> for (int t = 16; t < buff.Length; t += 4) >>> { >>> e = new Vector4ui (buff [t-16],buff [t-15],buff >>> [t-14],buff [t-13]) ^ >>> new Vector4ui (buff [t-14],buff [t-13],buff >>> [t-12],buff [t-11]) ^ >>> new Vector4ui (buff [t-8], buff [t-7], buff >>> [t-6], buff [t-5]) ^ >>> new Vector4ui (buff [t-3], buff [t-2], buff >>> [t-1], buff [t-0]); >>> >>> e.W ^= buff[t]; >>> buff[t] = (e.X << 1) | (e.X >> 31); >>> buff[t + 1] = (e.Y << 1) | (e.Y >> 31); >>> buff[t + 2] = (e.Z << 1) | (e.Z >> 31); >>> buff[t + 3] = (e.W << 1) | (e.W >> 31) ^ ((e.X << 2) | >>> (e.X >> 30)); >>> } >>> } >>> _______________________________________________ >>> Mono-devel-list mailing list >>> Mono-devel-list@lists.ximian.com >>> http://lists.ximian.com/mailman/listinfo/mono-devel-list >> >> >
using Mono.Simd; namespace System.Security.Cryptography { internal class SHA1Internal { static void Main() { uint[] b = new uint[80]; for (int i = 0; i < b.Length; i++) b[i] = (uint)(i*i*i); while (true) { long start = Environment.TickCount; for (int i = 0; i < 1000000; i++) FillBuffUnsafe (b); Console.WriteLine("Unsafe: {0}ms", Environment.TickCount - start); start = Environment.TickCount; for (int i = 0; i < 1000000; i++) FillBuffUnsafe2 (b); Console.WriteLine("Unsafe2: {0}ms", Environment.TickCount - start); start = Environment.TickCount; for (int i = 0; i < 1000000; i++) FillBuffUnsafe2 (b); Console.WriteLine("Safe: {0}ms", Environment.TickCount - start); start = Environment.TickCount; for (int i = 0; i < 1000000; i++) FillBuffOriginal (b); Console.WriteLine("Orig: {0}ms", Environment.TickCount - start); } } private static void FillBuffOriginal(uint[] buff) { uint val; for (int i = 16; i < 80; i += 8) { val = buff[i - 3] ^ buff[i - 8] ^ buff[i - 14] ^ buff[i - 16]; buff[i] = (val << 1) | (val >> 31); val = buff[i - 2] ^ buff[i - 7] ^ buff[i - 13] ^ buff[i - 15]; buff[i + 1] = (val << 1) | (val >> 31); val = buff[i - 1] ^ buff[i - 6] ^ buff[i - 12] ^ buff[i - 14]; buff[i + 2] = (val << 1) | (val >> 31); val = buff[i + 0] ^ buff[i - 5] ^ buff[i - 11] ^ buff[i - 13]; buff[i + 3] = (val << 1) | (val >> 31); val = buff[i + 1] ^ buff[i - 4] ^ buff[i - 10] ^ buff[i - 12]; buff[i + 4] = (val << 1) | (val >> 31); val = buff[i + 2] ^ buff[i - 3] ^ buff[i - 9] ^ buff[i - 11]; buff[i + 5] = (val << 1) | (val >> 31); val = buff[i + 3] ^ buff[i - 2] ^ buff[i - 8] ^ buff[i - 10]; buff[i + 6] = (val << 1) | (val >> 31); val = buff[i + 4] ^ buff[i - 1] ^ buff[i - 7] ^ buff[i - 9]; buff[i + 7] = (val << 1) | (val >> 31); } } public static void FillBuffSafe(uint[] buff) { for (int t = 16; t < buff.Length; t += 4) { Vector4ui e = new Vector4ui (buff [t-16],buff [t-15],buff[t-14],buff [t-13]) ^ new Vector4ui (buff [t-14],buff [t-13],buff[t-12],buff [t-11]) ^ new Vector4ui (buff [t-8], buff [t-7], buff[t-6], buff [t-5]) ^ new Vector4ui (buff [t-3], buff [t-2], buff[t-1], buff [t-0]); e.W ^= buff[t]; buff[t + 0] = (e.X << 1) | (e.X >> 31); buff[t + 1] = (e.Y << 1) | (e.Y >> 31); buff[t + 2] = (e.Z << 1) | (e.Z >> 31); buff[t + 3] = ((e.W << 1) | (e.W >> 31)) ^ ((e.X << 2) | (e.X >> 30)); } } public unsafe static void FillBuffUnsafe(uint[] buff) { fixed (uint* ptr = buff) { for (int t = 16; t < 80; t += 4) { Vector4ui e = *((Vector4ui*)&ptr[t - 16]) ^ *((Vector4ui*)&ptr[t - 14]) ^ *((Vector4ui*)&ptr[t - 8]) ^ *((Vector4ui*)&ptr[t - 3]); e.W ^= buff[t]; buff[t] = (e.X << 1) | (e.X >> 31); buff[t + 1] = (e.Y << 1) | (e.Y >> 31); buff[t + 2] = (e.Z << 1) | (e.Z >> 31); buff[t + 3] = (e.W << 1) | (e.W >> 31); buff[t + 3] ^= ((e.X << 2) | (e.X >> 30)); } } } public unsafe static void FillBuffUnsafe2(uint[] buffb) { fixed (uint* buff = buffb) { for (int t = 16; t < buffb.Length; t += 4) { uint temp = buff[t]; Vector4ui e = *((Vector4ui*)&buff[t - 16]) ^ *((Vector4ui*)&buff[t - 14]) ^ *((Vector4ui*)&buff[t - 8]) ^ *((Vector4ui*)&buff[t - 3]); Vector4ui.StoreAligned((Vector4ui*)&buff[t], e); buff[t] = (buff[t] << 1) | (buff[t] >> 31); buff[t + 1] = (buff[t + 1] << 1) | (buff[t + 1] >> 31); buff[t + 2] = (buff[t + 2] << 1) | (buff[t + 2] >> 31); buff[t + 3] ^= temp; buff[t + 3] = ((buff[t + 3] << 1) | (buff[t + 3] >> 31)) ^ ((buff[t] << 1) | (buff[t] >> 31)); } } } } }
_______________________________________________ Mono-devel-list mailing list Mono-devel-list@lists.ximian.com http://lists.ximian.com/mailman/listinfo/mono-devel-list