Hi, I have converted exp_ps(from http://gruntthepeon.free.fr/ssemath/) to Mono. using System; using Mono.Simd;
public static class Myext{ public static unsafe Vector4i LogicalLeftShift(this Vector4i v1, int amount) { Vector4i res = new Vector4i(); int* a = (int*)&v1; int* b =(int*)&res; for (int i = 0; i < 4; ++i) *b++ = (int)((uint)(*a++) << amount); return res; } public static unsafe Vector4ui LogicalLeftShift(this Vector4ui v1, int amount) { Vector4ui res = new Vector4ui(); uint* a = (uint*)&v1; uint* b =(uint*)&res; for (int i = 0; i < 4; ++i) *b++ = ((uint)(*a++) << amount); return res; } public static unsafe Vector4f Cast2Vector4f(this Vector4i v1) { Vector4f res = new Vector4f(); int* a = (int*)&v1; float* b = (float*)&res; for (int i = 0; i < 4; ++i) *b++ = ((float)(*a++)); return res; } public static unsafe Vector4f Cast2Vector4f(this Vector4ui v1) { Vector4f res = new Vector4f(); uint* a = (uint*)&v1; float* b = (float*)&res; for (int i = 0; i < 4; ++i) *b++ = ((float)(*a++)); return res; } public static unsafe Vector4i Cast2Vector4i(this Vector4f v1) { Vector4i res = new Vector4i(); float* a = (float*)&v1; int* b = (int*)&res; for (int i = 0; i < 4; ++i) *b++ = ((int)(*a++)); return res; } public static unsafe Vector4ui Cast2Vector4ui(this Vector4f v1) { Vector4ui res = new Vector4ui(); float* a = (float*)&v1; uint* b = (uint*)&res; for (int i = 0; i < 4; ++i) *b++ = ((uint)(*a++)); return res; } static Vector4f v4sf_0p5 = new Vector4f(0.5f); static Vector4ui v4sui_0x7f = new Vector4ui(0x7f); static Vector4i v4si_0x7f = new Vector4i(0x7f); static Vector4f v4sf_one = Vector4f.One; static Vector4f v4sf_exp_hi = new Vector4f(88.3762626647949f); static Vector4f v4sf_exp_lo = new Vector4f(-88.3762626647949f); static Vector4f v4sf_cephes_LOG2EF = new Vector4f(1.44269504088896341f); static Vector4f v4sf_cephes_exp_C1 = new Vector4f(0.693359375f); static Vector4f v4sf_cephes_exp_C2 = new Vector4f(-2.12194440e-4f); static Vector4f v4sf_cephes_exp_p0 = new Vector4f(1.9875691500E-4f); static Vector4f v4sf_cephes_exp_p1 = new Vector4f(1.3981999507E-3f); static Vector4f v4sf_cephes_exp_p2 = new Vector4f(8.3334519073E-3f); static Vector4f v4sf_cephes_exp_p3 = new Vector4f(4.1665795894E-2f); static Vector4f v4sf_cephes_exp_p4 = new Vector4f(1.6666665459E-1f); static Vector4f v4sf_cephes_exp_p5 = new Vector4f(5.0000001201E-1f); public static Vector4f ExpSSE(Vector4f x) { //Vector4f tmp = Vector4f.Zero; Vector4f fx = Vector4f.Zero; Vector4i emm0; x = VectorOperations.Min(x, v4sf_exp_hi); x = VectorOperations.Max(x, v4sf_exp_lo); /* express exp(x) as exp(g + n*log(2)) */ fx = x * v4sf_cephes_LOG2EF; fx = fx + v4sf_0p5; //Console.WriteLine(fx); /* how to perform a floorf with SSE: just below */ //Console.WriteLine(fx); emm0 = Cast2Vector4i(fx); //Console.WriteLine(emm0); Vector4f tmp = Cast2Vector4f(emm0); //Console.WriteLine(tmp); //Vector4f tmp = new Vector4f(Math.Truncate(fx.X), Math.Truncate(fx.Y), Math.Truncate(fx.Z), Math.Truncate(fx.W)); /* if greater, substract 1 */ Vector4f mask = VectorOperations.CompareLessEqual(fx, tmp); mask = mask & v4sf_one; fx = tmp - mask; //Console.WriteLine(fx); tmp = fx * v4sf_cephes_exp_C1; //Console.WriteLine("tmp:{0}",tmp); Vector4f z = fx * v4sf_cephes_exp_C2; x = x - tmp; x = x - z; //Console.WriteLine("x:{0}",x); z = x * x; Vector4f y = v4sf_cephes_exp_p0; y = y * x; y = y + v4sf_cephes_exp_p1; y = y * x; y = y + v4sf_cephes_exp_p2; y = y * x; y = y + v4sf_cephes_exp_p3; y = y * x; y = y + v4sf_cephes_exp_p4; y = y * x; y = y + v4sf_cephes_exp_p5; y = y * z; y = y + x; y = y + v4sf_one; //Console.WriteLine("y:{0}",y); /* build 2^n */ //Console.WriteLine(fx); //Vector4ui emm1 = Cast2Vector4ui(fx); emm0 = Cast2Vector4i(fx); //Console.WriteLine("ui:{0}", Cast2Vector4ui(fx)); //Console.WriteLine("i:{0}",Cast2Vector4i(fx)); //emm1 = emm1 + v4sui_0x7f; emm0 = emm0 +v4si_0x7f; //Console.WriteLine(emm1); //emm1 = LogicalLeftShift(emm1, 23); emm0 = LogicalLeftShift(emm0, 23); //Console.WriteLine(emm1); //Vector4f pow2n = (Vector4f)emm1; Vector4f pow2n = (Vector4f)emm0; //Console.WriteLine("pow2n:{0}",pow2n); //Console.WriteLine("pow2n:{0}",); y = y * pow2n; return y; } } public class SampleRuntimeDetection { public static void Main() { Vector4f x = new Vector4f(1f, -2f, 0.5f, 0); Vector4f z= new Vector4f(1f, -2f, 0.5f, 0); double uz=0; DateTime start_at=DateTime.Now; for(int i=0;i<40000000;i++) uz=Math.Exp(1); Console.WriteLine("Math.exp:{0}", DateTime.Now-start_at); Console.WriteLine(uz); start_at=DateTime.Now; for(int i=0;i<10000000;i++) z = Myext.ExpSSE(x); Console.WriteLine("expSSE:{0}", DateTime.Now-start_at); //Console.WriteLine(y); //Console.WriteLine(f[0]); Console.WriteLine(z); } } gmcs -unsafe -r:Mono.Simd.dll exp.cs && mono exp.exe exp.cs(63,22): warning CS0414: The private field `Myext.v4sui_0x7f' is assigned but its value is never used Compilation succeeded - 1 warning(s) SSE1, SSE2, SSE3, SSSE3 Math.exp:00:00:05.1405921 2.71828182845905 expSSE:00:00:01.9999872 <2.718282, 0.1353353, 1.648721, 1> I only convert exp function. expSSE is 2.6 times faster than Math.exp. But on origin c version, sse_mathfun_test.exe show sse verion four times faster than none sse version. And I also test Math.exp on .net 3.5 sp1, the speed is as fastas my expSSE on mono. I dont know what problem cause mono sse exp slow. Thanks, Jet -- View this message in context: http://www.nabble.com/sse_mathfun-convert-tp25696934p25696934.html Sent from the Mono - Dev mailing list archive at Nabble.com. _______________________________________________ Mono-devel-list mailing list Mono-devel-list@lists.ximian.com http://lists.ximian.com/mailman/listinfo/mono-devel-list