Author: post
Date: 2010-06-25 17:16:14 +0200 (Fri, 25 Jun 2010)
New Revision: 3444

Modified:
   trunk/plugins/dcp/dcp-sse2.c
   trunk/plugins/dcp/dcp-sse4.c
Log:
Use faster squareroot function for contrast. Minor Speedup on core2, much 
faster on Athlon and older Intels

Modified: trunk/plugins/dcp/dcp-sse2.c
===================================================================
--- trunk/plugins/dcp/dcp-sse2.c        2010-06-25 15:00:46 UTC (rev 3443)
+++ trunk/plugins/dcp/dcp-sse2.c        2010-06-25 15:16:14 UTC (rev 3444)
@@ -789,9 +789,9 @@
                                r = _mm_max_ps(r, min_val);
                                g = _mm_max_ps(g, min_val);
                                b = _mm_max_ps(b, min_val);
-                               r = _mm_add_ps(_mm_mul_ps(contrast, 
_mm_sub_ps(_mm_sqrt_ps(r), contr_base)), contr_base);
-                               g = _mm_add_ps(_mm_mul_ps(contrast, 
_mm_sub_ps(_mm_sqrt_ps(g), contr_base)), contr_base);
-                               b = _mm_add_ps(_mm_mul_ps(contrast, 
_mm_sub_ps(_mm_sqrt_ps(b), contr_base)), contr_base);
+                               r = _mm_add_ps(_mm_mul_ps(contrast, 
_mm_sub_ps(_mm_rcp_ps(_mm_rsqrt_ps(r)), contr_base)), contr_base);
+                               g = _mm_add_ps(_mm_mul_ps(contrast, 
_mm_sub_ps(_mm_rcp_ps(_mm_rsqrt_ps(g)), contr_base)), contr_base);
+                               b = _mm_add_ps(_mm_mul_ps(contrast, 
_mm_sub_ps(_mm_rcp_ps(_mm_rsqrt_ps(b)), contr_base)), contr_base);
                                r = _mm_max_ps(r, min_val);
                                g = _mm_max_ps(g, min_val);
                                b = _mm_max_ps(b, min_val);

Modified: trunk/plugins/dcp/dcp-sse4.c
===================================================================
--- trunk/plugins/dcp/dcp-sse4.c        2010-06-25 15:00:46 UTC (rev 3443)
+++ trunk/plugins/dcp/dcp-sse4.c        2010-06-25 15:16:14 UTC (rev 3444)
@@ -668,9 +668,9 @@
                                r = _mm_max_ps(r, min_val);
                                g = _mm_max_ps(g, min_val);
                                b = _mm_max_ps(b, min_val);
-                               r = _mm_add_ps(_mm_mul_ps(contrast, 
_mm_sub_ps(_mm_sqrt_ps(r), contr_base)), contr_base);
-                               g = _mm_add_ps(_mm_mul_ps(contrast, 
_mm_sub_ps(_mm_sqrt_ps(g), contr_base)), contr_base);
-                               b = _mm_add_ps(_mm_mul_ps(contrast, 
_mm_sub_ps(_mm_sqrt_ps(b), contr_base)), contr_base);
+                               r = _mm_add_ps(_mm_mul_ps(contrast, 
_mm_sub_ps(_mm_rcp_ps(_mm_rsqrt_ps(r)), contr_base)), contr_base);
+                               g = _mm_add_ps(_mm_mul_ps(contrast, 
_mm_sub_ps(_mm_rcp_ps(_mm_rsqrt_ps(g)), contr_base)), contr_base);
+                               b = _mm_add_ps(_mm_mul_ps(contrast, 
_mm_sub_ps(_mm_rcp_ps(_mm_rsqrt_ps(b)), contr_base)), contr_base);
                                r = _mm_max_ps(r, min_val);
                                g = _mm_max_ps(g, min_val);
                                b = _mm_max_ps(b, min_val);


_______________________________________________
Rawstudio-commit mailing list
[email protected]
http://rawstudio.org/cgi-bin/mailman/listinfo/rawstudio-commit

Reply via email to