Author: post
Date: 2009-10-14 16:26:41 +0200 (Wed, 14 Oct 2009)
New Revision: 2716

Modified:
   trunk/plugins/dcp/dcp.c
Log:
DCP: Took out HSV2RGB SSE code and put it into its own function for more 
flexibility. Made SSE functions static to facilitate inlining.

Modified: trunk/plugins/dcp/dcp.c
===================================================================
--- trunk/plugins/dcp/dcp.c     2009-10-14 12:53:43 UTC (rev 2715)
+++ trunk/plugins/dcp/dcp.c     2009-10-14 14:26:41 UTC (rev 2716)
@@ -482,7 +482,7 @@
 static gfloat _two_ps[4] __attribute__ ((aligned (16))) = {2.0f, 2.0f, 2.0f, 
2.0f};
 static gfloat _six_ps[4] __attribute__ ((aligned (16))) = {6.0f-1e-15, 
6.0f-1e-15, 6.0f-1e-15, 6.0f-1e-15};
 
-inline void
+static inline void
 RGBtoHSV_SSE(__m128 *c0, __m128 *c1, __m128 *c2)
 {
 
@@ -551,11 +551,93 @@
        mask = _mm_cmplt_ps(h, zero_ps);
        h = _mm_add_ps(h, _mm_and_ps(mask, six_ps));
 
-
        *c0 = h;
        *c1 = s;
        *c2 = v;
 }
+
+
+static inline void
+HSVtoRGB_SSE(__m128 *c0, __m128 *c1, __m128 *c2)
+{
+       __m128 h = *c0;
+       __m128 s = *c1;
+       __m128 v = *c2;
+       __m128 r, g, b;
+       
+       /* Convert get the fraction of h
+       * h_fraction = h - (float)(int)h */
+       __m128 ones_ps = _mm_load_ps(_ones_ps);
+       __m128 h_fraction = _mm_sub_ps(h,_mm_cvtepi32_ps(_mm_cvttps_epi32(h)));
+
+       /* p = v * (1.0f - s)  */
+       __m128 p = _mm_mul_ps(v,  _mm_sub_ps(ones_ps, s));
+       /* q = (v * (1.0f - s * f)) */
+       __m128 q = _mm_mul_ps(v, _mm_sub_ps(ones_ps, _mm_mul_ps(s, 
h_fraction)));
+       /* t = (v * (1.0f - s * (1.0f - f))) */
+       __m128 t = _mm_mul_ps(v, _mm_sub_ps(ones_ps, _mm_mul_ps(s, 
_mm_sub_ps(ones_ps, h_fraction))));
+
+       /* h < 1  (case 0)*/
+       /* case 0: *r = v; *g = t; *b = p; break; */
+       __m128 h_threshold = _mm_add_ps(ones_ps, ones_ps);
+       __m128 out_mask = _mm_cmplt_ps(h, ones_ps);
+       r = _mm_and_ps(v, out_mask);
+       g = _mm_and_ps(t, out_mask);
+       b = _mm_and_ps(p, out_mask);
+
+       /* h < 2 (case 1) */
+       /* case 1: *r = q; *g = v; *b = p; break; */
+       __m128 m = _mm_cmplt_ps(h, h_threshold);
+       h_threshold = _mm_add_ps(h_threshold, ones_ps);
+       m = _mm_andnot_ps(out_mask, m);
+       r = _mm_or_ps(r, _mm_and_ps(q, m));
+       g = _mm_or_ps(g, _mm_and_ps(v, m));
+       b = _mm_or_ps(b, _mm_and_ps(p, m));
+       out_mask = _mm_or_ps(out_mask, m);
+
+       /* h < 3 (case 2)*/
+       /* case 2: *r = p; *g = v; *b = t; break; */
+       m = _mm_cmplt_ps(h, h_threshold);
+       h_threshold = _mm_add_ps(h_threshold, ones_ps);
+       m = _mm_andnot_ps(out_mask, m);
+       r = _mm_or_ps(r, _mm_and_ps(p, m));
+       g = _mm_or_ps(g, _mm_and_ps(v, m));
+       b = _mm_or_ps(b, _mm_and_ps(t, m));
+       out_mask = _mm_or_ps(out_mask, m);
+
+       /* h < 4 (case 3)*/
+       /* case 3: *r = p; *g = q; *b = v; break; */
+       m = _mm_cmplt_ps(h, h_threshold);
+       h_threshold = _mm_add_ps(h_threshold, ones_ps);
+       m = _mm_andnot_ps(out_mask, m);
+       r = _mm_or_ps(r, _mm_and_ps(p, m));
+       g = _mm_or_ps(g, _mm_and_ps(q, m));
+       b = _mm_or_ps(b, _mm_and_ps(v, m));
+       out_mask = _mm_or_ps(out_mask, m);
+
+       /* h < 5 (case 4)*/
+       /* case 4: *r = t; *g = p; *b = v; break; */
+       m = _mm_cmplt_ps(h, h_threshold);
+       m = _mm_andnot_ps(out_mask, m);
+       r = _mm_or_ps(r, _mm_and_ps(t, m));
+       g = _mm_or_ps(g, _mm_and_ps(p, m));
+       b = _mm_or_ps(b, _mm_and_ps(v, m));
+       out_mask = _mm_or_ps(out_mask, m);
+
+
+       /* Remainder (case 5) */
+       /* case 5: *r = v; *g = p; *b = q; break; */
+       __m128 all_ones = _mm_cmpeq_ps(h,h);
+       m = _mm_xor_ps(out_mask, all_ones);
+       r = _mm_or_ps(r, _mm_and_ps(v, m));
+       g = _mm_or_ps(g, _mm_and_ps(p, m));
+       b = _mm_or_ps(b, _mm_and_ps(q, m));
+       
+       *c0 = r;
+       *c1 = g;
+       *c2 = b;
+}
+
 #endif
 
 inline void
@@ -1115,7 +1197,7 @@
 
 #if defined (__SSE2__)
 
-inline __m128
+static inline __m128
 sse_matrix3_mul(float* mul, __m128 a, __m128 b, __m128 c)
 {
 
@@ -1272,9 +1354,8 @@
                        if (dcp->looktable) {
                                huesat_map_SSE2(dcp->looktable, 
&dcp->looktable_precalc, &h, &s, &v);
                        }
-
-                       /* Back to RGB */
-                       /* ensure that hue is within range */
+                       
+                       /* Ensure that hue is within range */   
                        h_mask_gt = _mm_cmpgt_ps(h, six_ps);
                        h_mask_lt = _mm_cmplt_ps(h, zero_ps);
                        six_masked_gt = _mm_and_ps(six_ps, h_mask_gt);
@@ -1284,86 +1365,18 @@
 
                        /* s always slightly > 0 */
                        s = _mm_max_ps(s, min_val);
+                       
+                       HSVtoRGB_SSE(&h, &s, &v);
+                       r = h; g = s; b = v;
 
-
-                       /* Convert get the fraction of h
-                        * h_fraction = h - (float)(int)h */
-                       __m128 ones_ps = _mm_load_ps(_ones_ps);
-                       __m128 h_fraction = 
_mm_sub_ps(h,_mm_cvtepi32_ps(_mm_cvttps_epi32(h)));
-
-                       /* p = v * (1.0f - s)  */
-                       __m128 p = _mm_mul_ps(v,  _mm_sub_ps(ones_ps, s));
-                       /* q = (v * (1.0f - s * f)) */
-                       __m128 q = _mm_mul_ps(v, _mm_sub_ps(ones_ps, 
_mm_mul_ps(s, h_fraction)));
-                       /* t = (v * (1.0f - s * (1.0f - f))) */
-                       __m128 t = _mm_mul_ps(v, _mm_sub_ps(ones_ps, 
_mm_mul_ps(s, _mm_sub_ps(ones_ps, h_fraction))));
-
-                       /* h < 1  (case 0)*/
-                       /* case 0: *r = v; *g = t; *b = p; break; */
-                       __m128 h_threshold = _mm_add_ps(ones_ps, ones_ps);
-                       __m128 out_mask = _mm_cmplt_ps(h, ones_ps);
-                       r = _mm_and_ps(v, out_mask);
-                       g = _mm_and_ps(t, out_mask);
-                       b = _mm_and_ps(p, out_mask);
-
-                       /* h < 2 (case 1) */
-                       /* case 1: *r = q; *g = v; *b = p; break; */
-                       __m128 m = _mm_cmplt_ps(h, h_threshold);
-                       h_threshold = _mm_add_ps(h_threshold, ones_ps);
-                       m = _mm_andnot_ps(out_mask, m);
-                       r = _mm_or_ps(r, _mm_and_ps(q, m));
-                       g = _mm_or_ps(g, _mm_and_ps(v, m));
-                       b = _mm_or_ps(b, _mm_and_ps(p, m));
-                       out_mask = _mm_or_ps(out_mask, m);
-
-                       /* h < 3 (case 2)*/
-                       /* case 2: *r = p; *g = v; *b = t; break; */
-                       m = _mm_cmplt_ps(h, h_threshold);
-                       h_threshold = _mm_add_ps(h_threshold, ones_ps);
-                       m = _mm_andnot_ps(out_mask, m);
-                       r = _mm_or_ps(r, _mm_and_ps(p, m));
-                       g = _mm_or_ps(g, _mm_and_ps(v, m));
-                       b = _mm_or_ps(b, _mm_and_ps(t, m));
-                       out_mask = _mm_or_ps(out_mask, m);
-
-                       /* h < 4 (case 3)*/
-                       /* case 3: *r = p; *g = q; *b = v; break; */
-                       m = _mm_cmplt_ps(h, h_threshold);
-                       h_threshold = _mm_add_ps(h_threshold, ones_ps);
-                       m = _mm_andnot_ps(out_mask, m);
-                       r = _mm_or_ps(r, _mm_and_ps(p, m));
-                       g = _mm_or_ps(g, _mm_and_ps(q, m));
-                       b = _mm_or_ps(b, _mm_and_ps(v, m));
-                       out_mask = _mm_or_ps(out_mask, m);
-
-                       /* h < 5 (case 4)*/
-                       /* case 4: *r = t; *g = p; *b = v; break; */
-                       m = _mm_cmplt_ps(h, h_threshold);
-                       m = _mm_andnot_ps(out_mask, m);
-                       r = _mm_or_ps(r, _mm_and_ps(t, m));
-                       g = _mm_or_ps(g, _mm_and_ps(p, m));
-                       b = _mm_or_ps(b, _mm_and_ps(v, m));
-                       out_mask = _mm_or_ps(out_mask, m);
-
-
-                       /* Remainder (case 5) */
-                       /* case 5: *r = v; *g = p; *b = q; break; */
-                       __m128 all_ones = _mm_cmpeq_ps(h,h);
-                       m = _mm_xor_ps(out_mask, all_ones);
-                       r = _mm_or_ps(r, _mm_and_ps(v, m));
-                       g = _mm_or_ps(g, _mm_and_ps(p, m));
-                       b = _mm_or_ps(b, _mm_and_ps(q, m));
-
-
                        __m128 rgb_mul = _mm_load_ps(_16_bit_ps);
                        r = _mm_mul_ps(r, rgb_mul);
                        g = _mm_mul_ps(g, rgb_mul);
                        b = _mm_mul_ps(b, rgb_mul);
-
+                       
                        __m128i r_i = _mm_cvtps_epi32(r);
                        __m128i g_i = _mm_cvtps_epi32(g);
                        __m128i b_i = _mm_cvtps_epi32(b);
-
                        __m128i sub_32 = 
_mm_load_si128((__m128i*)_15_bit_epi32);
                        __m128i signxor = 
_mm_load_si128((__m128i*)_16_bit_sign);
 


_______________________________________________
Rawstudio-commit mailing list
[email protected]
http://rawstudio.org/cgi-bin/mailman/listinfo/rawstudio-commit

Reply via email to