Author: post
Date: 2009-11-24 17:23:18 +0100 (Tue, 24 Nov 2009)
New Revision: 2759

Modified:
   trunk/plugins/dcp/dcp.c
Log:
DCP: Added SSE2 intrinsics tone curve.

Modified: trunk/plugins/dcp/dcp.c
===================================================================
--- trunk/plugins/dcp/dcp.c     2009-11-23 21:18:48 UTC (rev 2758)
+++ trunk/plugins/dcp/dcp.c     2009-11-24 16:23:18 UTC (rev 2759)
@@ -1082,7 +1082,84 @@
        *_s = s;
        *_v = v;
 }
+#define DW(A) _mm_castps_si128(A)
+#define PS(A) _mm_castsi128_ps(A)
 
+static gfloat _very_small_ps[4] __attribute__ ((aligned (16))) = {1e-15, 
1e-15, 1e-15, 1e-15};
+static gfloat _16_bit_ps[4] __attribute__ ((aligned (16))) = {65535.0, 
65535.0, 65535.0, 65535.0};
+
+void inline
+rgb_tone_sse2(__m128* _r, __m128* _g, __m128* _b, const gfloat * const 
tone_lut)
+{
+       int xfer[8] __attribute__ ((aligned (16)));
+
+       __m128 r = *_r;
+       __m128 g = *_g;
+       __m128 b = *_b;
+       
+       __m128 lg = _mm_max_ps(b, _mm_max_ps(r, g));
+       __m128 sm = _mm_min_ps(b, _mm_min_ps(r, g));
+       __m128i lookup_max = _mm_cvtps_epi32(_mm_mul_ps(lg,
+                                                                               
 _mm_load_ps(_16_bit_ps)));
+       __m128i lookup_min = _mm_cvtps_epi32(_mm_mul_ps(sm,
+                                                                               
 _mm_load_ps(_16_bit_ps)));
+
+       _mm_store_si128((__m128i*)&xfer[0], lookup_max);
+       _mm_store_si128((__m128i*)&xfer[4], lookup_min);
+       
+    /* Lookup */
+       __m128 LG = _mm_set_ps(tone_lut[xfer[3]], tone_lut[xfer[2]], 
tone_lut[xfer[1]], tone_lut[xfer[0]]);
+       __m128 SM = _mm_set_ps(tone_lut[xfer[7]], tone_lut[xfer[6]], 
tone_lut[xfer[5]], tone_lut[xfer[4]]);
+
+       __m128i ones = _mm_cmpeq_epi32(DW(r), DW(r));
+       __m128i is_r_lg = _mm_cmpeq_epi32(DW(r), DW(lg));
+       __m128i is_g_lg = _mm_cmpeq_epi32(DW(g), DW(lg));
+       __m128i is_b_lg = _mm_cmpeq_epi32(DW(b), DW(lg));
+       
+       __m128i is_r_sm = _mm_andnot_si128(is_r_lg, _mm_cmpeq_epi32(DW(r), 
DW(sm)));
+       __m128i is_g_sm = _mm_andnot_si128(is_g_lg, _mm_cmpeq_epi32(DW(g), 
DW(sm)));
+       __m128i is_b_sm = _mm_andnot_si128(is_b_lg, _mm_cmpeq_epi32(DW(b), 
DW(sm)));
+       
+       __m128i is_r_md = _mm_xor_si128(ones, _mm_or_si128(is_r_lg, is_r_sm));
+       __m128i is_g_md = _mm_xor_si128(ones, _mm_or_si128(is_g_lg, is_g_sm));
+       __m128i is_b_md = _mm_xor_si128(ones, _mm_or_si128(is_b_lg, is_b_sm));
+
+       __m128 md = PS(_mm_or_si128(_mm_or_si128(
+                                       _mm_and_si128(DW(r), is_r_md), 
+                                       _mm_and_si128(DW(g), is_g_md)),
+                                       _mm_and_si128(DW(b), is_b_md)));
+       
+       __m128 p = _mm_rcp_ps(_mm_sub_ps(lg, sm));
+       __m128 q = _mm_sub_ps(md, sm);
+       __m128 o = _mm_sub_ps(LG, SM);
+       __m128 MD = _mm_add_ps(SM, _mm_mul_ps(o, _mm_mul_ps(p, q)));
+
+       is_r_lg = _mm_cmpeq_epi32(DW(r), DW(lg));
+       is_g_lg = _mm_cmpeq_epi32(DW(g), DW(lg));
+       is_b_lg = _mm_cmpeq_epi32(DW(b), DW(lg));
+
+       r = PS(_mm_or_si128( _mm_or_si128(
+                       _mm_and_si128(DW(LG), is_r_lg),
+                       _mm_and_si128(DW(SM), is_r_sm)), 
+                       _mm_and_si128(DW(MD), is_r_md)));
+       
+       g = PS(_mm_or_si128( _mm_or_si128(
+                       _mm_and_si128(DW(LG), is_g_lg),
+                       _mm_and_si128(DW(SM), is_g_sm)), 
+                       _mm_and_si128(DW(MD), is_g_md)));
+       
+       b = PS(_mm_or_si128( _mm_or_si128(
+                       _mm_and_si128(DW(LG), is_b_lg),
+                       _mm_and_si128(DW(SM), is_b_sm)), 
+                       _mm_and_si128(DW(MD), is_b_md)));
+       *_r = r;
+       *_g = g;
+       *_b = b;
+}
+
+#undef DW
+#undef PS
+
 #endif // defined __SSE2__
 
 /* RefBaselineRGBTone() */
@@ -1181,8 +1258,6 @@
 }
 
 static gfloat _rgb_div_ps[4] __attribute__ ((aligned (16))) = {1.0/65535.0, 
1.0/65535.0, 1.0/65535.0, 1.0/65535.0};
-static gfloat _very_small_ps[4] __attribute__ ((aligned (16))) = {1e-15, 
1e-15, 1e-15, 1e-15};
-static gfloat _16_bit_ps[4] __attribute__ ((aligned (16))) = {65535.0, 
65535.0, 65535.0, 65535.0};
 static gint _15_bit_epi32[4] __attribute__ ((aligned (16))) = { 32768, 32768, 
32768, 32768};
 static guint _16_bit_sign[4] __attribute__ ((aligned (16))) = 
{0x80008000,0x80008000,0x80008000,0x80008000};
 
@@ -1192,7 +1267,6 @@
        RS_IMAGE16 *image = t->tmp;
        RSDcp *dcp = t->dcp;
        gint x, y;
-       gint i;
        __m128 h, s, v;
        __m128i p1,p2;
        __m128 p1f, p2f, p3f, p4f;
@@ -1200,7 +1274,6 @@
        __m128i zero = _mm_load_si128((__m128i*)_15_bit_epi32);
 
        int xfer[4] __attribute__ ((aligned (16)));
-       float xfer_ps[12] __attribute__ ((aligned (16)));
 
        const gfloat exposure_comp = pow(2.0, dcp->exposure);
        __m128 exp = _mm_set_ps(exposure_comp, exposure_comp, exposure_comp, 
exposure_comp);
@@ -1349,16 +1422,7 @@
                        /* Apply Tone Curve  in RGB space*/
                        if (dcp->tone_curve_lut) 
                        {
-                               _mm_store_ps(&xfer_ps[0], r);
-                               _mm_store_ps(&xfer_ps[4], g);
-                               _mm_store_ps(&xfer_ps[8], b);
-
-                               for( i = 0 ; i < 4 ; i++ )
-                                       rgb_tone(&xfer_ps[i], &xfer_ps[4+i], 
&xfer_ps[8+i],dcp->tone_curve_lut);
-                       
-                               r = _mm_load_ps(&xfer_ps[0]);
-                               g = _mm_load_ps(&xfer_ps[4]);
-                               b = _mm_load_ps(&xfer_ps[8]);
+                               rgb_tone_sse2( &r, &g, &b, dcp->tone_curve_lut);
                        }
 
                        /* Convert to 16 bit */


_______________________________________________
Rawstudio-commit mailing list
[email protected]
http://rawstudio.org/cgi-bin/mailman/listinfo/rawstudio-commit

Reply via email to