Author: post
Date: 2010-02-04 21:54:52 +0100 (Thu, 04 Feb 2010)
New Revision: 3161

Modified:
   trunk/plugins/dcp/dcp-sse2.c
   trunk/plugins/dcp/dcp.c
Log:
Reduce curve table to 256 entries, but interpolate between them instead. 
Significant speedup and avoid posterizing the image.

Modified: trunk/plugins/dcp/dcp-sse2.c
===================================================================
--- trunk/plugins/dcp/dcp-sse2.c        2010-02-04 19:31:47 UTC (rev 3160)
+++ trunk/plugins/dcp/dcp-sse2.c        2010-02-04 20:54:52 UTC (rev 3161)
@@ -550,6 +550,7 @@
 static gfloat _rgb_div_ps[4] __attribute__ ((aligned (16))) = {1.0/65535.0, 
1.0/65535.0, 1.0/65535.0, 1.0/65535.0};
 static gint _15_bit_epi32[4] __attribute__ ((aligned (16))) = { 32768, 32768, 
32768, 32768};
 static guint _16_bit_sign[4] __attribute__ ((aligned (16))) = 
{0x80008000,0x80008000,0x80008000,0x80008000};
+static gfloat _twofiftysix_ps[4] __attribute__ ((aligned (16))) = 
{255.9999f,255.9999f,255.9999f,255.9999f};
 
 #define SETFLOAT4(N, A, B, C, D) float N[4] __attribute__ ((aligned (16))); \
 N[0] = D; N[1] = C; N[2] = B; N[3] = A;
@@ -761,20 +762,25 @@
 
                        if (!dcp->curve_is_flat)                        
                        {
-                               /* Convert v to lookup values */
-                               /* TODO: Use 8 bit fraction as interpolation, 
for interpolating
-                               * a more precise lookup using linear 
interpolation. Maybe use less than
-                               * 16 bits for lookup for speed, 10 bits with 
interpolation should be enough */
-                               __m128 v_mul = _mm_load_ps(_16_bit_ps);
-                               v = _mm_mul_ps(v, v_mul);
-                               __m128i lookup = _mm_cvtps_epi32(v);
-                               gfloat* v_p = (gfloat*)&v;
+                               /* Convert v to lookup values and interpolate */
+                               __m128 v_mul = _mm_mul_ps(v, 
_mm_load_ps(_twofiftysix_ps));
+                               __m128i lookup = _mm_cvtps_epi32(v_mul);
                                _mm_store_si128((__m128i*)&xfer[0], lookup);
 
-                               v_p[0] = dcp->curve_samples[xfer[0]];
-                               v_p[1] = dcp->curve_samples[xfer[1]];
-                               v_p[2] = dcp->curve_samples[xfer[2]];
-                               v_p[3] = dcp->curve_samples[xfer[3]];
+                               /* Calculate fractions */
+                               __m128 frac = _mm_sub_ps(v_mul, 
_mm_floor_positive_ps(v_mul));
+                               __m128 inv_frac = 
_mm_sub_ps(_mm_load_ps(_ones_ps), frac);
+                               
+                               /* Load two adjacent curve values and 
interpolate between them */
+                               __m128 p0p1 = 
_mm_castsi128_ps(_mm_loadl_epi64((__m128i*)&dcp->curve_samples[xfer[0]]));
+                               __m128 p2p3 = 
_mm_castsi128_ps(_mm_loadl_epi64((__m128i*)&dcp->curve_samples[xfer[2]]));
+                               p0p1 = _mm_loadh_pi(p0p1, 
(__m64*)&dcp->curve_samples[xfer[1]]);
+                               p2p3 = _mm_loadh_pi(p2p3, 
(__m64*)&dcp->curve_samples[xfer[3]]);
+                               
+                               /* Pack all lower values in v0, high in v1 and 
interpolate */
+                               __m128 v0 = _mm_shuffle_ps(p0p1, p2p3, 
_MM_SHUFFLE(2,0,2,0));
+                               __m128 v1 = _mm_shuffle_ps(p0p1, p2p3, 
_MM_SHUFFLE(3,1,3,1));
+                               v = _mm_add_ps(_mm_mul_ps(inv_frac, v0), 
_mm_mul_ps(frac, v1));
                        }
 
                        /* Apply looktable */

Modified: trunk/plugins/dcp/dcp.c
===================================================================
--- trunk/plugins/dcp/dcp.c     2010-02-04 19:31:47 UTC (rev 3160)
+++ trunk/plugins/dcp/dcp.c     2010-02-04 20:54:52 UTC (rev 3161)
@@ -197,13 +197,13 @@
                                        g_object_unref(spline);
                                        /* Create extra entry */
                                        sampled[65536] = sampled[65535];
-                                       for (i = 0; i < 65536; i++)
+                                       for (i = 0; i < 256; i++)
                                        {
-                                               gfloat value = (gfloat)i * (1.0 
/ 65535.0f);
+                                               gfloat value = (gfloat)i * (1.0 
/ 255.0f);
                                                /* Gamma correct value */
                                                value = powf(value, 1.0f / 
2.2f);
                                                
-                                               /*Lookup curve corrected value 
*/
+                                               /* Lookup curve corrected value 
*/
                                                gfloat lookup = (int)(value * 
65535.0f);
                                                gfloat v0 = 
sampled[(int)lookup];
                                                gfloat v1 = 
sampled[(int)lookup+1];
@@ -216,6 +216,7 @@
                                                /* Store in table */
                                                dcp->curve_samples[i] = value;
                                        }
+                                       dcp->curve_samples[256] = 
dcp->curve_samples[255];
                                }
                        }
                        if (knots)
@@ -224,7 +225,7 @@
                else
                        dcp->curve_is_flat = TRUE;
 
-               for(i=0;i<65536;i++)
+               for(i=0;i<257;i++)
                        dcp->curve_samples[i] = MIN(1.0f, MAX(0.0f, 
dcp->curve_samples[i]));
 
                changed = TRUE;
@@ -267,7 +268,7 @@
 {
        RSDcpClass *klass = RS_DCP_GET_CLASS(dcp);
 
-       dcp->curve_samples = g_new(gfloat, 65536);
+       dcp->curve_samples = g_new(gfloat, 257);
        dcp->huesatmap_interpolated = NULL;
        dcp->use_profile = FALSE;
        dcp->curve_is_flat = TRUE;
@@ -921,7 +922,13 @@
 
                        /* Curve */
                        if (!dcp->curve_is_flat)
-                               v = dcp->curve_samples[_S(v)];
+                       {
+                               gfloat lookup = CLAMP(v * 256.0f, 0.0f, 
255.9999f);
+                               gfloat v0 = dcp->curve_samples[(gint)lookup];
+                               gfloat v1 = dcp->curve_samples[(gint)lookup + 
1];
+                               lookup -= floorf(lookup);
+                               v = v0 * (1.0f - lookup) + v1 * lookup;
+                       }
 
                        if (dcp->looktable)
                                huesat_map(dcp->looktable, &h, &s, &v);


_______________________________________________
Rawstudio-commit mailing list
[email protected]
http://rawstudio.org/cgi-bin/mailman/listinfo/rawstudio-commit

Reply via email to